twitter: initial talon module, processing data from Talon android app
This commit is contained in:
parent
f8e73134b3
commit
afdf9d4334
4 changed files with 121 additions and 0 deletions
|
@ -124,3 +124,8 @@ class fbmessenger:
|
||||||
export_db: PathIsh
|
export_db: PathIsh
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
class twitter:
|
||||||
|
class talon:
|
||||||
|
export_path: Paths
|
||||||
|
|
0
my/twitter/archive.py
Executable file → Normal file
0
my/twitter/archive.py
Executable file → Normal file
113
my/twitter/talon.py
Normal file
113
my/twitter/talon.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
"""
|
||||||
|
Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter_l/databases/=)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Iterator, Sequence, Optional, Dict
|
||||||
|
|
||||||
|
|
||||||
|
from my.config import twitter as user_config
|
||||||
|
|
||||||
|
|
||||||
|
from ..core import Paths
|
||||||
|
@dataclass
|
||||||
|
class config(user_config.talon):
|
||||||
|
# paths[s]/glob to the exported sqlite databases
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from ..core import get_files
|
||||||
|
from pathlib import Path
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(unsafe_hash=True)
|
||||||
|
class Tweet:
|
||||||
|
id_str: str
|
||||||
|
# TODO figure out if utc
|
||||||
|
created_at: datetime
|
||||||
|
screen_name: str
|
||||||
|
text: str
|
||||||
|
urls: Sequence[str]
|
||||||
|
|
||||||
|
|
||||||
|
# meh... just wrappers to tell apart tweets from favorites...
|
||||||
|
@dataclass(unsafe_hash=True)
|
||||||
|
class _IsTweet:
|
||||||
|
tweet: Tweet
|
||||||
|
@dataclass(unsafe_hash=True)
|
||||||
|
class _IsFavorire:
|
||||||
|
tweet: Tweet
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Union
|
||||||
|
from ..core.error import Res
|
||||||
|
from ..core.dataset import connect_readonly
|
||||||
|
Entity = Union[_IsTweet, _IsFavorire]
|
||||||
|
def _entities() -> Iterator[Res[Entity]]:
|
||||||
|
for f in inputs():
|
||||||
|
yield from _process_one(f)
|
||||||
|
|
||||||
|
|
||||||
|
def _process_one(f: Path) -> Iterator[Res[Entity]]:
|
||||||
|
handlers = {
|
||||||
|
'user_tweets.db' : _process_user_tweets,
|
||||||
|
'favorite_tweets.db': _process_favorite_tweets,
|
||||||
|
}
|
||||||
|
fname = f.name
|
||||||
|
handler = handlers.get(fname)
|
||||||
|
if handler is None:
|
||||||
|
yield RuntimeError(f"Coulnd't find handler for {fname}")
|
||||||
|
return
|
||||||
|
with connect_readonly(f) as db:
|
||||||
|
yield from handler(db)
|
||||||
|
|
||||||
|
|
||||||
|
def _process_user_tweets(db) -> Iterator[Res[Entity]]:
|
||||||
|
# dunno why it's called 'lists'
|
||||||
|
for r in db['lists'].all(order_by='time'):
|
||||||
|
try:
|
||||||
|
yield _IsTweet(_parse_tweet(r))
|
||||||
|
except Exception as e:
|
||||||
|
yield e
|
||||||
|
|
||||||
|
|
||||||
|
def _process_favorite_tweets(db) -> Iterator[Res[Entity]]:
|
||||||
|
for r in db['favorite_tweets'].all(order_by='time'):
|
||||||
|
try:
|
||||||
|
yield _IsFavorire(_parse_tweet(r))
|
||||||
|
except Exception as e:
|
||||||
|
yield e
|
||||||
|
|
||||||
|
def _parse_tweet(row) -> Tweet:
|
||||||
|
# TODO row['retweeter] if not empty, would be user's name and means retweet?
|
||||||
|
# screen name would be the actual tweet's author
|
||||||
|
return Tweet(
|
||||||
|
id_str=str(row['tweet_id']),
|
||||||
|
created_at=datetime.fromtimestamp(row['time'] / 1000),
|
||||||
|
screen_name=row['screen_name'],
|
||||||
|
text=row['text'],
|
||||||
|
# todo hmm text sometimes is trimmed with ellipsis? at least urls
|
||||||
|
urls=tuple(u for u in row['other_url'].split(' ') if len(u.strip()) > 0),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
from more_itertools import unique_everseen
|
||||||
|
def tweets() -> Iterator[Res[Tweet]]:
|
||||||
|
for x in unique_everseen(_entities()):
|
||||||
|
if isinstance(x, Exception):
|
||||||
|
yield x
|
||||||
|
elif isinstance(x, _IsTweet):
|
||||||
|
yield x.tweet
|
||||||
|
|
||||||
|
def likes() -> Iterator[Res[Tweet]]:
|
||||||
|
for x in unique_everseen(_entities()):
|
||||||
|
if isinstance(x, Exception):
|
||||||
|
yield x
|
||||||
|
elif isinstance(x, _IsFavorire):
|
||||||
|
yield x.tweet
|
||||||
|
|
|
@ -8,6 +8,8 @@ from ..core.common import Paths
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from my.config import twint as user_config
|
from my.config import twint as user_config
|
||||||
|
|
||||||
|
# TODO move to twitter.twint config structure
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class twint(user_config):
|
class twint(user_config):
|
||||||
export_path: Paths # path[s]/glob to the twint Sqlite database
|
export_path: Paths # path[s]/glob to the twint Sqlite database
|
||||||
|
@ -63,6 +65,7 @@ class Tweet(NamedTuple):
|
||||||
return []
|
return []
|
||||||
return ustr.split(',')
|
return ustr.split(',')
|
||||||
|
|
||||||
|
# TODO move to common
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def permalink(self) -> str:
|
||||||
return f'https://twitter.com/{self.screen_name}/status/{self.id_str}'
|
return f'https://twitter.com/{self.screen_name}/status/{self.id_str}'
|
||||||
|
|
Loading…
Add table
Reference in a new issue