twitter: initial talon module, processing data from Talon android app

This commit is contained in:
Dima Gerasimov 2022-02-07 09:58:50 +00:00 committed by karlicoss
parent f8e73134b3
commit afdf9d4334
4 changed files with 121 additions and 0 deletions

View file

@ -124,3 +124,8 @@ class fbmessenger:
export_db: PathIsh export_db: PathIsh
class android: class android:
export_path: Paths export_path: Paths
class twitter:
class talon:
export_path: Paths

0
my/twitter/archive.py Executable file → Normal file
View file

113
my/twitter/talon.py Normal file
View file

@ -0,0 +1,113 @@
"""
Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter_l/databases/=)
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Iterator, Sequence, Optional, Dict
from my.config import twitter as user_config
from ..core import Paths
@dataclass
class config(user_config.talon):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
from ..core import get_files
from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@dataclass(unsafe_hash=True)
class Tweet:
id_str: str
# TODO figure out if utc
created_at: datetime
screen_name: str
text: str
urls: Sequence[str]
# meh... just wrappers to tell apart tweets from favorites...
@dataclass(unsafe_hash=True)
class _IsTweet:
tweet: Tweet
@dataclass(unsafe_hash=True)
class _IsFavorire:
tweet: Tweet
from typing import Union
from ..core.error import Res
from ..core.dataset import connect_readonly
Entity = Union[_IsTweet, _IsFavorire]
def _entities() -> Iterator[Res[Entity]]:
for f in inputs():
yield from _process_one(f)
def _process_one(f: Path) -> Iterator[Res[Entity]]:
handlers = {
'user_tweets.db' : _process_user_tweets,
'favorite_tweets.db': _process_favorite_tweets,
}
fname = f.name
handler = handlers.get(fname)
if handler is None:
yield RuntimeError(f"Coulnd't find handler for {fname}")
return
with connect_readonly(f) as db:
yield from handler(db)
def _process_user_tweets(db) -> Iterator[Res[Entity]]:
# dunno why it's called 'lists'
for r in db['lists'].all(order_by='time'):
try:
yield _IsTweet(_parse_tweet(r))
except Exception as e:
yield e
def _process_favorite_tweets(db) -> Iterator[Res[Entity]]:
for r in db['favorite_tweets'].all(order_by='time'):
try:
yield _IsFavorire(_parse_tweet(r))
except Exception as e:
yield e
def _parse_tweet(row) -> Tweet:
# TODO row['retweeter] if not empty, would be user's name and means retweet?
# screen name would be the actual tweet's author
return Tweet(
id_str=str(row['tweet_id']),
created_at=datetime.fromtimestamp(row['time'] / 1000),
screen_name=row['screen_name'],
text=row['text'],
# todo hmm text sometimes is trimmed with ellipsis? at least urls
urls=tuple(u for u in row['other_url'].split(' ') if len(u.strip()) > 0),
)
from more_itertools import unique_everseen
def tweets() -> Iterator[Res[Tweet]]:
for x in unique_everseen(_entities()):
if isinstance(x, Exception):
yield x
elif isinstance(x, _IsTweet):
yield x.tweet
def likes() -> Iterator[Res[Tweet]]:
for x in unique_everseen(_entities()):
if isinstance(x, Exception):
yield x
elif isinstance(x, _IsFavorire):
yield x.tweet

View file

@ -8,6 +8,8 @@ from ..core.common import Paths
from dataclasses import dataclass from dataclasses import dataclass
from my.config import twint as user_config from my.config import twint as user_config
# TODO move to twitter.twint config structure
@dataclass @dataclass
class twint(user_config): class twint(user_config):
export_path: Paths # path[s]/glob to the twint Sqlite database export_path: Paths # path[s]/glob to the twint Sqlite database
@ -63,6 +65,7 @@ class Tweet(NamedTuple):
return [] return []
return ustr.split(',') return ustr.split(',')
# TODO move to common
@property @property
def permalink(self) -> str: def permalink(self) -> str:
return f'https://twitter.com/{self.screen_name}/status/{self.id_str}' return f'https://twitter.com/{self.screen_name}/status/{self.id_str}'