From afdf9d43340ec56ab3ba49ee5fe44103d661d54b Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 7 Feb 2022 09:58:50 +0000 Subject: [PATCH] twitter: initial talon module, processing data from Talon android app --- my/config.py | 5 ++ my/twitter/archive.py | 0 my/twitter/talon.py | 113 ++++++++++++++++++++++++++++++++++++++++++ my/twitter/twint.py | 3 ++ 4 files changed, 121 insertions(+) mode change 100755 => 100644 my/twitter/archive.py create mode 100644 my/twitter/talon.py diff --git a/my/config.py b/my/config.py index aef90bc..7201a84 100644 --- a/my/config.py +++ b/my/config.py @@ -124,3 +124,8 @@ class fbmessenger: export_db: PathIsh class android: export_path: Paths + + +class twitter: + class talon: + export_path: Paths diff --git a/my/twitter/archive.py b/my/twitter/archive.py old mode 100755 new mode 100644 diff --git a/my/twitter/talon.py b/my/twitter/talon.py new file mode 100644 index 0000000..a369f69 --- /dev/null +++ b/my/twitter/talon.py @@ -0,0 +1,113 @@ +""" +Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter_l/databases/=) +""" +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Iterator, Sequence, Optional, Dict + + +from my.config import twitter as user_config + + +from ..core import Paths +@dataclass +class config(user_config.talon): + # paths[s]/glob to the exported sqlite databases + export_path: Paths + + +from ..core import get_files +from pathlib import Path +def inputs() -> Sequence[Path]: + return get_files(config.export_path) + + + +@dataclass(unsafe_hash=True) +class Tweet: + id_str: str + # TODO figure out if utc + created_at: datetime + screen_name: str + text: str + urls: Sequence[str] + + +# meh... just wrappers to tell apart tweets from favorites... +@dataclass(unsafe_hash=True) +class _IsTweet: + tweet: Tweet +@dataclass(unsafe_hash=True) +class _IsFavorire: + tweet: Tweet + + +from typing import Union +from ..core.error import Res +from ..core.dataset import connect_readonly +Entity = Union[_IsTweet, _IsFavorire] +def _entities() -> Iterator[Res[Entity]]: + for f in inputs(): + yield from _process_one(f) + + +def _process_one(f: Path) -> Iterator[Res[Entity]]: + handlers = { + 'user_tweets.db' : _process_user_tweets, + 'favorite_tweets.db': _process_favorite_tweets, + } + fname = f.name + handler = handlers.get(fname) + if handler is None: + yield RuntimeError(f"Coulnd't find handler for {fname}") + return + with connect_readonly(f) as db: + yield from handler(db) + + +def _process_user_tweets(db) -> Iterator[Res[Entity]]: + # dunno why it's called 'lists' + for r in db['lists'].all(order_by='time'): + try: + yield _IsTweet(_parse_tweet(r)) + except Exception as e: + yield e + + +def _process_favorite_tweets(db) -> Iterator[Res[Entity]]: + for r in db['favorite_tweets'].all(order_by='time'): + try: + yield _IsFavorire(_parse_tweet(r)) + except Exception as e: + yield e + +def _parse_tweet(row) -> Tweet: + # TODO row['retweeter] if not empty, would be user's name and means retweet? + # screen name would be the actual tweet's author + return Tweet( + id_str=str(row['tweet_id']), + created_at=datetime.fromtimestamp(row['time'] / 1000), + screen_name=row['screen_name'], + text=row['text'], + # todo hmm text sometimes is trimmed with ellipsis? at least urls + urls=tuple(u for u in row['other_url'].split(' ') if len(u.strip()) > 0), + ) + + +from more_itertools import unique_everseen +def tweets() -> Iterator[Res[Tweet]]: + for x in unique_everseen(_entities()): + if isinstance(x, Exception): + yield x + elif isinstance(x, _IsTweet): + yield x.tweet + +def likes() -> Iterator[Res[Tweet]]: + for x in unique_everseen(_entities()): + if isinstance(x, Exception): + yield x + elif isinstance(x, _IsFavorire): + yield x.tweet + diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 6f70ce3..c8d426e 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -8,6 +8,8 @@ from ..core.common import Paths from dataclasses import dataclass from my.config import twint as user_config +# TODO move to twitter.twint config structure + @dataclass class twint(user_config): export_path: Paths # path[s]/glob to the twint Sqlite database @@ -63,6 +65,7 @@ class Tweet(NamedTuple): return [] return ustr.split(',') + # TODO move to common @property def permalink(self) -> str: return f'https://twitter.com/{self.screen_name}/status/{self.id_str}'