HPI/my/twitter/talon.py

164 lines
4.9 KiB
Python

"""
Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter_l/databases/=)
"""
from __future__ import annotations
import re
import sqlite3
from abc import abstractmethod
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Sequence, Union
from my.core import Paths, Res, datetime_aware, get_files
from my.core.common import unique_everseen
from my.core.sqlite import sqlite_connection
from .common import TweetId, permalink
class config:
@property
@abstractmethod
def export_path(self) -> Paths:
raise NotImplementedError
def make_config() -> config:
from my.config import twitter as user_config
class combined_config(user_config.talon, config):
pass
return combined_config()
def inputs() -> Sequence[Path]:
return get_files(make_config().export_path)
@dataclass(unsafe_hash=True)
class Tweet:
id_str: TweetId
created_at: datetime_aware
screen_name: str
text: str
urls: Sequence[str]
@property
def permalink(self) -> str:
return permalink(screen_name=self.screen_name, id=self.id_str)
# meh... just wrappers to tell apart tweets from favorites...
@dataclass(unsafe_hash=True)
class _IsTweet:
tweet: Tweet
@dataclass(unsafe_hash=True)
class _IsFavorire:
tweet: Tweet
Entity = Union[_IsTweet, _IsFavorire]
def _entities() -> Iterator[Res[Entity]]:
for f in inputs():
yield from _process_one(f)
def _process_one(f: Path) -> Iterator[Res[Entity]]:
handlers = {
'user_tweets.db': _process_user_tweets,
'favorite_tweets.db': _process_favorite_tweets,
}
fname = f.name
handler = handlers.get(fname)
if handler is None:
yield RuntimeError(f"Could not find handler for {fname}")
return
with sqlite_connection(f, immutable=True, row_factory='row') as db:
yield from handler(db)
def _process_user_tweets(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
# dunno why it's called 'lists'
for r in db.execute('SELECT * FROM lists ORDER BY time'):
try:
yield _IsTweet(_parse_tweet(r))
except Exception as e:
yield e
def _process_favorite_tweets(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
for r in db.execute('SELECT * FROM favorite_tweets ORDER BY time'):
try:
yield _IsFavorire(_parse_tweet(r))
except Exception as e:
yield e
def _parse_tweet(row: sqlite3.Row) -> Tweet:
# ok so looks like it's tz aware..
# https://github.com/klinker24/talon-for-twitter-android/blob/c3b0612717ba3ea93c0cae6d907d7d86d640069e/app/src/main/java/com/klinker/android/twitter_l/data/sq_lite/FavoriteTweetsDataSource.java#L95
# uses https://docs.oracle.com/javase/7/docs/api/java/util/Date.html#getTime()
# and it's created here, so looks like it's properly parsed from the api
# https://github.com/Twitter4J/Twitter4J/blob/8376fade8d557896bb9319fb46e39a55b134b166/twitter4j-core/src/internal-json/java/twitter4j/ParseUtil.java#L69-L79
created_at = datetime.fromtimestamp(row['time'] / 1000, tz=timezone.utc)
text = row['text']
# try explanding URLs.. sadly there are no positions in the db
urls = row['other_url'].split()
if len(urls) > 0:
ellipsis = '...'
# might have something collapsed
# e.g. deepmind.com/blog/article/Comp...
# NOTE: need a one character of lookahead to split on ellipsis.. hence ?=
for short in re.findall(r'(?:^|\s)([\S]+)' + re.escape(ellipsis) + r'(?=\s|$)', text):
for full in urls:
if short in full:
text = text.replace(short + ellipsis, full)
break
#
screen_name = row['screen_name']
# considering id_str is referring to the retweeter's tweet (rather than the original tweet)
# makes sense for the permalink to contain the retweeter as well
# also makes it more compatible to twitter archive
# a bit sad to lose structured information about RT, but then again we could always just parse it..
retweeter = row['retweeter']
if len(retweeter) > 0:
text = f'RT @{screen_name}: {text}'
screen_name = retweeter
return Tweet(
id_str=str(row['tweet_id']),
created_at=created_at,
screen_name=screen_name,
text=text,
# todo hmm text sometimes is trimmed with ellipsis? at least urls
urls=tuple(u for u in row['other_url'].split(' ') if len(u.strip()) > 0),
)
def tweets() -> Iterator[Res[Tweet]]:
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
elif isinstance(x, _IsTweet):
yield x.tweet
def likes() -> Iterator[Res[Tweet]]:
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
elif isinstance(x, _IsFavorire):
yield x.tweet
# TODO maybe should combine all public iterators into a stats()