HPI/my/twitter/twint.py

118 lines
2.8 KiB
Python

"""
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export.
"""
REQUIRES = ['dataset']
from ..core.common import Paths
from ..core.error import Res
from dataclasses import dataclass
from my.config import twint as user_config
# TODO move to twitter.twint config structure
@dataclass
class twint(user_config):
export_path: Paths # path[s]/glob to the twint Sqlite database
####
from ..core.cfg import make_config
config = make_config(twint)
from datetime import datetime
from typing import NamedTuple, Iterator, List
from pathlib import Path
from ..core.common import get_files, LazyLogger, Json, datetime_aware
from ..core.time import localize_with_abbr
log = LazyLogger(__name__)
def get_db_path() -> Path:
return max(get_files(config.export_path))
from .common import TweetId, permalink
class Tweet(NamedTuple):
row: Json
@property
def id_str(self) -> TweetId:
return self.row['id_str']
@property
def created_at(self) -> datetime_aware:
seconds = self.row['created_at'] / 1000
tz_abbr = self.row['timezone']
naive = datetime.fromtimestamp(seconds)
return localize_with_abbr(naive, abbr=tz_abbr)
@property
def screen_name(self) -> str:
return self.row['screen_name']
@property
def text(self) -> str:
return self.row['tweet']
@property
def urls(self) -> List[str]:
ustr = self.row['urls']
if len(ustr) == 0:
return []
return ustr.split(',')
@property
def permalink(self) -> str:
return permalink(screen_name=self.screen_name, id=self.id_str)
# TODO urls
def __repr__(self):
return f'Tweet(id_str={self.id_str}, created_at={self.created_at}, text={self.text})'
# https://github.com/twintproject/twint/issues/196
# ugh. so it dumps everything in tweet table, and there is no good way to tell between fav/original tweet.
# it might result in some tweets missing from the timeline if you happened to like them...
# not sure what to do with it
# alternatively, could ask the user to run separate databases for tweets and favs?
# TODO think about it
_QUERY = '''
SELECT T.*
FROM tweets as T
LEFT JOIN favorites as F
ON T.id_str = F.tweet_id
WHERE {where}
ORDER BY T.created_at
'''
def _get_db():
from ..core.dataset import connect_readonly
db_path = get_db_path()
return connect_readonly(db_path)
def tweets() -> Iterator[Res[Tweet]]:
db = _get_db()
res = db.query(_QUERY.format(where='F.tweet_id IS NULL'))
yield from map(Tweet, res)
def likes() -> Iterator[Res[Tweet]]:
db = _get_db()
res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL'))
yield from map(Tweet, res)
from ..core import stat, Stats
def stats() -> Stats:
return {
**stat(tweets),
**stat(likes),
}