HPI/my/twitter/twint.py

"""
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export.
"""

REQUIRES = ['dataset']

from ..core.common import Paths
from dataclasses import dataclass
from my.config import twint as user_config

# TODO move to twitter.twint config structure

@dataclass
class twint(user_config):
    export_path: Paths # path[s]/glob to the twint Sqlite database

####

from ..core.cfg import make_config
config = make_config(twint)


from datetime import datetime
from typing import NamedTuple, Iterable, List
from pathlib import Path

from ..core.common import get_files, LazyLogger, Json
from ..core.time import abbr_to_timezone

log = LazyLogger(__name__)


def get_db_path() -> Path:
    return max(get_files(config.export_path))


class Tweet(NamedTuple):
    row: Json

    @property
    def id_str(self) -> str:
        return self.row['id_str']

    @property
    def created_at(self) -> datetime:
        seconds = self.row['created_at'] / 1000
        tz_abbr = self.row['timezone']
        tz = abbr_to_timezone(tz_abbr)
        dt = datetime.fromtimestamp(seconds, tz=tz)
        return dt

    # TODO permalink -- take user into account?
    @property
    def screen_name(self) -> str:
        return self.row['screen_name']

    @property
    def text(self) -> str:
        return self.row['tweet']

    @property
    def urls(self) -> List[str]:
        ustr = self.row['urls']
        if len(ustr) == 0:
            return []
        return ustr.split(',')

    # TODO move to common
    @property
    def permalink(self) -> str:
        return f'https://twitter.com/{self.screen_name}/status/{self.id_str}'


    # TODO urls
    def __repr__(self):
        return f'Tweet(id_str={self.id_str}, created_at={self.created_at}, text={self.text})'

# https://github.com/twintproject/twint/issues/196
# ugh. so it dumps everything in tweet table, and there is no good way to tell between fav/original tweet.
# it might result in some tweets missing from the timeline if you happened to like them...
# not sure what to do with it
# alternatively, could ask the user to run separate databases for tweets and favs?
# TODO think about it

_QUERY = '''
SELECT T.*
FROM      tweets    as T
LEFT JOIN favorites as F
ON    T.id_str = F.tweet_id
WHERE {where}
ORDER BY T.created_at
'''

def _get_db():
    from ..core.dataset import connect_readonly
    db_path = get_db_path()
    return connect_readonly(db_path)


def tweets() -> Iterable[Tweet]:
    db = _get_db()
    res = db.query(_QUERY.format(where='F.tweet_id IS NULL'))
    yield from map(Tweet, res)


def likes() -> Iterable[Tweet]:
    db = _get_db()
    res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL'))
    yield from map(Tweet, res)


def stats():
    from ..core import stat
    return {
        **stat(tweets),
        **stat(likes),
    }