Adapt takeout and twitter configs to the new pattern

Works fairly well so far?
This commit is contained in:
Dima Gerasimov 2020-05-10 15:56:57 +01:00
parent 8cbbafae1d
commit e92ca215e3
4 changed files with 45 additions and 23 deletions

View file

@ -1,5 +1,5 @@
from functools import lru_cache
from datetime import datetime
from datetime import datetime, tzinfo
import pytz # type: ignore
@ -11,6 +11,7 @@ tz_lookup = {
tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu...
# TODO dammit, lru_cache interferes with mypy?
@lru_cache(None)
def abbr_to_timezone(abbr: str):
def abbr_to_timezone(abbr: str) -> tzinfo:
return tz_lookup[abbr]

View file

@ -1,22 +1,17 @@
from typing import NamedTuple
from dataclasses import dataclass
from ...core.common import Paths
class google(NamedTuple):
from my.config import google as user_config
@dataclass
class google(user_config):
'''
Expects [[https://takeout.google.com][Google Takeout]] data.
'''
takeout_path: Paths # path/paths/glob for the takeout zips
###
from my.config import google as user_config
###
# TODO: generalize the thing from my.reddit
# i.e. config = make_config(google, user_config)
# reveal_type(config) should be 'google'
config = google(**{k: v for k, v in vars(user_config).items() if k in google._fields})
###
from ...core.cfg import make_config
config = make_config(google)
from pathlib import Path
from typing import Optional, Iterable

View file

@ -1,6 +1,22 @@
"""
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
"""
from dataclasses import dataclass
from ..core.common import Paths
from my.config import twitter as user_config
@dataclass
class twitter(user_config):
export_path: Paths # path[s]/glob to the twitter archive takeout
###
from ..core.cfg import make_config
config = make_config(twitter)
from datetime import datetime
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
from pathlib import Path
@ -13,14 +29,13 @@ import pytz
from ..common import PathIsh, get_files, LazyLogger, Json
from ..kython import kompress
from my.config import twitter as config
logger = LazyLogger(__name__)
def _get_export() -> Path:
return max(get_files(config.export_path, '*.zip'))
return max(get_files(config.export_path))
Tid = str

View file

@ -1,24 +1,35 @@
"""
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export.
Twitter data (tweets and favorites).
"""
from ..core.common import Paths
from dataclasses import dataclass
from my.config import twint as user_config
@dataclass
class twitter(user_config):
'''
Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
from ..core.cfg import make_config
config = make_config(twitter)
from datetime import datetime
from typing import NamedTuple, Iterable, List
from pathlib import Path
from ..common import PathIsh, get_files, LazyLogger, Json
from ..core.common import get_files, LazyLogger, Json
from ..core.time import abbr_to_timezone
from my.config import twint as config
log = LazyLogger(__name__)
def get_db_path() -> Path:
# TODO don't like the hardcoded extension. maybe, config should decide?
# or, glob only applies to directories?
return max(get_files(config.export_path, glob='*.db'))
return max(get_files(config.export_path))
class Tweet(NamedTuple):