Adapt takeout and twitter configs to the new pattern

Works fairly well so far?
This commit is contained in:
Dima Gerasimov 2020-05-10 15:56:57 +01:00
parent 8cbbafae1d
commit e92ca215e3
4 changed files with 45 additions and 23 deletions

View file

@ -1,5 +1,5 @@
from functools import lru_cache from functools import lru_cache
from datetime import datetime from datetime import datetime, tzinfo
import pytz # type: ignore import pytz # type: ignore
@ -11,6 +11,7 @@ tz_lookup = {
tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu... tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu...
# TODO dammit, lru_cache interferes with mypy?
@lru_cache(None) @lru_cache(None)
def abbr_to_timezone(abbr: str): def abbr_to_timezone(abbr: str) -> tzinfo:
return tz_lookup[abbr] return tz_lookup[abbr]

View file

@ -1,22 +1,17 @@
from typing import NamedTuple from dataclasses import dataclass
from ...core.common import Paths from ...core.common import Paths
class google(NamedTuple): from my.config import google as user_config
@dataclass
class google(user_config):
''' '''
Expects [[https://takeout.google.com][Google Takeout]] data. Expects [[https://takeout.google.com][Google Takeout]] data.
''' '''
takeout_path: Paths # path/paths/glob for the takeout zips takeout_path: Paths # path/paths/glob for the takeout zips
### ###
from my.config import google as user_config from ...core.cfg import make_config
config = make_config(google)
###
# TODO: generalize the thing from my.reddit
# i.e. config = make_config(google, user_config)
# reveal_type(config) should be 'google'
config = google(**{k: v for k, v in vars(user_config).items() if k in google._fields})
###
from pathlib import Path from pathlib import Path
from typing import Optional, Iterable from typing import Optional, Iterable

View file

@ -1,6 +1,22 @@
""" """
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]]) Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
""" """
from dataclasses import dataclass
from ..core.common import Paths
from my.config import twitter as user_config
@dataclass
class twitter(user_config):
export_path: Paths # path[s]/glob to the twitter archive takeout
###
from ..core.cfg import make_config
config = make_config(twitter)
from datetime import datetime from datetime import datetime
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
from pathlib import Path from pathlib import Path
@ -13,14 +29,13 @@ import pytz
from ..common import PathIsh, get_files, LazyLogger, Json from ..common import PathIsh, get_files, LazyLogger, Json
from ..kython import kompress from ..kython import kompress
from my.config import twitter as config
logger = LazyLogger(__name__) logger = LazyLogger(__name__)
def _get_export() -> Path: def _get_export() -> Path:
return max(get_files(config.export_path, '*.zip')) return max(get_files(config.export_path))
Tid = str Tid = str

View file

@ -1,24 +1,35 @@
""" """
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export. Twitter data (tweets and favorites).
""" """
from ..core.common import Paths
from dataclasses import dataclass
from my.config import twint as user_config
@dataclass
class twitter(user_config):
'''
Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
from ..core.cfg import make_config
config = make_config(twitter)
from datetime import datetime from datetime import datetime
from typing import NamedTuple, Iterable, List from typing import NamedTuple, Iterable, List
from pathlib import Path from pathlib import Path
from ..common import PathIsh, get_files, LazyLogger, Json from ..core.common import get_files, LazyLogger, Json
from ..core.time import abbr_to_timezone from ..core.time import abbr_to_timezone
from my.config import twint as config
log = LazyLogger(__name__) log = LazyLogger(__name__)
def get_db_path() -> Path: def get_db_path() -> Path:
# TODO don't like the hardcoded extension. maybe, config should decide? return max(get_files(config.export_path))
# or, glob only applies to directories?
return max(get_files(config.export_path, glob='*.db'))
class Tweet(NamedTuple): class Tweet(NamedTuple):