diff --git a/doc/MODULES.org b/doc/MODULES.org index 763bebd..a30e814 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -25,6 +25,8 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][ - [[#mylastfm][my.lastfm]] - [[#myreadingpolar][my.reading.polar]] - [[#myinstapaper][my.instapaper]] + - [[#mygithubgdpr][my.github.gdpr]] + - [[#mygithubghexport][my.github.ghexport]] :END: * Intro @@ -74,6 +76,8 @@ modules = [ ('lastfm' , 'my.lastfm' ), ('polar' , 'my.reading.polar' ), ('instapaper' , 'my.instapaper' ), + ('github' , 'my.github.gdpr' ), + ('github' , 'my.github.ghexport' ), ] def indent(s, spaces=4): @@ -227,3 +231,31 @@ for cls, p in modules: # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport instapexport: Optional[PathIsh] = None #+end_src +** [[file:../my/github/gdpr.py][my.github.gdpr]] + + Github data (uses [[https://github.com/settings/admin][official GDPR export]]) + + #+begin_src python + class github: + gdpr_dir: PathIsh # path to unpacked GDPR archive + #+end_src +** [[file:../my/github/ghexport.py][my.github.ghexport]] + + Github data: events, comments, etc. (API data) + + #+begin_src python + class github: + ''' + Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs. + ''' + # path[s]/glob to the exported JSON data + export_path: Paths + + # path to a local clone of ghexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport + ghexport : Optional[PathIsh] = None + + # path to a cache directory + # if omitted, will use /tmp + cache_dir: Optional[PathIsh] = None + #+end_src diff --git a/my/github/all.py b/my/github/all.py index 61dcef3..f885dde 100644 --- a/my/github/all.py +++ b/my/github/all.py @@ -1,3 +1,7 @@ +""" +Unified Github data (merged from GDPR export and periodic API updates) +""" + from . import gdpr, ghexport from .common import merge_events, Results diff --git a/my/github/gdpr.py b/my/github/gdpr.py index b1504e9..cc813a8 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -1,3 +1,7 @@ +""" +Github data (uses [[https://github.com/settings/admin][official GDPR export]]) +""" + from datetime import datetime import json from typing import Iterable, Dict, Any @@ -7,14 +11,25 @@ from ..core import get_files from .common import Event, parse_dt -from my.config import github as config +# TODO later, use a separate user config? (github_gdpr) +from my.config import github as user_config + +from dataclasses import dataclass +from ..core import PathIsh + +@dataclass +class github(user_config): + gdpr_dir: PathIsh # path to unpacked GDPR archive + +### + + +from ..core.cfg import make_config +config = make_config(github) def events() -> Iterable[Res[Event]]: - """ - Parses events from GDPR export (https://github.com/settings/admin) - """ - # TODO allow using archive here? + # TODO FIXME allow using archive here? files = get_files(config.gdpr_dir, glob='*.json') handler_map = { 'schema' : None, diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 2a7c239..30fd76c 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -1,5 +1,61 @@ +""" +Github data: events, comments, etc. (API data) +""" +from dataclasses import dataclass +from typing import Optional + +from ..core import Paths, PathIsh + +from my.config import github as user_config + + +@dataclass +class github(user_config): + ''' + Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs. + ''' + # path[s]/glob to the exported JSON data + export_path: Paths + + # path to a local clone of ghexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport + ghexport : Optional[PathIsh] = None + + # path to a cache directory + # if omitted, will use /tmp + cache_dir: Optional[PathIsh] = None + + @property + def dal_module(self): + rpath = self.ghexport + if rpath is not None: + from .core.common import import_dir + return import_dir(rpath, '.dal') + else: + import my.config.repos.ghexport.dal as dal + return dal +### + +# TODO perhaps using /tmp in case of None isn't ideal... maybe it should be treated as if cache is off + +from ..core.cfg import make_config, Attrs +def migration(attrs: Attrs) -> Attrs: + if 'export_dir' in attrs: # legacy name + attrs['export_path'] = attrs['export_dir'] + return attrs +config = make_config(github, migration=migration) + + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + import my.config.repos.ghexport.dal as dal +else: + dal = config.dal_module + +############################ + from pathlib import Path -from typing import Tuple, Optional, Iterable, Dict, Sequence +from typing import Tuple, Iterable, Dict, Sequence from ..core import get_files from ..core.common import mcachew @@ -7,18 +63,15 @@ from ..kython.kompress import CPath from .common import Event, parse_dt, Results -from my.config import github as config -import my.config.repos.ghexport.dal as ghexport - def inputs() -> Sequence[Path]: - return get_files(config.export_dir) + return get_files(config.export_path) -def _dal(): +def _dal() -> dal.DAL: sources = inputs() sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg? - return ghexport.DAL(sources) + return dal.DAL(sources) # TODO hmm. not good, need to be lazier?... diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 031701f..c44272c 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -18,9 +18,8 @@ except ImportError as e: from dataclasses import dataclass -from ..core.common import Paths +from ..core import Paths -# TODO perhaps rename to twitter_archive? dunno @dataclass class twitter_archive(user_config): export_path: Paths # path[s]/glob to the twitter archive takeout diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 0c45a0d..3a2b327 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -14,6 +14,7 @@ from my.config import twint as user_config class twint(user_config): export_path: Paths # path[s]/glob to the twint Sqlite database +#### from ..core.cfg import make_config config = make_config(twint)