github: add config templates + docs

- ghexport: use export_path (export_dir is still supported)
This commit is contained in:
Dima Gerasimov 2020-06-01 23:33:34 +01:00
parent ca39187c63
commit a267aeec5b
6 changed files with 118 additions and 14 deletions

View file

@ -25,6 +25,8 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
- [[#mylastfm][my.lastfm]]
- [[#myreadingpolar][my.reading.polar]]
- [[#myinstapaper][my.instapaper]]
- [[#mygithubgdpr][my.github.gdpr]]
- [[#mygithubghexport][my.github.ghexport]]
:END:
* Intro
@ -74,6 +76,8 @@ modules = [
('lastfm' , 'my.lastfm' ),
('polar' , 'my.reading.polar' ),
('instapaper' , 'my.instapaper' ),
('github' , 'my.github.gdpr' ),
('github' , 'my.github.ghexport' ),
]
def indent(s, spaces=4):
@ -227,3 +231,31 @@ for cls, p in modules:
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
instapexport: Optional[PathIsh] = None
#+end_src
** [[file:../my/github/gdpr.py][my.github.gdpr]]
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
#+begin_src python
class github:
gdpr_dir: PathIsh # path to unpacked GDPR archive
#+end_src
** [[file:../my/github/ghexport.py][my.github.ghexport]]
Github data: events, comments, etc. (API data)
#+begin_src python
class github:
'''
Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs.
'''
# path[s]/glob to the exported JSON data
export_path: Paths
# path to a local clone of ghexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport
ghexport : Optional[PathIsh] = None
# path to a cache directory
# if omitted, will use /tmp
cache_dir: Optional[PathIsh] = None
#+end_src

View file

@ -1,3 +1,7 @@
"""
Unified Github data (merged from GDPR export and periodic API updates)
"""
from . import gdpr, ghexport
from .common import merge_events, Results

View file

@ -1,3 +1,7 @@
"""
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
"""
from datetime import datetime
import json
from typing import Iterable, Dict, Any
@ -7,14 +11,25 @@ from ..core import get_files
from .common import Event, parse_dt
from my.config import github as config
# TODO later, use a separate user config? (github_gdpr)
from my.config import github as user_config
from dataclasses import dataclass
from ..core import PathIsh
@dataclass
class github(user_config):
gdpr_dir: PathIsh # path to unpacked GDPR archive
###
from ..core.cfg import make_config
config = make_config(github)
def events() -> Iterable[Res[Event]]:
"""
Parses events from GDPR export (https://github.com/settings/admin)
"""
# TODO allow using archive here?
# TODO FIXME allow using archive here?
files = get_files(config.gdpr_dir, glob='*.json')
handler_map = {
'schema' : None,

View file

@ -1,5 +1,61 @@
"""
Github data: events, comments, etc. (API data)
"""
from dataclasses import dataclass
from typing import Optional
from ..core import Paths, PathIsh
from my.config import github as user_config
@dataclass
class github(user_config):
'''
Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs.
'''
# path[s]/glob to the exported JSON data
export_path: Paths
# path to a local clone of ghexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport
ghexport : Optional[PathIsh] = None
# path to a cache directory
# if omitted, will use /tmp
cache_dir: Optional[PathIsh] = None
@property
def dal_module(self):
rpath = self.ghexport
if rpath is not None:
from .core.common import import_dir
return import_dir(rpath, '.dal')
else:
import my.config.repos.ghexport.dal as dal
return dal
###
# TODO perhaps using /tmp in case of None isn't ideal... maybe it should be treated as if cache is off
from ..core.cfg import make_config, Attrs
def migration(attrs: Attrs) -> Attrs:
if 'export_dir' in attrs: # legacy name
attrs['export_path'] = attrs['export_dir']
return attrs
config = make_config(github, migration=migration)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import my.config.repos.ghexport.dal as dal
else:
dal = config.dal_module
############################
from pathlib import Path
from typing import Tuple, Optional, Iterable, Dict, Sequence
from typing import Tuple, Iterable, Dict, Sequence
from ..core import get_files
from ..core.common import mcachew
@ -7,18 +63,15 @@ from ..kython.kompress import CPath
from .common import Event, parse_dt, Results
from my.config import github as config
import my.config.repos.ghexport.dal as ghexport
def inputs() -> Sequence[Path]:
return get_files(config.export_dir)
return get_files(config.export_path)
def _dal():
def _dal() -> dal.DAL:
sources = inputs()
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
return ghexport.DAL(sources)
return dal.DAL(sources)
# TODO hmm. not good, need to be lazier?...

View file

@ -18,9 +18,8 @@ except ImportError as e:
from dataclasses import dataclass
from ..core.common import Paths
from ..core import Paths
# TODO perhaps rename to twitter_archive? dunno
@dataclass
class twitter_archive(user_config):
export_path: Paths # path[s]/glob to the twitter archive takeout

View file

@ -14,6 +14,7 @@ from my.config import twint as user_config
class twint(user_config):
export_path: Paths # path[s]/glob to the twint Sqlite database
####
from ..core.cfg import make_config
config = make_config(twint)