github: add config templates + docs
- ghexport: use export_path (export_dir is still supported)
This commit is contained in:
parent
ca39187c63
commit
a267aeec5b
6 changed files with 118 additions and 14 deletions
|
@ -25,6 +25,8 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
|
||||||
- [[#mylastfm][my.lastfm]]
|
- [[#mylastfm][my.lastfm]]
|
||||||
- [[#myreadingpolar][my.reading.polar]]
|
- [[#myreadingpolar][my.reading.polar]]
|
||||||
- [[#myinstapaper][my.instapaper]]
|
- [[#myinstapaper][my.instapaper]]
|
||||||
|
- [[#mygithubgdpr][my.github.gdpr]]
|
||||||
|
- [[#mygithubghexport][my.github.ghexport]]
|
||||||
:END:
|
:END:
|
||||||
|
|
||||||
* Intro
|
* Intro
|
||||||
|
@ -74,6 +76,8 @@ modules = [
|
||||||
('lastfm' , 'my.lastfm' ),
|
('lastfm' , 'my.lastfm' ),
|
||||||
('polar' , 'my.reading.polar' ),
|
('polar' , 'my.reading.polar' ),
|
||||||
('instapaper' , 'my.instapaper' ),
|
('instapaper' , 'my.instapaper' ),
|
||||||
|
('github' , 'my.github.gdpr' ),
|
||||||
|
('github' , 'my.github.ghexport' ),
|
||||||
]
|
]
|
||||||
|
|
||||||
def indent(s, spaces=4):
|
def indent(s, spaces=4):
|
||||||
|
@ -227,3 +231,31 @@ for cls, p in modules:
|
||||||
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
|
||||||
instapexport: Optional[PathIsh] = None
|
instapexport: Optional[PathIsh] = None
|
||||||
#+end_src
|
#+end_src
|
||||||
|
** [[file:../my/github/gdpr.py][my.github.gdpr]]
|
||||||
|
|
||||||
|
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
class github:
|
||||||
|
gdpr_dir: PathIsh # path to unpacked GDPR archive
|
||||||
|
#+end_src
|
||||||
|
** [[file:../my/github/ghexport.py][my.github.ghexport]]
|
||||||
|
|
||||||
|
Github data: events, comments, etc. (API data)
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
class github:
|
||||||
|
'''
|
||||||
|
Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs.
|
||||||
|
'''
|
||||||
|
# path[s]/glob to the exported JSON data
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
# path to a local clone of ghexport
|
||||||
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport
|
||||||
|
ghexport : Optional[PathIsh] = None
|
||||||
|
|
||||||
|
# path to a cache directory
|
||||||
|
# if omitted, will use /tmp
|
||||||
|
cache_dir: Optional[PathIsh] = None
|
||||||
|
#+end_src
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
"""
|
||||||
|
Unified Github data (merged from GDPR export and periodic API updates)
|
||||||
|
"""
|
||||||
|
|
||||||
from . import gdpr, ghexport
|
from . import gdpr, ghexport
|
||||||
|
|
||||||
from .common import merge_events, Results
|
from .common import merge_events, Results
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
"""
|
||||||
|
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
||||||
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
from typing import Iterable, Dict, Any
|
from typing import Iterable, Dict, Any
|
||||||
|
@ -7,14 +11,25 @@ from ..core import get_files
|
||||||
|
|
||||||
from .common import Event, parse_dt
|
from .common import Event, parse_dt
|
||||||
|
|
||||||
from my.config import github as config
|
# TODO later, use a separate user config? (github_gdpr)
|
||||||
|
from my.config import github as user_config
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from ..core import PathIsh
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class github(user_config):
|
||||||
|
gdpr_dir: PathIsh # path to unpacked GDPR archive
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
|
|
||||||
|
from ..core.cfg import make_config
|
||||||
|
config = make_config(github)
|
||||||
|
|
||||||
|
|
||||||
def events() -> Iterable[Res[Event]]:
|
def events() -> Iterable[Res[Event]]:
|
||||||
"""
|
# TODO FIXME allow using archive here?
|
||||||
Parses events from GDPR export (https://github.com/settings/admin)
|
|
||||||
"""
|
|
||||||
# TODO allow using archive here?
|
|
||||||
files = get_files(config.gdpr_dir, glob='*.json')
|
files = get_files(config.gdpr_dir, glob='*.json')
|
||||||
handler_map = {
|
handler_map = {
|
||||||
'schema' : None,
|
'schema' : None,
|
||||||
|
|
|
@ -1,5 +1,61 @@
|
||||||
|
"""
|
||||||
|
Github data: events, comments, etc. (API data)
|
||||||
|
"""
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from ..core import Paths, PathIsh
|
||||||
|
|
||||||
|
from my.config import github as user_config
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class github(user_config):
|
||||||
|
'''
|
||||||
|
Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs.
|
||||||
|
'''
|
||||||
|
# path[s]/glob to the exported JSON data
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
# path to a local clone of ghexport
|
||||||
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport
|
||||||
|
ghexport : Optional[PathIsh] = None
|
||||||
|
|
||||||
|
# path to a cache directory
|
||||||
|
# if omitted, will use /tmp
|
||||||
|
cache_dir: Optional[PathIsh] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dal_module(self):
|
||||||
|
rpath = self.ghexport
|
||||||
|
if rpath is not None:
|
||||||
|
from .core.common import import_dir
|
||||||
|
return import_dir(rpath, '.dal')
|
||||||
|
else:
|
||||||
|
import my.config.repos.ghexport.dal as dal
|
||||||
|
return dal
|
||||||
|
###
|
||||||
|
|
||||||
|
# TODO perhaps using /tmp in case of None isn't ideal... maybe it should be treated as if cache is off
|
||||||
|
|
||||||
|
from ..core.cfg import make_config, Attrs
|
||||||
|
def migration(attrs: Attrs) -> Attrs:
|
||||||
|
if 'export_dir' in attrs: # legacy name
|
||||||
|
attrs['export_path'] = attrs['export_dir']
|
||||||
|
return attrs
|
||||||
|
config = make_config(github, migration=migration)
|
||||||
|
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import my.config.repos.ghexport.dal as dal
|
||||||
|
else:
|
||||||
|
dal = config.dal_module
|
||||||
|
|
||||||
|
############################
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Tuple, Optional, Iterable, Dict, Sequence
|
from typing import Tuple, Iterable, Dict, Sequence
|
||||||
|
|
||||||
from ..core import get_files
|
from ..core import get_files
|
||||||
from ..core.common import mcachew
|
from ..core.common import mcachew
|
||||||
|
@ -7,18 +63,15 @@ from ..kython.kompress import CPath
|
||||||
|
|
||||||
from .common import Event, parse_dt, Results
|
from .common import Event, parse_dt, Results
|
||||||
|
|
||||||
from my.config import github as config
|
|
||||||
import my.config.repos.ghexport.dal as ghexport
|
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_dir)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
def _dal():
|
def _dal() -> dal.DAL:
|
||||||
sources = inputs()
|
sources = inputs()
|
||||||
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
||||||
return ghexport.DAL(sources)
|
return dal.DAL(sources)
|
||||||
|
|
||||||
|
|
||||||
# TODO hmm. not good, need to be lazier?...
|
# TODO hmm. not good, need to be lazier?...
|
||||||
|
|
|
@ -18,9 +18,8 @@ except ImportError as e:
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from ..core.common import Paths
|
from ..core import Paths
|
||||||
|
|
||||||
# TODO perhaps rename to twitter_archive? dunno
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class twitter_archive(user_config):
|
class twitter_archive(user_config):
|
||||||
export_path: Paths # path[s]/glob to the twitter archive takeout
|
export_path: Paths # path[s]/glob to the twitter archive takeout
|
||||||
|
|
|
@ -14,6 +14,7 @@ from my.config import twint as user_config
|
||||||
class twint(user_config):
|
class twint(user_config):
|
||||||
export_path: Paths # path[s]/glob to the twint Sqlite database
|
export_path: Paths # path[s]/glob to the twint Sqlite database
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
from ..core.cfg import make_config
|
from ..core.cfg import make_config
|
||||||
config = make_config(twint)
|
config = make_config(twint)
|
||||||
|
|
Loading…
Add table
Reference in a new issue