github: start moving to a proper artbitrated module
This commit is contained in:
parent
67cf4d0c04
commit
d7aff1be3f
5 changed files with 27 additions and 13 deletions
|
@ -116,6 +116,7 @@ from ..kython.klogging import setup_logger, LazyLogger
|
|||
|
||||
Paths = Union[Sequence[PathIsh], PathIsh]
|
||||
|
||||
# TODO support '' for emtpy path
|
||||
DEFAULT_GLOB = '*'
|
||||
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
|
||||
"""
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Github events and their metadata: comments/issues/pull requests
|
||||
"""
|
||||
from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
|
||||
from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterable, TypeVar, Set
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
|
@ -10,7 +10,7 @@ import pytz
|
|||
from ..kython.klogging import LazyLogger
|
||||
from ..kython.kompress import CPath
|
||||
from ..common import get_files, mcachew
|
||||
from ..error import Res
|
||||
from ..core.error import Res, sort_res_by
|
||||
|
||||
from my.config import github as config
|
||||
import my.config.repos.ghexport.dal as ghexport
|
||||
|
@ -197,7 +197,7 @@ def _parse_event(d: Dict) -> Event:
|
|||
)
|
||||
|
||||
|
||||
def iter_gdpr_events() -> Iterator[Res[Event]]:
|
||||
def iter_gdpr_events() -> Iterable[Res[Event]]:
|
||||
"""
|
||||
Parses events from GDPR export (https://github.com/settings/admin)
|
||||
"""
|
||||
|
@ -240,12 +240,12 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
|
|||
|
||||
# TODO hmm. not good, need to be lazier?...
|
||||
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
||||
def iter_backup_events(dal=_dal()) -> Iterator[Event]:
|
||||
def iter_backup_events(dal=_dal()) -> Iterable[Event]:
|
||||
for d in dal.events():
|
||||
yield _parse_event(d)
|
||||
|
||||
|
||||
def iter_events() -> Iterator[Res[Event]]:
|
||||
def events() -> Iterable[Res[Event]]:
|
||||
from itertools import chain
|
||||
emitted: Set[Tuple[datetime, str]] = set()
|
||||
for e in chain(iter_gdpr_events(), iter_backup_events()):
|
||||
|
@ -260,13 +260,16 @@ def iter_events() -> Iterator[Res[Event]]:
|
|||
logger.debug('ignoring %s: %s', key, e)
|
||||
continue
|
||||
yield e
|
||||
emitted.add(key)
|
||||
emitted.add(key) # todo more_itertools
|
||||
|
||||
|
||||
def get_events():
|
||||
return sorted(iter_events(), key=lambda e: e.dt)
|
||||
def get_events() -> Iterable[Res[Event]]:
|
||||
return sort_res_by(events(), key=lambda e: e.dt)
|
||||
|
||||
# TODO mm. ok, not much point in deserializing as github.Event as it's basically a fancy dict wrapper?
|
||||
# from github.Event import Event as GEvent # type: ignore
|
||||
# # see https://github.com/PyGithub/PyGithub/blob/master/github/GithubObject.py::GithubObject.__init__
|
||||
# e = GEvent(None, None, raw_event, True)
|
||||
|
||||
# todo deprecate
|
||||
iter_events = events
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
from datetime import datetime
|
||||
from typing import NamedTuple, List
|
||||
from typing import NamedTuple, List, Iterable
|
||||
|
||||
from ..google.takeout.html import read_html
|
||||
from ..google.takeout.paths import get_last_takeout
|
||||
|
@ -16,7 +16,7 @@ class Watched(NamedTuple):
|
|||
return f'{self.url}-{self.when.isoformat()}'
|
||||
|
||||
|
||||
def get_watched():
|
||||
def watched() -> Iterable[Watched]:
|
||||
# TODO need to use a glob? to make up for old takouts that didn't start with Takeout/
|
||||
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
|
||||
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
|
||||
|
@ -30,6 +30,10 @@ def get_watched():
|
|||
return list(sorted(watches, key=lambda e: e.when))
|
||||
|
||||
|
||||
# todo deprecate
|
||||
get_watched = watched
|
||||
|
||||
|
||||
def main():
|
||||
# TODO shit. a LOT of watches...
|
||||
for w in get_watched():
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
from my.coding.github import get_events
|
||||
from more_itertools import ilen
|
||||
|
||||
from my.coding.github import get_events, iter_gdpr_events
|
||||
|
||||
|
||||
def test_gdpr():
|
||||
assert ilen(iter_gdpr_events()) > 100
|
||||
|
||||
|
||||
def test():
|
||||
events = get_events()
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# TODO move elsewhere?
|
||||
|
||||
# these tests would only make sense with some existing data? although some of them would work for everyone..
|
||||
# not sure what's a good way of handling this..
|
||||
|
||||
|
@ -7,7 +6,7 @@ from my.media.youtube import get_watched, Watched
|
|||
|
||||
|
||||
def test():
|
||||
watched = get_watched()
|
||||
watched = list(get_watched())
|
||||
assert len(watched) > 1000
|
||||
|
||||
from datetime import datetime
|
||||
|
|
Loading…
Add table
Reference in a new issue