github: start moving to a proper artbitrated module

2020-06-01 22:10:29 +01:00 · 2020-06-01 22:10:29 +01:00 · d7aff1be3f
commit d7aff1be3f
parent 67cf4d0c04
5 changed files with 27 additions and 13 deletions
--- a/my/core/common.py
+++ b/my/core/common.py
@ -116,6 +116,7 @@ from ..kython.klogging import setup_logger, LazyLogger

 Paths = Union[Sequence[PathIsh], PathIsh]

+# TODO support '' for emtpy path
 DEFAULT_GLOB = '*'
 def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
--- a/my/github/common.py
+++ b/my/github/common.py
@ -1,7 +1,7 @@
 """
 Github events and their metadata: comments/issues/pull requests
 """
-from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
+from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterable, TypeVar, Set
 from datetime import datetime
 import json

@ -10,7 +10,7 @@ import pytz
 from ..kython.klogging import LazyLogger
 from ..kython.kompress import CPath
 from ..common import get_files, mcachew
-from ..error import Res
+from ..core.error import Res, sort_res_by

 from my.config import github as config
 import my.config.repos.ghexport.dal as ghexport
@ -197,7 +197,7 @@ def _parse_event(d: Dict) -> Event:
    )


-def iter_gdpr_events() -> Iterator[Res[Event]]:
+def iter_gdpr_events() -> Iterable[Res[Event]]:
    """
    Parses events from GDPR export (https://github.com/settings/admin)
    """
@ -240,12 +240,12 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:

 # TODO hmm. not good, need to be lazier?...
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
-def iter_backup_events(dal=_dal()) -> Iterator[Event]:
+def iter_backup_events(dal=_dal()) -> Iterable[Event]:
    for d in dal.events():
        yield _parse_event(d)


-def iter_events() -> Iterator[Res[Event]]:
+def events() -> Iterable[Res[Event]]:
    from itertools import chain
    emitted: Set[Tuple[datetime, str]] = set()
    for e in chain(iter_gdpr_events(), iter_backup_events()):
@ -260,13 +260,16 @@ def iter_events() -> Iterator[Res[Event]]:
            logger.debug('ignoring %s: %s', key, e)
            continue
        yield e
-        emitted.add(key)
+        emitted.add(key) # todo more_itertools


-def get_events():
-    return sorted(iter_events(), key=lambda e: e.dt)
+def get_events() -> Iterable[Res[Event]]:
+    return sort_res_by(events(), key=lambda e: e.dt)

 # TODO mm. ok, not much point in deserializing as github.Event as it's basically a fancy dict wrapper?
 # from github.Event import Event as GEvent # type: ignore
 # # see https://github.com/PyGithub/PyGithub/blob/master/github/GithubObject.py::GithubObject.__init__
 # e = GEvent(None, None, raw_event, True)
+
+# todo deprecate
+iter_events = events
--- a/my/media/youtube.py
+++ b/my/media/youtube.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 from datetime import datetime
-from typing import NamedTuple, List
+from typing import NamedTuple, List, Iterable

 from ..google.takeout.html import read_html
 from ..google.takeout.paths import get_last_takeout
@ -16,7 +16,7 @@ class Watched(NamedTuple):
        return f'{self.url}-{self.when.isoformat()}'


-def get_watched():
+def watched() -> Iterable[Watched]:
    # TODO need to use a glob? to make up for old takouts that didn't start with Takeout/
    path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
    # TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
@ -30,6 +30,10 @@ def get_watched():
    return list(sorted(watches, key=lambda e: e.when))


+# todo deprecate
+get_watched = watched
+
+
 def main():
    # TODO shit. a LOT of watches...
    for w in get_watched():
--- a/tests/github.py
+++ b/tests/github.py
@ -1,5 +1,12 @@
 #!/usr/bin/env python3
-from my.coding.github import get_events
+from more_itertools import ilen
+
+from my.coding.github import get_events, iter_gdpr_events
+
+
+def test_gdpr():
+    assert ilen(iter_gdpr_events()) > 100
+

 def test():
    events = get_events()
--- a/tests/youtube.py
+++ b/tests/youtube.py
@ -1,5 +1,4 @@
 # TODO move elsewhere?
-
 # these tests would only make sense with some existing data? although some of them would work for everyone..
 # not sure what's a good way of handling this..

@ -7,7 +6,7 @@ from my.media.youtube import get_watched, Watched


 def test():
-    watched = get_watched()
+    watched = list(get_watched())
    assert len(watched) > 1000

    from datetime import datetime