diff --git a/my/core/common.py b/my/core/common.py index 64e7b23..74aac5e 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -116,6 +116,7 @@ from ..kython.klogging import setup_logger, LazyLogger Paths = Union[Sequence[PathIsh], PathIsh] +# TODO support '' for emtpy path DEFAULT_GLOB = '*' def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: """ diff --git a/my/coding/github.py b/my/github/common.py similarity index 95% rename from my/coding/github.py rename to my/github/common.py index 3f5dd63..1f05a19 100644 --- a/my/coding/github.py +++ b/my/github/common.py @@ -1,7 +1,7 @@ """ Github events and their metadata: comments/issues/pull requests """ -from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set +from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterable, TypeVar, Set from datetime import datetime import json @@ -10,7 +10,7 @@ import pytz from ..kython.klogging import LazyLogger from ..kython.kompress import CPath from ..common import get_files, mcachew -from ..error import Res +from ..core.error import Res, sort_res_by from my.config import github as config import my.config.repos.ghexport.dal as ghexport @@ -197,7 +197,7 @@ def _parse_event(d: Dict) -> Event: ) -def iter_gdpr_events() -> Iterator[Res[Event]]: +def iter_gdpr_events() -> Iterable[Res[Event]]: """ Parses events from GDPR export (https://github.com/settings/admin) """ @@ -240,12 +240,12 @@ def iter_gdpr_events() -> Iterator[Res[Event]]: # TODO hmm. not good, need to be lazier?... @mcachew(config.cache_dir, hashf=lambda dal: dal.sources) -def iter_backup_events(dal=_dal()) -> Iterator[Event]: +def iter_backup_events(dal=_dal()) -> Iterable[Event]: for d in dal.events(): yield _parse_event(d) -def iter_events() -> Iterator[Res[Event]]: +def events() -> Iterable[Res[Event]]: from itertools import chain emitted: Set[Tuple[datetime, str]] = set() for e in chain(iter_gdpr_events(), iter_backup_events()): @@ -260,13 +260,16 @@ def iter_events() -> Iterator[Res[Event]]: logger.debug('ignoring %s: %s', key, e) continue yield e - emitted.add(key) + emitted.add(key) # todo more_itertools -def get_events(): - return sorted(iter_events(), key=lambda e: e.dt) +def get_events() -> Iterable[Res[Event]]: + return sort_res_by(events(), key=lambda e: e.dt) # TODO mm. ok, not much point in deserializing as github.Event as it's basically a fancy dict wrapper? # from github.Event import Event as GEvent # type: ignore # # see https://github.com/PyGithub/PyGithub/blob/master/github/GithubObject.py::GithubObject.__init__ # e = GEvent(None, None, raw_event, True) + +# todo deprecate +iter_events = events diff --git a/my/media/youtube.py b/my/media/youtube.py index ffe2740..faeb09a 100755 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 from datetime import datetime -from typing import NamedTuple, List +from typing import NamedTuple, List, Iterable from ..google.takeout.html import read_html from ..google.takeout.paths import get_last_takeout @@ -16,7 +16,7 @@ class Watched(NamedTuple): return f'{self.url}-{self.when.isoformat()}' -def get_watched(): +def watched() -> Iterable[Watched]: # TODO need to use a glob? to make up for old takouts that didn't start with Takeout/ path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last # TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json @@ -30,6 +30,10 @@ def get_watched(): return list(sorted(watches, key=lambda e: e.when)) +# todo deprecate +get_watched = watched + + def main(): # TODO shit. a LOT of watches... for w in get_watched(): diff --git a/tests/github.py b/tests/github.py index d296096..a007a42 100644 --- a/tests/github.py +++ b/tests/github.py @@ -1,5 +1,12 @@ #!/usr/bin/env python3 -from my.coding.github import get_events +from more_itertools import ilen + +from my.coding.github import get_events, iter_gdpr_events + + +def test_gdpr(): + assert ilen(iter_gdpr_events()) > 100 + def test(): events = get_events() diff --git a/tests/youtube.py b/tests/youtube.py index 104f2d8..b8c1aa8 100644 --- a/tests/youtube.py +++ b/tests/youtube.py @@ -1,5 +1,4 @@ # TODO move elsewhere? - # these tests would only make sense with some existing data? although some of them would work for everyone.. # not sure what's a good way of handling this.. @@ -7,7 +6,7 @@ from my.media.youtube import get_watched, Watched def test(): - watched = get_watched() + watched = list(get_watched()) assert len(watched) > 1000 from datetime import datetime