From 1cc4eb5d8dbcb6a7cf0ba09386895b7e32fe18e1 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 4 Jun 2020 22:19:34 +0100 Subject: [PATCH] core: add helper for computing stats; use it in modules --- my/books/kobo.py | 10 ++++++++++ my/core/__init__.py | 5 ++++- my/core/__main__.py | 4 ++++ my/core/common.py | 27 +++++++++++++++++++++++++++ my/github/gdpr.py | 7 +++++++ my/github/ghexport.py | 9 ++++++++- my/photos/__init__.py | 3 ++- my/twitter/archive.py | 8 ++++++++ my/twitter/twint.py | 8 ++++++++ tests/github.py | 1 + 10 files changed, 79 insertions(+), 3 deletions(-) diff --git a/my/books/kobo.py b/my/books/kobo.py index e5603a0..9b27642 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -10,6 +10,16 @@ from my.config.repos.kobuddy.src.kobuddy import Highlight, set_databases, get_hi set_databases(config.export_dir) + +def stats(): + from ..core import stat + return { + **stat(get_highlights), + } + +## TODO hmm. not sure if all this really belongs here?... perhaps orger? + + # TODO maybe type over T? _Predicate = Callable[[str], bool] Predicatish = Union[str, _Predicate] diff --git a/my/core/__init__.py b/my/core/__init__.py index 678df85..63c14ae 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -1,5 +1,8 @@ # this file only keeps the most common & critical types/utility functions from .common import PathIsh, Paths, Json -from .common import get_files, LazyLogger +from .common import get_files +from .common import LazyLogger from .common import warn_if_empty +from .common import stat + from .cfg import make_config diff --git a/my/core/__main__.py b/my/core/__main__.py index 1159907..afb8d65 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -133,6 +133,10 @@ def modules_check(args): stats = getattr(mod, 'stats', None) if stats is None: continue + from . import common + common.QUICK_STATS = True + # todo make it a cmdline option.. + try: res = stats() except Exception as ee: diff --git a/my/core/common.py b/my/core/common.py index 324ae26..bc1e2e2 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -338,3 +338,30 @@ def warn_if_empty(f): res = f(*args, **kwargs) return _warn_iterable(res, f=f) return wrapped # type: ignore + + +# hacky hook to speed up for 'hpi doctor' +# todo think about something better +QUICK_STATS = False + + +C = TypeVar('C') +# todo not sure about return type... +def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]: + from more_itertools import ilen, take, first + + it = iter(func()) + res: Any + if QUICK_STATS: + initial = take(100, it) + res = len(initial) + if first(it, None) is not None: # todo can actually be none... + # haven't exhausted + res = f'{res}+' + else: + res = ilen(it) + + + return { + func.__name__: res, + } diff --git a/my/github/gdpr.py b/my/github/gdpr.py index cc813a8..c9808f3 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -66,6 +66,13 @@ def events() -> Iterable[Res[Event]]: yield e +def stats(): + from ..core import stat + return { + **stat(events), + } + + # TODO typing.TypedDict could be handy here.. def _parse_common(d: Dict) -> Dict: url = d['url'] diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 30fd76c..4156628 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -29,7 +29,7 @@ class github(user_config): def dal_module(self): rpath = self.ghexport if rpath is not None: - from .core.common import import_dir + from ..core.common import import_dir return import_dir(rpath, '.dal') else: import my.config.repos.ghexport.dal as dal @@ -81,6 +81,13 @@ def events(dal=_dal()) -> Results: yield _parse_event(d) +def stats(): + from ..core import stat + return { + **stat(events), + } + + # TODO hmm. need some sort of abstract syntax for this... # TODO split further, title too def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]: diff --git a/my/photos/__init__.py b/my/photos/__init__.py index c11fe4c..abb6bb9 100644 --- a/my/photos/__init__.py +++ b/my/photos/__init__.py @@ -123,7 +123,8 @@ def _candidates() -> Iterable[str]: '.', *config.paths, ], stdout=PIPE) as p: - for line in p.stdout: + out = p.stdout; assert out is not None + for line in out: path = line.decode('utf8').rstrip('\n') mime = fastermime(path) tp = mime.split('/')[0] diff --git a/my/twitter/archive.py b/my/twitter/archive.py index c44272c..f3550d3 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -183,3 +183,11 @@ def tweets() -> Iterable[Tweet]: def likes() -> Iterable[Like]: for inp in inputs(): yield from ZipExport(inp).likes() + + +def stats(): + from ..core import stat + return { + **stat(tweets), + **stat(likes), + } diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 3a2b327..1b5c73e 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -108,3 +108,11 @@ def likes() -> Iterable[Tweet]: db = _get_db() res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL')) yield from map(Tweet, res) + + +def stats(): + from ..core import stat + return { + **stat(tweets), + **stat(likes), + } diff --git a/tests/github.py b/tests/github.py index 5817756..f4ca4b5 100644 --- a/tests/github.py +++ b/tests/github.py @@ -3,6 +3,7 @@ from more_itertools import ilen from my.coding.github import get_events +# todo test against stats? not sure.. maybe both def test_gdpr(): import my.github.gdpr as gdpr