From 1cc4eb5d8dbcb6a7cf0ba09386895b7e32fe18e1 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 4 Jun 2020 22:19:34 +0100 Subject: [PATCH 1/3] core: add helper for computing stats; use it in modules --- my/books/kobo.py | 10 ++++++++++ my/core/__init__.py | 5 ++++- my/core/__main__.py | 4 ++++ my/core/common.py | 27 +++++++++++++++++++++++++++ my/github/gdpr.py | 7 +++++++ my/github/ghexport.py | 9 ++++++++- my/photos/__init__.py | 3 ++- my/twitter/archive.py | 8 ++++++++ my/twitter/twint.py | 8 ++++++++ tests/github.py | 1 + 10 files changed, 79 insertions(+), 3 deletions(-) diff --git a/my/books/kobo.py b/my/books/kobo.py index e5603a0..9b27642 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -10,6 +10,16 @@ from my.config.repos.kobuddy.src.kobuddy import Highlight, set_databases, get_hi set_databases(config.export_dir) + +def stats(): + from ..core import stat + return { + **stat(get_highlights), + } + +## TODO hmm. not sure if all this really belongs here?... perhaps orger? + + # TODO maybe type over T? _Predicate = Callable[[str], bool] Predicatish = Union[str, _Predicate] diff --git a/my/core/__init__.py b/my/core/__init__.py index 678df85..63c14ae 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -1,5 +1,8 @@ # this file only keeps the most common & critical types/utility functions from .common import PathIsh, Paths, Json -from .common import get_files, LazyLogger +from .common import get_files +from .common import LazyLogger from .common import warn_if_empty +from .common import stat + from .cfg import make_config diff --git a/my/core/__main__.py b/my/core/__main__.py index 1159907..afb8d65 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -133,6 +133,10 @@ def modules_check(args): stats = getattr(mod, 'stats', None) if stats is None: continue + from . import common + common.QUICK_STATS = True + # todo make it a cmdline option.. + try: res = stats() except Exception as ee: diff --git a/my/core/common.py b/my/core/common.py index 324ae26..bc1e2e2 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -338,3 +338,30 @@ def warn_if_empty(f): res = f(*args, **kwargs) return _warn_iterable(res, f=f) return wrapped # type: ignore + + +# hacky hook to speed up for 'hpi doctor' +# todo think about something better +QUICK_STATS = False + + +C = TypeVar('C') +# todo not sure about return type... +def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]: + from more_itertools import ilen, take, first + + it = iter(func()) + res: Any + if QUICK_STATS: + initial = take(100, it) + res = len(initial) + if first(it, None) is not None: # todo can actually be none... + # haven't exhausted + res = f'{res}+' + else: + res = ilen(it) + + + return { + func.__name__: res, + } diff --git a/my/github/gdpr.py b/my/github/gdpr.py index cc813a8..c9808f3 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -66,6 +66,13 @@ def events() -> Iterable[Res[Event]]: yield e +def stats(): + from ..core import stat + return { + **stat(events), + } + + # TODO typing.TypedDict could be handy here.. def _parse_common(d: Dict) -> Dict: url = d['url'] diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 30fd76c..4156628 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -29,7 +29,7 @@ class github(user_config): def dal_module(self): rpath = self.ghexport if rpath is not None: - from .core.common import import_dir + from ..core.common import import_dir return import_dir(rpath, '.dal') else: import my.config.repos.ghexport.dal as dal @@ -81,6 +81,13 @@ def events(dal=_dal()) -> Results: yield _parse_event(d) +def stats(): + from ..core import stat + return { + **stat(events), + } + + # TODO hmm. need some sort of abstract syntax for this... # TODO split further, title too def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]: diff --git a/my/photos/__init__.py b/my/photos/__init__.py index c11fe4c..abb6bb9 100644 --- a/my/photos/__init__.py +++ b/my/photos/__init__.py @@ -123,7 +123,8 @@ def _candidates() -> Iterable[str]: '.', *config.paths, ], stdout=PIPE) as p: - for line in p.stdout: + out = p.stdout; assert out is not None + for line in out: path = line.decode('utf8').rstrip('\n') mime = fastermime(path) tp = mime.split('/')[0] diff --git a/my/twitter/archive.py b/my/twitter/archive.py index c44272c..f3550d3 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -183,3 +183,11 @@ def tweets() -> Iterable[Tweet]: def likes() -> Iterable[Like]: for inp in inputs(): yield from ZipExport(inp).likes() + + +def stats(): + from ..core import stat + return { + **stat(tweets), + **stat(likes), + } diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 3a2b327..1b5c73e 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -108,3 +108,11 @@ def likes() -> Iterable[Tweet]: db = _get_db() res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL')) yield from map(Tweet, res) + + +def stats(): + from ..core import stat + return { + **stat(tweets), + **stat(likes), + } diff --git a/tests/github.py b/tests/github.py index 5817756..f4ca4b5 100644 --- a/tests/github.py +++ b/tests/github.py @@ -3,6 +3,7 @@ from more_itertools import ilen from my.coding.github import get_events +# todo test against stats? not sure.. maybe both def test_gdpr(): import my.github.gdpr as gdpr From db852b3927ebf9aac5ff11b35197e38fb71cf9ee Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 4 Jun 2020 22:20:48 +0100 Subject: [PATCH 2/3] kobo: move away from my.books --- my/{books => }/kobo.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename my/{books => }/kobo.py (100%) diff --git a/my/books/kobo.py b/my/kobo.py similarity index 100% rename from my/books/kobo.py rename to my/kobo.py From 821eb47c936385a42785af43be9a02d3a4640997 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 4 Jun 2020 22:43:10 +0100 Subject: [PATCH 3/3] kobo: BREAKING changes. Use kobuddy module directly, rename export_dir to export_path. Hopefully this makes a lot of sense in the first place, and not that many users, so deserves breaking. --- doc/MODULES.org | 14 ++++++++++++++ my/books/__init__.py | 0 my/books/kobo.py | 5 +++++ my/coding/github.py | 3 ++- my/kobo.py | 35 ++++++++++++++++++++++++++++------- my/kython/kompress.py | 3 ++- 6 files changed, 51 insertions(+), 9 deletions(-) delete mode 100644 my/books/__init__.py create mode 100644 my/books/kobo.py diff --git a/doc/MODULES.org b/doc/MODULES.org index 4b33143..5011cae 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -27,6 +27,7 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][ - [[#myinstapaper][my.instapaper]] - [[#mygithubgdpr][my.github.gdpr]] - [[#mygithubghexport][my.github.ghexport]] + - [[#mykobo][my.kobo]] :END: * Intro @@ -80,6 +81,7 @@ modules = [ ('instapaper' , 'my.instapaper' ), ('github' , 'my.github.gdpr' ), ('github' , 'my.github.ghexport' ), + ('kobo' , 'my.kobo' ), ] def indent(s, spaces=4): @@ -261,3 +263,15 @@ for cls, p in modules: # if omitted, will use /tmp cache_dir: Optional[PathIsh] = None #+end_src +** [[file:../my/kobo.py][my.kobo]] + + [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats + + #+begin_src python + class kobo: + ''' + Uses [[https://github.com/karlicoss/kobuddy#as-a-backup-tool][kobuddy]] outputs. + ''' + # path[s]/glob to the exported databases + export_path: Paths + #+end_src diff --git a/my/books/__init__.py b/my/books/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/my/books/kobo.py b/my/books/kobo.py new file mode 100644 index 0000000..10b0352 --- /dev/null +++ b/my/books/kobo.py @@ -0,0 +1,5 @@ +import warnings + +warnings.warn('my.books.kobo is deprecated! Please use my.kobo instead!') + +from ..kobo import * diff --git a/my/coding/github.py b/my/coding/github.py index e1e0d77..9358b04 100644 --- a/my/coding/github.py +++ b/my/coding/github.py @@ -1,6 +1,7 @@ import warnings -warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!', DeprecationWarning) +warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!') +# todo why aren't DeprecationWarning shown by default?? from ..github.all import events, get_events diff --git a/my/kobo.py b/my/kobo.py index 9b27642..51a86f1 100644 --- a/my/kobo.py +++ b/my/kobo.py @@ -1,18 +1,38 @@ """ [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats """ -from typing import Callable, Union, List -from my.config import kobo as config -from my.config.repos.kobuddy.src.kobuddy import * -# hmm, explicit imports make pylint a bit happier.. -from my.config.repos.kobuddy.src.kobuddy import Highlight, set_databases, get_highlights +# TODO require installing kobuddy, need to upload to pypi as well? +from dataclasses import dataclass +from .core import Paths +from my.config import kobo as user_config +@dataclass +class kobo(user_config): + ''' + Uses [[https://github.com/karlicoss/kobuddy#as-a-backup-tool][kobuddy]] outputs. + ''' + # path[s]/glob to the exported databases + export_path: Paths + + +from .core.cfg import make_config +config = make_config(kobo) + +from .core import get_files +import kobuddy +# todo not sure about this glob.. +kobuddy.DATABASES = list(get_files(config.export_path, glob='*.sqlite')) + +######################### + +# hmm, explicit imports make pylint a bit happier? +from kobuddy import Highlight, get_highlights +from kobuddy import * -set_databases(config.export_dir) def stats(): - from ..core import stat + from .core import stat return { **stat(get_highlights), } @@ -20,6 +40,7 @@ def stats(): ## TODO hmm. not sure if all this really belongs here?... perhaps orger? +from typing import Callable, Union, List # TODO maybe type over T? _Predicate = Callable[[str], bool] Predicatish = Union[str, _Predicate] diff --git a/my/kython/kompress.py b/my/kython/kompress.py index 057627a..df49bcc 100644 --- a/my/kython/kompress.py +++ b/my/kython/kompress.py @@ -43,7 +43,8 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]: ifile.seekable = lambda: False # type: ignore ifile.read1 = ifile.read # type: ignore # TODO pass all kwargs here?? - return io.TextIOWrapper(ifile, encoding=encoding) + # todo 'expected "BinaryIO"'?? + return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type] elif suf in {'.lz4'}: import lz4.frame # type: ignore return lz4.frame.open(str(pp), mode, *args, **kwargs)