From ddea816a49f5da79fd6332e7f6b879b1955838af Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 4 Dec 2020 16:49:42 +0000 Subject: [PATCH] my.stackexchange: use proper pip package, add stat + 'anonymous' mode for stat() function --- my/config.py | 3 +++ my/core/common.py | 12 +++++++++--- my/pocket.py | 6 +++--- my/stackexchange.py | 11 ----------- my/stackexchange/stexport.py | 28 ++++++++++++++++++++++++++++ tox.ini | 10 +++++++--- 6 files changed, 50 insertions(+), 20 deletions(-) delete mode 100644 my/stackexchange.py create mode 100644 my/stackexchange/stexport.py diff --git a/my/config.py b/my/config.py index 9e02564..52841a9 100644 --- a/my/config.py +++ b/my/config.py @@ -42,6 +42,9 @@ class exercise: class bluemaestro: export_path: Paths = '' +class stackexchange: + export_path: Paths = '' + class google: takeout_path: Paths = '' diff --git a/my/core/common.py b/my/core/common.py index 79e06da..80d76da 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -386,8 +386,14 @@ QUICK_STATS = False C = TypeVar('C') Stats = Dict[str, Any] # todo not sure about return type... -def stat(func: Callable[[], Iterable[C]]) -> Stats: - fr = func() +def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats: + if callable(func): + fr = func() + fname = func.__name__ + else: + # meh. means it's just a list.. not sure how to generate a name then + fr = func + fname = f'unnamed_{id(fr)}' tname = type(fr).__name__ if tname == 'DataFrame': # dynamic, because pandas is an optional dependency.. @@ -399,7 +405,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats: else: res = _stat_iterable(fr) return { - func.__name__: res, + fname: res, } diff --git a/my/pocket.py b/my/pocket.py index faba1d7..ff80967 100644 --- a/my/pocket.py +++ b/my/pocket.py @@ -51,12 +51,12 @@ def articles() -> Iterable[Article]: yield from _dal().articles() -def stats(): +from .core import stat, Stats +def stats() -> Stats: from itertools import chain from more_itertools import ilen - # todo make stats more defensive? return { - 'articles' : ilen(articles()), + **stat(articles), 'highlights': ilen(chain.from_iterable(a.highlights for a in articles())), } diff --git a/my/stackexchange.py b/my/stackexchange.py deleted file mode 100644 index 314546a..0000000 --- a/my/stackexchange.py +++ /dev/null @@ -1,11 +0,0 @@ -''' -Stackexchange data -''' - -import my.config.repos.stexport.model as stexport -from my.config import stackexchange as config - - -def get_data(): - sources = [max(config.export_dir.glob('*.json'))] - return stexport.Model(sources).site_model('stackoverflow') diff --git a/my/stackexchange/stexport.py b/my/stackexchange/stexport.py new file mode 100644 index 0000000..7150b1e --- /dev/null +++ b/my/stackexchange/stexport.py @@ -0,0 +1,28 @@ +''' +Stackexchange data +''' +REQUIRES = [ + 'git+https://github.com/karlicoss/stexport', +] + +# TODO use GDPR? + +from stexport import dal +from my.config import stackexchange as config + + +# todo lru cache? +def _dal() -> dal.DAL: + from ..core import get_files + inputs = get_files(config.export_path) + return dal.DAL(inputs) + + +def site(name: str) -> dal.SiteDAL: + return _dal().site_dal(name) + + +from ..core import stat, Stats +def stats() -> Stats: + s = site('stackoverflow') + return stat(s.questions) diff --git a/tox.ini b/tox.ini index 7531cfd..e0872a7 100644 --- a/tox.ini +++ b/tox.ini @@ -47,25 +47,29 @@ whitelist_externals = bash commands = pip install -e .[testing] .[optional] pip install orgparse + pip install git+https://github.com/karlicoss/endoexport pip install git+https://github.com/karlicoss/ghexport pip install git+https://github.com/karlicoss/hypexport pip install git+https://github.com/karlicoss/instapexport pip install git+https://github.com/karlicoss/pockexport pip install git+https://github.com/karlicoss/rexport - pip install git+https://github.com/karlicoss/endoexport + pip install git+https://github.com/karlicoss/stexport # ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path... # see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487 # this is particularly annoying when user's config is leaking and mypy isn't running against the repository config # maybe this issue... https://github.com/tox-dev/tox/issues/838 # and also since it's Tox, we can't just set an env variable for a single command, have to spawn a subshell. jeez. + # TODO fuck. -p my.github isn't checking the subpackages?? wtf... bash -c 'HOME= \ - python3 -m mypy -p my.github.ghexport \ + python3 -m mypy \ + -p my.endomondo \ + -p my.github.ghexport \ -p my.hypothesis \ -p my.instapaper \ -p my.pocket \ -p my.reddit \ - -p my.endomondo \ + -p my.stackexchange.stexport \ -p my.body.exercise.cardio \ -p my.body.exercise.cross_trainer \ -p my.bluemaestro \