my.stackexchange: use proper pip package, add stat

+ 'anonymous' mode for stat() function
This commit is contained in:
Dima Gerasimov 2020-12-04 16:49:42 +00:00 committed by karlicoss
parent 9d39892e75
commit ddea816a49
6 changed files with 50 additions and 20 deletions

View file

@ -42,6 +42,9 @@ class exercise:
class bluemaestro: class bluemaestro:
export_path: Paths = '' export_path: Paths = ''
class stackexchange:
export_path: Paths = ''
class google: class google:
takeout_path: Paths = '' takeout_path: Paths = ''

View file

@ -386,8 +386,14 @@ QUICK_STATS = False
C = TypeVar('C') C = TypeVar('C')
Stats = Dict[str, Any] Stats = Dict[str, Any]
# todo not sure about return type... # todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Stats: def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
fr = func() if callable(func):
fr = func()
fname = func.__name__
else:
# meh. means it's just a list.. not sure how to generate a name then
fr = func
fname = f'unnamed_{id(fr)}'
tname = type(fr).__name__ tname = type(fr).__name__
if tname == 'DataFrame': if tname == 'DataFrame':
# dynamic, because pandas is an optional dependency.. # dynamic, because pandas is an optional dependency..
@ -399,7 +405,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
else: else:
res = _stat_iterable(fr) res = _stat_iterable(fr)
return { return {
func.__name__: res, fname: res,
} }

View file

@ -51,12 +51,12 @@ def articles() -> Iterable[Article]:
yield from _dal().articles() yield from _dal().articles()
def stats(): from .core import stat, Stats
def stats() -> Stats:
from itertools import chain from itertools import chain
from more_itertools import ilen from more_itertools import ilen
# todo make stats more defensive?
return { return {
'articles' : ilen(articles()), **stat(articles),
'highlights': ilen(chain.from_iterable(a.highlights for a in articles())), 'highlights': ilen(chain.from_iterable(a.highlights for a in articles())),
} }

View file

@ -1,11 +0,0 @@
'''
Stackexchange data
'''
import my.config.repos.stexport.model as stexport
from my.config import stackexchange as config
def get_data():
sources = [max(config.export_dir.glob('*.json'))]
return stexport.Model(sources).site_model('stackoverflow')

View file

@ -0,0 +1,28 @@
'''
Stackexchange data
'''
REQUIRES = [
'git+https://github.com/karlicoss/stexport',
]
# TODO use GDPR?
from stexport import dal
from my.config import stackexchange as config
# todo lru cache?
def _dal() -> dal.DAL:
from ..core import get_files
inputs = get_files(config.export_path)
return dal.DAL(inputs)
def site(name: str) -> dal.SiteDAL:
return _dal().site_dal(name)
from ..core import stat, Stats
def stats() -> Stats:
s = site('stackoverflow')
return stat(s.questions)

10
tox.ini
View file

@ -47,25 +47,29 @@ whitelist_externals = bash
commands = commands =
pip install -e .[testing] .[optional] pip install -e .[testing] .[optional]
pip install orgparse pip install orgparse
pip install git+https://github.com/karlicoss/endoexport
pip install git+https://github.com/karlicoss/ghexport pip install git+https://github.com/karlicoss/ghexport
pip install git+https://github.com/karlicoss/hypexport pip install git+https://github.com/karlicoss/hypexport
pip install git+https://github.com/karlicoss/instapexport pip install git+https://github.com/karlicoss/instapexport
pip install git+https://github.com/karlicoss/pockexport pip install git+https://github.com/karlicoss/pockexport
pip install git+https://github.com/karlicoss/rexport pip install git+https://github.com/karlicoss/rexport
pip install git+https://github.com/karlicoss/endoexport pip install git+https://github.com/karlicoss/stexport
# ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path... # ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path...
# see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487 # see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487
# this is particularly annoying when user's config is leaking and mypy isn't running against the repository config # this is particularly annoying when user's config is leaking and mypy isn't running against the repository config
# maybe this issue... https://github.com/tox-dev/tox/issues/838 # maybe this issue... https://github.com/tox-dev/tox/issues/838
# and also since it's Tox, we can't just set an env variable for a single command, have to spawn a subshell. jeez. # and also since it's Tox, we can't just set an env variable for a single command, have to spawn a subshell. jeez.
# TODO fuck. -p my.github isn't checking the subpackages?? wtf...
bash -c 'HOME= \ bash -c 'HOME= \
python3 -m mypy -p my.github.ghexport \ python3 -m mypy \
-p my.endomondo \
-p my.github.ghexport \
-p my.hypothesis \ -p my.hypothesis \
-p my.instapaper \ -p my.instapaper \
-p my.pocket \ -p my.pocket \
-p my.reddit \ -p my.reddit \
-p my.endomondo \ -p my.stackexchange.stexport \
-p my.body.exercise.cardio \ -p my.body.exercise.cardio \
-p my.body.exercise.cross_trainer \ -p my.body.exercise.cross_trainer \
-p my.bluemaestro \ -p my.bluemaestro \