my.stackexchange: use proper pip package, add stat

+ 'anonymous' mode for stat() function
This commit is contained in:
Dima Gerasimov 2020-12-04 16:49:42 +00:00 committed by karlicoss
parent 9d39892e75
commit ddea816a49
6 changed files with 50 additions and 20 deletions

View file

@ -42,6 +42,9 @@ class exercise:
class bluemaestro:
export_path: Paths = ''
class stackexchange:
export_path: Paths = ''
class google:
takeout_path: Paths = ''

View file

@ -386,8 +386,14 @@ QUICK_STATS = False
C = TypeVar('C')
Stats = Dict[str, Any]
# todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Stats:
fr = func()
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
if callable(func):
fr = func()
fname = func.__name__
else:
# meh. means it's just a list.. not sure how to generate a name then
fr = func
fname = f'unnamed_{id(fr)}'
tname = type(fr).__name__
if tname == 'DataFrame':
# dynamic, because pandas is an optional dependency..
@ -399,7 +405,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
else:
res = _stat_iterable(fr)
return {
func.__name__: res,
fname: res,
}

View file

@ -51,12 +51,12 @@ def articles() -> Iterable[Article]:
yield from _dal().articles()
def stats():
from .core import stat, Stats
def stats() -> Stats:
from itertools import chain
from more_itertools import ilen
# todo make stats more defensive?
return {
'articles' : ilen(articles()),
**stat(articles),
'highlights': ilen(chain.from_iterable(a.highlights for a in articles())),
}

View file

@ -1,11 +0,0 @@
'''
Stackexchange data
'''
import my.config.repos.stexport.model as stexport
from my.config import stackexchange as config
def get_data():
sources = [max(config.export_dir.glob('*.json'))]
return stexport.Model(sources).site_model('stackoverflow')

View file

@ -0,0 +1,28 @@
'''
Stackexchange data
'''
REQUIRES = [
'git+https://github.com/karlicoss/stexport',
]
# TODO use GDPR?
from stexport import dal
from my.config import stackexchange as config
# todo lru cache?
def _dal() -> dal.DAL:
from ..core import get_files
inputs = get_files(config.export_path)
return dal.DAL(inputs)
def site(name: str) -> dal.SiteDAL:
return _dal().site_dal(name)
from ..core import stat, Stats
def stats() -> Stats:
s = site('stackoverflow')
return stat(s.questions)