my.stackexchange: use proper pip package, add stat
+ 'anonymous' mode for stat() function
This commit is contained in:
parent
9d39892e75
commit
ddea816a49
6 changed files with 50 additions and 20 deletions
|
@ -42,6 +42,9 @@ class exercise:
|
|||
class bluemaestro:
|
||||
export_path: Paths = ''
|
||||
|
||||
class stackexchange:
|
||||
export_path: Paths = ''
|
||||
|
||||
class google:
|
||||
takeout_path: Paths = ''
|
||||
|
||||
|
|
|
@ -386,8 +386,14 @@ QUICK_STATS = False
|
|||
C = TypeVar('C')
|
||||
Stats = Dict[str, Any]
|
||||
# todo not sure about return type...
|
||||
def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
||||
fr = func()
|
||||
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
|
||||
if callable(func):
|
||||
fr = func()
|
||||
fname = func.__name__
|
||||
else:
|
||||
# meh. means it's just a list.. not sure how to generate a name then
|
||||
fr = func
|
||||
fname = f'unnamed_{id(fr)}'
|
||||
tname = type(fr).__name__
|
||||
if tname == 'DataFrame':
|
||||
# dynamic, because pandas is an optional dependency..
|
||||
|
@ -399,7 +405,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
|||
else:
|
||||
res = _stat_iterable(fr)
|
||||
return {
|
||||
func.__name__: res,
|
||||
fname: res,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -51,12 +51,12 @@ def articles() -> Iterable[Article]:
|
|||
yield from _dal().articles()
|
||||
|
||||
|
||||
def stats():
|
||||
from .core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
from itertools import chain
|
||||
from more_itertools import ilen
|
||||
# todo make stats more defensive?
|
||||
return {
|
||||
'articles' : ilen(articles()),
|
||||
**stat(articles),
|
||||
'highlights': ilen(chain.from_iterable(a.highlights for a in articles())),
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
'''
|
||||
Stackexchange data
|
||||
'''
|
||||
|
||||
import my.config.repos.stexport.model as stexport
|
||||
from my.config import stackexchange as config
|
||||
|
||||
|
||||
def get_data():
|
||||
sources = [max(config.export_dir.glob('*.json'))]
|
||||
return stexport.Model(sources).site_model('stackoverflow')
|
28
my/stackexchange/stexport.py
Normal file
28
my/stackexchange/stexport.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
'''
|
||||
Stackexchange data
|
||||
'''
|
||||
REQUIRES = [
|
||||
'git+https://github.com/karlicoss/stexport',
|
||||
]
|
||||
|
||||
# TODO use GDPR?
|
||||
|
||||
from stexport import dal
|
||||
from my.config import stackexchange as config
|
||||
|
||||
|
||||
# todo lru cache?
|
||||
def _dal() -> dal.DAL:
|
||||
from ..core import get_files
|
||||
inputs = get_files(config.export_path)
|
||||
return dal.DAL(inputs)
|
||||
|
||||
|
||||
def site(name: str) -> dal.SiteDAL:
|
||||
return _dal().site_dal(name)
|
||||
|
||||
|
||||
from ..core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
s = site('stackoverflow')
|
||||
return stat(s.questions)
|
Loading…
Add table
Add a link
Reference in a new issue