core: add helper for computing stats; use it in modules

This commit is contained in:
Dima Gerasimov 2020-06-04 22:19:34 +01:00
parent a94b64c273
commit 1cc4eb5d8d
10 changed files with 79 additions and 3 deletions

View file

@ -10,6 +10,16 @@ from my.config.repos.kobuddy.src.kobuddy import Highlight, set_databases, get_hi
set_databases(config.export_dir) set_databases(config.export_dir)
def stats():
from ..core import stat
return {
**stat(get_highlights),
}
## TODO hmm. not sure if all this really belongs here?... perhaps orger?
# TODO maybe type over T? # TODO maybe type over T?
_Predicate = Callable[[str], bool] _Predicate = Callable[[str], bool]
Predicatish = Union[str, _Predicate] Predicatish = Union[str, _Predicate]

View file

@ -1,5 +1,8 @@
# this file only keeps the most common & critical types/utility functions # this file only keeps the most common & critical types/utility functions
from .common import PathIsh, Paths, Json from .common import PathIsh, Paths, Json
from .common import get_files, LazyLogger from .common import get_files
from .common import LazyLogger
from .common import warn_if_empty from .common import warn_if_empty
from .common import stat
from .cfg import make_config from .cfg import make_config

View file

@ -133,6 +133,10 @@ def modules_check(args):
stats = getattr(mod, 'stats', None) stats = getattr(mod, 'stats', None)
if stats is None: if stats is None:
continue continue
from . import common
common.QUICK_STATS = True
# todo make it a cmdline option..
try: try:
res = stats() res = stats()
except Exception as ee: except Exception as ee:

View file

@ -338,3 +338,30 @@ def warn_if_empty(f):
res = f(*args, **kwargs) res = f(*args, **kwargs)
return _warn_iterable(res, f=f) return _warn_iterable(res, f=f)
return wrapped # type: ignore return wrapped # type: ignore
# hacky hook to speed up for 'hpi doctor'
# todo think about something better
QUICK_STATS = False
C = TypeVar('C')
# todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]:
from more_itertools import ilen, take, first
it = iter(func())
res: Any
if QUICK_STATS:
initial = take(100, it)
res = len(initial)
if first(it, None) is not None: # todo can actually be none...
# haven't exhausted
res = f'{res}+'
else:
res = ilen(it)
return {
func.__name__: res,
}

View file

@ -66,6 +66,13 @@ def events() -> Iterable[Res[Event]]:
yield e yield e
def stats():
from ..core import stat
return {
**stat(events),
}
# TODO typing.TypedDict could be handy here.. # TODO typing.TypedDict could be handy here..
def _parse_common(d: Dict) -> Dict: def _parse_common(d: Dict) -> Dict:
url = d['url'] url = d['url']

View file

@ -29,7 +29,7 @@ class github(user_config):
def dal_module(self): def dal_module(self):
rpath = self.ghexport rpath = self.ghexport
if rpath is not None: if rpath is not None:
from .core.common import import_dir from ..core.common import import_dir
return import_dir(rpath, '.dal') return import_dir(rpath, '.dal')
else: else:
import my.config.repos.ghexport.dal as dal import my.config.repos.ghexport.dal as dal
@ -81,6 +81,13 @@ def events(dal=_dal()) -> Results:
yield _parse_event(d) yield _parse_event(d)
def stats():
from ..core import stat
return {
**stat(events),
}
# TODO hmm. need some sort of abstract syntax for this... # TODO hmm. need some sort of abstract syntax for this...
# TODO split further, title too # TODO split further, title too
def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]: def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:

View file

@ -123,7 +123,8 @@ def _candidates() -> Iterable[str]:
'.', '.',
*config.paths, *config.paths,
], stdout=PIPE) as p: ], stdout=PIPE) as p:
for line in p.stdout: out = p.stdout; assert out is not None
for line in out:
path = line.decode('utf8').rstrip('\n') path = line.decode('utf8').rstrip('\n')
mime = fastermime(path) mime = fastermime(path)
tp = mime.split('/')[0] tp = mime.split('/')[0]

View file

@ -183,3 +183,11 @@ def tweets() -> Iterable[Tweet]:
def likes() -> Iterable[Like]: def likes() -> Iterable[Like]:
for inp in inputs(): for inp in inputs():
yield from ZipExport(inp).likes() yield from ZipExport(inp).likes()
def stats():
from ..core import stat
return {
**stat(tweets),
**stat(likes),
}

View file

@ -108,3 +108,11 @@ def likes() -> Iterable[Tweet]:
db = _get_db() db = _get_db()
res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL')) res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL'))
yield from map(Tweet, res) yield from map(Tweet, res)
def stats():
from ..core import stat
return {
**stat(tweets),
**stat(likes),
}

View file

@ -3,6 +3,7 @@ from more_itertools import ilen
from my.coding.github import get_events from my.coding.github import get_events
# todo test against stats? not sure.. maybe both
def test_gdpr(): def test_gdpr():
import my.github.gdpr as gdpr import my.github.gdpr as gdpr