core: add DataFrame support to stat

This commit is contained in:
Dima Gerasimov 2020-10-09 22:49:26 +01:00 committed by karlicoss
parent 209cffb476
commit 6a1a006202

View file

@ -363,6 +363,23 @@ C = TypeVar('C')
Stats = Dict[str, Any] Stats = Dict[str, Any]
# todo not sure about return type... # todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Stats: def stat(func: Callable[[], Iterable[C]]) -> Stats:
fr = func()
tname = type(fr).__name__
if tname == 'DataFrame':
# dynamic, because pandas is an optional dependency..
df = cast(Any, fr) # todo ugh, not sure how to annotate properly
res = dict(
dtypes=df.dtypes.to_dict(),
rows=len(df),
)
else:
res = _stat_iterable(fr)
return {
func.__name__: res,
}
def _stat_iterable(it: Iterable[C]) -> Any:
from more_itertools import ilen, take, first from more_itertools import ilen, take, first
# todo not sure if there is something in more_itertools to compute this? # todo not sure if there is something in more_itertools to compute this?
@ -370,23 +387,23 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
last = None last = None
def funcit(): def funcit():
nonlocal errors, last nonlocal errors, last
for x in func(): for x in it:
if isinstance(x, Exception): if isinstance(x, Exception):
errors += 1 errors += 1
else: else:
last = x last = x
yield x yield x
it = iter(funcit()) eit = funcit()
count: Any count: Any
if QUICK_STATS: if QUICK_STATS:
initial = take(100, it) initial = take(100, eit)
count = len(initial) count = len(initial)
if first(it, None) is not None: # todo can actually be none... if first(eit, None) is not None: # todo can actually be none...
# haven't exhausted # haven't exhausted
count = f'{count}+' count = f'{count}+'
else: else:
count = ilen(it) count = ilen(eit)
res = { res = {
'count': count, 'count': count,
@ -399,10 +416,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
dt = guess_datetime(last) dt = guess_datetime(last)
if dt is not None: if dt is not None:
res['last'] = dt res['last'] = dt
return res
return {
func.__name__: res,
}
# experimental, not sure about it.. # experimental, not sure about it..