core: add DataFrame support to stat
This commit is contained in:
parent
209cffb476
commit
6a1a006202
1 changed files with 23 additions and 9 deletions
|
@ -363,6 +363,23 @@ C = TypeVar('C')
|
||||||
Stats = Dict[str, Any]
|
Stats = Dict[str, Any]
|
||||||
# todo not sure about return type...
|
# todo not sure about return type...
|
||||||
def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
||||||
|
fr = func()
|
||||||
|
tname = type(fr).__name__
|
||||||
|
if tname == 'DataFrame':
|
||||||
|
# dynamic, because pandas is an optional dependency..
|
||||||
|
df = cast(Any, fr) # todo ugh, not sure how to annotate properly
|
||||||
|
res = dict(
|
||||||
|
dtypes=df.dtypes.to_dict(),
|
||||||
|
rows=len(df),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
res = _stat_iterable(fr)
|
||||||
|
return {
|
||||||
|
func.__name__: res,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _stat_iterable(it: Iterable[C]) -> Any:
|
||||||
from more_itertools import ilen, take, first
|
from more_itertools import ilen, take, first
|
||||||
|
|
||||||
# todo not sure if there is something in more_itertools to compute this?
|
# todo not sure if there is something in more_itertools to compute this?
|
||||||
|
@ -370,23 +387,23 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
||||||
last = None
|
last = None
|
||||||
def funcit():
|
def funcit():
|
||||||
nonlocal errors, last
|
nonlocal errors, last
|
||||||
for x in func():
|
for x in it:
|
||||||
if isinstance(x, Exception):
|
if isinstance(x, Exception):
|
||||||
errors += 1
|
errors += 1
|
||||||
else:
|
else:
|
||||||
last = x
|
last = x
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
it = iter(funcit())
|
eit = funcit()
|
||||||
count: Any
|
count: Any
|
||||||
if QUICK_STATS:
|
if QUICK_STATS:
|
||||||
initial = take(100, it)
|
initial = take(100, eit)
|
||||||
count = len(initial)
|
count = len(initial)
|
||||||
if first(it, None) is not None: # todo can actually be none...
|
if first(eit, None) is not None: # todo can actually be none...
|
||||||
# haven't exhausted
|
# haven't exhausted
|
||||||
count = f'{count}+'
|
count = f'{count}+'
|
||||||
else:
|
else:
|
||||||
count = ilen(it)
|
count = ilen(eit)
|
||||||
|
|
||||||
res = {
|
res = {
|
||||||
'count': count,
|
'count': count,
|
||||||
|
@ -399,10 +416,7 @@ def stat(func: Callable[[], Iterable[C]]) -> Stats:
|
||||||
dt = guess_datetime(last)
|
dt = guess_datetime(last)
|
||||||
if dt is not None:
|
if dt is not None:
|
||||||
res['last'] = dt
|
res['last'] = dt
|
||||||
|
return res
|
||||||
return {
|
|
||||||
func.__name__: res,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# experimental, not sure about it..
|
# experimental, not sure about it..
|
||||||
|
|
Loading…
Add table
Reference in a new issue