core/stats: report datetime of first item in addition to last

quite useful for quickly determining time span of a data source
This commit is contained in:
karlicoss 2023-10-21 23:57:01 +01:00
parent a60d69fb30
commit c335c0c9d8
2 changed files with 15 additions and 6 deletions

View file

@ -431,22 +431,25 @@ def stat(
} }
def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any: def _stat_iterable(it: Iterable[C], quick: bool = False) -> Any:
from more_itertools import ilen, take, first from more_itertools import ilen, take, first
# todo not sure if there is something in more_itertools to compute this? # todo not sure if there is something in more_itertools to compute this?
total = 0 total = 0
errors = 0 errors = 0
last = None first_item = None
last_item = None
def funcit(): def funcit():
nonlocal errors, last, total nonlocal errors, first_item, last_item, total
for x in it: for x in it:
total += 1 total += 1
if isinstance(x, Exception): if isinstance(x, Exception):
errors += 1 errors += 1
else: else:
last = x last_item = x
if first_item is None:
first_item = x
yield x yield x
eit = funcit() eit = funcit()
@ -471,8 +474,13 @@ def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
if errors > 0: if errors > 0:
res['errors'] = errors res['errors'] = errors
if last is not None: if first_item is not None:
dt = guess_datetime(last) dt = guess_datetime(first_item)
if dt is not None:
res['first'] = dt
if last_item is not None:
dt = guess_datetime(last_item)
if dt is not None: if dt is not None:
res['last'] = dt res['last'] = dt
return res return res

View file

@ -37,6 +37,7 @@ def test_guess_stats() -> None:
r = res['iter_data'] r = res['iter_data']
assert r == { assert r == {
'count': 9, 'count': 9,
'first': datetime(2020, 1, 1, 1, 1, 1),
'last': datetime(2020, 1, 3, 1, 1, 1), 'last': datetime(2020, 1, 3, 1, 1, 1),
} }