core/stats: enable processing input files, report first and last filename

can be useful for quick investigation/testing setup
This commit is contained in:
karlicoss 2023-10-22 00:07:48 +01:00
parent c335c0c9d8
commit 86ea605aec
2 changed files with 26 additions and 20 deletions

View file

@ -474,15 +474,19 @@ def _stat_iterable(it: Iterable[C], quick: bool = False) -> Any:
if errors > 0: if errors > 0:
res['errors'] = errors res['errors'] = errors
if first_item is not None: def stat_item(item):
dt = guess_datetime(first_item) if item is None:
if dt is not None: return None
res['first'] = dt if isinstance(item, Path):
return str(item)
return guess_datetime(item)
if (stat_first := stat_item(first_item)) is not None:
res['first'] = stat_first
if (stat_last := stat_item(last_item)) is not None:
res['last'] = stat_last
if last_item is not None:
dt = guess_datetime(last_item)
if dt is not None:
res['last'] = dt
return res return res

View file

@ -31,14 +31,20 @@ def test_guess_stats() -> None:
import my.core.tests.auto_stats as M import my.core.tests.auto_stats as M
auto_stats = guess_stats(M.__name__) auto_stats = guess_stats(M.__name__)
assert auto_stats is not None
res = auto_stats() res = auto_stats()
assert res.keys() == {'iter_data'}
r = res['iter_data'] assert res == {
assert r == { 'inputs': {
'count': 9, 'count': 3,
'first': datetime(2020, 1, 1, 1, 1, 1), 'first': 'file1.json',
'last': datetime(2020, 1, 3, 1, 1, 1), 'last': 'file3.json',
},
'iter_data': {
'count': 9,
'first': datetime(2020, 1, 1, 1, 1, 1),
'last': datetime(2020, 1, 3, 1, 1, 1),
},
} }
@ -54,7 +60,6 @@ def is_data_provider(fun: Any) -> bool:
1. returns iterable or something like that 1. returns iterable or something like that
2. takes no arguments? (otherwise not callable by stats anyway?) 2. takes no arguments? (otherwise not callable by stats anyway?)
3. doesn't start with an underscore (those are probably helper functions?) 3. doesn't start with an underscore (those are probably helper functions?)
4. functions isn't the 'inputs' function (or ends with '_inputs')
""" """
# todo maybe for 2 allow default arguments? not sure # todo maybe for 2 allow default arguments? not sure
# one example which could benefit is my.pdfs # one example which could benefit is my.pdfs
@ -74,9 +79,6 @@ def is_data_provider(fun: Any) -> bool:
# probably a helper function? # probably a helper function?
if fun.__name__.startswith('_'): if fun.__name__.startswith('_'):
return False return False
# ignore def inputs; something like comment_inputs or backup_inputs should also be ignored
if fun.__name__ == 'inputs' or fun.__name__.endswith('_inputs'):
return False
# inspect.signature might return str instead of a proper type object # inspect.signature might return str instead of a proper type object
# if from __future__ import annotations is used # if from __future__ import annotations is used
@ -116,11 +118,11 @@ def test_is_data_provider() -> None:
def inputs() -> Iterator[Any]: def inputs() -> Iterator[Any]:
yield 1 yield 1
assert not idp(inputs) assert idp(inputs)
def producer_inputs() -> Iterator[Any]: def producer_inputs() -> Iterator[Any]:
yield 1 yield 1
assert not idp(producer_inputs) assert idp(producer_inputs)
# return any parameters the user is required to provide - those which don't have default values # return any parameters the user is required to provide - those which don't have default values