From 86ea605aecbe330731c4bdba7f15a2524ece1808 Mon Sep 17 00:00:00 2001 From: karlicoss Date: Sun, 22 Oct 2023 00:07:48 +0100 Subject: [PATCH] core/stats: enable processing input files, report first and last filename can be useful for quick investigation/testing setup --- my/core/common.py | 20 ++++++++++++-------- my/core/stats.py | 26 ++++++++++++++------------ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/my/core/common.py b/my/core/common.py index b34d6d2..602f8af 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -474,15 +474,19 @@ def _stat_iterable(it: Iterable[C], quick: bool = False) -> Any: if errors > 0: res['errors'] = errors - if first_item is not None: - dt = guess_datetime(first_item) - if dt is not None: - res['first'] = dt + def stat_item(item): + if item is None: + return None + if isinstance(item, Path): + return str(item) + return guess_datetime(item) + + if (stat_first := stat_item(first_item)) is not None: + res['first'] = stat_first + + if (stat_last := stat_item(last_item)) is not None: + res['last'] = stat_last - if last_item is not None: - dt = guess_datetime(last_item) - if dt is not None: - res['last'] = dt return res diff --git a/my/core/stats.py b/my/core/stats.py index 1818b63..44735b8 100644 --- a/my/core/stats.py +++ b/my/core/stats.py @@ -31,14 +31,20 @@ def test_guess_stats() -> None: import my.core.tests.auto_stats as M auto_stats = guess_stats(M.__name__) + assert auto_stats is not None res = auto_stats() - assert res.keys() == {'iter_data'} - r = res['iter_data'] - assert r == { - 'count': 9, - 'first': datetime(2020, 1, 1, 1, 1, 1), - 'last': datetime(2020, 1, 3, 1, 1, 1), + assert res == { + 'inputs': { + 'count': 3, + 'first': 'file1.json', + 'last': 'file3.json', + }, + 'iter_data': { + 'count': 9, + 'first': datetime(2020, 1, 1, 1, 1, 1), + 'last': datetime(2020, 1, 3, 1, 1, 1), + }, } @@ -54,7 +60,6 @@ def is_data_provider(fun: Any) -> bool: 1. returns iterable or something like that 2. takes no arguments? (otherwise not callable by stats anyway?) 3. doesn't start with an underscore (those are probably helper functions?) - 4. functions isn't the 'inputs' function (or ends with '_inputs') """ # todo maybe for 2 allow default arguments? not sure # one example which could benefit is my.pdfs @@ -74,9 +79,6 @@ def is_data_provider(fun: Any) -> bool: # probably a helper function? if fun.__name__.startswith('_'): return False - # ignore def inputs; something like comment_inputs or backup_inputs should also be ignored - if fun.__name__ == 'inputs' or fun.__name__.endswith('_inputs'): - return False # inspect.signature might return str instead of a proper type object # if from __future__ import annotations is used @@ -116,11 +118,11 @@ def test_is_data_provider() -> None: def inputs() -> Iterator[Any]: yield 1 - assert not idp(inputs) + assert idp(inputs) def producer_inputs() -> Iterator[Any]: yield 1 - assert not idp(producer_inputs) + assert idp(producer_inputs) # return any parameters the user is required to provide - those which don't have default values