diff --git a/my/core/common.py b/my/core/common.py index 1284565..b692730 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -401,7 +401,12 @@ C = TypeVar('C') Stats = Dict[str, Any] StatsFun = Callable[[], Stats] # todo not sure about return type... -def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats: +def stat( + func: Union[Callable[[], Iterable[C]], Iterable[C]], + *, + quick: bool = False, + name: Optional[str] = None, +) -> Stats: if callable(func): fr = func() fname = func.__name__ @@ -409,18 +414,20 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) # meh. means it's just a list.. not sure how to generate a name then fr = func fname = f'unnamed_{id(fr)}' - tname = type(fr).__name__ - if tname == 'DataFrame': + type_name = type(fr).__name__ + if type_name == 'DataFrame': # dynamic, because pandas is an optional dependency.. - df = cast(Any, fr) # todo ugh, not sure how to annotate properly + df = cast(Any, fr) # todo ugh, not sure how to annotate properly res = dict( dtypes=df.dtypes.to_dict(), rows=len(df), ) else: res = _stat_iterable(fr, quick=quick) + + stat_name = name if name is not None else fname return { - fname: res, + stat_name: res, } diff --git a/my/core/stats.py b/my/core/stats.py index 42e8cd9..9dfaa04 100644 --- a/my/core/stats.py +++ b/my/core/stats.py @@ -12,16 +12,35 @@ from .common import StatsFun, Stats, stat # TODO maybe could be enough to annotate OUTPUTS or something like that? # then stats could just use them as hints? -def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]: +def guess_stats(module_name: str, quick: bool = False) -> Optional[StatsFun]: providers = guess_data_providers(module_name) if len(providers) == 0: return None def auto_stats() -> Stats: - return {k: stat(v, quick=quick) for k, v in providers.items()} + res = {} + for k, v in providers.items(): + res.update(stat(v, quick=quick, name=k)) + return res + return auto_stats +def test_guess_stats() -> None: + from datetime import datetime + import my.core.tests.auto_stats as M + + auto_stats = guess_stats(M.__name__) + res = auto_stats() + assert res.keys() == {'iter_data'} + + r = res['iter_data'] + assert r == { + 'count': 9, + 'last': datetime(2020, 1, 3, 1, 1, 1), + } + + def guess_data_providers(module_name: str) -> Dict[str, Callable]: module = importlib.import_module(module_name) mfunctions = inspect.getmembers(module, inspect.isfunction) diff --git a/my/core/tests/auto_stats.py b/my/core/tests/auto_stats.py new file mode 100644 index 0000000..2946ab2 --- /dev/null +++ b/my/core/tests/auto_stats.py @@ -0,0 +1,30 @@ +""" +Helper 'module' for test_guess_stats +""" + +from dataclasses import dataclass +from datetime import datetime, timedelta +from pathlib import Path +from typing import Iterable, Sequence + + +@dataclass +class Item: + id: str + dt: datetime + source: Path + + +def inputs() -> Sequence[Path]: + return [ + Path('file1.json'), + Path('file2.json'), + Path('file3.json'), + ] + + +def iter_data() -> Iterable[Item]: + dt = datetime.fromisoformat('2020-01-01 01:01:01') + for path in inputs(): + for i in range(3): + yield Item(id=str(i), dt=dt + timedelta(days=i), source=path)