core/stats: get rid of duplicated keys for 'auto stats'

previously:
```
{'iter_data': {'iter_data': {'count': 9, 'last': datetime.datetime(2020, 1, 3, 1, 1, 1)}}}
```

after
```
{'iter_data': {'count': 9, 'last': datetime.datetime(2020, 1, 3, 1, 1, 1)}}
```
This commit is contained in:
karlicoss 2023-10-21 23:50:35 +01:00
parent c5fe2e9412
commit a60d69fb30
3 changed files with 63 additions and 7 deletions

View file

@ -401,7 +401,12 @@ C = TypeVar('C')
Stats = Dict[str, Any] Stats = Dict[str, Any]
StatsFun = Callable[[], Stats] StatsFun = Callable[[], Stats]
# todo not sure about return type... # todo not sure about return type...
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats: def stat(
func: Union[Callable[[], Iterable[C]], Iterable[C]],
*,
quick: bool = False,
name: Optional[str] = None,
) -> Stats:
if callable(func): if callable(func):
fr = func() fr = func()
fname = func.__name__ fname = func.__name__
@ -409,8 +414,8 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False)
# meh. means it's just a list.. not sure how to generate a name then # meh. means it's just a list.. not sure how to generate a name then
fr = func fr = func
fname = f'unnamed_{id(fr)}' fname = f'unnamed_{id(fr)}'
tname = type(fr).__name__ type_name = type(fr).__name__
if tname == 'DataFrame': if type_name == 'DataFrame':
# dynamic, because pandas is an optional dependency.. # dynamic, because pandas is an optional dependency..
df = cast(Any, fr) # todo ugh, not sure how to annotate properly df = cast(Any, fr) # todo ugh, not sure how to annotate properly
res = dict( res = dict(
@ -419,8 +424,10 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False)
) )
else: else:
res = _stat_iterable(fr, quick=quick) res = _stat_iterable(fr, quick=quick)
stat_name = name if name is not None else fname
return { return {
fname: res, stat_name: res,
} }

View file

@ -18,10 +18,29 @@ def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
return None return None
def auto_stats() -> Stats: def auto_stats() -> Stats:
return {k: stat(v, quick=quick) for k, v in providers.items()} res = {}
for k, v in providers.items():
res.update(stat(v, quick=quick, name=k))
return res
return auto_stats return auto_stats
def test_guess_stats() -> None:
from datetime import datetime
import my.core.tests.auto_stats as M
auto_stats = guess_stats(M.__name__)
res = auto_stats()
assert res.keys() == {'iter_data'}
r = res['iter_data']
assert r == {
'count': 9,
'last': datetime(2020, 1, 3, 1, 1, 1),
}
def guess_data_providers(module_name: str) -> Dict[str, Callable]: def guess_data_providers(module_name: str) -> Dict[str, Callable]:
module = importlib.import_module(module_name) module = importlib.import_module(module_name)
mfunctions = inspect.getmembers(module, inspect.isfunction) mfunctions = inspect.getmembers(module, inspect.isfunction)

View file

@ -0,0 +1,30 @@
"""
Helper 'module' for test_guess_stats
"""
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, Sequence
@dataclass
class Item:
id: str
dt: datetime
source: Path
def inputs() -> Sequence[Path]:
return [
Path('file1.json'),
Path('file2.json'),
Path('file3.json'),
]
def iter_data() -> Iterable[Item]:
dt = datetime.fromisoformat('2020-01-01 01:01:01')
for path in inputs():
for i in range(3):
yield Item(id=str(i), dt=dt + timedelta(days=i), source=path)