core/stats: get rid of duplicated keys for 'auto stats'
previously: ``` {'iter_data': {'iter_data': {'count': 9, 'last': datetime.datetime(2020, 1, 3, 1, 1, 1)}}} ``` after ``` {'iter_data': {'count': 9, 'last': datetime.datetime(2020, 1, 3, 1, 1, 1)}} ```
This commit is contained in:
parent
c5fe2e9412
commit
a60d69fb30
3 changed files with 63 additions and 7 deletions
|
@ -401,7 +401,12 @@ C = TypeVar('C')
|
|||
Stats = Dict[str, Any]
|
||||
StatsFun = Callable[[], Stats]
|
||||
# todo not sure about return type...
|
||||
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats:
|
||||
def stat(
|
||||
func: Union[Callable[[], Iterable[C]], Iterable[C]],
|
||||
*,
|
||||
quick: bool = False,
|
||||
name: Optional[str] = None,
|
||||
) -> Stats:
|
||||
if callable(func):
|
||||
fr = func()
|
||||
fname = func.__name__
|
||||
|
@ -409,8 +414,8 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False)
|
|||
# meh. means it's just a list.. not sure how to generate a name then
|
||||
fr = func
|
||||
fname = f'unnamed_{id(fr)}'
|
||||
tname = type(fr).__name__
|
||||
if tname == 'DataFrame':
|
||||
type_name = type(fr).__name__
|
||||
if type_name == 'DataFrame':
|
||||
# dynamic, because pandas is an optional dependency..
|
||||
df = cast(Any, fr) # todo ugh, not sure how to annotate properly
|
||||
res = dict(
|
||||
|
@ -419,8 +424,10 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False)
|
|||
)
|
||||
else:
|
||||
res = _stat_iterable(fr, quick=quick)
|
||||
|
||||
stat_name = name if name is not None else fname
|
||||
return {
|
||||
fname: res,
|
||||
stat_name: res,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -12,16 +12,35 @@ from .common import StatsFun, Stats, stat
|
|||
|
||||
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
||||
# then stats could just use them as hints?
|
||||
def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
|
||||
def guess_stats(module_name: str, quick: bool = False) -> Optional[StatsFun]:
|
||||
providers = guess_data_providers(module_name)
|
||||
if len(providers) == 0:
|
||||
return None
|
||||
|
||||
def auto_stats() -> Stats:
|
||||
return {k: stat(v, quick=quick) for k, v in providers.items()}
|
||||
res = {}
|
||||
for k, v in providers.items():
|
||||
res.update(stat(v, quick=quick, name=k))
|
||||
return res
|
||||
|
||||
return auto_stats
|
||||
|
||||
|
||||
def test_guess_stats() -> None:
|
||||
from datetime import datetime
|
||||
import my.core.tests.auto_stats as M
|
||||
|
||||
auto_stats = guess_stats(M.__name__)
|
||||
res = auto_stats()
|
||||
assert res.keys() == {'iter_data'}
|
||||
|
||||
r = res['iter_data']
|
||||
assert r == {
|
||||
'count': 9,
|
||||
'last': datetime(2020, 1, 3, 1, 1, 1),
|
||||
}
|
||||
|
||||
|
||||
def guess_data_providers(module_name: str) -> Dict[str, Callable]:
|
||||
module = importlib.import_module(module_name)
|
||||
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
||||
|
|
30
my/core/tests/auto_stats.py
Normal file
30
my/core/tests/auto_stats.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
"""
|
||||
Helper 'module' for test_guess_stats
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Sequence
|
||||
|
||||
|
||||
@dataclass
|
||||
class Item:
|
||||
id: str
|
||||
dt: datetime
|
||||
source: Path
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return [
|
||||
Path('file1.json'),
|
||||
Path('file2.json'),
|
||||
Path('file3.json'),
|
||||
]
|
||||
|
||||
|
||||
def iter_data() -> Iterable[Item]:
|
||||
dt = datetime.fromisoformat('2020-01-01 01:01:01')
|
||||
for path in inputs():
|
||||
for i in range(3):
|
||||
yield Item(id=str(i), dt=dt + timedelta(days=i), source=path)
|
Loading…
Add table
Reference in a new issue