HPI/my/core/stats.py
karlicoss c335c0c9d8 core/stats: report datetime of first item in addition to last
quite useful for quickly determining time span of a data source
2023-10-22 00:47:36 +01:00

178 lines
5.1 KiB
Python

'''
Helpers for hpi doctor/stats functionality.
'''
import collections
import importlib
import inspect
import typing
from typing import Optional, Callable, Any, Iterator, Sequence, Dict, List
from .common import StatsFun, Stats, stat
# TODO maybe could be enough to annotate OUTPUTS or something like that?
# then stats could just use them as hints?
def guess_stats(module_name: str, quick: bool = False) -> Optional[StatsFun]:
providers = guess_data_providers(module_name)
if len(providers) == 0:
return None
def auto_stats() -> Stats:
res = {}
for k, v in providers.items():
res.update(stat(v, quick=quick, name=k))
return res
return auto_stats
def test_guess_stats() -> None:
from datetime import datetime
import my.core.tests.auto_stats as M
auto_stats = guess_stats(M.__name__)
res = auto_stats()
assert res.keys() == {'iter_data'}
r = res['iter_data']
assert r == {
'count': 9,
'first': datetime(2020, 1, 1, 1, 1, 1),
'last': datetime(2020, 1, 3, 1, 1, 1),
}
def guess_data_providers(module_name: str) -> Dict[str, Callable]:
module = importlib.import_module(module_name)
mfunctions = inspect.getmembers(module, inspect.isfunction)
return {k: v for k, v in mfunctions if is_data_provider(v)}
# todo how to exclude deprecated stuff?
def is_data_provider(fun: Any) -> bool:
"""
1. returns iterable or something like that
2. takes no arguments? (otherwise not callable by stats anyway?)
3. doesn't start with an underscore (those are probably helper functions?)
4. functions isn't the 'inputs' function (or ends with '_inputs')
"""
# todo maybe for 2 allow default arguments? not sure
# one example which could benefit is my.pdfs
if fun is None:
return False
# todo. uh.. very similar to what cachew is trying to do?
try:
sig = inspect.signature(fun)
except (ValueError, TypeError): # not a function?
return False
# has at least one argument without default values
if len(list(sig_required_params(sig))) > 0:
return False
if hasattr(fun, '__name__'):
# probably a helper function?
if fun.__name__.startswith('_'):
return False
# ignore def inputs; something like comment_inputs or backup_inputs should also be ignored
if fun.__name__ == 'inputs' or fun.__name__.endswith('_inputs'):
return False
# inspect.signature might return str instead of a proper type object
# if from __future__ import annotations is used
# so best to rely on get_type_hints (which evals the annotations)
type_hints = typing.get_type_hints(fun)
return_type = type_hints.get('return')
if return_type is None:
return False
return type_is_iterable(return_type)
def test_is_data_provider() -> None:
idp = is_data_provider
assert not idp(None)
assert not idp(int)
assert not idp("x")
def no_return_type():
return [1, 2, 3]
assert not idp(no_return_type)
lam = lambda: [1, 2]
assert not idp(lam)
def has_extra_args(count) -> List[int]:
return list(range(count))
assert not idp(has_extra_args)
def has_return_type() -> Sequence[str]:
return ['a', 'b', 'c']
assert idp(has_return_type)
def _helper_func() -> Iterator[Any]:
yield 1
assert not idp(_helper_func)
def inputs() -> Iterator[Any]:
yield 1
assert not idp(inputs)
def producer_inputs() -> Iterator[Any]:
yield 1
assert not idp(producer_inputs)
# return any parameters the user is required to provide - those which don't have default values
def sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
for param in sig.parameters.values():
if param.default == inspect.Parameter.empty:
yield param
def test_sig_required_params() -> None:
def x() -> int:
return 5
assert len(list(sig_required_params(inspect.signature(x)))) == 0
def y(arg: int) -> int:
return arg
assert len(list(sig_required_params(inspect.signature(y)))) == 1
# from stats perspective, this should be treated as a data provider as well
# could be that the default value to the data provider is the 'default'
# path to use for inputs/a function to provide input data
def z(arg: int = 5) -> int:
return arg
assert len(list(sig_required_params(inspect.signature(z)))) == 0
def type_is_iterable(type_spec) -> bool:
origin = typing.get_origin(type_spec)
if origin is None:
return False
# explicitly exclude dicts... not sure?
if issubclass(origin, collections.abc.Mapping):
return False
if issubclass(origin, collections.abc.Iterable):
return True
return False
# todo docstring test?
def test_type_is_iterable() -> None:
from typing import List, Sequence, Iterable, Dict, Any
fun = type_is_iterable
assert not fun(None)
assert not fun(int)
assert not fun(Any)
assert not fun(Dict[int, int])
assert fun(List[int])
assert fun(Sequence[Dict[str, str]])
assert fun(Iterable[Any])