core: more type annotations

This commit is contained in:
Dima Gerasimov 2020-10-03 13:15:15 +01:00 committed by karlicoss
parent 44b756cc6b
commit 06ee72bc30
7 changed files with 43 additions and 34 deletions

View file

@ -3,7 +3,7 @@ from .common import PathIsh, Paths, Json
from .common import get_files from .common import get_files
from .common import LazyLogger from .common import LazyLogger
from .common import warn_if_empty from .common import warn_if_empty
from .common import stat from .common import stat, Stats
from .cfg import make_config from .cfg import make_config
from .util import __NOT_HPI_MODULE__ from .util import __NOT_HPI_MODULE__

View file

@ -362,8 +362,9 @@ QUICK_STATS = False
C = TypeVar('C') C = TypeVar('C')
Stats = Dict[str, Any]
# todo not sure about return type... # todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]: def stat(func: Callable[[], Iterable[C]]) -> Stats:
from more_itertools import ilen, take, first from more_itertools import ilen, take, first
# todo not sure if there is something in more_itertools to compute this? # todo not sure if there is something in more_itertools to compute this?

View file

@ -2,18 +2,24 @@
Various pandas helpers and convenience functions Various pandas helpers and convenience functions
''' '''
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
from typing import Optional # NOTE: this file is meant to be importable without Pandas installed
import warnings from typing import Optional, TYPE_CHECKING, Any
from . import warnings
# FIXME need to make sure check_dataframe decorator can be used without actually importing pandas if TYPE_CHECKING:
# so need to move this import drom top level # this is kinda pointless at the moment, but handy to annotate DF returning methods now
import pandas as pd # type: ignore # later will be unignored when they implement type annotations
import pandas as pd # type: ignore
# todo special warning type? # DataFrameT = pd.DataFrame
DataFrameT = Any
else:
# in runtime, make it defensive so it works without pandas
DataFrameT = Any
def check_dateish(s) -> Optional[str]: def check_dateish(s) -> Optional[str]:
import pandas as pd # type: ignore
ctype = s.dtype ctype = s.dtype
if str(ctype).startswith('datetime64'): if str(ctype).startswith('datetime64'):
return None return None
@ -26,18 +32,22 @@ def check_dateish(s) -> Optional[str]:
return None return None
def check_dataframe(f): from typing import Any, Callable, TypeVar
FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
def check_dataframe(f: FuncT) -> FuncT:
from functools import wraps from functools import wraps
@wraps(f) @wraps(f)
def wrapper(*args, **kwargs) -> pd.DataFrame: def wrapper(*args, **kwargs) -> DataFrameT:
df = f(*args, **kwargs) df = f(*args, **kwargs)
# todo make super defensive? # todo make super defensive?
# TODO check index as well? # TODO check index as well?
for col, data in df.iteritems(): for col, data in df.iteritems():
res = check_dateish(data) res = check_dateish(data)
if res is not None: if res is not None:
warnings.warn(f"{f.__name__}, column '{col}': {res}") warnings.low(f"{f.__name__}, column '{col}': {res}")
return df return df
return wrapper # https://github.com/python/mypy/issues/1927
return wrapper # type: ignore[return-value]
# todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type? # todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?

View file

@ -1,9 +0,0 @@
import typing
if typing.TYPE_CHECKING:
from typing import Any
# todo would be nice to use some real stubs..
DataFrameT = Any
else:
import pandas # type: ignore
DataFrameT = pandas.DataFrame

View file

@ -35,6 +35,11 @@ def _warn(message: str, *args, color=None, **kwargs) -> None:
warnings.warn(_colorize(message, color=color), *args, **kwargs) warnings.warn(_colorize(message, color=color), *args, **kwargs)
def low(message: str, *args, **kwargs) -> None:
kwargs['color'] = 'grey'
_warn(message, *args, **kwargs)
def medium(message: str, *args, **kwargs) -> None: def medium(message: str, *args, **kwargs) -> None:
kwargs['color'] = 'yellow' kwargs['color'] = 'yellow'
_warn(message, *args, **kwargs) _warn(message, *args, **kwargs)

View file

@ -6,13 +6,13 @@ Consumes data exported by https://github.com/karlicoss/emfitexport
""" """
from datetime import date from datetime import date
from pathlib import Path from pathlib import Path
from typing import Dict, List, Iterable, Any from typing import Dict, List, Iterable, Any, Optional
from ..core import get_files from ..core import get_files
from ..core.common import mcachew from ..core.common import mcachew
from ..core.cachew import cache_dir from ..core.cachew import cache_dir
from ..core.error import Res, set_error_datetime, extract_error_datetime from ..core.error import Res, set_error_datetime, extract_error_datetime
from ..core.types import DataFrameT from ..core.pandas import DataFrameT
from my.config import emfit as config from my.config import emfit as config
@ -89,7 +89,7 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
def dataframe() -> DataFrameT: def dataframe() -> DataFrameT:
from datetime import timedelta from datetime import timedelta
dicts: List[Dict[str, Any]] = [] dicts: List[Dict[str, Any]] = []
last = None last: Optional[Emfit] = None
for s in pre_dataframe(): for s in pre_dataframe():
d: Dict[str, Any] d: Dict[str, Any]
if isinstance(s, Exception): if isinstance(s, Exception):
@ -134,14 +134,15 @@ def dataframe() -> DataFrameT:
return pandas.DataFrame(dicts) return pandas.DataFrame(dicts)
# TODO add dataframe support to stat() # TODO add dataframe support to stat()
def stats(): from ..core import stat, Stats
from ..core import stat def stats() -> Stats:
return stat(pre_dataframe) return stat(pre_dataframe)
from contextlib import contextmanager from contextlib import contextmanager
from typing import Iterator
@contextmanager @contextmanager
def fake_data(nights=500): def fake_data(nights: int=500) -> Iterator[None]:
from ..core.cfg import override_config from ..core.cfg import override_config
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
with override_config(config) as cfg, TemporaryDirectory() as td: with override_config(config) as cfg, TemporaryDirectory() as td:

View file

@ -12,7 +12,7 @@ from typing import Sequence, Iterable
from .core import get_files, LazyLogger from .core import get_files, LazyLogger
from .core.common import mcachew from .core.common import mcachew
from .core.error import Res, split_errors from .core.error import Res, split_errors
from .core.pandas import check_dataframe as cdf from .core.pandas import check_dataframe as cdf, DataFrameT
from my.config import rescuetime as config from my.config import rescuetime as config
@ -36,7 +36,7 @@ def entries() -> Iterable[Res[Entry]]:
yield from dal.entries() yield from dal.entries()
def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
vit, eit = split_errors(entries(), ET=Exception) vit, eit = split_errors(entries(), ET=Exception)
yield from eit yield from eit
import more_itertools import more_itertools
@ -45,7 +45,7 @@ def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
@cdf @cdf
def dataframe(): def dataframe() -> DataFrameT:
import pandas as pd # type: ignore import pandas as pd # type: ignore
# type: ignore[call-arg, attr-defined] # type: ignore[call-arg, attr-defined]
def it(): def it():
@ -57,8 +57,8 @@ def dataframe():
return pd.DataFrame(it()) return pd.DataFrame(it())
def stats(): from .core import stat, Stats
from .core import stat def stats() -> Stats:
return { return {
**stat(groups), **stat(groups),
**stat(entries), **stat(entries),
@ -67,10 +67,11 @@ def stats():
# basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this? # basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?
from typing import Iterator
from contextlib import contextmanager from contextlib import contextmanager
# todo take seed, or what? # todo take seed, or what?
@contextmanager @contextmanager
def fake_data(rows=1000): def fake_data(rows: int=1000) -> Iterator[None]:
# todo also disable cachew automatically for such things? # todo also disable cachew automatically for such things?
# TODO right, disabled_cachew won't work here because at that point, entries() is already wrapped? # TODO right, disabled_cachew won't work here because at that point, entries() is already wrapped?
# I guess need to fix this in cachew? # I guess need to fix this in cachew?