core: more type annotations

This commit is contained in:
Dima Gerasimov 2020-10-03 13:15:15 +01:00 committed by karlicoss
parent 44b756cc6b
commit 06ee72bc30
7 changed files with 43 additions and 34 deletions

View file

@ -3,7 +3,7 @@ from .common import PathIsh, Paths, Json
from .common import get_files
from .common import LazyLogger
from .common import warn_if_empty
from .common import stat
from .common import stat, Stats
from .cfg import make_config
from .util import __NOT_HPI_MODULE__

View file

@ -362,8 +362,9 @@ QUICK_STATS = False
C = TypeVar('C')
Stats = Dict[str, Any]
# todo not sure about return type...
def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]:
def stat(func: Callable[[], Iterable[C]]) -> Stats:
from more_itertools import ilen, take, first
# todo not sure if there is something in more_itertools to compute this?

View file

@ -2,18 +2,24 @@
Various pandas helpers and convenience functions
'''
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
from typing import Optional
import warnings
# NOTE: this file is meant to be importable without Pandas installed
from typing import Optional, TYPE_CHECKING, Any
from . import warnings
# FIXME need to make sure check_dataframe decorator can be used without actually importing pandas
# so need to move this import drom top level
import pandas as pd # type: ignore
# todo special warning type?
if TYPE_CHECKING:
# this is kinda pointless at the moment, but handy to annotate DF returning methods now
# later will be unignored when they implement type annotations
import pandas as pd # type: ignore
# DataFrameT = pd.DataFrame
DataFrameT = Any
else:
# in runtime, make it defensive so it works without pandas
DataFrameT = Any
def check_dateish(s) -> Optional[str]:
import pandas as pd # type: ignore
ctype = s.dtype
if str(ctype).startswith('datetime64'):
return None
@ -26,18 +32,22 @@ def check_dateish(s) -> Optional[str]:
return None
def check_dataframe(f):
from typing import Any, Callable, TypeVar
FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
def check_dataframe(f: FuncT) -> FuncT:
from functools import wraps
@wraps(f)
def wrapper(*args, **kwargs) -> pd.DataFrame:
def wrapper(*args, **kwargs) -> DataFrameT:
df = f(*args, **kwargs)
# todo make super defensive?
# TODO check index as well?
for col, data in df.iteritems():
res = check_dateish(data)
if res is not None:
warnings.warn(f"{f.__name__}, column '{col}': {res}")
warnings.low(f"{f.__name__}, column '{col}': {res}")
return df
return wrapper
# https://github.com/python/mypy/issues/1927
return wrapper # type: ignore[return-value]
# todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?

View file

@ -1,9 +0,0 @@
import typing
if typing.TYPE_CHECKING:
from typing import Any
# todo would be nice to use some real stubs..
DataFrameT = Any
else:
import pandas # type: ignore
DataFrameT = pandas.DataFrame

View file

@ -35,6 +35,11 @@ def _warn(message: str, *args, color=None, **kwargs) -> None:
warnings.warn(_colorize(message, color=color), *args, **kwargs)
def low(message: str, *args, **kwargs) -> None:
kwargs['color'] = 'grey'
_warn(message, *args, **kwargs)
def medium(message: str, *args, **kwargs) -> None:
kwargs['color'] = 'yellow'
_warn(message, *args, **kwargs)

View file

@ -6,13 +6,13 @@ Consumes data exported by https://github.com/karlicoss/emfitexport
"""
from datetime import date
from pathlib import Path
from typing import Dict, List, Iterable, Any
from typing import Dict, List, Iterable, Any, Optional
from ..core import get_files
from ..core.common import mcachew
from ..core.cachew import cache_dir
from ..core.error import Res, set_error_datetime, extract_error_datetime
from ..core.types import DataFrameT
from ..core.pandas import DataFrameT
from my.config import emfit as config
@ -89,7 +89,7 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
def dataframe() -> DataFrameT:
from datetime import timedelta
dicts: List[Dict[str, Any]] = []
last = None
last: Optional[Emfit] = None
for s in pre_dataframe():
d: Dict[str, Any]
if isinstance(s, Exception):
@ -134,14 +134,15 @@ def dataframe() -> DataFrameT:
return pandas.DataFrame(dicts)
# TODO add dataframe support to stat()
def stats():
from ..core import stat
from ..core import stat, Stats
def stats() -> Stats:
return stat(pre_dataframe)
from contextlib import contextmanager
from typing import Iterator
@contextmanager
def fake_data(nights=500):
def fake_data(nights: int=500) -> Iterator[None]:
from ..core.cfg import override_config
from tempfile import TemporaryDirectory
with override_config(config) as cfg, TemporaryDirectory() as td:

View file

@ -12,7 +12,7 @@ from typing import Sequence, Iterable
from .core import get_files, LazyLogger
from .core.common import mcachew
from .core.error import Res, split_errors
from .core.pandas import check_dataframe as cdf
from .core.pandas import check_dataframe as cdf, DataFrameT
from my.config import rescuetime as config
@ -36,7 +36,7 @@ def entries() -> Iterable[Res[Entry]]:
yield from dal.entries()
def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
vit, eit = split_errors(entries(), ET=Exception)
yield from eit
import more_itertools
@ -45,7 +45,7 @@ def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
@cdf
def dataframe():
def dataframe() -> DataFrameT:
import pandas as pd # type: ignore
# type: ignore[call-arg, attr-defined]
def it():
@ -57,8 +57,8 @@ def dataframe():
return pd.DataFrame(it())
def stats():
from .core import stat
from .core import stat, Stats
def stats() -> Stats:
return {
**stat(groups),
**stat(entries),
@ -67,10 +67,11 @@ def stats():
# basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?
from typing import Iterator
from contextlib import contextmanager
# todo take seed, or what?
@contextmanager
def fake_data(rows=1000):
def fake_data(rows: int=1000) -> Iterator[None]:
# todo also disable cachew automatically for such things?
# TODO right, disabled_cachew won't work here because at that point, entries() is already wrapped?
# I guess need to fix this in cachew?