From 06ee72bc3030228f10980b71746966706281c6a9 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 3 Oct 2020 13:15:15 +0100 Subject: [PATCH] core: more type annotations --- my/core/__init__.py | 2 +- my/core/common.py | 3 ++- my/core/pandas.py | 32 +++++++++++++++++++++----------- my/core/types.py | 9 --------- my/core/warnings.py | 5 +++++ my/emfit/__init__.py | 13 +++++++------ my/rescuetime.py | 13 +++++++------ 7 files changed, 43 insertions(+), 34 deletions(-) delete mode 100644 my/core/types.py diff --git a/my/core/__init__.py b/my/core/__init__.py index ed6a3b0..e86eb81 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -3,7 +3,7 @@ from .common import PathIsh, Paths, Json from .common import get_files from .common import LazyLogger from .common import warn_if_empty -from .common import stat +from .common import stat, Stats from .cfg import make_config from .util import __NOT_HPI_MODULE__ diff --git a/my/core/common.py b/my/core/common.py index b599880..b5b8d75 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -362,8 +362,9 @@ QUICK_STATS = False C = TypeVar('C') +Stats = Dict[str, Any] # todo not sure about return type... -def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]: +def stat(func: Callable[[], Iterable[C]]) -> Stats: from more_itertools import ilen, take, first # todo not sure if there is something in more_itertools to compute this? diff --git a/my/core/pandas.py b/my/core/pandas.py index 28b8de1..de9077a 100644 --- a/my/core/pandas.py +++ b/my/core/pandas.py @@ -2,18 +2,24 @@ Various pandas helpers and convenience functions ''' # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential -from typing import Optional -import warnings +# NOTE: this file is meant to be importable without Pandas installed +from typing import Optional, TYPE_CHECKING, Any +from . import warnings -# FIXME need to make sure check_dataframe decorator can be used without actually importing pandas -# so need to move this import drom top level -import pandas as pd # type: ignore - -# todo special warning type? +if TYPE_CHECKING: + # this is kinda pointless at the moment, but handy to annotate DF returning methods now + # later will be unignored when they implement type annotations + import pandas as pd # type: ignore + # DataFrameT = pd.DataFrame + DataFrameT = Any +else: + # in runtime, make it defensive so it works without pandas + DataFrameT = Any def check_dateish(s) -> Optional[str]: + import pandas as pd # type: ignore ctype = s.dtype if str(ctype).startswith('datetime64'): return None @@ -26,18 +32,22 @@ def check_dateish(s) -> Optional[str]: return None -def check_dataframe(f): +from typing import Any, Callable, TypeVar +FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT]) + +def check_dataframe(f: FuncT) -> FuncT: from functools import wraps @wraps(f) - def wrapper(*args, **kwargs) -> pd.DataFrame: + def wrapper(*args, **kwargs) -> DataFrameT: df = f(*args, **kwargs) # todo make super defensive? # TODO check index as well? for col, data in df.iteritems(): res = check_dateish(data) if res is not None: - warnings.warn(f"{f.__name__}, column '{col}': {res}") + warnings.low(f"{f.__name__}, column '{col}': {res}") return df - return wrapper + # https://github.com/python/mypy/issues/1927 + return wrapper # type: ignore[return-value] # todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type? diff --git a/my/core/types.py b/my/core/types.py deleted file mode 100644 index 6b13e67..0000000 --- a/my/core/types.py +++ /dev/null @@ -1,9 +0,0 @@ -import typing - -if typing.TYPE_CHECKING: - from typing import Any - # todo would be nice to use some real stubs.. - DataFrameT = Any -else: - import pandas # type: ignore - DataFrameT = pandas.DataFrame diff --git a/my/core/warnings.py b/my/core/warnings.py index 2a71235..7aa1dd1 100644 --- a/my/core/warnings.py +++ b/my/core/warnings.py @@ -35,6 +35,11 @@ def _warn(message: str, *args, color=None, **kwargs) -> None: warnings.warn(_colorize(message, color=color), *args, **kwargs) +def low(message: str, *args, **kwargs) -> None: + kwargs['color'] = 'grey' + _warn(message, *args, **kwargs) + + def medium(message: str, *args, **kwargs) -> None: kwargs['color'] = 'yellow' _warn(message, *args, **kwargs) diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py index 81a0dbb..53999b7 100755 --- a/my/emfit/__init__.py +++ b/my/emfit/__init__.py @@ -6,13 +6,13 @@ Consumes data exported by https://github.com/karlicoss/emfitexport """ from datetime import date from pathlib import Path -from typing import Dict, List, Iterable, Any +from typing import Dict, List, Iterable, Any, Optional from ..core import get_files from ..core.common import mcachew from ..core.cachew import cache_dir from ..core.error import Res, set_error_datetime, extract_error_datetime -from ..core.types import DataFrameT +from ..core.pandas import DataFrameT from my.config import emfit as config @@ -89,7 +89,7 @@ def pre_dataframe() -> Iterable[Res[Emfit]]: def dataframe() -> DataFrameT: from datetime import timedelta dicts: List[Dict[str, Any]] = [] - last = None + last: Optional[Emfit] = None for s in pre_dataframe(): d: Dict[str, Any] if isinstance(s, Exception): @@ -134,14 +134,15 @@ def dataframe() -> DataFrameT: return pandas.DataFrame(dicts) # TODO add dataframe support to stat() -def stats(): - from ..core import stat +from ..core import stat, Stats +def stats() -> Stats: return stat(pre_dataframe) from contextlib import contextmanager +from typing import Iterator @contextmanager -def fake_data(nights=500): +def fake_data(nights: int=500) -> Iterator[None]: from ..core.cfg import override_config from tempfile import TemporaryDirectory with override_config(config) as cfg, TemporaryDirectory() as td: diff --git a/my/rescuetime.py b/my/rescuetime.py index 06c72eb..1a0f664 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -12,7 +12,7 @@ from typing import Sequence, Iterable from .core import get_files, LazyLogger from .core.common import mcachew from .core.error import Res, split_errors -from .core.pandas import check_dataframe as cdf +from .core.pandas import check_dataframe as cdf, DataFrameT from my.config import rescuetime as config @@ -36,7 +36,7 @@ def entries() -> Iterable[Res[Entry]]: yield from dal.entries() -def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: +def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: vit, eit = split_errors(entries(), ET=Exception) yield from eit import more_itertools @@ -45,7 +45,7 @@ def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: @cdf -def dataframe(): +def dataframe() -> DataFrameT: import pandas as pd # type: ignore # type: ignore[call-arg, attr-defined] def it(): @@ -57,8 +57,8 @@ def dataframe(): return pd.DataFrame(it()) -def stats(): - from .core import stat +from .core import stat, Stats +def stats() -> Stats: return { **stat(groups), **stat(entries), @@ -67,10 +67,11 @@ def stats(): # basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this? +from typing import Iterator from contextlib import contextmanager # todo take seed, or what? @contextmanager -def fake_data(rows=1000): +def fake_data(rows: int=1000) -> Iterator[None]: # todo also disable cachew automatically for such things? # TODO right, disabled_cachew won't work here because at that point, entries() is already wrapped? # I guess need to fix this in cachew?