core: more type annotations

2020-10-03 13:15:15 +01:00 · 2020-10-03 13:15:15 +01:00 · 06ee72bc30
commit 06ee72bc30
parent 44b756cc6b
7 changed files with 43 additions and 34 deletions
--- a/my/core/init.py
+++ b/my/core/init.py
@ -3,7 +3,7 @@ from .common import PathIsh, Paths, Json
 from .common import get_files
 from .common import LazyLogger
 from .common import warn_if_empty
-from .common import stat
+from .common import stat, Stats

 from .cfg import make_config
 from .util import __NOT_HPI_MODULE__
--- a/my/core/common.py
+++ b/my/core/common.py
@ -362,8 +362,9 @@ QUICK_STATS = False


 C = TypeVar('C')
+Stats = Dict[str, Any]
 # todo not sure about return type...
-def stat(func: Callable[[], Iterable[C]]) -> Dict[str, Any]:
+def stat(func: Callable[[], Iterable[C]]) -> Stats:
    from more_itertools import ilen, take, first

    # todo not sure if there is something in more_itertools to compute this?
--- a/my/core/pandas.py
+++ b/my/core/pandas.py
@ -2,18 +2,24 @@
 Various pandas helpers and convenience functions
 '''
 # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
-from typing import Optional
-import warnings
+# NOTE: this file is meant to be importable without Pandas installed
+from typing import Optional, TYPE_CHECKING, Any
+from . import warnings


-# FIXME need to make sure check_dataframe decorator can be used without actually importing pandas
-# so need to move this import drom top level
+if TYPE_CHECKING:
+    # this is kinda pointless at the moment, but handy to annotate DF returning methods now
+    # later will be unignored when they implement type annotations
    import pandas as pd # type: ignore
-
-# todo special warning type?
+    # DataFrameT = pd.DataFrame
+    DataFrameT = Any
+else:
+    # in runtime, make it defensive so it works without pandas
+    DataFrameT = Any


 def check_dateish(s) -> Optional[str]:
+    import pandas as pd # type: ignore
    ctype = s.dtype
    if str(ctype).startswith('datetime64'):
        return None
@ -26,18 +32,22 @@ def check_dateish(s) -> Optional[str]:
    return None


-def check_dataframe(f):
+from typing import Any, Callable, TypeVar
+FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
+
+def check_dataframe(f: FuncT) -> FuncT:
    from functools import wraps
    @wraps(f)
-    def wrapper(*args, **kwargs) -> pd.DataFrame:
+    def wrapper(*args, **kwargs) -> DataFrameT:
        df = f(*args, **kwargs)
        # todo make super defensive?
        # TODO check index as well?
        for col, data in df.iteritems():
            res = check_dateish(data)
            if res is not None:
-                warnings.warn(f"{f.__name__}, column '{col}': {res}")
+                warnings.low(f"{f.__name__}, column '{col}': {res}")
        return df
-    return wrapper
+    # https://github.com/python/mypy/issues/1927
+    return wrapper # type: ignore[return-value]

 # todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?
--- a/my/core/types.py
+++ b/my/core/types.py
@ -1,9 +0,0 @@
-import typing
-
-if typing.TYPE_CHECKING:
-    from typing import Any
-    # todo would be nice to use some real stubs..
-    DataFrameT = Any
-else:
-    import pandas # type: ignore
-    DataFrameT = pandas.DataFrame
--- a/my/core/warnings.py
+++ b/my/core/warnings.py
@ -35,6 +35,11 @@ def _warn(message: str, *args, color=None, **kwargs) -> None:
    warnings.warn(_colorize(message, color=color), *args, **kwargs)


+def low(message: str, *args, **kwargs) -> None:
+    kwargs['color'] = 'grey'
+    _warn(message, *args, **kwargs)
+
+
 def medium(message: str, *args, **kwargs) -> None:
    kwargs['color'] = 'yellow'
    _warn(message, *args, **kwargs)
--- a/my/emfit/init.py
+++ b/my/emfit/init.py
@ -6,13 +6,13 @@ Consumes data exported by https://github.com/karlicoss/emfitexport
 """
 from datetime import date
 from pathlib import Path
-from typing import Dict, List, Iterable, Any
+from typing import Dict, List, Iterable, Any, Optional

 from ..core import get_files
 from ..core.common import mcachew
 from ..core.cachew import cache_dir
 from ..core.error import Res, set_error_datetime, extract_error_datetime
-from ..core.types import DataFrameT
+from ..core.pandas import DataFrameT

 from my.config import emfit as config

@ -89,7 +89,7 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
 def dataframe() -> DataFrameT:
    from datetime import timedelta
    dicts: List[Dict[str, Any]] = []
-    last = None
+    last: Optional[Emfit] = None
    for s in pre_dataframe():
        d: Dict[str, Any]
        if isinstance(s, Exception):
@ -134,14 +134,15 @@ def dataframe() -> DataFrameT:
    return pandas.DataFrame(dicts)

 # TODO add dataframe support to stat()
-def stats():
-    from ..core import stat
+from ..core import stat, Stats
+def stats() -> Stats:
    return stat(pre_dataframe)


 from contextlib import contextmanager
+from typing import Iterator
@contextmanager
-def fake_data(nights=500):
+def fake_data(nights: int=500) -> Iterator[None]:
    from ..core.cfg import override_config
    from tempfile import TemporaryDirectory
    with override_config(config) as cfg, TemporaryDirectory() as td:
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@ -12,7 +12,7 @@ from typing import Sequence, Iterable
 from .core import get_files, LazyLogger
 from .core.common import mcachew
 from .core.error import Res, split_errors
-from .core.pandas import check_dataframe as cdf
+from .core.pandas import check_dataframe as cdf, DataFrameT

 from my.config import rescuetime as config

@ -36,7 +36,7 @@ def entries() -> Iterable[Res[Entry]]:
    yield from dal.entries()


-def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
+def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
    vit, eit = split_errors(entries(), ET=Exception)
    yield from eit
    import more_itertools
@ -45,7 +45,7 @@ def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:


@cdf
-def dataframe():
+def dataframe() -> DataFrameT:
    import pandas as pd # type: ignore
    # type: ignore[call-arg, attr-defined]
    def it():
@ -57,8 +57,8 @@ def dataframe():
    return pd.DataFrame(it())


-def stats():
-    from .core import stat
+from .core import stat, Stats
+def stats() -> Stats:
    return {
        **stat(groups),
        **stat(entries),
@ -67,10 +67,11 @@ def stats():

 # basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?

+from typing import Iterator
 from contextlib import contextmanager
 # todo take seed, or what?
@contextmanager
-def fake_data(rows=1000):
+def fake_data(rows: int=1000) -> Iterator[None]:
    # todo also disable cachew automatically for such things?
    # TODO right, disabled_cachew won't work here because at that point, entries() is already wrapped?
    # I guess need to fix this in cachew?