diff --git a/my/core/__main__.py b/my/core/__main__.py index c2092ed..4fb1518 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -168,7 +168,7 @@ def config_ok(args) -> bool: sys.exit(1) cfg_path = cfg.__file__# todo might be better to use __path__? - info(f"config file: {cfg_path}") + info(f"config file : {cfg_path}") import my.core as core try: @@ -195,7 +195,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module if mres is not None: # has mypy rc = mres.returncode if rc == 0: - info('mypy check: success') + info('mypy check : success') else: error('mypy check: failed') errors.append(RuntimeError('mypy failed')) diff --git a/my/core/common.py b/my/core/common.py index 80d76da..17d0db9 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -292,16 +292,6 @@ else: from .py37 import fromisoformat -if sys.version_info[:2] >= (3, 8): - from typing import Literal -else: - if TYPE_CHECKING: - from typing_extensions import Literal - else: - # erm.. I guess as long as it's not crashing, whatever... - Literal = Union - - # TODO doctests? def isoparse(s: str) -> tzdatetime: """ @@ -313,6 +303,8 @@ def isoparse(s: str) -> tzdatetime: s = s[:-1] + '+00:00' return fromisoformat(s) +from .compat import Literal + import re # https://stackoverflow.com/a/295466/706389 diff --git a/my/core/compat.py b/my/core/compat.py index bf63b0c..3a97242 100644 --- a/my/core/compat.py +++ b/my/core/compat.py @@ -47,3 +47,16 @@ def _get_dal(cfg, module_name: str): from importlib import import_module return import_module(f'my.config.repos.{module_name}.dal') + +import sys +from typing import TYPE_CHECKING + +if sys.version_info[:2] >= (3, 8): + from typing import Literal +else: + if TYPE_CHECKING: + from typing_extensions import Literal + else: + from typing import Union + # erm.. I guess as long as it's not crashing, whatever... + Literal = Union diff --git a/my/core/pandas.py b/my/core/pandas.py index aa04d3f..f58a894 100644 --- a/my/core/pandas.py +++ b/my/core/pandas.py @@ -7,6 +7,9 @@ from datetime import datetime from pprint import pformat from typing import Optional, TYPE_CHECKING, Any, Iterable from . import warnings +from .common import LazyLogger + +logger = LazyLogger(__name__) if TYPE_CHECKING: @@ -14,6 +17,8 @@ if TYPE_CHECKING: # later will be unignored when they implement type annotations import pandas as pd # type: ignore # DataFrameT = pd.DataFrame + # TODO ugh. pretty annoying, having any is not very useful since it would allow arbitrary coercions.. + # ideally want to use a type that's like Any but doesn't allow arbitrary coercions?? DataFrameT = Any else: # in runtime, make it defensive so it works without pandas @@ -40,21 +45,54 @@ def check_dateish(s) -> Iterable[str]: '''.strip() +from .compat import Literal + +ErrorColPolicy = Literal[ + 'add_if_missing', # add error column if it's missing + 'warn' , # warn, but do not modify + 'ignore' , # no warnings +] + +def check_error_column(df: DataFrameT, *, policy: ErrorColPolicy) -> Iterable[str]: + if 'error' in df: + return + if policy == 'ignore': + return + + wmsg = ''' +No 'error' column detected. You probably forgot to handle errors defensively, which means a single bad entry might bring the whole dataframe down. +'''.strip() + if policy == 'add_if_missing': + # todo maybe just add the warnings text as well? + df['error'] = None + wmsg += "\nAdding empty 'error' column (see 'error_col_policy' if you want to change this behaviour)" + pass + + yield wmsg + + from typing import Any, Callable, TypeVar FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT]) -def check_dataframe(f: FuncT) -> FuncT: - from functools import wraps - @wraps(f) - def wrapper(*args, **kwargs) -> DataFrameT: - df = f(*args, **kwargs) - # todo make super defensive? +# TODO ugh. typing this is a mess... shoul I use mypy_extensions.VarArg/KwArgs?? or what?? +from decorator import decorator +@decorator +def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy='add_if_missing', *args, **kwargs) -> DataFrameT: + df = f(*args, **kwargs) + tag = '{f.__module__}:{f.__name__}' + # makes sense to keep super defensive + try: for col, data in df.reset_index().iteritems(): for w in check_dateish(data): - warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {w}") - return df - # https://github.com/python/mypy/issues/1927 - return wrapper # type: ignore[return-value] + warnings.low(f"{tag}, column '{col}': {w}") + except Exception as e: + logger.exception(e) + try: + for w in check_error_column(df, policy=error_col_policy): + warnings.low(f"{tag}, {w}") + except Exception as e: + logger.exception(e) + return df # todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type? diff --git a/my/core/warnings.py b/my/core/warnings.py index 7aa1dd1..695de57 100644 --- a/my/core/warnings.py +++ b/my/core/warnings.py @@ -36,7 +36,7 @@ def _warn(message: str, *args, color=None, **kwargs) -> None: def low(message: str, *args, **kwargs) -> None: - kwargs['color'] = 'grey' + # kwargs['color'] = 'grey' # eh, grey is way too pale _warn(message, *args, **kwargs) diff --git a/my/endomondo.py b/my/endomondo.py index 6c56bfa..0df7aa9 100644 --- a/my/endomondo.py +++ b/my/endomondo.py @@ -11,8 +11,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Sequence, Iterable -from .core.common import Paths, get_files -from .core.error import Res +from .core import Paths, get_files from my.config import endomondo as user_config @@ -35,13 +34,17 @@ import endoexport.dal as dal from endoexport.dal import Point, Workout +from .core import Res # todo cachew? def workouts() -> Iterable[Res[Workout]]: _dal = dal.DAL(inputs()) yield from _dal.workouts() -def dataframe(defensive=True): +from .core.pandas import check_dataframe, DataFrameT + +@check_dataframe +def dataframe(defensive: bool=True) -> DataFrameT: def it(): for w in workouts(): if isinstance(w, Exception): @@ -67,13 +70,18 @@ def dataframe(defensive=True): df = pd.DataFrame(it()) # pandas guesses integer, which is pointless for this field (might get coerced to float too) df['id'] = df['id'].astype(str) + if 'error' not in df: + df['error'] = None return df - -def stats(): - from .core import stat - return stat(workouts) +from .core import stat, Stats +def stats() -> Stats: + return { + # todo pretty print stats? + **stat(workouts), + **stat(dataframe), + } # TODO make sure it's possible to 'advise' functions and override stuff diff --git a/setup.py b/setup.py index fbc152d..0c913a5 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ INSTALL_REQUIRES = [ 'pytz', # even though it's not needed by the core, it's so common anyway... 'appdirs', # very common, and makes it portable 'more-itertools', # it's just too useful and very common anyway + 'decorator' , # less pain in writing correct decorators. very mature and stable, so worth keeping in core ]