core.pandas: add check for 'error' column + add empty one by default
This commit is contained in:
parent
cb37a0d080
commit
47e77403e7
7 changed files with 82 additions and 30 deletions
|
@ -7,6 +7,9 @@ from datetime import datetime
|
|||
from pprint import pformat
|
||||
from typing import Optional, TYPE_CHECKING, Any, Iterable
|
||||
from . import warnings
|
||||
from .common import LazyLogger
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -14,6 +17,8 @@ if TYPE_CHECKING:
|
|||
# later will be unignored when they implement type annotations
|
||||
import pandas as pd # type: ignore
|
||||
# DataFrameT = pd.DataFrame
|
||||
# TODO ugh. pretty annoying, having any is not very useful since it would allow arbitrary coercions..
|
||||
# ideally want to use a type that's like Any but doesn't allow arbitrary coercions??
|
||||
DataFrameT = Any
|
||||
else:
|
||||
# in runtime, make it defensive so it works without pandas
|
||||
|
@ -40,21 +45,54 @@ def check_dateish(s) -> Iterable[str]:
|
|||
'''.strip()
|
||||
|
||||
|
||||
from .compat import Literal
|
||||
|
||||
ErrorColPolicy = Literal[
|
||||
'add_if_missing', # add error column if it's missing
|
||||
'warn' , # warn, but do not modify
|
||||
'ignore' , # no warnings
|
||||
]
|
||||
|
||||
def check_error_column(df: DataFrameT, *, policy: ErrorColPolicy) -> Iterable[str]:
|
||||
if 'error' in df:
|
||||
return
|
||||
if policy == 'ignore':
|
||||
return
|
||||
|
||||
wmsg = '''
|
||||
No 'error' column detected. You probably forgot to handle errors defensively, which means a single bad entry might bring the whole dataframe down.
|
||||
'''.strip()
|
||||
if policy == 'add_if_missing':
|
||||
# todo maybe just add the warnings text as well?
|
||||
df['error'] = None
|
||||
wmsg += "\nAdding empty 'error' column (see 'error_col_policy' if you want to change this behaviour)"
|
||||
pass
|
||||
|
||||
yield wmsg
|
||||
|
||||
|
||||
from typing import Any, Callable, TypeVar
|
||||
FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
|
||||
|
||||
def check_dataframe(f: FuncT) -> FuncT:
|
||||
from functools import wraps
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs) -> DataFrameT:
|
||||
df = f(*args, **kwargs)
|
||||
# todo make super defensive?
|
||||
# TODO ugh. typing this is a mess... shoul I use mypy_extensions.VarArg/KwArgs?? or what??
|
||||
from decorator import decorator
|
||||
@decorator
|
||||
def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy='add_if_missing', *args, **kwargs) -> DataFrameT:
|
||||
df = f(*args, **kwargs)
|
||||
tag = '{f.__module__}:{f.__name__}'
|
||||
# makes sense to keep super defensive
|
||||
try:
|
||||
for col, data in df.reset_index().iteritems():
|
||||
for w in check_dateish(data):
|
||||
warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {w}")
|
||||
return df
|
||||
# https://github.com/python/mypy/issues/1927
|
||||
return wrapper # type: ignore[return-value]
|
||||
warnings.low(f"{tag}, column '{col}': {w}")
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
try:
|
||||
for w in check_error_column(df, policy=error_col_policy):
|
||||
warnings.low(f"{tag}, {w}")
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return df
|
||||
|
||||
# todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue