From a946e23dd3351bebd57f7f06ef0fad57886b523d Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 19 Oct 2020 16:28:35 +0100 Subject: [PATCH] core.pandas: dump the timezones in check_dateish --- my/core/pandas.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/my/core/pandas.py b/my/core/pandas.py index 9a87c89..aa04d3f 100644 --- a/my/core/pandas.py +++ b/my/core/pandas.py @@ -4,7 +4,8 @@ Various pandas helpers and convenience functions # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential # NOTE: this file is meant to be importable without Pandas installed from datetime import datetime -from typing import Optional, TYPE_CHECKING, Any +from pprint import pformat +from typing import Optional, TYPE_CHECKING, Any, Iterable from . import warnings @@ -19,18 +20,24 @@ else: DataFrameT = Any -def check_dateish(s) -> Optional[str]: +def check_dateish(s) -> Iterable[str]: import pandas as pd # type: ignore ctype = s.dtype if str(ctype).startswith('datetime64'): - return None + return s = s.dropna() if len(s) == 0: - return None + return all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all() - if all_timestamps: - return 'All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones' - return None + if not all_timestamps: + return # not sure why it would happen, but ok + tzs = s.map(lambda x: x.tzinfo).drop_duplicates() + examples = s[tzs.index] + # todo not so sure this warning is that useful... except for stuff without tz + yield f''' + All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones: + {pformat(list(zip(examples, tzs)))} + '''.strip() from typing import Any, Callable, TypeVar @@ -43,9 +50,8 @@ def check_dataframe(f: FuncT) -> FuncT: df = f(*args, **kwargs) # todo make super defensive? for col, data in df.reset_index().iteritems(): - res = check_dateish(data) - if res is not None: - warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {res}") + for w in check_dateish(data): + warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {w}") return df # https://github.com/python/mypy/issues/1927 return wrapper # type: ignore[return-value]