core.pandas: dump the timezones in check_dateish

This commit is contained in:
Dima Gerasimov 2020-10-19 16:28:35 +01:00 committed by karlicoss
parent 831fee42a1
commit a946e23dd3

View file

@ -4,7 +4,8 @@ Various pandas helpers and convenience functions
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
# NOTE: this file is meant to be importable without Pandas installed # NOTE: this file is meant to be importable without Pandas installed
from datetime import datetime from datetime import datetime
from typing import Optional, TYPE_CHECKING, Any from pprint import pformat
from typing import Optional, TYPE_CHECKING, Any, Iterable
from . import warnings from . import warnings
@ -19,18 +20,24 @@ else:
DataFrameT = Any DataFrameT = Any
def check_dateish(s) -> Optional[str]: def check_dateish(s) -> Iterable[str]:
import pandas as pd # type: ignore import pandas as pd # type: ignore
ctype = s.dtype ctype = s.dtype
if str(ctype).startswith('datetime64'): if str(ctype).startswith('datetime64'):
return None return
s = s.dropna() s = s.dropna()
if len(s) == 0: if len(s) == 0:
return None return
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all() all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
if all_timestamps: if not all_timestamps:
return 'All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones' return # not sure why it would happen, but ok
return None tzs = s.map(lambda x: x.tzinfo).drop_duplicates()
examples = s[tzs.index]
# todo not so sure this warning is that useful... except for stuff without tz
yield f'''
All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones:
{pformat(list(zip(examples, tzs)))}
'''.strip()
from typing import Any, Callable, TypeVar from typing import Any, Callable, TypeVar
@ -43,9 +50,8 @@ def check_dataframe(f: FuncT) -> FuncT:
df = f(*args, **kwargs) df = f(*args, **kwargs)
# todo make super defensive? # todo make super defensive?
for col, data in df.reset_index().iteritems(): for col, data in df.reset_index().iteritems():
res = check_dateish(data) for w in check_dateish(data):
if res is not None: warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {w}")
warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {res}")
return df return df
# https://github.com/python/mypy/issues/1927 # https://github.com/python/mypy/issues/1927
return wrapper # type: ignore[return-value] return wrapper # type: ignore[return-value]