core.pandas: dump the timezones in check_dateish
This commit is contained in:
parent
831fee42a1
commit
a946e23dd3
1 changed files with 16 additions and 10 deletions
|
@ -4,7 +4,8 @@ Various pandas helpers and convenience functions
|
||||||
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
||||||
# NOTE: this file is meant to be importable without Pandas installed
|
# NOTE: this file is meant to be importable without Pandas installed
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, TYPE_CHECKING, Any
|
from pprint import pformat
|
||||||
|
from typing import Optional, TYPE_CHECKING, Any, Iterable
|
||||||
from . import warnings
|
from . import warnings
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,18 +20,24 @@ else:
|
||||||
DataFrameT = Any
|
DataFrameT = Any
|
||||||
|
|
||||||
|
|
||||||
def check_dateish(s) -> Optional[str]:
|
def check_dateish(s) -> Iterable[str]:
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd # type: ignore
|
||||||
ctype = s.dtype
|
ctype = s.dtype
|
||||||
if str(ctype).startswith('datetime64'):
|
if str(ctype).startswith('datetime64'):
|
||||||
return None
|
return
|
||||||
s = s.dropna()
|
s = s.dropna()
|
||||||
if len(s) == 0:
|
if len(s) == 0:
|
||||||
return None
|
return
|
||||||
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
|
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
|
||||||
if all_timestamps:
|
if not all_timestamps:
|
||||||
return 'All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones'
|
return # not sure why it would happen, but ok
|
||||||
return None
|
tzs = s.map(lambda x: x.tzinfo).drop_duplicates()
|
||||||
|
examples = s[tzs.index]
|
||||||
|
# todo not so sure this warning is that useful... except for stuff without tz
|
||||||
|
yield f'''
|
||||||
|
All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones:
|
||||||
|
{pformat(list(zip(examples, tzs)))}
|
||||||
|
'''.strip()
|
||||||
|
|
||||||
|
|
||||||
from typing import Any, Callable, TypeVar
|
from typing import Any, Callable, TypeVar
|
||||||
|
@ -43,9 +50,8 @@ def check_dataframe(f: FuncT) -> FuncT:
|
||||||
df = f(*args, **kwargs)
|
df = f(*args, **kwargs)
|
||||||
# todo make super defensive?
|
# todo make super defensive?
|
||||||
for col, data in df.reset_index().iteritems():
|
for col, data in df.reset_index().iteritems():
|
||||||
res = check_dateish(data)
|
for w in check_dateish(data):
|
||||||
if res is not None:
|
warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {w}")
|
||||||
warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {res}")
|
|
||||||
return df
|
return df
|
||||||
# https://github.com/python/mypy/issues/1927
|
# https://github.com/python/mypy/issues/1927
|
||||||
return wrapper # type: ignore[return-value]
|
return wrapper # type: ignore[return-value]
|
||||||
|
|
Loading…
Add table
Reference in a new issue