core.pandas: check index in check_dataframe

This commit is contained in:
Dima Gerasimov 2020-10-04 00:00:25 +01:00 committed by karlicoss
parent 5babbb44d0
commit d3f2551560
2 changed files with 10 additions and 7 deletions

View file

@ -142,7 +142,8 @@ def stats() -> Stats:
return stat(measurements) return stat(measurements)
from ..core.pandas import DataFrameT from ..core.pandas import DataFrameT, check_dataframe as cdf
@cdf
def dataframe() -> DataFrameT: def dataframe() -> DataFrameT:
""" """
%matplotlib gtk %matplotlib gtk
@ -152,6 +153,8 @@ def dataframe() -> DataFrameT:
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks... # todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh? # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
import pandas as pd # type: ignore import pandas as pd # type: ignore
return pd.DataFrame(p._asdict() for p in measurements()).set_index('dt') df = pd.DataFrame(p._asdict() for p in measurements())
# todo not sure how it would handle mixed timezones??
return df.set_index('dt')
# todo test against an older db? # todo test against an older db?

View file

@ -3,6 +3,7 @@ Various pandas helpers and convenience functions
''' '''
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
# NOTE: this file is meant to be importable without Pandas installed # NOTE: this file is meant to be importable without Pandas installed
from datetime import datetime
from typing import Optional, TYPE_CHECKING, Any from typing import Optional, TYPE_CHECKING, Any
from . import warnings from . import warnings
@ -26,9 +27,9 @@ def check_dateish(s) -> Optional[str]:
s = s.dropna() s = s.dropna()
if len(s) == 0: if len(s) == 0:
return None return None
all_timestamps = s.apply(lambda x: isinstance(x, pd.Timestamp)).all() all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
if all_timestamps: if all_timestamps:
return 'All values are pd.Timestamp, but dtype is not datetime. Most likely, you have mixed timezones' return 'All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones'
return None return None
@ -41,11 +42,10 @@ def check_dataframe(f: FuncT) -> FuncT:
def wrapper(*args, **kwargs) -> DataFrameT: def wrapper(*args, **kwargs) -> DataFrameT:
df = f(*args, **kwargs) df = f(*args, **kwargs)
# todo make super defensive? # todo make super defensive?
# TODO check index as well? for col, data in df.reset_index().iteritems():
for col, data in df.iteritems():
res = check_dateish(data) res = check_dateish(data)
if res is not None: if res is not None:
warnings.low(f"{f.__name__}, column '{col}': {res}") warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {res}")
return df return df
# https://github.com/python/mypy/issues/1927 # https://github.com/python/mypy/issues/1927
return wrapper # type: ignore[return-value] return wrapper # type: ignore[return-value]