core.pandas: check index in check_dataframe
This commit is contained in:
parent
5babbb44d0
commit
d3f2551560
2 changed files with 10 additions and 7 deletions
|
@ -142,7 +142,8 @@ def stats() -> Stats:
|
||||||
return stat(measurements)
|
return stat(measurements)
|
||||||
|
|
||||||
|
|
||||||
from ..core.pandas import DataFrameT
|
from ..core.pandas import DataFrameT, check_dataframe as cdf
|
||||||
|
@cdf
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
"""
|
"""
|
||||||
%matplotlib gtk
|
%matplotlib gtk
|
||||||
|
@ -152,6 +153,8 @@ def dataframe() -> DataFrameT:
|
||||||
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
|
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
|
||||||
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
|
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd # type: ignore
|
||||||
return pd.DataFrame(p._asdict() for p in measurements()).set_index('dt')
|
df = pd.DataFrame(p._asdict() for p in measurements())
|
||||||
|
# todo not sure how it would handle mixed timezones??
|
||||||
|
return df.set_index('dt')
|
||||||
|
|
||||||
# todo test against an older db?
|
# todo test against an older db?
|
||||||
|
|
|
@ -3,6 +3,7 @@ Various pandas helpers and convenience functions
|
||||||
'''
|
'''
|
||||||
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
||||||
# NOTE: this file is meant to be importable without Pandas installed
|
# NOTE: this file is meant to be importable without Pandas installed
|
||||||
|
from datetime import datetime
|
||||||
from typing import Optional, TYPE_CHECKING, Any
|
from typing import Optional, TYPE_CHECKING, Any
|
||||||
from . import warnings
|
from . import warnings
|
||||||
|
|
||||||
|
@ -26,9 +27,9 @@ def check_dateish(s) -> Optional[str]:
|
||||||
s = s.dropna()
|
s = s.dropna()
|
||||||
if len(s) == 0:
|
if len(s) == 0:
|
||||||
return None
|
return None
|
||||||
all_timestamps = s.apply(lambda x: isinstance(x, pd.Timestamp)).all()
|
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
|
||||||
if all_timestamps:
|
if all_timestamps:
|
||||||
return 'All values are pd.Timestamp, but dtype is not datetime. Most likely, you have mixed timezones'
|
return 'All values are timestamp-like, but dtype is not datetime. Most likely, you have mixed timezones'
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,11 +42,10 @@ def check_dataframe(f: FuncT) -> FuncT:
|
||||||
def wrapper(*args, **kwargs) -> DataFrameT:
|
def wrapper(*args, **kwargs) -> DataFrameT:
|
||||||
df = f(*args, **kwargs)
|
df = f(*args, **kwargs)
|
||||||
# todo make super defensive?
|
# todo make super defensive?
|
||||||
# TODO check index as well?
|
for col, data in df.reset_index().iteritems():
|
||||||
for col, data in df.iteritems():
|
|
||||||
res = check_dateish(data)
|
res = check_dateish(data)
|
||||||
if res is not None:
|
if res is not None:
|
||||||
warnings.low(f"{f.__name__}, column '{col}': {res}")
|
warnings.low(f"{f.__module__}:{f.__name__}, column '{col}': {res}")
|
||||||
return df
|
return df
|
||||||
# https://github.com/python/mypy/issues/1927
|
# https://github.com/python/mypy/issues/1927
|
||||||
return wrapper # type: ignore[return-value]
|
return wrapper # type: ignore[return-value]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue