core: helpers for automatic dataframes from sequences of NamedTuple/dataclass
also use in my.rescuetime
This commit is contained in:
parent
df9a7f7390
commit
07f901e1e5
5 changed files with 77 additions and 14 deletions
36
misc/rescuetime_cleanup.py
Normal file
36
misc/rescuetime_cleanup.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# M-x run-python (raise window so it doesn't hide)
|
||||||
|
# ?? python-shell-send-defun
|
||||||
|
# C-c C-r python-shell-send-region
|
||||||
|
# shit, it isn't autoscrolling??
|
||||||
|
# maybe add hook
|
||||||
|
# (setq comint-move-point-for-output t) ;; https://github.com/jorgenschaefer/elpy/issues/1641#issuecomment-528355368
|
||||||
|
#
|
||||||
|
from importlib import reload
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# todo function to reload hpi?
|
||||||
|
todel = [m for m in sys.modules if m.startswith('my.')]
|
||||||
|
# for m in todel: del sys.modules[m]
|
||||||
|
|
||||||
|
import my
|
||||||
|
import my.rescuetime as M
|
||||||
|
|
||||||
|
from itertools import islice, groupby
|
||||||
|
from more_itertools import ilen, bucket
|
||||||
|
|
||||||
|
print(M.dataframe())
|
||||||
|
|
||||||
|
e = M.entries()
|
||||||
|
e = list(islice(e, 0, 10))
|
||||||
|
|
||||||
|
|
||||||
|
key = lambda x: 'ERROR' if isinstance(x, Exception) else x.activity
|
||||||
|
|
||||||
|
# TODO move to errors module? how to preserve type signature?
|
||||||
|
# b = bucket(e, key=key)
|
||||||
|
# for k in b:
|
||||||
|
# g = b[k] # meh? should maybe sort
|
||||||
|
# print(k, ilen(g))
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
print(Counter(key(x) for x in e))
|
|
@ -458,3 +458,16 @@ def guess_datetime(x: Any) -> Optional[datetime]:
|
||||||
if isinstance(v, datetime):
|
if isinstance(v, datetime):
|
||||||
return v
|
return v
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def asdict(thing) -> Json:
|
||||||
|
# todo primitive?
|
||||||
|
# todo exception?
|
||||||
|
if isinstance(thing, dict):
|
||||||
|
return thing
|
||||||
|
import dataclasses as D
|
||||||
|
if D.is_dataclass(thing):
|
||||||
|
return D.asdict(thing)
|
||||||
|
# must be a NT otherwise?
|
||||||
|
# todo add a proper check.. ()
|
||||||
|
return thing._asdict()
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
'''
|
'''
|
||||||
Default logger is a bit, see 'test'/run this file for a demo
|
Default logger is a bit meh, see 'test'/run this file for a demo
|
||||||
|
TODO name 'klogging' to avoid possible conflict with default 'logging' module
|
||||||
|
TODO shit. too late already? maybe use fallback & deprecate
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def test() -> None:
|
def test() -> None:
|
||||||
|
|
|
@ -6,7 +6,7 @@ Various pandas helpers and convenience functions
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from typing import Optional, TYPE_CHECKING, Any, Iterable
|
from typing import Optional, TYPE_CHECKING, Any, Iterable
|
||||||
from . import warnings
|
from . import warnings, Res
|
||||||
from .common import LazyLogger
|
from .common import LazyLogger
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
@ -109,3 +109,19 @@ def error_to_row(e: Exception, *, dt_col: str='dt', tz=None) -> Dict[str, Any]:
|
||||||
'error': estr,
|
'error': estr,
|
||||||
dt_col : edt,
|
dt_col : edt,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# todo add proper types
|
||||||
|
@check_dataframe
|
||||||
|
def as_dataframe(it: Iterable[Res[Any]]) -> DataFrameT:
|
||||||
|
# ok nice supports dataframe/NT natively
|
||||||
|
# https://github.com/pandas-dev/pandas/pull/27999
|
||||||
|
# but it dispatches dataclass based on the first entry...
|
||||||
|
# https://github.com/pandas-dev/pandas/blob/fc9fdba6592bdb5d0d1147ce4d65639acd897565/pandas/core/frame.py#L562
|
||||||
|
# same for NamedTuple -- seems that it takes whatever schema the first NT has
|
||||||
|
# so we need to convert each individually... sigh
|
||||||
|
from .common import asdict
|
||||||
|
ie = (error_to_row(r) if isinstance(r, Exception) else asdict(r) for r in it)
|
||||||
|
# TODO just add tests for it?
|
||||||
|
import pandas as pd
|
||||||
|
return pd.DataFrame(ie)
|
||||||
|
|
|
@ -12,7 +12,6 @@ from typing import Sequence, Iterable
|
||||||
from .core import get_files, LazyLogger
|
from .core import get_files, LazyLogger
|
||||||
from .core.common import mcachew
|
from .core.common import mcachew
|
||||||
from .core.error import Res, split_errors
|
from .core.error import Res, split_errors
|
||||||
from .core.pandas import check_dataframe as cdf, DataFrameT
|
|
||||||
|
|
||||||
from my.config import rescuetime as config
|
from my.config import rescuetime as config
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ DAL = dal.DAL
|
||||||
Entry = dal.Entry
|
Entry = dal.Entry
|
||||||
|
|
||||||
|
|
||||||
@mcachew(hashf=lambda: inputs())
|
@mcachew(depends_on=lambda: inputs())
|
||||||
def entries() -> Iterable[Res[Entry]]:
|
def entries() -> Iterable[Res[Entry]]:
|
||||||
dal = DAL(inputs())
|
dal = DAL(inputs())
|
||||||
it = dal.entries()
|
it = dal.entries()
|
||||||
|
@ -44,17 +43,10 @@ def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
|
||||||
yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap)
|
yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap)
|
||||||
|
|
||||||
|
|
||||||
@cdf
|
# todo automatic dataframe interface?
|
||||||
|
from .core.pandas import DataFrameT, as_dataframe
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
import pandas as pd # type: ignore
|
return as_dataframe(entries())
|
||||||
# type: ignore[call-arg, attr-defined]
|
|
||||||
def it():
|
|
||||||
for e in entries():
|
|
||||||
if isinstance(e, Exception):
|
|
||||||
yield dict(error=str(e))
|
|
||||||
else:
|
|
||||||
yield e._asdict()
|
|
||||||
return pd.DataFrame(it())
|
|
||||||
|
|
||||||
|
|
||||||
from .core import stat, Stats
|
from .core import stat, Stats
|
||||||
|
@ -89,6 +81,8 @@ def fake_data(rows: int=1000) -> Iterator[None]:
|
||||||
|
|
||||||
# todo not sure if I want to keep these here? vvv
|
# todo not sure if I want to keep these here? vvv
|
||||||
|
|
||||||
|
# guess should move to core? or to 'ext' module, i.e. interfaces?
|
||||||
|
# make automatic
|
||||||
def fill_influxdb():
|
def fill_influxdb():
|
||||||
from influxdb import InfluxDBClient # type: ignore
|
from influxdb import InfluxDBClient # type: ignore
|
||||||
client = InfluxDBClient()
|
client = InfluxDBClient()
|
||||||
|
@ -106,3 +100,5 @@ def fill_influxdb():
|
||||||
} for e in vit]
|
} for e in vit]
|
||||||
client.write_points(jsons, database=db) # TODO??
|
client.write_points(jsons, database=db) # TODO??
|
||||||
|
|
||||||
|
|
||||||
|
# TODO lots of garbage in dir()? maybe need to del the imports...
|
||||||
|
|
Loading…
Add table
Reference in a new issue