From 28fcc1d9b6f64f57c7a05ba3aaffef2fade04f9a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 18 Sep 2020 23:42:05 +0100 Subject: [PATCH] my.rescuetime: use rescuexport directly, add error handling & dataframe --- my/core/error.py | 2 +- my/kython/klogging.py | 2 -- my/rescuetime.py | 39 ++++++++++++++++++++++++++------------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/my/core/error.py b/my/core/error.py index b60ab5a..c0f3f0c 100644 --- a/my/core/error.py +++ b/my/core/error.py @@ -28,7 +28,7 @@ def echain(ex: E, cause: Exception) -> E: def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]: - # TODO would be nice to have ET=Exception default? + # TODO would be nice to have ET=Exception default? but it causes some mypy complaints? vit, eit = tee(l) # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type? values: Iterable[T] = ( diff --git a/my/kython/klogging.py b/my/kython/klogging.py index dccf192..2c7601a 100644 --- a/my/kython/klogging.py +++ b/my/kython/klogging.py @@ -40,8 +40,6 @@ def setup_logger(logger: logging.Logger, level: LevelIsh) -> None: class LazyLogger(logging.Logger): - # TODO perhaps should use __new__? - def __new__(cls, name, level: LevelIsh = 'DEBUG'): logger = logging.getLogger(name) # this is called prior to all _log calls so makes sense to do it here? diff --git a/my/rescuetime.py b/my/rescuetime.py index 07abc8e..b2acad9 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -1,5 +1,5 @@ ''' -Rescuetime (activity tracking) data +Rescuetime (phone activity tracking) data. ''' from pathlib import Path @@ -9,8 +9,7 @@ from typing import Sequence, Iterable from .core import get_files, LazyLogger from .core.common import mcachew from .core.error import Res, split_errors - -import more_itertools +from .core.pandas import check_dataframe as cdf from my.config import rescuetime as config @@ -22,27 +21,40 @@ def inputs() -> Sequence[Path]: return get_files(config.export_path) -import my.config.repos.rescuexport.dal as dal +# pip git+https://github.com/karlicoss/rescuexport +import rescuexport.dal as dal DAL = dal.DAL Entry = dal.Entry @mcachew(hashf=lambda: inputs()) -def entries() -> Iterable[Entry]: +def entries() -> Iterable[Res[Entry]]: dal = DAL(inputs()) it = dal.entries() - vit, eit = split_errors(it, ET=Exception) - # todo handle errors, I guess initially I didn't because it's unclear how to easily group? - # todo would be nice if logger unwrapped causes by default?? - yield from vit + yield from dal.entries() -def groups(gap=timedelta(hours=3)): - vit = entries() +def groups(gap=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: + vit, eit = split_errors(entries(), ET=Exception) + yield from eit + import more_itertools from more_itertools import split_when yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap) +@cdf +def dataframe(): + import pandas as pd # type: ignore + # type: ignore[call-arg, attr-defined] + def it(): + for e in entries(): + if isinstance(e, Exception): + yield dict(error=str(e)) + else: + yield e._asdict() + return pd.DataFrame(it()) + + def stats(): from .core import stat return { @@ -71,7 +83,7 @@ def fake_data(rows=1000): f.write_text(json.dumps(dal.fake_data_generator(rows=rows))) yield # TODO ok, now it's something that actually could run on CI! - +# todo would be kinda nice if doctor could run against the fake data, to have a basic health check of the module? # todo not sure if I want to keep these here? vvv @@ -83,7 +95,8 @@ def fill_influxdb(): db = 'test' client.drop_database(db) client.create_database(db) - vit = entries() + # todo handle errors + vit = (e for e in entries() if isinstance(e, dal.Entry)) jsons = [{ "measurement": 'phone', "tags": {},