HPI/my/rescuetime.py

'''
Rescuetime (phone activity tracking) data.
'''
REQUIRES = [
    'git+https://github.com/karlicoss/rescuexport',
]

from pathlib import Path
from datetime import datetime, timedelta
from typing import Sequence, Iterable

from .core import get_files, LazyLogger
from .core.common import mcachew
from .core.error import Res, split_errors

from my.config import rescuetime as config


log = LazyLogger(__name__, level='info')


def inputs() -> Sequence[Path]:
    return get_files(config.export_path)


import rescuexport.dal as dal
DAL = dal.DAL
Entry = dal.Entry


@mcachew(depends_on=lambda: inputs())
def entries() -> Iterable[Res[Entry]]:
    dal = DAL(inputs())
    it = dal.entries()
    yield from dal.entries()


def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
    vit, eit = split_errors(entries(), ET=Exception)
    yield from eit
    import more_itertools
    from more_itertools import split_when
    yield from split_when(vit, lambda a, b: (b.dt - a.dt) > gap)


# todo automatic dataframe interface?
from .core.pandas import DataFrameT, as_dataframe
def dataframe() -> DataFrameT:
    return as_dataframe(entries())


from .core import stat, Stats
def stats() -> Stats:
    return {
        **stat(groups),
        **stat(entries),
    }


# basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?

from typing import Iterator
from contextlib import contextmanager
# todo take seed, or what?
@contextmanager
def fake_data(rows: int=1000) -> Iterator[None]:
    # todo also disable cachew automatically for such things?
    from .core.cachew import disabled_cachew
    from .core.cfg import override_config
    from tempfile import TemporaryDirectory
    with disabled_cachew(), override_config(config) as cfg, TemporaryDirectory() as td:
        tdir = Path(td)
        cfg.export_path = tdir
        f = tdir / 'rescuetime.json'
        import json
        f.write_text(json.dumps(dal.fake_data_generator(rows=rows)))
        yield
# TODO ok, now it's something that actually could run on CI!
# todo would be kinda nice if doctor could run against the fake data, to have a basic health check of the module?


# todo not sure if I want to keep these here? vvv
# guess should move to core? or to 'ext' module, i.e. interfaces?
# make automatic
def fill_influxdb() -> None:
    from .core.common import asdict

    from influxdb import InfluxDBClient # type: ignore
    client = InfluxDBClient()
    db = 'db'
    measurement = __name__.replace('.', '_')
    client.delete_series(database=db,  measurement=measurement)
    # client.drop_database(db)
    # todo create if not exists?
    # client.create_database(db)
    # todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
    vit = (e for e in entries() if isinstance(e, Entry))
    jsons = ({
        'measurement': measurement, # hmm, influx doesn't like dots?
        # hmm, so tags are autoindexed and might be faster?
        # not sure what's the big difference though
        # "fields are data and tags are metadata"
        'tags': {'activity': e.activity},
        'time': e.dt.isoformat(),
        'fields': {'duration_s': e.duration_s},
        # todo asdict(e),
    } for e in vit)
    # todo do we need to batch?
    client.write_points(jsons, database=db)


# TODO lots of garbage in dir()? maybe need to del the imports...