HPI/my/core/influxdb.py

'''
TODO doesn't really belong to 'core' morally, but can think of moving out later
'''
from typing import Iterable, Any, Optional


from .common import LazyLogger, asdict, Json


logger = LazyLogger(__name__)


class config:
    db = 'db'


def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
    # todo infer dt column automatically, reuse in stat?
    # it doesn't like dots, ends up some syntax error?
    measurement = measurement.replace('.', '_')
    # todo autoinfer measurement?

    db = config.db

    from influxdb import InfluxDBClient # type: ignore
    client = InfluxDBClient()
    # todo maybe create if not exists?
    # client.create_database(db)

    # todo should be it be env variable?
    if reset:
        client.delete_series(database=db, measurement=measurement)

    def dit() -> Iterable[Json]:
        for i in it:
            d = asdict(i)
            tags: Optional[Json] = None
            tags = d.get('tags') # meh... handle in a more robust manner
            if tags is not None:
                del d['tags']

            # TODO what to do with exceptions??
            # todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
            dt = d['dt'].isoformat()
            del d['dt']
            fields = d
            yield dict(
                measurement=measurement,
                # TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
                # hmm, so tags are autoindexed and might be faster?
                # not sure what's the big difference though
                # "fields are data and tags are metadata"
                tags=tags,
                time=dt,
                fields=d,
            )


    from more_itertools import chunked
    # "The optimal batch size is 5000 lines of line protocol."
    # some chunking is def necessary, otherwise it fails
    for chi in chunked(dit(), n=5000):
        chl = list(chi)
        logger.debug('writing next chunk %s', chl[-1])
        client.write_points(chl, database=db)
    # todo "Specify timestamp precision when writing to InfluxDB."?