HPI/my/core/influxdb.py

66 lines
2.1 KiB
Python

'''
TODO doesn't really belong to 'core' morally, but can think of moving out later
'''
from typing import Iterable, Any, Optional
from .common import LazyLogger, asdict, Json
logger = LazyLogger(__name__)
class config:
db = 'db'
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
# todo infer dt column automatically, reuse in stat?
# it doesn't like dots, ends up some syntax error?
measurement = measurement.replace('.', '_')
# todo autoinfer measurement?
db = config.db
from influxdb import InfluxDBClient # type: ignore
client = InfluxDBClient()
# todo maybe create if not exists?
# client.create_database(db)
# todo should be it be env variable?
if reset:
client.delete_series(database=db, measurement=measurement)
def dit() -> Iterable[Json]:
for i in it:
d = asdict(i)
tags: Optional[Json] = None
tags = d.get('tags') # meh... handle in a more robust manner
if tags is not None:
del d['tags']
# TODO what to do with exceptions??
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
dt = d['dt'].isoformat()
del d['dt']
fields = d
yield dict(
measurement=measurement,
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
# hmm, so tags are autoindexed and might be faster?
# not sure what's the big difference though
# "fields are data and tags are metadata"
tags=tags,
time=dt,
fields=d,
)
from more_itertools import chunked
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails
for chi in chunked(dit(), n=5000):
chl = list(chi)
logger.debug('writing next chunk %s', chl[-1])
client.write_points(chl, database=db)
# todo "Specify timestamp precision when writing to InfluxDB."?