influxdb: add helper to core + use it in bluemaestro/lastfm/rescuetime

This commit is contained in:
Dima Gerasimov 2021-02-21 20:59:03 +00:00 committed by karlicoss
parent 271cd7feef
commit bfec6b975f
5 changed files with 95 additions and 75 deletions

66
my/core/influxdb.py Normal file
View file

@ -0,0 +1,66 @@
'''
TODO doesn't really belong to 'core' morally, but can think of moving out later
'''
from typing import Iterable, Any, Optional
from .common import LazyLogger, asdict, Json
logger = LazyLogger(__name__)
class config:
db = 'db'
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
# todo infer dt column automatically, reuse in stat?
# it doesn't like dots, ends up some syntax error?
measurement = measurement.replace('.', '_')
# todo autoinfer measurement?
db = config.db
from influxdb import InfluxDBClient # type: ignore
client = InfluxDBClient()
# todo maybe create if not exists?
# client.create_database(db)
# todo should be it be env variable?
if reset:
client.delete_series(database=db, measurement=measurement)
def dit() -> Iterable[Json]:
for i in it:
d = asdict(i)
tags: Optional[Json] = None
tags = d.get('tags') # meh... handle in a more robust manner
if tags is not None:
del d['tags']
# TODO what to do with exceptions??
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
dt = d['dt'].isoformat()
del d['dt']
fields = d
yield dict(
measurement=measurement,
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
# hmm, so tags are autoindexed and might be faster?
# not sure what's the big difference though
# "fields are data and tags are metadata"
tags=tags,
time=dt,
fields=d,
)
from more_itertools import chunked
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails
for chi in chunked(dit(), n=5000):
chl = list(chi)
logger.debug('writing next chunk %s', chl[-1])
client.write_points(chl, database=db)
# todo "Specify timestamp precision when writing to InfluxDB."?