influxdb: add helper to core + use it in bluemaestro/lastfm/rescuetime
This commit is contained in:
parent
271cd7feef
commit
bfec6b975f
5 changed files with 95 additions and 75 deletions
66
my/core/influxdb.py
Normal file
66
my/core/influxdb.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
'''
|
||||
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
||||
'''
|
||||
from typing import Iterable, Any, Optional
|
||||
|
||||
|
||||
from .common import LazyLogger, asdict, Json
|
||||
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
class config:
|
||||
db = 'db'
|
||||
|
||||
|
||||
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
||||
# todo infer dt column automatically, reuse in stat?
|
||||
# it doesn't like dots, ends up some syntax error?
|
||||
measurement = measurement.replace('.', '_')
|
||||
# todo autoinfer measurement?
|
||||
|
||||
db = config.db
|
||||
|
||||
from influxdb import InfluxDBClient # type: ignore
|
||||
client = InfluxDBClient()
|
||||
# todo maybe create if not exists?
|
||||
# client.create_database(db)
|
||||
|
||||
# todo should be it be env variable?
|
||||
if reset:
|
||||
client.delete_series(database=db, measurement=measurement)
|
||||
|
||||
def dit() -> Iterable[Json]:
|
||||
for i in it:
|
||||
d = asdict(i)
|
||||
tags: Optional[Json] = None
|
||||
tags = d.get('tags') # meh... handle in a more robust manner
|
||||
if tags is not None:
|
||||
del d['tags']
|
||||
|
||||
# TODO what to do with exceptions??
|
||||
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
|
||||
dt = d['dt'].isoformat()
|
||||
del d['dt']
|
||||
fields = d
|
||||
yield dict(
|
||||
measurement=measurement,
|
||||
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
||||
# hmm, so tags are autoindexed and might be faster?
|
||||
# not sure what's the big difference though
|
||||
# "fields are data and tags are metadata"
|
||||
tags=tags,
|
||||
time=dt,
|
||||
fields=d,
|
||||
)
|
||||
|
||||
|
||||
from more_itertools import chunked
|
||||
# "The optimal batch size is 5000 lines of line protocol."
|
||||
# some chunking is def necessary, otherwise it fails
|
||||
for chi in chunked(dit(), n=5000):
|
||||
chl = list(chi)
|
||||
logger.debug('writing next chunk %s', chl[-1])
|
||||
client.write_points(chl, database=db)
|
||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
Loading…
Add table
Add a link
Reference in a new issue