influxdb: add helper to core + use it in bluemaestro/lastfm/rescuetime

This commit is contained in:
Dima Gerasimov 2021-02-21 20:59:03 +00:00 committed by karlicoss
parent 271cd7feef
commit bfec6b975f
5 changed files with 95 additions and 75 deletions

View file

@ -192,32 +192,8 @@ def dataframe() -> DataFrameT:
def fill_influxdb() -> None: def fill_influxdb() -> None:
from itertools import islice from .core import influxdb
from .core.common import asdict influxdb.fill(measurements(), measurement=__name__)
from influxdb import InfluxDBClient # type: ignore
client = InfluxDBClient()
db = 'db'
mname = __name__.replace('.', '_')
client.delete_series(database=db, measurement=mname)
def dissoc(d, k):
del d[k]
return d # meh
jsons = ({
'measurement': mname,
# todo maybe good idea to tags with database file/name? to inspect inconsistencies etc..
# 'tags': {'activity': e.activity},
'time': e.dt.isoformat(),
'fields': dissoc(asdict(e), 'dt'),
} for e in measurements())
from more_itertools import chunked
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails
for chunk in chunked(jsons, n=5000):
cl = list(chunk)
logger.debug('writing next chunk %s', cl[-1])
client.write_points(cl, database=db)
# todo "Specify timestamp precision when writing to InfluxDB."?
def check() -> None: def check() -> None:

66
my/core/influxdb.py Normal file
View file

@ -0,0 +1,66 @@
'''
TODO doesn't really belong to 'core' morally, but can think of moving out later
'''
from typing import Iterable, Any, Optional
from .common import LazyLogger, asdict, Json
logger = LazyLogger(__name__)
class config:
db = 'db'
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
# todo infer dt column automatically, reuse in stat?
# it doesn't like dots, ends up some syntax error?
measurement = measurement.replace('.', '_')
# todo autoinfer measurement?
db = config.db
from influxdb import InfluxDBClient # type: ignore
client = InfluxDBClient()
# todo maybe create if not exists?
# client.create_database(db)
# todo should be it be env variable?
if reset:
client.delete_series(database=db, measurement=measurement)
def dit() -> Iterable[Json]:
for i in it:
d = asdict(i)
tags: Optional[Json] = None
tags = d.get('tags') # meh... handle in a more robust manner
if tags is not None:
del d['tags']
# TODO what to do with exceptions??
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
dt = d['dt'].isoformat()
del d['dt']
fields = d
yield dict(
measurement=measurement,
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
# hmm, so tags are autoindexed and might be faster?
# not sure what's the big difference though
# "fields are data and tags are metadata"
tags=tags,
time=dt,
fields=d,
)
from more_itertools import chunked
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails
for chi in chunked(dit(), n=5000):
chl = list(chi)
logger.debug('writing next chunk %s', chl[-1])
client.write_points(chl, database=db)
# todo "Specify timestamp precision when writing to InfluxDB."?

View file

@ -2,8 +2,7 @@
Last.fm scrobbles Last.fm scrobbles
''' '''
from ..core.common import Paths from .core import Paths, dataclass
from dataclasses import dataclass
from my.config import lastfm as user_config from my.config import lastfm as user_config
@dataclass @dataclass
@ -14,7 +13,7 @@ class lastfm(user_config):
export_path: Paths export_path: Paths
from ..core.cfg import make_config from .core.cfg import make_config
config = make_config(lastfm) config = make_config(lastfm)
@ -25,7 +24,8 @@ from typing import NamedTuple, Any, Sequence, Iterable
import pytz import pytz
from ..core.common import mcachew, Json, get_files from .core.common import mcachew, Json, get_files
def inputs() -> Sequence[Path]: def inputs() -> Sequence[Path]:
return get_files(config.export_path) return get_files(config.export_path)
@ -63,10 +63,25 @@ class Scrobble(NamedTuple):
# TODO could also be nice to make generic? maybe even depending on eagerness # TODO could also be nice to make generic? maybe even depending on eagerness
@mcachew(hashf=lambda: inputs()) @mcachew(depends_on=inputs)
def scrobbles() -> Iterable[Scrobble]: def scrobbles() -> Iterable[Scrobble]:
last = max(inputs()) last = max(inputs())
j = json.loads(last.read_text()) j = json.loads(last.read_text())
for raw in reversed(j): for raw in reversed(j):
yield Scrobble(raw=raw) yield Scrobble(raw=raw)
from .core import stat, Stats
def stats() -> Stats:
return stat(scrobbles)
def fill_influxdb() -> None:
from .core import influxdb
# todo needs to be more automatic
sd = (dict(
dt=x.dt,
track=x.track,
) for x in scrobbles())
influxdb.fill(sd, measurement=__name__)

View file

@ -1,17 +0,0 @@
#!/usr/bin/env python3
# pip install influxdb
from influxdb import InfluxDBClient # type: ignore
from my.lastfm import scrobbles
def main() -> None:
scrobbles = scrobbles()
client = InfluxDBClient()
# TODO client.create_database('lastfm')
jsons = [{"measurement": 'scrobble', "tags": {}, "time": str(sc.dt), "fields": {"name": sc.track}} for sc in scrobbles]
client.write_points(jsons, database='lastfm')
if __name__ == '__main__':
main()

View file

@ -79,34 +79,14 @@ def fake_data(rows: int=1000) -> Iterator[None]:
# todo would be kinda nice if doctor could run against the fake data, to have a basic health check of the module? # todo would be kinda nice if doctor could run against the fake data, to have a basic health check of the module?
# todo not sure if I want to keep these here? vvv
# guess should move to core? or to 'ext' module, i.e. interfaces?
# make automatic
def fill_influxdb() -> None: def fill_influxdb() -> None:
from .core.common import asdict from .core import influxdb
it = (dict(
from influxdb import InfluxDBClient # type: ignore dt=e.dt,
client = InfluxDBClient() duration_d=e.duration_s,
db = 'db' tags=dict(activity=e.activity),
measurement = __name__.replace('.', '_') ) for e in entries() if isinstance(e, Entry)) # TODO handle errors in core.influxdb
client.delete_series(database=db, measurement=measurement) influxdb.fill(it, measurement=__name__)
# client.drop_database(db)
# todo create if not exists?
# client.create_database(db)
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
vit = (e for e in entries() if isinstance(e, Entry))
jsons = ({
'measurement': measurement, # hmm, influx doesn't like dots?
# hmm, so tags are autoindexed and might be faster?
# not sure what's the big difference though
# "fields are data and tags are metadata"
'tags': {'activity': e.activity},
'time': e.dt.isoformat(),
'fields': {'duration_s': e.duration_s},
# todo asdict(e),
} for e in vit)
# todo do we need to batch?
client.write_points(jsons, database=db)
# TODO lots of garbage in dir()? maybe need to del the imports... # TODO lots of garbage in dir()? maybe need to del the imports...