influxdb: add helper to core + use it in bluemaestro/lastfm/rescuetime
This commit is contained in:
parent
271cd7feef
commit
bfec6b975f
5 changed files with 95 additions and 75 deletions
|
@ -192,32 +192,8 @@ def dataframe() -> DataFrameT:
|
|||
|
||||
|
||||
def fill_influxdb() -> None:
|
||||
from itertools import islice
|
||||
from .core.common import asdict
|
||||
|
||||
from influxdb import InfluxDBClient # type: ignore
|
||||
client = InfluxDBClient()
|
||||
db = 'db'
|
||||
mname = __name__.replace('.', '_')
|
||||
client.delete_series(database=db, measurement=mname)
|
||||
def dissoc(d, k):
|
||||
del d[k]
|
||||
return d # meh
|
||||
jsons = ({
|
||||
'measurement': mname,
|
||||
# todo maybe good idea to tags with database file/name? to inspect inconsistencies etc..
|
||||
# 'tags': {'activity': e.activity},
|
||||
'time': e.dt.isoformat(),
|
||||
'fields': dissoc(asdict(e), 'dt'),
|
||||
} for e in measurements())
|
||||
from more_itertools import chunked
|
||||
# "The optimal batch size is 5000 lines of line protocol."
|
||||
# some chunking is def necessary, otherwise it fails
|
||||
for chunk in chunked(jsons, n=5000):
|
||||
cl = list(chunk)
|
||||
logger.debug('writing next chunk %s', cl[-1])
|
||||
client.write_points(cl, database=db)
|
||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
||||
from .core import influxdb
|
||||
influxdb.fill(measurements(), measurement=__name__)
|
||||
|
||||
|
||||
def check() -> None:
|
||||
|
|
66
my/core/influxdb.py
Normal file
66
my/core/influxdb.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
'''
|
||||
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
||||
'''
|
||||
from typing import Iterable, Any, Optional
|
||||
|
||||
|
||||
from .common import LazyLogger, asdict, Json
|
||||
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
class config:
|
||||
db = 'db'
|
||||
|
||||
|
||||
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
||||
# todo infer dt column automatically, reuse in stat?
|
||||
# it doesn't like dots, ends up some syntax error?
|
||||
measurement = measurement.replace('.', '_')
|
||||
# todo autoinfer measurement?
|
||||
|
||||
db = config.db
|
||||
|
||||
from influxdb import InfluxDBClient # type: ignore
|
||||
client = InfluxDBClient()
|
||||
# todo maybe create if not exists?
|
||||
# client.create_database(db)
|
||||
|
||||
# todo should be it be env variable?
|
||||
if reset:
|
||||
client.delete_series(database=db, measurement=measurement)
|
||||
|
||||
def dit() -> Iterable[Json]:
|
||||
for i in it:
|
||||
d = asdict(i)
|
||||
tags: Optional[Json] = None
|
||||
tags = d.get('tags') # meh... handle in a more robust manner
|
||||
if tags is not None:
|
||||
del d['tags']
|
||||
|
||||
# TODO what to do with exceptions??
|
||||
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
|
||||
dt = d['dt'].isoformat()
|
||||
del d['dt']
|
||||
fields = d
|
||||
yield dict(
|
||||
measurement=measurement,
|
||||
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
||||
# hmm, so tags are autoindexed and might be faster?
|
||||
# not sure what's the big difference though
|
||||
# "fields are data and tags are metadata"
|
||||
tags=tags,
|
||||
time=dt,
|
||||
fields=d,
|
||||
)
|
||||
|
||||
|
||||
from more_itertools import chunked
|
||||
# "The optimal batch size is 5000 lines of line protocol."
|
||||
# some chunking is def necessary, otherwise it fails
|
||||
for chi in chunked(dit(), n=5000):
|
||||
chl = list(chi)
|
||||
logger.debug('writing next chunk %s', chl[-1])
|
||||
client.write_points(chl, database=db)
|
||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
|
@ -2,8 +2,7 @@
|
|||
Last.fm scrobbles
|
||||
'''
|
||||
|
||||
from ..core.common import Paths
|
||||
from dataclasses import dataclass
|
||||
from .core import Paths, dataclass
|
||||
from my.config import lastfm as user_config
|
||||
|
||||
@dataclass
|
||||
|
@ -14,7 +13,7 @@ class lastfm(user_config):
|
|||
export_path: Paths
|
||||
|
||||
|
||||
from ..core.cfg import make_config
|
||||
from .core.cfg import make_config
|
||||
config = make_config(lastfm)
|
||||
|
||||
|
||||
|
@ -25,7 +24,8 @@ from typing import NamedTuple, Any, Sequence, Iterable
|
|||
|
||||
import pytz
|
||||
|
||||
from ..core.common import mcachew, Json, get_files
|
||||
from .core.common import mcachew, Json, get_files
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
@ -63,10 +63,25 @@ class Scrobble(NamedTuple):
|
|||
# TODO could also be nice to make generic? maybe even depending on eagerness
|
||||
|
||||
|
||||
@mcachew(hashf=lambda: inputs())
|
||||
@mcachew(depends_on=inputs)
|
||||
def scrobbles() -> Iterable[Scrobble]:
|
||||
last = max(inputs())
|
||||
j = json.loads(last.read_text())
|
||||
|
||||
for raw in reversed(j):
|
||||
yield Scrobble(raw=raw)
|
||||
|
||||
|
||||
from .core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
return stat(scrobbles)
|
||||
|
||||
|
||||
def fill_influxdb() -> None:
|
||||
from .core import influxdb
|
||||
# todo needs to be more automatic
|
||||
sd = (dict(
|
||||
dt=x.dt,
|
||||
track=x.track,
|
||||
) for x in scrobbles())
|
||||
influxdb.fill(sd, measurement=__name__)
|
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# pip install influxdb
|
||||
from influxdb import InfluxDBClient # type: ignore
|
||||
from my.lastfm import scrobbles
|
||||
|
||||
|
||||
def main() -> None:
|
||||
scrobbles = scrobbles()
|
||||
client = InfluxDBClient()
|
||||
# TODO client.create_database('lastfm')
|
||||
|
||||
jsons = [{"measurement": 'scrobble', "tags": {}, "time": str(sc.dt), "fields": {"name": sc.track}} for sc in scrobbles]
|
||||
client.write_points(jsons, database='lastfm')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -79,34 +79,14 @@ def fake_data(rows: int=1000) -> Iterator[None]:
|
|||
# todo would be kinda nice if doctor could run against the fake data, to have a basic health check of the module?
|
||||
|
||||
|
||||
# todo not sure if I want to keep these here? vvv
|
||||
# guess should move to core? or to 'ext' module, i.e. interfaces?
|
||||
# make automatic
|
||||
def fill_influxdb() -> None:
|
||||
from .core.common import asdict
|
||||
|
||||
from influxdb import InfluxDBClient # type: ignore
|
||||
client = InfluxDBClient()
|
||||
db = 'db'
|
||||
measurement = __name__.replace('.', '_')
|
||||
client.delete_series(database=db, measurement=measurement)
|
||||
# client.drop_database(db)
|
||||
# todo create if not exists?
|
||||
# client.create_database(db)
|
||||
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
|
||||
vit = (e for e in entries() if isinstance(e, Entry))
|
||||
jsons = ({
|
||||
'measurement': measurement, # hmm, influx doesn't like dots?
|
||||
# hmm, so tags are autoindexed and might be faster?
|
||||
# not sure what's the big difference though
|
||||
# "fields are data and tags are metadata"
|
||||
'tags': {'activity': e.activity},
|
||||
'time': e.dt.isoformat(),
|
||||
'fields': {'duration_s': e.duration_s},
|
||||
# todo asdict(e),
|
||||
} for e in vit)
|
||||
# todo do we need to batch?
|
||||
client.write_points(jsons, database=db)
|
||||
from .core import influxdb
|
||||
it = (dict(
|
||||
dt=e.dt,
|
||||
duration_d=e.duration_s,
|
||||
tags=dict(activity=e.activity),
|
||||
) for e in entries() if isinstance(e, Entry)) # TODO handle errors in core.influxdb
|
||||
influxdb.fill(it, measurement=__name__)
|
||||
|
||||
|
||||
# TODO lots of garbage in dir()? maybe need to del the imports...
|
||||
|
|
Loading…
Add table
Reference in a new issue