influxdb: WIP on magic automatic interface
to run: python3 -c 'import my.core.influxdb as I; import my.hypothesis as H; I.magic_fill(H.highlights)'
This commit is contained in:
parent
bfec6b975f
commit
20585a3130
2 changed files with 58 additions and 14 deletions
|
@ -1,8 +1,7 @@
|
||||||
'''
|
'''
|
||||||
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
||||||
'''
|
'''
|
||||||
from typing import Iterable, Any, Optional
|
from typing import Iterable, Any, Optional, Dict
|
||||||
|
|
||||||
|
|
||||||
from .common import LazyLogger, asdict, Json
|
from .common import LazyLogger, asdict, Json
|
||||||
|
|
||||||
|
@ -14,7 +13,7 @@ class config:
|
||||||
db = 'db'
|
db = 'db'
|
||||||
|
|
||||||
|
|
||||||
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False, dt_col: str='dt') -> None:
|
||||||
# todo infer dt column automatically, reuse in stat?
|
# todo infer dt column automatically, reuse in stat?
|
||||||
# it doesn't like dots, ends up some syntax error?
|
# it doesn't like dots, ends up some syntax error?
|
||||||
measurement = measurement.replace('.', '_')
|
measurement = measurement.replace('.', '_')
|
||||||
|
@ -31,19 +30,38 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
||||||
if reset:
|
if reset:
|
||||||
client.delete_series(database=db, measurement=measurement)
|
client.delete_series(database=db, measurement=measurement)
|
||||||
|
|
||||||
|
# TODO need to take schema here...
|
||||||
|
cache: Dict[str, bool] = {}
|
||||||
|
def good(f, v) -> bool:
|
||||||
|
c = cache.get(f)
|
||||||
|
if c is not None:
|
||||||
|
return c
|
||||||
|
t = type(v)
|
||||||
|
r = t in {str, int}
|
||||||
|
cache[f] = r
|
||||||
|
if not r:
|
||||||
|
logger.warning('%s: filtering out %s=%s because of type %s', measurement, f, v, t)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def filter_dict(d: Json) -> Json:
|
||||||
|
return {f: v for f, v in d.items() if good(f, v)}
|
||||||
|
|
||||||
def dit() -> Iterable[Json]:
|
def dit() -> Iterable[Json]:
|
||||||
for i in it:
|
for i in it:
|
||||||
d = asdict(i)
|
d = asdict(i)
|
||||||
tags: Optional[Json] = None
|
tags: Optional[Json] = None
|
||||||
tags = d.get('tags') # meh... handle in a more robust manner
|
tags_ = d.get('tags') # meh... handle in a more robust manner
|
||||||
if tags is not None:
|
if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
|
||||||
del d['tags']
|
del d['tags']
|
||||||
|
tags = tags_
|
||||||
|
|
||||||
# TODO what to do with exceptions??
|
# TODO what to do with exceptions??
|
||||||
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
|
# todo handle errors.. not sure how? maybe add tag for 'error' and fill with emtpy data?
|
||||||
dt = d['dt'].isoformat()
|
dt = d[dt_col].isoformat()
|
||||||
del d['dt']
|
del d[dt_col]
|
||||||
fields = d
|
|
||||||
|
fields = filter_dict(d)
|
||||||
|
|
||||||
yield dict(
|
yield dict(
|
||||||
measurement=measurement,
|
measurement=measurement,
|
||||||
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
||||||
|
@ -52,7 +70,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
||||||
# "fields are data and tags are metadata"
|
# "fields are data and tags are metadata"
|
||||||
tags=tags,
|
tags=tags,
|
||||||
time=dt,
|
time=dt,
|
||||||
fields=d,
|
fields=fields,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,3 +82,28 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=False) -> None:
|
||||||
logger.debug('writing next chunk %s', chl[-1])
|
logger.debug('writing next chunk %s', chl[-1])
|
||||||
client.write_points(chl, database=db)
|
client.write_points(chl, database=db)
|
||||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
# todo "Specify timestamp precision when writing to InfluxDB."?
|
||||||
|
|
||||||
|
|
||||||
|
def magic_fill(it) -> None:
|
||||||
|
assert callable(it)
|
||||||
|
name = f'{it.__module__}:{it.__name__}'
|
||||||
|
|
||||||
|
from itertools import tee
|
||||||
|
from more_itertools import first, one
|
||||||
|
it = it()
|
||||||
|
it, x = tee(it)
|
||||||
|
f = first(x, default=None)
|
||||||
|
if f is None:
|
||||||
|
logger.warning('%s has no data', name)
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO can we reuse pandas code or something?
|
||||||
|
#
|
||||||
|
from .pandas import _as_columns
|
||||||
|
schema = _as_columns(type(f))
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
||||||
|
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
||||||
|
|
||||||
|
fill(it, measurement=name, reset=True, dt_col=dtf)
|
||||||
|
|
|
@ -5,7 +5,7 @@ Various pandas helpers and convenience functions
|
||||||
# NOTE: this file is meant to be importable without Pandas installed
|
# NOTE: this file is meant to be importable without Pandas installed
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from typing import Optional, TYPE_CHECKING, Any, Iterable, Type, List
|
from typing import Optional, TYPE_CHECKING, Any, Iterable, Type, List, Dict
|
||||||
from . import warnings, Res
|
from . import warnings, Res
|
||||||
from .common import LazyLogger
|
from .common import LazyLogger
|
||||||
|
|
||||||
|
@ -105,12 +105,13 @@ error_to_row = error_to_json # todo deprecate?
|
||||||
# no type for dataclass?
|
# no type for dataclass?
|
||||||
Schema = Any
|
Schema = Any
|
||||||
|
|
||||||
def _as_columns(s: Schema) -> List[str]:
|
def _as_columns(s: Schema) -> Dict[str, Type]:
|
||||||
|
# todo would be nice to extract properties; add tests for this as well
|
||||||
import dataclasses as D
|
import dataclasses as D
|
||||||
if D.is_dataclass(s):
|
if D.is_dataclass(s):
|
||||||
return [f.name for f in D.fields(s)]
|
return {f.name: f.type for f in D.fields(s)}
|
||||||
# else must be NamedTuple??
|
# else must be NamedTuple??
|
||||||
return list(getattr(s, '_fields'))
|
return getattr(s, '_field_types')
|
||||||
|
|
||||||
|
|
||||||
# todo add proper types
|
# todo add proper types
|
||||||
|
@ -125,7 +126,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
|
||||||
# so we need to convert each individually... sigh
|
# so we need to convert each individually... sigh
|
||||||
from .common import to_jsons
|
from .common import to_jsons
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
columns = None if schema is None else _as_columns(schema)
|
columns = None if schema is None else list(_as_columns(schema).keys())
|
||||||
return pd.DataFrame(to_jsons(it), columns=columns)
|
return pd.DataFrame(to_jsons(it), columns=columns)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue