core/influxdb: add main method to create influx measurement and fill with values
allows running something like python3 -m my.core.influxdb populate my.zotero
This commit is contained in:
parent
0278f2b68d
commit
0517f7ffb8
4 changed files with 52 additions and 10 deletions
|
@ -596,7 +596,7 @@ datetime_aware = datetime
|
||||||
def assert_subpackage(name: str) -> None:
|
def assert_subpackage(name: str) -> None:
|
||||||
# can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
|
# can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
|
||||||
# NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
|
# NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
|
||||||
assert 'my.core' in name, f'Expected module __name__ ({name}) to start with my.core'
|
assert name == '__main__' or 'my.core' in name, f'Expected module __name__ ({name}) to be __main__ or start with my.core'
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/10436851/706389
|
# https://stackoverflow.com/a/10436851/706389
|
||||||
|
|
|
@ -15,7 +15,10 @@ class config:
|
||||||
db = 'db'
|
db = 'db'
|
||||||
|
|
||||||
|
|
||||||
def fill(it: Iterable[Any], *, measurement: str, reset: bool=False, dt_col: str='dt') -> None:
|
RESET_DEFAULT = False
|
||||||
|
|
||||||
|
|
||||||
|
def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_col: str='dt') -> None:
|
||||||
# todo infer dt column automatically, reuse in stat?
|
# todo infer dt column automatically, reuse in stat?
|
||||||
# it doesn't like dots, ends up some syntax error?
|
# it doesn't like dots, ends up some syntax error?
|
||||||
measurement = measurement.replace('.', '_')
|
measurement = measurement.replace('.', '_')
|
||||||
|
@ -30,6 +33,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=False, dt_col: str=
|
||||||
|
|
||||||
# todo should be it be env variable?
|
# todo should be it be env variable?
|
||||||
if reset:
|
if reset:
|
||||||
|
logger.warning('deleting measurements: %s:%s', db, measurement)
|
||||||
client.delete_series(database=db, measurement=measurement)
|
client.delete_series(database=db, measurement=measurement)
|
||||||
|
|
||||||
# TODO need to take schema here...
|
# TODO need to take schema here...
|
||||||
|
@ -79,14 +83,18 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=False, dt_col: str=
|
||||||
from more_itertools import chunked
|
from more_itertools import chunked
|
||||||
# "The optimal batch size is 5000 lines of line protocol."
|
# "The optimal batch size is 5000 lines of line protocol."
|
||||||
# some chunking is def necessary, otherwise it fails
|
# some chunking is def necessary, otherwise it fails
|
||||||
|
inserted = 0
|
||||||
for chi in chunked(dit(), n=5000):
|
for chi in chunked(dit(), n=5000):
|
||||||
chl = list(chi)
|
chl = list(chi)
|
||||||
|
inserted += len(chl)
|
||||||
logger.debug('writing next chunk %s', chl[-1])
|
logger.debug('writing next chunk %s', chl[-1])
|
||||||
client.write_points(chl, database=db)
|
client.write_points(chl, database=db)
|
||||||
|
|
||||||
|
logger.info('inserted %d points', inserted)
|
||||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
# todo "Specify timestamp precision when writing to InfluxDB."?
|
||||||
|
|
||||||
|
|
||||||
def magic_fill(it, *, name: Optional[str]=None) -> None:
|
def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None:
|
||||||
if name is None:
|
if name is None:
|
||||||
assert callable(it) # generators have no name/module
|
assert callable(it) # generators have no name/module
|
||||||
name = f'{it.__module__}:{it.__name__}'
|
name = f'{it.__module__}:{it.__name__}'
|
||||||
|
@ -112,4 +120,31 @@ def magic_fill(it, *, name: Optional[str]=None) -> None:
|
||||||
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
||||||
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
||||||
|
|
||||||
fill(it, measurement=name, reset=True, dt_col=dtf)
|
fill(it, measurement=name, reset=reset, dt_col=dtf)
|
||||||
|
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
def main() -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@main.command(name='populate', short_help='populate influxdb')
|
||||||
|
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
|
||||||
|
@click.argument('MODULE', type=str, required=True)
|
||||||
|
def populate(module: str, reset: bool) -> None:
|
||||||
|
from .stats import guess_data_providers
|
||||||
|
providers = guess_data_providers(module)
|
||||||
|
# meh.. encapsulate in guess_data_providers?
|
||||||
|
if 'inputs' in providers:
|
||||||
|
del providers['inputs']
|
||||||
|
# todo could do interactive thing? same way as in hpi query
|
||||||
|
[(k, f)] = providers.items()
|
||||||
|
magic_fill(f, reset=reset)
|
||||||
|
|
||||||
|
|
||||||
|
# todo later just add to hpi main?
|
||||||
|
# not sure if want to couple
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
|
@ -6,7 +6,7 @@ import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
import sys
|
||||||
import typing
|
import typing
|
||||||
from typing import Optional, Callable, Any, Iterator
|
from typing import Optional, Callable, Any, Iterator, Sequence, Dict
|
||||||
|
|
||||||
from .common import StatsFun, Stats, stat
|
from .common import StatsFun, Stats, stat
|
||||||
|
|
||||||
|
@ -14,16 +14,22 @@ from .common import StatsFun, Stats, stat
|
||||||
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
||||||
# then stats could just use them as hints?
|
# then stats could just use them as hints?
|
||||||
def guess_stats(module_name: str) -> Optional[StatsFun]:
|
def guess_stats(module_name: str) -> Optional[StatsFun]:
|
||||||
module = importlib.import_module(module_name)
|
providers = guess_data_providers(module_name)
|
||||||
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
if len(providers) == 0:
|
||||||
functions = {k: v for k, v in mfunctions if is_data_provider(v)}
|
|
||||||
if len(functions) == 0:
|
|
||||||
return None
|
return None
|
||||||
def auto_stats() -> Stats:
|
def auto_stats() -> Stats:
|
||||||
return {k: stat(v) for k, v in functions.items()}
|
return {k: stat(v) for k, v in providers.items()}
|
||||||
return auto_stats
|
return auto_stats
|
||||||
|
|
||||||
|
|
||||||
|
def guess_data_providers(module_name: str) -> Dict[str, Callable]:
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
||||||
|
return {k: v for k, v in mfunctions if is_data_provider(v)}
|
||||||
|
|
||||||
|
|
||||||
|
# todo how to exclude deprecated stuff?
|
||||||
|
# todo also exclude def inputs()?
|
||||||
def is_data_provider(fun: Any) -> bool:
|
def is_data_provider(fun: Any) -> bool:
|
||||||
"""
|
"""
|
||||||
1. returns iterable or something like that
|
1. returns iterable or something like that
|
||||||
|
|
|
@ -53,6 +53,7 @@ def _dal() -> dal.DAL:
|
||||||
return dal.DAL(sources)
|
return dal.DAL(sources)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO they are in reverse chronological order...
|
||||||
def highlights() -> List[Res[Highlight]]:
|
def highlights() -> List[Res[Highlight]]:
|
||||||
# todo hmm. otherwise mypy complans
|
# todo hmm. otherwise mypy complans
|
||||||
key: Callable[[Highlight], datetime] = lambda h: h.created
|
key: Callable[[Highlight], datetime] = lambda h: h.created
|
||||||
|
|
Loading…
Add table
Reference in a new issue