general: enhancle logging for various modules

This commit is contained in:
karlicoss 2023-10-29 21:58:13 +00:00
parent ea195e3d17
commit f28f68b14b
5 changed files with 36 additions and 23 deletions

View file

@ -65,22 +65,24 @@ def is_bad_table(name: str) -> bool:
@mcachew(depends_on=inputs)
def measurements() -> Iterable[Res[Measurement]]:
# todo ideally this would be via arguments... but needs to be lazy
dbs = inputs()
paths = inputs()
total = len(paths)
width = len(str(total))
last: Optional[datetime] = None
# tables are immutable, so can save on processing..
processed_tables: Set[str] = set()
for f in dbs:
logger.info('processing %s', f)
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
tot = 0
new = 0
# todo assert increasing timestamp?
with sqlite_connect_immutable(f) as db:
with sqlite_connect_immutable(path) as db:
db_dt: Optional[datetime] = None
try:
datas = db.execute(
f'SELECT "{f.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
)
oldfmt = True
db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
@ -156,7 +158,7 @@ def measurements() -> Iterable[Res[Measurement]]:
upper = timedelta(days=10) # kinda arbitrary
if not (db_dt - lower < dt < db_dt + timedelta(days=10)):
# todo could be more defenive??
yield RuntimeError('timestamp too far out', f, name, db_dt, dt)
yield RuntimeError('timestamp too far out', path, name, db_dt, dt)
continue
# err.. sometimes my values are just interleaved with these for no apparent reason???
@ -164,7 +166,7 @@ def measurements() -> Iterable[Res[Measurement]]:
yield RuntimeError('the weird sensor bug')
continue
assert -60 <= temp <= 60, (f, dt, temp)
assert -60 <= temp <= 60, (path, dt, temp)
##
tot += 1
@ -181,7 +183,7 @@ def measurements() -> Iterable[Res[Measurement]]:
dewpoint=dewp,
)
yield p
logger.debug('%s: new %d/%d', f, new, tot)
logger.debug(f'{path}: new {new}/{tot}')
# logger.info('total items: %d', len(merged))
# for k, v in merged.items():
# # TODO shit. quite a few of them have varying values... how is that freaking possible????

View file

@ -75,14 +75,16 @@ class Message(_BaseMessage):
Entity = Union[Sender, Thread, _Message]
def _entities() -> Iterator[Res[Entity]]:
dbs = inputs()
for i, f in enumerate(dbs):
logger.debug(f'processing {f} {i}/{len(dbs)}')
with sqlite_connection(f, immutable=True, row_factory='row') as db:
paths = inputs()
total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='row') as db:
try:
yield from _process_db(db)
except Exception as e:
yield echain(RuntimeError(f'While processing {f}'), cause=e)
yield echain(RuntimeError(f'While processing {path}'), cause=e)
def _normalise_user_id(ukey: str) -> str:

View file

@ -46,6 +46,8 @@ def inputs() -> Sequence[Path]:
def events() -> Iterable[Res[Event]]:
last = max(inputs())
logger.info(f'extracting data from {last}')
# a bit naughty and ad-hoc, but we will generify reading from tar.gz. once we have more examples
# another one is zulip archive
if last.is_dir():

View file

@ -180,15 +180,17 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
dbs = inputs()
for f in dbs:
logger.info(f'{f} : processing...')
with sqlite_connect_immutable(f) as db:
paths = inputs()
total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connect_immutable(path) as db:
try:
yield from _process_db(db=db)
except Exception as e:
# todo use error policy here
yield echain(RuntimeError(f'While processing {f}'), cause=e)
yield echain(RuntimeError(f'While processing {path}'), cause=e)
@mcachew(depends_on=inputs)

View file

@ -2,9 +2,13 @@
Last.fm scrobbles
'''
from .core import Paths, dataclass
from my.core import Paths, dataclass, make_logger
from my.config import lastfm as user_config
logger = make_logger(__name__)
@dataclass
class lastfm(user_config):
"""
@ -13,7 +17,7 @@ class lastfm(user_config):
export_path: Paths
from .core.cfg import make_config
from my.core.cfg import make_config
config = make_config(lastfm)
@ -22,7 +26,7 @@ import json
from pathlib import Path
from typing import NamedTuple, Sequence, Iterable
from .core.common import mcachew, Json, get_files
from my.core.common import mcachew, Json, get_files
def inputs() -> Sequence[Path]:
@ -64,19 +68,20 @@ class Scrobble(NamedTuple):
@mcachew(depends_on=inputs)
def scrobbles() -> Iterable[Scrobble]:
last = max(inputs())
logger.info(f'loading data from {last}')
j = json.loads(last.read_text())
for raw in reversed(j):
yield Scrobble(raw=raw)
from .core import stat, Stats
from my.core import stat, Stats
def stats() -> Stats:
return stat(scrobbles)
def fill_influxdb() -> None:
from .core import influxdb
from my.core import influxdb
# todo needs to be more automatic
sd = (dict(
dt=x.dt,