diff --git a/my/bluemaestro.py b/my/bluemaestro.py index 1586426..8f05aac 100644 --- a/my/bluemaestro.py +++ b/my/bluemaestro.py @@ -65,22 +65,24 @@ def is_bad_table(name: str) -> bool: @mcachew(depends_on=inputs) def measurements() -> Iterable[Res[Measurement]]: # todo ideally this would be via arguments... but needs to be lazy - dbs = inputs() + paths = inputs() + total = len(paths) + width = len(str(total)) last: Optional[datetime] = None # tables are immutable, so can save on processing.. processed_tables: Set[str] = set() - for f in dbs: - logger.info('processing %s', f) + for idx, path in enumerate(paths): + logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') tot = 0 new = 0 # todo assert increasing timestamp? - with sqlite_connect_immutable(f) as db: + with sqlite_connect_immutable(path) as db: db_dt: Optional[datetime] = None try: datas = db.execute( - f'SELECT "{f.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index' + f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index' ) oldfmt = True db_dts = list(db.execute('SELECT last_download FROM info'))[0][0] @@ -156,7 +158,7 @@ def measurements() -> Iterable[Res[Measurement]]: upper = timedelta(days=10) # kinda arbitrary if not (db_dt - lower < dt < db_dt + timedelta(days=10)): # todo could be more defenive?? - yield RuntimeError('timestamp too far out', f, name, db_dt, dt) + yield RuntimeError('timestamp too far out', path, name, db_dt, dt) continue # err.. sometimes my values are just interleaved with these for no apparent reason??? @@ -164,7 +166,7 @@ def measurements() -> Iterable[Res[Measurement]]: yield RuntimeError('the weird sensor bug') continue - assert -60 <= temp <= 60, (f, dt, temp) + assert -60 <= temp <= 60, (path, dt, temp) ## tot += 1 @@ -181,7 +183,7 @@ def measurements() -> Iterable[Res[Measurement]]: dewpoint=dewp, ) yield p - logger.debug('%s: new %d/%d', f, new, tot) + logger.debug(f'{path}: new {new}/{tot}') # logger.info('total items: %d', len(merged)) # for k, v in merged.items(): # # TODO shit. quite a few of them have varying values... how is that freaking possible???? diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index 38551b4..a5e6749 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -75,14 +75,16 @@ class Message(_BaseMessage): Entity = Union[Sender, Thread, _Message] def _entities() -> Iterator[Res[Entity]]: - dbs = inputs() - for i, f in enumerate(dbs): - logger.debug(f'processing {f} {i}/{len(dbs)}') - with sqlite_connection(f, immutable=True, row_factory='row') as db: + paths = inputs() + total = len(paths) + width = len(str(total)) + for idx, path in enumerate(paths): + logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') + with sqlite_connection(path, immutable=True, row_factory='row') as db: try: yield from _process_db(db) except Exception as e: - yield echain(RuntimeError(f'While processing {f}'), cause=e) + yield echain(RuntimeError(f'While processing {path}'), cause=e) def _normalise_user_id(ukey: str) -> str: diff --git a/my/github/gdpr.py b/my/github/gdpr.py index 1ff0f93..1fde7c9 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -46,6 +46,8 @@ def inputs() -> Sequence[Path]: def events() -> Iterable[Res[Event]]: last = max(inputs()) + logger.info(f'extracting data from {last}') + # a bit naughty and ad-hoc, but we will generify reading from tar.gz. once we have more examples # another one is zulip archive if last.is_dir(): diff --git a/my/instagram/android.py b/my/instagram/android.py index 97733b8..eace1c0 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -180,15 +180,17 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: # NOTE: definitely need to merge multiple, app seems to recycle old messages # TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data... # todo use TypedDict? - dbs = inputs() - for f in dbs: - logger.info(f'{f} : processing...') - with sqlite_connect_immutable(f) as db: + paths = inputs() + total = len(paths) + width = len(str(total)) + for idx, path in enumerate(paths): + logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') + with sqlite_connect_immutable(path) as db: try: yield from _process_db(db=db) except Exception as e: # todo use error policy here - yield echain(RuntimeError(f'While processing {f}'), cause=e) + yield echain(RuntimeError(f'While processing {path}'), cause=e) @mcachew(depends_on=inputs) diff --git a/my/lastfm.py b/my/lastfm.py index 97c112c..90484b4 100644 --- a/my/lastfm.py +++ b/my/lastfm.py @@ -2,9 +2,13 @@ Last.fm scrobbles ''' -from .core import Paths, dataclass +from my.core import Paths, dataclass, make_logger from my.config import lastfm as user_config + +logger = make_logger(__name__) + + @dataclass class lastfm(user_config): """ @@ -13,7 +17,7 @@ class lastfm(user_config): export_path: Paths -from .core.cfg import make_config +from my.core.cfg import make_config config = make_config(lastfm) @@ -22,7 +26,7 @@ import json from pathlib import Path from typing import NamedTuple, Sequence, Iterable -from .core.common import mcachew, Json, get_files +from my.core.common import mcachew, Json, get_files def inputs() -> Sequence[Path]: @@ -64,19 +68,20 @@ class Scrobble(NamedTuple): @mcachew(depends_on=inputs) def scrobbles() -> Iterable[Scrobble]: last = max(inputs()) + logger.info(f'loading data from {last}') j = json.loads(last.read_text()) for raw in reversed(j): yield Scrobble(raw=raw) -from .core import stat, Stats +from my.core import stat, Stats def stats() -> Stats: return stat(scrobbles) def fill_influxdb() -> None: - from .core import influxdb + from my.core import influxdb # todo needs to be more automatic sd = (dict( dt=x.dt,