general: enhancle logging for various modules

2023-10-29 21:58:13 +00:00 · 2023-10-29 21:58:13 +00:00 · f28f68b14b
commit f28f68b14b
parent ea195e3d17
5 changed files with 36 additions and 23 deletions
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@ -65,22 +65,24 @@ def is_bad_table(name: str) -> bool:
@mcachew(depends_on=inputs)
 def measurements() -> Iterable[Res[Measurement]]:
    # todo ideally this would be via arguments... but needs to be lazy
-    dbs = inputs()
+    paths = inputs()
+    total = len(paths)
+    width = len(str(total))

    last: Optional[datetime] = None

    # tables are immutable, so can save on processing..
    processed_tables: Set[str] = set()
-    for f in dbs:
-        logger.info('processing %s', f)
+    for idx, path in enumerate(paths):
+        logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
        tot = 0
        new = 0
        # todo assert increasing timestamp?
-        with sqlite_connect_immutable(f) as db:
+        with sqlite_connect_immutable(path) as db:
            db_dt: Optional[datetime] = None
            try:
                datas = db.execute(
-                    f'SELECT "{f.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
+                    f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
                )
                oldfmt = True
                db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
@ -156,7 +158,7 @@ def measurements() -> Iterable[Res[Measurement]]:
                upper = timedelta(days=10)  # kinda arbitrary
                if not (db_dt - lower < dt < db_dt + timedelta(days=10)):
                    # todo could be more defenive??
-                    yield RuntimeError('timestamp too far out', f, name, db_dt, dt)
+                    yield RuntimeError('timestamp too far out', path, name, db_dt, dt)
                    continue

                # err.. sometimes my values are just interleaved with these for no apparent reason???
@ -164,7 +166,7 @@ def measurements() -> Iterable[Res[Measurement]]:
                    yield RuntimeError('the weird sensor bug')
                    continue

-                assert -60 <= temp <= 60, (f, dt, temp)
+                assert -60 <= temp <= 60, (path, dt, temp)
                ##

                tot += 1
@ -181,7 +183,7 @@ def measurements() -> Iterable[Res[Measurement]]:
                    dewpoint=dewp,
                )
                yield p
-        logger.debug('%s: new %d/%d', f, new, tot)
+        logger.debug(f'{path}: new {new}/{tot}')
    # logger.info('total items: %d', len(merged))
    # for k, v in merged.items():
    #     # TODO shit. quite a few of them have varying values... how is that freaking possible????
--- a/my/fbmessenger/android.py
+++ b/my/fbmessenger/android.py
@ -75,14 +75,16 @@ class Message(_BaseMessage):

 Entity = Union[Sender, Thread, _Message]
 def _entities() -> Iterator[Res[Entity]]:
-    dbs = inputs()
-    for i, f in enumerate(dbs):
-        logger.debug(f'processing {f} {i}/{len(dbs)}')
-        with sqlite_connection(f, immutable=True, row_factory='row') as db:
+    paths = inputs()
+    total = len(paths)
+    width = len(str(total))
+    for idx, path in enumerate(paths):
+        logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
+        with sqlite_connection(path, immutable=True, row_factory='row') as db:
            try:
                yield from _process_db(db)
            except Exception as e:
-                yield echain(RuntimeError(f'While processing {f}'), cause=e)
+                yield echain(RuntimeError(f'While processing {path}'), cause=e)


 def _normalise_user_id(ukey: str) -> str:
--- a/my/github/gdpr.py
+++ b/my/github/gdpr.py
@ -46,6 +46,8 @@ def inputs() -> Sequence[Path]:
 def events() -> Iterable[Res[Event]]:
    last = max(inputs())

+    logger.info(f'extracting data from {last}')
+
    # a bit naughty and ad-hoc, but we will generify reading from tar.gz. once we have more examples
    # another one is zulip archive
    if last.is_dir():
--- a/my/instagram/android.py
+++ b/my/instagram/android.py
@ -180,15 +180,17 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
    # NOTE: definitely need to merge multiple, app seems to recycle old messages
    # TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
    # todo use TypedDict?
-    dbs = inputs()
-    for f in dbs:
-        logger.info(f'{f} : processing...')
-        with sqlite_connect_immutable(f) as db:
+    paths = inputs()
+    total = len(paths)
+    width = len(str(total))
+    for idx, path in enumerate(paths):
+        logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
+        with sqlite_connect_immutable(path) as db:
            try:
                yield from _process_db(db=db)
            except Exception as e:
                # todo use error policy here
-                yield echain(RuntimeError(f'While processing {f}'), cause=e)
+                yield echain(RuntimeError(f'While processing {path}'), cause=e)


@mcachew(depends_on=inputs)
--- a/my/lastfm.py
+++ b/my/lastfm.py
@ -2,9 +2,13 @@
 Last.fm scrobbles
 '''

-from .core import Paths, dataclass
+from my.core import Paths, dataclass, make_logger
 from my.config import lastfm as user_config

+
+logger = make_logger(__name__)
+
+
@dataclass
 class lastfm(user_config):
    """
@ -13,7 +17,7 @@ class lastfm(user_config):
    export_path: Paths


-from .core.cfg import make_config
+from my.core.cfg import make_config
 config = make_config(lastfm)


@ -22,7 +26,7 @@ import json
 from pathlib import Path
 from typing import NamedTuple, Sequence, Iterable

-from .core.common import mcachew, Json, get_files
+from my.core.common import mcachew, Json, get_files


 def inputs() -> Sequence[Path]:
@ -64,19 +68,20 @@ class Scrobble(NamedTuple):
@mcachew(depends_on=inputs)
 def scrobbles() -> Iterable[Scrobble]:
    last = max(inputs())
+    logger.info(f'loading data from {last}')
    j = json.loads(last.read_text())

    for raw in reversed(j):
        yield Scrobble(raw=raw)


-from .core import stat, Stats
+from my.core import stat, Stats
 def stats() -> Stats:
    return stat(scrobbles)


 def fill_influxdb() -> None:
-    from .core import influxdb
+    from my.core import influxdb
    # todo needs to be more automatic
    sd = (dict(
        dt=x.dt,