general: improve logging during file processing in various modules

This commit is contained in:
karlicoss 2023-10-29 00:11:58 +01:00
parent bd27bd4c24
commit ea195e3d17
4 changed files with 29 additions and 15 deletions

View file

@ -81,10 +81,13 @@ _PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES'
def _saved() -> Iterator[Res[Saved]]: def _saved() -> Iterator[Res[Saved]]:
for p in inputs(): paths = inputs()
logger.info(f'processing: {p}') total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
# TODO defensive for each item! # TODO defensive for each item!
tr = etree.parse(p) tr = etree.parse(path)
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
cached_ids = [x.text.split('-')[0] for x in res] cached_ids = [x.text.split('-')[0] for x in res]

View file

@ -7,7 +7,7 @@ from typing import Any, Dict, Iterator, NamedTuple, Sequence
from more_itertools import unique_everseen from more_itertools import unique_everseen
from my.core import get_files, datetime_aware from my.core import get_files, datetime_aware, make_logger
from my.core.sqlite import sqlite_connection from my.core.sqlite import sqlite_connection
from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
@ -15,6 +15,9 @@ from my.config import materialistic as config # todo migrate config to my.hacke
from .common import hackernews_link from .common import hackernews_link
logger = make_logger(__name__)
def inputs() -> Sequence[Path]: def inputs() -> Sequence[Path]:
return get_files(config.export_path) return get_files(config.export_path)
@ -51,8 +54,12 @@ class Saved(NamedTuple):
def _all_raw() -> Iterator[Row]: def _all_raw() -> Iterator[Row]:
for db in inputs(): paths = inputs()
with sqlite_connection(db, immutable=True, row_factory='dict') as conn: total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='dict') as conn:
yield from conn.execute('SELECT * FROM saved ORDER BY time') yield from conn.execute('SELECT * FROM saved ORDER BY time')

View file

@ -87,10 +87,12 @@ Entity = Union[Person, Match, Message]
def _entities() -> Iterator[Res[_Entity]]: def _entities() -> Iterator[Res[_Entity]]:
dbs = inputs() paths = inputs()
for i, db_file in enumerate(dbs): total = len(paths)
logger.info(f'processing {db_file} {i}/{len(dbs)}') width = len(str(total))
with sqlite_connection(db_file, immutable=True, row_factory='row') as db: for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='row') as db:
yield from _handle_db(db) yield from _handle_db(db)

View file

@ -189,14 +189,16 @@ def _process_db(db: sqlite3.Connection):
def _messages() -> Iterator[Res[Message]]: def _messages() -> Iterator[Res[Message]]:
dbs = inputs() paths = inputs()
for i, f in enumerate(dbs): total = len(paths)
logger.info(f'processing {f} {i}/{len(dbs)}') width = len(str(total))
with sqlite_connection(f, immutable=True, row_factory='row') as db: for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='row') as db:
try: try:
yield from _process_db(db) yield from _process_db(db)
except Exception as e: except Exception as e:
yield echain(RuntimeError(f'While processing {f}'), cause=e) yield echain(RuntimeError(f'While processing {path}'), cause=e)
def messages() -> Iterator[Res[Message]]: def messages() -> Iterator[Res[Message]]: