general: improve logging during file processing in various modules

This commit is contained in:
karlicoss 2023-10-29 00:11:58 +01:00
parent bd27bd4c24
commit ea195e3d17
4 changed files with 29 additions and 15 deletions

View file

@ -81,10 +81,13 @@ _PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES'
def _saved() -> Iterator[Res[Saved]]:
for p in inputs():
logger.info(f'processing: {p}')
paths = inputs()
total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
# TODO defensive for each item!
tr = etree.parse(p)
tr = etree.parse(path)
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
cached_ids = [x.text.split('-')[0] for x in res]

View file

@ -7,7 +7,7 @@ from typing import Any, Dict, Iterator, NamedTuple, Sequence
from more_itertools import unique_everseen
from my.core import get_files, datetime_aware
from my.core import get_files, datetime_aware, make_logger
from my.core.sqlite import sqlite_connection
from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
@ -15,6 +15,9 @@ from my.config import materialistic as config # todo migrate config to my.hacke
from .common import hackernews_link
logger = make_logger(__name__)
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -51,8 +54,12 @@ class Saved(NamedTuple):
def _all_raw() -> Iterator[Row]:
for db in inputs():
with sqlite_connection(db, immutable=True, row_factory='dict') as conn:
paths = inputs()
total = len(paths)
width = len(str(total))
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='dict') as conn:
yield from conn.execute('SELECT * FROM saved ORDER BY time')