general: improve logging during file processing in various modules
This commit is contained in:
parent
bd27bd4c24
commit
ea195e3d17
4 changed files with 29 additions and 15 deletions
|
@ -81,10 +81,13 @@ _PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES'
|
||||||
|
|
||||||
|
|
||||||
def _saved() -> Iterator[Res[Saved]]:
|
def _saved() -> Iterator[Res[Saved]]:
|
||||||
for p in inputs():
|
paths = inputs()
|
||||||
logger.info(f'processing: {p}')
|
total = len(paths)
|
||||||
|
width = len(str(total))
|
||||||
|
for idx, path in enumerate(paths):
|
||||||
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
# TODO defensive for each item!
|
# TODO defensive for each item!
|
||||||
tr = etree.parse(p)
|
tr = etree.parse(path)
|
||||||
|
|
||||||
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
|
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
|
||||||
cached_ids = [x.text.split('-')[0] for x in res]
|
cached_ids = [x.text.split('-')[0] for x in res]
|
||||||
|
|
|
@ -7,7 +7,7 @@ from typing import Any, Dict, Iterator, NamedTuple, Sequence
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
from my.core import get_files, datetime_aware
|
from my.core import get_files, datetime_aware, make_logger
|
||||||
from my.core.sqlite import sqlite_connection
|
from my.core.sqlite import sqlite_connection
|
||||||
|
|
||||||
from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
|
from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
|
||||||
|
@ -15,6 +15,9 @@ from my.config import materialistic as config # todo migrate config to my.hacke
|
||||||
from .common import hackernews_link
|
from .common import hackernews_link
|
||||||
|
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
@ -51,8 +54,12 @@ class Saved(NamedTuple):
|
||||||
|
|
||||||
|
|
||||||
def _all_raw() -> Iterator[Row]:
|
def _all_raw() -> Iterator[Row]:
|
||||||
for db in inputs():
|
paths = inputs()
|
||||||
with sqlite_connection(db, immutable=True, row_factory='dict') as conn:
|
total = len(paths)
|
||||||
|
width = len(str(total))
|
||||||
|
for idx, path in enumerate(paths):
|
||||||
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
|
with sqlite_connection(path, immutable=True, row_factory='dict') as conn:
|
||||||
yield from conn.execute('SELECT * FROM saved ORDER BY time')
|
yield from conn.execute('SELECT * FROM saved ORDER BY time')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -87,10 +87,12 @@ Entity = Union[Person, Match, Message]
|
||||||
|
|
||||||
|
|
||||||
def _entities() -> Iterator[Res[_Entity]]:
|
def _entities() -> Iterator[Res[_Entity]]:
|
||||||
dbs = inputs()
|
paths = inputs()
|
||||||
for i, db_file in enumerate(dbs):
|
total = len(paths)
|
||||||
logger.info(f'processing {db_file} {i}/{len(dbs)}')
|
width = len(str(total))
|
||||||
with sqlite_connection(db_file, immutable=True, row_factory='row') as db:
|
for idx, path in enumerate(paths):
|
||||||
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
|
with sqlite_connection(path, immutable=True, row_factory='row') as db:
|
||||||
yield from _handle_db(db)
|
yield from _handle_db(db)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -189,14 +189,16 @@ def _process_db(db: sqlite3.Connection):
|
||||||
|
|
||||||
|
|
||||||
def _messages() -> Iterator[Res[Message]]:
|
def _messages() -> Iterator[Res[Message]]:
|
||||||
dbs = inputs()
|
paths = inputs()
|
||||||
for i, f in enumerate(dbs):
|
total = len(paths)
|
||||||
logger.info(f'processing {f} {i}/{len(dbs)}')
|
width = len(str(total))
|
||||||
with sqlite_connection(f, immutable=True, row_factory='row') as db:
|
for idx, path in enumerate(paths):
|
||||||
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
|
with sqlite_connection(path, immutable=True, row_factory='row') as db:
|
||||||
try:
|
try:
|
||||||
yield from _process_db(db)
|
yield from _process_db(db)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield echain(RuntimeError(f'While processing {f}'), cause=e)
|
yield echain(RuntimeError(f'While processing {path}'), cause=e)
|
||||||
|
|
||||||
|
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def messages() -> Iterator[Res[Message]]:
|
||||||
|
|
Loading…
Add table
Reference in a new issue