From 012249ceca8a8039dc5d093b3a5679b48039489f Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 8 Jan 2020 22:07:34 +0000 Subject: [PATCH] Use experimental cachew Exceptions --- my/common.py | 2 ++ my/pdfs.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/my/common.py b/my/common.py index e1389e3..861e48b 100644 --- a/my/common.py +++ b/my/common.py @@ -125,4 +125,6 @@ def mcachew(*args, **kwargs): warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') return lambda orig_func: orig_func else: + import cachew.experimental + cachew.experimental.enable_exceptions() # TODO do it only once? return cachew.cachew(*args, **kwargs) diff --git a/my/pdfs.py b/my/pdfs.py index 2191f7b..67d7aa0 100755 --- a/my/pdfs.py +++ b/my/pdfs.py @@ -23,6 +23,7 @@ def get_logger(): def is_ignored(p: Path) -> bool: + # ignore some extremely heavy files return paths.pdfs.is_ignored(p) @@ -85,19 +86,22 @@ def get_annots(p: Path) -> List[Annotation]: with p.open('rb') as fo: f = io.StringIO() with redirect_stderr(f): - # TODO FIXME defensive, try on garbage file (s) (annots, outlines) = pdfannots.process_file(fo, emit_progress=False) # outlines are kinda like TOC, I don't really need them return [as_annotation(raw_ann=a, path=str(p)) for a in annots] # TODO stderr? -# TODO cachew needs to be based on mtime, hence take candidates, not roots -# @mcachew -def iter_annotations(roots=None) -> Iterator[Res[Annotation]]: +def hash_files(pdfs: List[Path]): + # if mtime hasn't changed then the file hasn't changed either + return [(pdf, pdf.stat().st_mtime) for pdf in pdfs] + +# TODO might make more sense to be more fine grained here, e.g. cache annotations for indifidual files + +@mcachew(hashf=hash_files) +def _iter_annotations(pdfs: List[Path]) -> Iterator[Res[Annotation]]: logger = get_logger() - pdfs = list(sorted(candidates(roots=roots))) logger.info('processing %d pdfs', len(pdfs)) # TODO how to print to stdout synchronously? @@ -117,6 +121,11 @@ def iter_annotations(roots=None) -> Iterator[Res[Annotation]]: yield e +def iter_annotations(roots=None) -> Iterator[Res[Annotation]]: + pdfs = list(sorted(candidates(roots=roots))) + yield from _iter_annotations(pdfs=pdfs) + + class Pdf(NamedTuple): path: Path annotations: List[Annotation]