Use experimental cachew Exceptions
This commit is contained in:
parent
a57be019d0
commit
012249ceca
2 changed files with 16 additions and 5 deletions
|
@ -125,4 +125,6 @@ def mcachew(*args, **kwargs):
|
||||||
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
||||||
return lambda orig_func: orig_func
|
return lambda orig_func: orig_func
|
||||||
else:
|
else:
|
||||||
|
import cachew.experimental
|
||||||
|
cachew.experimental.enable_exceptions() # TODO do it only once?
|
||||||
return cachew.cachew(*args, **kwargs)
|
return cachew.cachew(*args, **kwargs)
|
||||||
|
|
19
my/pdfs.py
19
my/pdfs.py
|
@ -23,6 +23,7 @@ def get_logger():
|
||||||
|
|
||||||
|
|
||||||
def is_ignored(p: Path) -> bool:
|
def is_ignored(p: Path) -> bool:
|
||||||
|
# ignore some extremely heavy files
|
||||||
return paths.pdfs.is_ignored(p)
|
return paths.pdfs.is_ignored(p)
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,19 +86,22 @@ def get_annots(p: Path) -> List[Annotation]:
|
||||||
with p.open('rb') as fo:
|
with p.open('rb') as fo:
|
||||||
f = io.StringIO()
|
f = io.StringIO()
|
||||||
with redirect_stderr(f):
|
with redirect_stderr(f):
|
||||||
# TODO FIXME defensive, try on garbage file (s)
|
|
||||||
(annots, outlines) = pdfannots.process_file(fo, emit_progress=False)
|
(annots, outlines) = pdfannots.process_file(fo, emit_progress=False)
|
||||||
# outlines are kinda like TOC, I don't really need them
|
# outlines are kinda like TOC, I don't really need them
|
||||||
return [as_annotation(raw_ann=a, path=str(p)) for a in annots]
|
return [as_annotation(raw_ann=a, path=str(p)) for a in annots]
|
||||||
# TODO stderr?
|
# TODO stderr?
|
||||||
|
|
||||||
|
|
||||||
# TODO cachew needs to be based on mtime, hence take candidates, not roots
|
def hash_files(pdfs: List[Path]):
|
||||||
# @mcachew
|
# if mtime hasn't changed then the file hasn't changed either
|
||||||
def iter_annotations(roots=None) -> Iterator[Res[Annotation]]:
|
return [(pdf, pdf.stat().st_mtime) for pdf in pdfs]
|
||||||
|
|
||||||
|
# TODO might make more sense to be more fine grained here, e.g. cache annotations for indifidual files
|
||||||
|
|
||||||
|
@mcachew(hashf=hash_files)
|
||||||
|
def _iter_annotations(pdfs: List[Path]) -> Iterator[Res[Annotation]]:
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
pdfs = list(sorted(candidates(roots=roots)))
|
|
||||||
logger.info('processing %d pdfs', len(pdfs))
|
logger.info('processing %d pdfs', len(pdfs))
|
||||||
|
|
||||||
# TODO how to print to stdout synchronously?
|
# TODO how to print to stdout synchronously?
|
||||||
|
@ -117,6 +121,11 @@ def iter_annotations(roots=None) -> Iterator[Res[Annotation]]:
|
||||||
yield e
|
yield e
|
||||||
|
|
||||||
|
|
||||||
|
def iter_annotations(roots=None) -> Iterator[Res[Annotation]]:
|
||||||
|
pdfs = list(sorted(candidates(roots=roots)))
|
||||||
|
yield from _iter_annotations(pdfs=pdfs)
|
||||||
|
|
||||||
|
|
||||||
class Pdf(NamedTuple):
|
class Pdf(NamedTuple):
|
||||||
path: Path
|
path: Path
|
||||||
annotations: List[Annotation]
|
annotations: List[Annotation]
|
||||||
|
|
Loading…
Add table
Reference in a new issue