From 882ceb62fc770971eb5507cb563899dff4d8bb87 Mon Sep 17 00:00:00 2001 From: Tom Craig Date: Sun, 16 Aug 2020 12:57:20 -0700 Subject: [PATCH] Add a "filelist" paramter to annotated_pdfs --- my/pdfs.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/my/pdfs.py b/my/pdfs.py index 3c04196..3a4905d 100755 --- a/my/pdfs.py +++ b/my/pdfs.py @@ -26,11 +26,31 @@ def get_logger(): def is_ignored(p: Path) -> bool: - # ignore some extremely heavy files - return config.is_ignored(p) + """ + Used to ignore some extremely heavy files + is_ignored function taken either from config, + or if not defined, it's a function that returns False + """ + if hasattr(config, 'is_ignored'): + return config.is_ignored(p) + + # Default + return lambda x: False -def candidates(roots=None) -> Iterator[Path]: +def candidates(filelist=None, roots=None) -> Iterator[Path]: + if filelist is not None: + return candidates_from_filelist(filelist) + else: + return candidates_from_roots(roots) + +def candidates_from_filelist(filelist) -> Iterator[Path]: + for f in filelist: + p = Path(f) + if not is_ignored(p): + yield p + +def candidates_from_roots(roots=None) -> Iterator[Path]: if roots is None: roots = config.roots @@ -58,7 +78,7 @@ def as_annotation(*, raw_ann, path: str) -> Annotation: for a in ('boxes', 'rect'): if a in d: del d[a] - dates = d['date'] + dates = d.get('date') date: Optional[datetime] = None if dates is not None: dates = dates.replace("'", "") @@ -124,8 +144,8 @@ def _iter_annotations(pdfs: List[Path]) -> Iterator[Res[Annotation]]: yield e -def iter_annotations(roots=None) -> Iterator[Res[Annotation]]: - pdfs = list(sorted(candidates(roots=roots))) +def iter_annotations(filelist=None, roots=None) -> Iterator[Res[Annotation]]: + pdfs = list(sorted(candidates(filelist=filelist, roots=None))) yield from _iter_annotations(pdfs=pdfs) @@ -138,8 +158,8 @@ class Pdf(NamedTuple): return self.annotations[-1].date -def annotated_pdfs(roots=None) -> Iterator[Res[Pdf]]: - it = iter_annotations(roots=roots) +def annotated_pdfs(filelist=None, roots=None) -> Iterator[Res[Pdf]]: + it = iter_annotations(filelist=filelist, roots=roots) vit, eit = split_errors(it, ET=Exception) for k, g in group_by_key(vit, key=lambda a: a.path).items(): @@ -190,3 +210,4 @@ def main(): logger.info('collected annotations in: %s', r.path) for a in r.annotations: pprint(a) +