diff --git a/my/location/takeout.py b/my/location/takeout.py
index 6c81992..482b51d 100644
--- a/my/location/takeout.py
+++ b/my/location/takeout.py
@@ -19,7 +19,7 @@ try:
     import ijson.backends.yajl2_cffi as ijson # type: ignore
 except:
     # fallback to default backend. warning?
-    import ijson
+    import ijson # type: ignore
 
 from kython import kompress # TODO
 
diff --git a/my/pdfs.py b/my/pdfs.py
index 3cbb1e2..2191f7b 100755
--- a/my/pdfs.py
+++ b/my/pdfs.py
@@ -1,43 +1,46 @@
 #!/usr/bin/env python3
-from .common import import_file
-
+from concurrent.futures import ProcessPoolExecutor
+from datetime import datetime
+import re
+import sys
+import io
+import logging
 from pathlib import Path
+from typing import NamedTuple, List, Optional, Iterator
+from contextlib import redirect_stderr
 
 
+from .common import import_file, mcachew, group_by_key
+from .error import Res, split_errors
+
 # path to pdfannots (https://github.com/0xabu/pdfannots)
 import mycfg.repos.pdfannots.pdfannots as pdfannots
 from mycfg import paths
 
 
-from datetime import datetime
-import re
-from subprocess import CompletedProcess
-import sys
-import io
-from typing import NamedTuple, List, Optional
-from contextlib import redirect_stderr
-import logging
-
-
 def get_logger():
     return logging.getLogger('my.pdfs')
 
 
-def get_candidates(roots=None) -> List[Path]:
-    if roots is None:
-        roots = paths.pdfs.roots
-
-    import itertools
-    pdfs = itertools.chain.from_iterable(Path(p).glob('**/*.pdf') for p in roots)
-    return list(sorted(pdfs))
-
-
-def is_ignored(p):
+def is_ignored(p: Path) -> bool:
     return paths.pdfs.is_ignored(p)
 
 
-# TODO cachew?
+def candidates(roots=None) -> Iterator[Path]:
+    if roots is None:
+        roots = paths.pdfs.roots
+
+    for r in roots:
+        for p in Path(r).rglob('*.pdf'):
+            if not is_ignored(p):
+                yield p
+
+# TODO canonical names
+# TODO defensive if pdf was removed, also cachew key needs to be defensive
+
+
 class Annotation(NamedTuple):
+    path: str
     author: Optional[str]
     page: int
     highlight: Optional[str]
@@ -45,19 +48,9 @@ class Annotation(NamedTuple):
     date: Optional[datetime]
 
 
-class Pdf(NamedTuple):
-    path: Path
-    annotations: List[Annotation]
-    stderr: str
-
-    @property
-    def date(self):
-        return self.annotations[-1].date
-
-
-def as_annotation(ann) -> Annotation:
-    d = vars(ann)
-    d['page'] = ann.page.pageno
+def as_annotation(*, raw_ann, path: str) -> Annotation:
+    d = vars(raw_ann)
+    d['page'] = raw_ann.page.pageno
     for a in ('boxes', 'rect'):
         if a in d:
             del d[a]
@@ -76,95 +69,75 @@ def as_annotation(ann) -> Annotation:
             except ValueError:
                 pass
         else:
+            # TODO defensive?
             raise RuntimeError(dates)
     return Annotation(
-        author   =d['author'],
-        page     =d['page'],
-        highlight=d['text'],
-        comment  =d['contents'],
-        date     =date,
+        path      = path,
+        author    = d['author'],
+        page      = d['page'],
+        highlight = d['text'],
+        comment   = d['contents'],
+        date      = date,
     )
 
 
-class PdfAnnotsException(Exception):
-    def __init__(self, path: Path) -> None:
-        self.path = path
-
-
-def _get_annots(p: Path) -> Pdf:
-    progress = False
+def get_annots(p: Path) -> List[Annotation]:
     with p.open('rb') as fo:
         f = io.StringIO()
         with redirect_stderr(f):
-            (annots, outlines) = pdfannots.process_file(fo, emit_progress=progress)
+            # TODO FIXME defensive, try on garbage file (s)
+            (annots, outlines) = pdfannots.process_file(fo, emit_progress=False)
             # outlines are kinda like TOC, I don't really need them
-    return Pdf(
-        path=p,
-        annotations=list(map(as_annotation, annots)),
-        stderr=f.getvalue(),
-    )
+    return [as_annotation(raw_ann=a, path=str(p)) for a in annots]
+    # TODO stderr?
 
 
-def get_annots(p: Path) -> Pdf:
-    try:
-        return _get_annots(p)
-    except Exception as e:
-        raise PdfAnnotsException(p) from e
-
-
-def get_annotated_pdfs(roots=None) -> List[Pdf]:
+# TODO cachew needs to be based on mtime, hence take candidates, not roots
+# @mcachew
+def iter_annotations(roots=None) -> Iterator[Res[Annotation]]:
     logger = get_logger()
 
-    pdfs = get_candidates(roots=roots)
+    pdfs = list(sorted(candidates(roots=roots)))
     logger.info('processing %d pdfs', len(pdfs))
 
-    collected = []
-    errors = []
-    def callback(res: Pdf):
-        if is_ignored(res.path):
-            return
-        logger.info('processed %s', res.path)
+    # TODO how to print to stdout synchronously?
+    with ProcessPoolExecutor() as pool:
+        futures = [
+            pool.submit(get_annots, pdf)
+            for pdf in pdfs
+        ]
+        for f, pdf in zip(futures, pdfs):
+            try:
+                yield from f.result()
+            except Exception as e:
+                logger.error('While processing %s:', pdf)
+                logger.exception(e)
+                # TODO not sure if should attach pdf as well; it's a bit annoying to pass around?
+                # also really have to think about interaction with cachew...
+                yield e
 
-        if len(res.stderr) > 0:
-            err = 'while processing %s: %s' % (res.path, res.stderr)
-            logger.error(err)
-            errors.append(err)
-        elif len(res.annotations) > 0:
-            logger.info('collected %s annotations', len(res.annotations))
-            collected.append(res)
 
-    def error_cb(err):
-        if isinstance(err, PdfAnnotsException):
-            if is_ignored(err.path):
-                # TODO log?
-                return
-            logger.error('while processing %s', err.path)
-            err = err.__cause__
-        logger.exception(err)
-        errors.append(str(err))
+class Pdf(NamedTuple):
+    path: Path
+    annotations: List[Annotation]
 
-    from multiprocessing.pool import Pool
-    with Pool() as p:
-        handles = [p.apply_async(
-            get_annots,
-            (pdf, ),
-            callback=callback,
-            error_callback=error_cb,
-        ) for pdf in pdfs if not is_ignored(pdf)] # TODO log if we skip?
-        for h in handles:
-            h.wait()
+    @property
+    def date(self):
+        return self.annotations[-1].date
 
-    # TODO more defensive error processing?
-    if len(errors) > 0:
-        logger.error('had %d errors while processing', len(errors))
-        sys.exit(2)
 
-    return collected
+def annotated_pdfs(roots=None) -> Iterator[Res[Pdf]]:
+    it = iter_annotations(roots=roots)
+    vit, eit = split_errors(it, ET=Exception)
+
+    for k, g in group_by_key(vit, key=lambda a: a.path).items():
+        yield Pdf(path=Path(k), annotations=g)
+    yield from eit
 
 
 def test():
     res = get_annots(Path('/L/zzz_syncthing/TODO/TOREAD/done/mature-optimization_wtf.pdf'))
-    assert len(res.annotations) > 0
+    assert len(res) > 3
 
 
 def test2():
@@ -172,6 +145,23 @@ def test2():
     print(res)
 
 
+def test_with_error():
+    # TODO need example of pdf file...
+    import tempfile
+    with tempfile.TemporaryDirectory() as td:
+        root = Path(td)
+        g = root / 'garbage.pdf'
+        g.write_text('garbage')
+        roots = [
+            root,
+            # '/usr/share/doc/texlive-doc/latex/amsrefs/',
+        ]
+        # TODO find some pdfs that actually has annotations...
+        annots = list(iter_annotations(roots=roots))
+    assert len(annots) == 1
+    assert isinstance(annots[0], Exception)
+
+
 def main():
     from pprint import pprint
 
@@ -179,9 +169,12 @@ def main():
     from .common import setup_logger
     setup_logger(logger, level=logging.DEBUG)
 
-    collected = get_annotated_pdfs()
+    collected = list(annotated_pdfs())
     if len(collected) > 0:
         for r in collected:
-            logger.warning('collected annotations in: %s', r.path)
-            for a in r.annotations:
-                pprint(a)
+            if isinstance(r, Exception):
+                logger.exception(r)
+            else:
+                logger.info('collected annotations in: %s', r.path)
+                for a in r.annotations:
+                    pprint(a)