diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pdfs/Information Architecture for the World Wide Web.pdf b/tests/pdfs/Information Architecture for the World Wide Web.pdf new file mode 100644 index 0000000..4c69942 Binary files /dev/null and b/tests/pdfs/Information Architecture for the World Wide Web.pdf differ diff --git a/tests/pdfs/__init__.py b/tests/pdfs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pdfs/test_pdfs.py b/tests/pdfs/test_pdfs.py new file mode 100644 index 0000000..afc35d9 --- /dev/null +++ b/tests/pdfs/test_pdfs.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import inspect +from pathlib import Path +import tempfile + +from my.pdfs import get_annots, annotated_pdfs + + +ROOT = Path(__file__).parent.absolute() +EXPECTED_HIGHLIGHTS = set(['Since 1994, when we first began organizing web sites, we have enjoyed a rare opportunity to participate in the birth of a new discipline. ', + 'And yet, unlearn we must, ', + '', + ]) + +def test_get_annots(): + """ + Test get_annots, with a real PDF file + get_annots should return a list of three Annotation objects + """ + annotations = get_annots(Path(ROOT / 'Information Architecture for the World Wide Web.pdf')) + assert len(annotations) == 3 + assert set([a.highlight for a in annotations]) == EXPECTED_HIGHLIGHTS + + +def test_annotated_pdfs_with_filelist(): + """ + Test annotated_pdfs, with a real PDF file + annotated_pdfs should return a list of one Pdf object, with three Annotations + """ + filelist = [Path(ROOT / 'Information Architecture for the World Wide Web.pdf')] + annotations_generator = annotated_pdfs(filelist=filelist, roots=None) + + assert inspect.isgeneratorfunction(annotated_pdfs) + + highlights_from_pdfs = [] + + for pdf_object in list(annotations_generator): + highlights_from_pdfs.extend([a.highlight for a in pdf_object.annotations]) + + assert len(highlights_from_pdfs) == 3 + assert set(highlights_from_pdfs) == EXPECTED_HIGHLIGHTS