HPI/tests/pdfs.py
2021-04-01 17:27:06 +01:00

43 lines
1.4 KiB
Python

import inspect
from pathlib import Path
import tempfile
from my.pdfs import get_annots, annotated_pdfs
from .common import testdata
EXPECTED_HIGHLIGHTS = {
'Since 1994, when we first began organizing web sites, we have enjoyed a rare opportunity to participate in the birth of a new discipline. ',
'And yet, unlearn we must, ',
'',
}
def test_get_annots() -> None:
"""
Test get_annots, with a real PDF file
get_annots should return a list of three Annotation objects
"""
annotations = get_annots(testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf')
assert len(annotations) == 3
assert set([a.highlight for a in annotations]) == EXPECTED_HIGHLIGHTS
def test_annotated_pdfs_with_filelist() -> None:
"""
Test annotated_pdfs, with a real PDF file
annotated_pdfs should return a list of one Pdf object, with three Annotations
"""
filelist = [testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf']
annotations_generator = annotated_pdfs(filelist=filelist, roots=None)
assert inspect.isgeneratorfunction(annotated_pdfs)
highlights_from_pdfs = []
for pdf_object in list(annotations_generator):
assert not isinstance(pdf_object, Exception)
highlights_from_pdfs.extend([a.highlight for a in pdf_object.annotations])
assert len(highlights_from_pdfs) == 3
assert set(highlights_from_pdfs) == EXPECTED_HIGHLIGHTS