my.pdfs: cleanup/refactor
- modernize: - add REQUIRES spec for pdfannots library - config dataclass/config stub - stats function - absolute my.core imports in anticipation of splitting core - use 'paths' instead of 'roots' (better reflects the semantics), use get_files backward compatible via config migration - properly run tests/mypy
This commit is contained in:
parent
e7604c188e
commit
ad177a1ccd
6 changed files with 177 additions and 108 deletions
|
@ -1,11 +1,60 @@
|
|||
import inspect
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
from my.pdfs import get_annots, annotated_pdfs
|
||||
from more_itertools import ilen
|
||||
|
||||
import pytest
|
||||
|
||||
from .common import testdata
|
||||
|
||||
|
||||
def test_module(with_config) -> None:
|
||||
# TODO crap. if module is imported too early (on the top level, it makes it super hard to overrride config)
|
||||
# need to at least detect it...
|
||||
from my.pdfs import annotations, annotated_pdfs
|
||||
|
||||
# todo check types etc as well
|
||||
assert ilen(annotations()) >= 3
|
||||
assert ilen(annotated_pdfs()) >= 1
|
||||
|
||||
|
||||
def test_with_error(with_config, tmp_path: Path) -> None:
|
||||
"""should handle crappy files gracefully"""
|
||||
root = tmp_path
|
||||
g = root / 'garbage.pdf'
|
||||
g.write_text('garbage')
|
||||
from my.config import pdfs
|
||||
del pdfs.roots # meh. otherwise legacy config value 'wins'
|
||||
pdfs.paths = (root,)
|
||||
|
||||
from my.pdfs import annotations
|
||||
annots = list(annotations())
|
||||
[annot] = annots
|
||||
assert isinstance(annot, Exception)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def with_config():
|
||||
from .common import reset_modules
|
||||
reset_modules() # todo ugh.. getting boilerplaty.. need to make it a bit more automatic..
|
||||
|
||||
# extra_data = Path(__file__).absolute().parent / 'extra/data/polar'
|
||||
# assert extra_data.exists(), extra_data
|
||||
# todo hmm, turned out no annotations in these ones.. whatever
|
||||
|
||||
class user_config:
|
||||
roots = [
|
||||
testdata(),
|
||||
]
|
||||
|
||||
import my.core.cfg as C
|
||||
with C.tmp_config() as config:
|
||||
config.pdfs = user_config # type: ignore
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
reset_modules()
|
||||
|
||||
|
||||
EXPECTED_HIGHLIGHTS = {
|
||||
'Since 1994, when we first began organizing web sites, we have enjoyed a rare opportunity to participate in the birth of a new discipline. ',
|
||||
'And yet, unlearn we must, ',
|
||||
|
@ -18,6 +67,8 @@ def test_get_annots() -> None:
|
|||
Test get_annots, with a real PDF file
|
||||
get_annots should return a list of three Annotation objects
|
||||
"""
|
||||
from my.pdfs import get_annots
|
||||
|
||||
annotations = get_annots(testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf')
|
||||
assert len(annotations) == 3
|
||||
assert set([a.highlight for a in annotations]) == EXPECTED_HIGHLIGHTS
|
||||
|
@ -28,9 +79,12 @@ def test_annotated_pdfs_with_filelist() -> None:
|
|||
Test annotated_pdfs, with a real PDF file
|
||||
annotated_pdfs should return a list of one Pdf object, with three Annotations
|
||||
"""
|
||||
filelist = [testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf']
|
||||
annotations_generator = annotated_pdfs(filelist=filelist, roots=None)
|
||||
from my.pdfs import annotated_pdfs
|
||||
|
||||
filelist = [testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf']
|
||||
annotations_generator = annotated_pdfs(filelist=filelist)
|
||||
|
||||
import inspect
|
||||
assert inspect.isgeneratorfunction(annotated_pdfs)
|
||||
|
||||
highlights_from_pdfs = []
|
||||
|
@ -41,3 +95,8 @@ def test_annotated_pdfs_with_filelist() -> None:
|
|||
|
||||
assert len(highlights_from_pdfs) == 3
|
||||
assert set(highlights_from_pdfs) == EXPECTED_HIGHLIGHTS
|
||||
|
||||
|
||||
# todo old test on my(karlicoss) computer:
|
||||
# - mature-optimization_wtf.pdf: >3 annotations?
|
||||
# - nonlinear2.pdf
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue