core/structure: add partial matching (#212)

* core/structure: add partial matching
This commit is contained in:
seanbreckenridge 2022-02-10 00:49:13 -08:00 committed by GitHub
parent 62832a6756
commit bea2c6a201
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 8 deletions

View file

@ -10,7 +10,7 @@ from pathlib import Path
from . import warnings as core_warnings from . import warnings as core_warnings
def _structure_exists(base_dir: Path, paths: Sequence[str]) -> bool: def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = False) -> bool:
""" """
Helper function for match_structure to check if Helper function for match_structure to check if
all subpaths exist at some base directory all subpaths exist at some base directory
@ -24,11 +24,11 @@ def _structure_exists(base_dir: Path, paths: Sequence[str]) -> bool:
_structure_exists(Path("dir1"), ["index.json", "messages/messages.csv"]) _structure_exists(Path("dir1"), ["index.json", "messages/messages.csv"])
""" """
for p in paths: targets_exist = ((base_dir / f).exists() for f in paths)
target: Path = base_dir / p if partial:
if not target.exists(): return any(targets_exist)
return False else:
return True return all(targets_exist)
ZIP_EXT = {".zip"} ZIP_EXT = {".zip"}
@ -38,12 +38,17 @@ ZIP_EXT = {".zip"}
def match_structure( def match_structure(
base: Path, base: Path,
expected: Union[str, Sequence[str]], expected: Union[str, Sequence[str]],
*,
partial: bool = False,
) -> Generator[Tuple[Path, ...], None, None]: ) -> Generator[Tuple[Path, ...], None, None]:
""" """
Given a 'base' directory or zipfile, recursively search for one or more paths that match the Given a 'base' directory or zipfile, recursively search for one or more paths that match the
pattern described in 'expected'. That can be a single string, or a list pattern described in 'expected'. That can be a single string, or a list
of relative paths (as strings) you expect at the same directory. of relative paths (as strings) you expect at the same directory.
If 'partial' is True, it only requires that one of the items in
expected be present, not all of them.
This reduces the chances of the user misconfiguring gdpr exports, e.g. This reduces the chances of the user misconfiguring gdpr exports, e.g.
if they zipped the folders instead of the parent directory or vice-versa if they zipped the folders instead of the parent directory or vice-versa
@ -127,8 +132,7 @@ def match_structure(
while len(possible_targets) > 0: while len(possible_targets) > 0:
p = possible_targets.pop(0) p = possible_targets.pop(0)
# factored out into a function to avoid weird stuff with continues/loop state if _structure_exists(p, expected, partial=partial):
if _structure_exists(p, expected):
matches.append(p) matches.append(p)
else: else:
# extend the list of possible targets with any subdirectories # extend the list of possible targets with any subdirectories

View file

@ -29,6 +29,14 @@ def test_gdpr_unzip() -> None:
assert not extracted.exists() assert not extracted.exists()
def test_match_partial() -> None:
# a partial match should match both the 'broken' and 'gdpr_export' directories
with match_structure(
structure_data / "gdpr_subdirs", expected=gdpr_expected, partial=True
) as results:
assert len(results) == 2
def test_not_directory() -> None: def test_not_directory() -> None:
with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"): with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"):
with match_structure( with match_structure(