core: make discovery rely on ast module more, add test

This commit is contained in:
Dima Gerasimov 2021-02-19 01:39:26 +00:00 committed by karlicoss
parent a3305677b2
commit f90599d7e4
2 changed files with 63 additions and 22 deletions

View file

@ -6,6 +6,8 @@ This potentially allows it to be:
- robust: can discover modules that can't be imported, generally makes it foolproof - robust: can discover modules that can't be imported, generally makes it foolproof
- faster: importing is slow and with tens of modules can be noteiceable - faster: importing is slow and with tens of modules can be noteiceable
- secure: can be executed in a sandbox & used during setup - secure: can be executed in a sandbox & used during setup
It should be free of external modules, importlib, exec, etc. etc.
''' '''
REQUIRES = 'REQUIRES' REQUIRES = 'REQUIRES'
@ -13,6 +15,7 @@ NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
### ###
import ast
from typing import Optional, Sequence, NamedTuple, Iterable from typing import Optional, Sequence, NamedTuple, Iterable
from pathlib import Path from pathlib import Path
import re import re
@ -42,7 +45,18 @@ def ignored(m: str) -> bool:
return re.match(f'^my.({exs})$', m) is not None return re.match(f'^my.({exs})$', m) is not None
import ast def _is_not_module_src(src: Path) -> bool:
a: ast.Module = ast.parse(src.read_text())
return _is_not_module_ast(a)
def _is_not_module_ast(a: ast.Module) -> bool:
return any(
getattr(node, 'name', None) == NOT_HPI_MODULE_VAR # direct definition
or any(getattr(n, 'name', None) == NOT_HPI_MODULE_VAR for n in getattr(node, 'names', [])) # import from
for node in a.body
)
# todo should be defensive? not sure # todo should be defensive? not sure
def _extract_requirements(a: ast.Module) -> Requires: def _extract_requirements(a: ast.Module) -> Requires:
@ -91,12 +105,7 @@ def all_modules() -> Iterable[HPIModule]:
if ignored(m): if ignored(m):
continue continue
a: ast.Module = ast.parse(f.read_text()) a: ast.Module = ast.parse(f.read_text())
is_not_module = any( if _is_not_module_ast(a):
getattr(node, 'name', None) == NOT_HPI_MODULE_VAR # direct definition
or any(getattr(n, 'name', None) == NOT_HPI_MODULE_VAR for n in getattr(node, 'names', [])) # import from
for node in a.body
)
if is_not_module:
continue continue
doc = ast.get_docstring(a, clean=False) doc = ast.get_docstring(a, clean=False)

View file

@ -7,7 +7,7 @@ import re
import sys import sys
from typing import List, Iterable, Optional from typing import List, Iterable, Optional
from .discovery_pure import HPIModule, ignored # legacy from .discovery_pure import HPIModule, ignored, _is_not_module_src
def modules() -> Iterable[HPIModule]: def modules() -> Iterable[HPIModule]:
@ -31,21 +31,23 @@ __NOT_HPI_MODULE__ = 'Import this to mark a python file as a helper, not an actu
from .discovery_pure import NOT_HPI_MODULE_VAR from .discovery_pure import NOT_HPI_MODULE_VAR
assert NOT_HPI_MODULE_VAR in globals() # check name consistency assert NOT_HPI_MODULE_VAR in globals() # check name consistency
def has_not_module_flag(module: str) -> bool:
# if module == 'my.books.kobo':
# breakpoint()
# pass
try:
mod = import_module(module)
except Exception as e:
return False
return any(x is __NOT_HPI_MODULE__ for x in vars(mod).values())
def is_not_hpi_module(module: str) -> Optional[str]: def is_not_hpi_module(module: str) -> Optional[str]:
# None if a module, otherwise returns reason '''
if has_not_module_flag(module): None if a module, otherwise returns reason
return "marked explicitly (via __NOT_HPI_MODULE__)" '''
import importlib
path: Optional[str] = None
try:
# # TODO annoying, this can cause import of the parent module?
spec = importlib.util.find_spec(module)
assert spec is not None
path = spec.origin
except Exception as e:
return "import error (possibly missing config entry)" # todo add exc message?
assert path is not None # not sure if can happen?
if _is_not_module_src(Path(path)):
return f"marked explicitly (via {NOT_HPI_MODULE_VAR})"
stats = get_stats(module) stats = get_stats(module)
if stats is None: if stats is None:
return "has no 'stats()' function" return "has no 'stats()' function"
@ -189,5 +191,35 @@ def test_module_detection() -> None:
assert mods['my.lastfm'].skip_reason == "suppressed in the user config" assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
def test_bad_module(tmp_path: Path) -> None:
xx = tmp_path / 'precious_data'
xx.write_text('some precious data')
badp = tmp_path / 'bad'
par = badp / 'my'
par.mkdir(parents=True)
(par / 'badmodule.py').write_text(f'''
from pathlib import Path
Path('{xx}').write_text('') # overwrite file
raise RuntimeError("FAIL ON IMPORT! naughy.")
def stats():
return [1, 2, 3]
''')
import sys
orig_path = list(sys.path)
try:
sys.path.insert(0, str(badp))
res = is_not_hpi_module('my.badmodule')
finally:
sys.path = orig_path
# shouldn't crash at least
assert res is not None # bad indeed
# TODO atm it says 'no stats()' function...
# assert 'import error' in res
# assert xx.read_text() == 'some precious data' # make sure module wasn't evauluated
### tests end ### tests end