HPI/my/core/util.py

245 lines
7.4 KiB
Python

import os
import pkgutil
import sys
from itertools import chain
from pathlib import Path
from types import ModuleType
from typing import Iterable, List, Optional
from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
def modules() -> Iterable[HPIModule]:
import my
for m in _iter_all_importables(my):
yield m
__NOT_HPI_MODULE__ = 'Import this to mark a python file as a helper, not an actual HPI module'
from .discovery_pure import NOT_HPI_MODULE_VAR
assert NOT_HPI_MODULE_VAR in globals() # check name consistency
def is_not_hpi_module(module: str) -> Optional[str]:
'''
None if a module, otherwise returns reason
'''
import importlib
path: Optional[str] = None
try:
# TODO annoying, this can cause import of the parent module?
spec = importlib.util.find_spec(module)
assert spec is not None
path = spec.origin
except Exception as e:
# todo a bit misleading.. it actually shouldn't import in most cases, it's just the weird parent module import thing
return "import error (possibly missing config entry)" # todo add exc message?
assert path is not None # not sure if can happen?
if _is_not_module_src(Path(path)):
return f"marked explicitly (via {NOT_HPI_MODULE_VAR})"
if not has_stats(Path(path)):
return "has no 'stats()' function"
return None
# todo reuse in readme/blog post
# borrowed from https://github.com/sanitizers/octomachinery/blob/24288774d6dcf977c5033ae11311dbff89394c89/tests/circular_imports_test.py#L22-L55
def _iter_all_importables(pkg: ModuleType) -> Iterable[HPIModule]:
# todo crap. why does it include some stuff three times??
yield from chain.from_iterable(
_discover_path_importables(Path(p), pkg.__name__)
# todo might need to handle __path__ for individual modules too?
# not sure why __path__ was duplicated, but it did happen..
for p in set(pkg.__path__)
)
def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModule]:
from .core_config import config
"""Yield all importables under a given path and package."""
for dir_path, dirs, file_names in os.walk(pkg_pth):
file_names.sort()
# NOTE: sorting dirs in place is intended, it's the way you're supposed to do it with os.walk
dirs.sort()
pkg_dir_path = Path(dir_path)
if pkg_dir_path.parts[-1] == '__pycache__':
continue
if all(Path(_).suffix != '.py' for _ in file_names):
continue
rel_pt = pkg_dir_path.relative_to(pkg_pth)
pkg_pref = '.'.join((pkg_name, ) + rel_pt.parts)
yield from _walk_packages(
(str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
)
# TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
# use onerror=?
# ignored explicitly -> not HPI
# if enabled in config -> HPI
# if disabled in config -> HPI
# otherwise, check for stats
# recursion is relied upon using .*
# TODO when do we need to recurse?
def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterable[HPIModule]:
"""
Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53,
to alvoid importing modules that are skipped
"""
from .core_config import config
def seen(p, m={}):
if p in m:
return True
m[p] = True
for info in pkgutil.iter_modules(path, prefix):
mname = info.name
if mname is None:
# why would it be? anyway makes mypy happier
continue
if ignored(mname):
# not sure if need to yield?
continue
active = config._is_module_active(mname)
skip_reason = None
if active is False:
skip_reason = 'suppressed in the user config'
elif active is None:
# unspecified by the user, rely on other means
is_not_module = is_not_hpi_module(mname)
if is_not_module is not None:
skip_reason = is_not_module
else: # active is True
# nothing to do, enabled explicitly
pass
yield HPIModule(
name=mname,
skip_reason=skip_reason,
)
if not info.ispkg:
continue
recurse = config._is_module_active(mname + '.')
if not recurse:
continue
try:
__import__(mname)
except ImportError:
if onerror is not None:
onerror(mname)
except Exception:
if onerror is not None:
onerror(mname)
else:
raise
else:
path = getattr(sys.modules[mname], '__path__', None) or []
# don't traverse path items we've seen before
path = [p for p in path if not seen(p)]
yield from _walk_packages(path, mname + '.', onerror)
# deprecate?
def get_modules() -> List[HPIModule]:
return list(modules())
### tests start
## FIXME: add test when there is an import error -- should be defensive and yield exception
def test_module_detection() -> None:
from .core_config import _reset_config as reset
with reset() as cc:
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*']
mods = {m.name: m for m in modules()}
assert mods['my.demo'] .skip_reason == "has no 'stats()' function"
with reset() as cc:
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm']
cc.enabled_modules = ['my.demo']
mods = {m.name: m for m in modules()}
assert mods['my.demo'] .skip_reason is None # not skipped
assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
def test_good_modules(tmp_path: Path) -> None:
badp = tmp_path / 'good'
par = badp / 'my'
par.mkdir(parents=True)
(par / 'good.py').write_text('def stats(): pass')
(par / 'disabled.py').write_text('''
from my.core import __NOT_HPI_MODULE__
''')
(par / 'nostats.py').write_text('''
# no stats!
''')
import sys
orig_path = list(sys.path)
try:
sys.path.insert(0, str(badp))
good = is_not_hpi_module('my.good')
disabled = is_not_hpi_module('my.disabled')
nostats = is_not_hpi_module('my.nostats')
finally:
sys.path = orig_path
assert good is None # good module!
assert disabled is not None
assert 'marked explicitly' in disabled
assert nostats is not None
assert 'stats' in nostats
def test_bad_modules(tmp_path: Path) -> None:
xx = tmp_path / 'precious_data'
xx.write_text('some precious data')
badp = tmp_path / 'bad'
par = badp / 'my'
par.mkdir(parents=True)
(par / 'malicious.py').write_text(f'''
from pathlib import Path
Path(r'{xx}').write_text('aaand your data is gone!')
raise RuntimeError("FAIL ON IMPORT! naughty.")
def stats():
return [1, 2, 3]
''')
import sys
orig_path = list(sys.path)
try:
sys.path.insert(0, str(badp))
res = is_not_hpi_module('my.malicious')
finally:
sys.path = orig_path
# shouldn't crash at least
assert res is None # good as far as discovery is concerned
assert xx.read_text() == 'some precious data' # make sure module wasn't evaluated
### tests end