core: more consistent module detection logic

2020-09-29 14:31:19 +01:00 · 2020-09-29 14:31:19 +01:00 · 4b49add746
commit 4b49add746
parent c79ffb50f6
4 changed files with 225 additions and 103 deletions
--- a/my/core/main.py
+++ b/my/core/main.py
@ -140,19 +140,21 @@ def modules_check(args):
    module: Optional[str] = args.module
    vw = '' if verbose else '; pass --verbose to print more information'
-    mods: Iterable[str]
+    from .util import get_stats, HPIModule, modules
    from .core_config import config
    mods: Iterable[HPIModule]
    if module is None:
        from .util import modules
        mods = modules()
    else:
-        mods = [module]
+        mods = [HPIModule(name=module, skip_reason=None)]
    from .core_config import config
    # todo add a --all argument to disregard is_active check?
-    for m in mods:
+    for mr in mods:
-        active = config.is_module_active(m)
+        skip = mr.skip_reason
-        if not active:
+        m    = mr.name
-            eprint(f'🔲 {color.YELLOW}SKIP{color.RESET}: {m:<30} module disabled in config')
+        if skip is not None:
            eprint(f'🔲 {color.YELLOW}SKIP{color.RESET}: {m:<30} {skip}')
            continue
        try:
@ -165,7 +167,7 @@ def modules_check(args):
            continue
        info(f'{color.GREEN}OK{color.RESET}  : {m:<30}')
-        stats = getattr(mod, 'stats', None)
+        stats = get_stats(m)
        if stats is None:
            continue
        from . import common
@ -188,8 +190,11 @@ def list_modules(args) -> None:
    # todo add an active_modules() method? would be useful for doctor?
    from .util import modules
-    for m in modules():
+    for mr in modules():
-        active = config.is_module_active(m)
+        m    = mr.name
        skip = mr.skip_reason
        active = skip is None
        # todo maybe reorder? (e.g. enabled first/last)? or/and color?
        # todo maybe use [off] / [ON] so it's easier to distinguish visually?
        print(f'- {m:50}' + ('' if active else f' {color.YELLOW}[disabled]{color.RESET}'))
--- a/my/core/core_config.py
+++ b/my/core/core_config.py
@ -37,33 +37,33 @@ class Config(user_config):
    disabled_modules: Optional[Sequence[str]] = None
-    def is_module_active(self, module: str) -> bool:
+    def _is_module_active(self, module: str) -> Optional[bool]:
        # None means the config doesn't specify anything
        # todo might be nice to return the 'reason' too? e.g. which option has matched
-        should_enable  = None
+        def matches(specs: Sequence[str]) -> Optional[str]:
        should_disable = None
        def matches(specs: Sequence[str]) -> bool:
            for spec in specs:
                # not sure because . (packages separate) matches anything, but I guess unlikely to clash
                if re.match(spec, module):
-                    return True
+                    return spec
-            return False
+            return None
        enabled  = self.enabled_modules
        disabled = self.disabled_modules
-        if enabled is None:
+        on  = matches(self.enabled_modules  or [])
-            if disabled is None:
+        off = matches(self.disabled_modules or [])
-                # by default, enable everything? not sure
+
        if on is None:
            if off is None:
                # user is indifferent
                return None
            else:
                return False
        else: # not None
            if off is None:
                return True
-            else:
+            else: # not None
-                # only disable the specified modules
+                # fallback onto the 'enable everything', then the user will notice
-                return not matches(disabled)
+                warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.")
        else:
            if disabled is None:
                # only enable the specified modules
                return matches(enabled)
            else:
                # ok, this means the config is inconsistent. better fallback onto the 'enable everything', then the user will notice?
                warnings.medium("Both 'enabled_modules' and 'disabled_modules' are set in the config. Please only use one of them.")
                return True
@ -72,42 +72,39 @@ config = make_config(Config)
 ### tests start
 from contextlib import contextmanager as ctx
@ctx
 def _reset_config():
    # todo maybe have this decorator for the whole of my.config?
    from .cfg import override_config
    with override_config(config) as cc:
        cc.enabled_modules  = None
        cc.disabled_modules = None
        yield cc
 def test_active_modules() -> None:
-    # todo maybe have this decorator for the whole of my.config?
+    reset = _reset_config
    from contextlib import contextmanager as ctx
    @ctx
    def reset():
        from .cfg import override_config
        with override_config(config) as cc:
            cc.enabled_modules  = None
            cc.disabled_modules = None
            yield cc
    with reset() as cc:
-        assert cc.is_module_active('my.whatever')
+        assert cc._is_module_active('my.whatever'     ) is None
-        assert cc.is_module_active('my.core'    )
+        assert cc._is_module_active('my.core'         ) is None
-        assert cc.is_module_active('my.body.exercise')
+        assert cc._is_module_active('my.body.exercise') is None
    with reset() as cc:
        cc.enabled_modules  = ['my.whatever']
        cc.disabled_modules = ['my.body.*']
-        assert cc.is_module_active('my.whatever')
+        assert     cc._is_module_active('my.whatever'     ) is True
-        assert cc.is_module_active('my.core'    )
+        assert     cc._is_module_active('my.core'         ) is None
-        assert not cc.is_module_active('my.body.exercise')
+        assert not cc._is_module_active('my.body.exercise') is True
    with reset() as cc:
        cc.enabled_modules = ['my.whatever']
        assert cc.is_module_active('my.whatever')
        assert not cc.is_module_active('my.core'    )
        assert not cc.is_module_active('my.body.exercise')
    with reset() as cc:
        # if both are set, enable all
        cc.disabled_modules = ['my.body.*']
-        cc.enabled_modules = ['my.whatever']
+        cc.enabled_modules =  ['my.body.exercise']
-        assert cc.is_module_active('my.whatever')
+        assert cc._is_module_active('my.whatever'     ) is None
-        assert cc.is_module_active('my.core'    )
+        assert cc._is_module_active('my.core'         ) is None
-        assert cc.is_module_active('my.body.exercise')
+        assert cc._is_module_active('my.body.exercise') is True
        # todo suppress warnings during the tests?
 ### tests end
--- a/my/core/util.py
+++ b/my/core/util.py
@ -1,20 +1,87 @@
 from pathlib import Path
 from itertools import chain
 from importlib import import_module
 import os
 import re
 import pkgutil
-from typing import List, Iterable
+import re
 import sys
 from typing import List, Iterable, NamedTuple, Optional
-# TODO reuse in readme/blog post
+
 class HPIModule(NamedTuple):
    name: str
    skip_reason: Optional[str]
 def modules() -> Iterable[HPIModule]:
    import my
    for m in _iter_all_importables(my):
        yield m
 def ignored(m: str) -> bool:
    excluded = [
        'core.*',
        'config.*',
        ## todo move these to core
        'kython.*',
        'mycfg_stub',
        ##
        ## these are just deprecated
        'common',
        'error',
        'cfg',
        ##
        ## TODO vvv these should be moved away from here
        'jawbone.plots',
        'emfit.plot',
        # 'google.takeout.paths',
        'bluemaestro.check',
        'location.__main__',
        'photos.utils',
        'books',
        'coding',
        'media',
        'reading',
        '_rss',
        'twitter.common',
        'rss.common',
        'lastfm.fill_influxdb',
    ]
    exs = '|'.join(excluded)
    return re.match(f'^my.({exs})$', m) is not None
 def get_stats(module: str):
    # todo detect via ast?
    try:
        mod = import_module(module)
    except Exception as e:
        return None
    return getattr(mod, 'stats', None)
 __NOT_A_MODULE__ = 'Import this to mark a python file as a helper, not an actual module'
 # todo reuse in readme/blog post
 # borrowed from https://github.com/sanitizers/octomachinery/blob/24288774d6dcf977c5033ae11311dbff89394c89/tests/circular_imports_test.py#L22-L55
-def _iter_all_importables(pkg):
+def _iter_all_importables(pkg) -> Iterable[HPIModule]:
    # todo crap. why does it include some stuff three times??
    yield from chain.from_iterable(
        _discover_path_importables(Path(p), pkg.__name__)
-        for p in pkg.__path__
+        # todo might need to handle __path__ for individual modules too?
        # not sure why __path__ was duplicated, but it did happen..
        for p in set(pkg.__path__)
    )
-def _discover_path_importables(pkg_pth, pkg_name):
+def _discover_path_importables(pkg_pth, pkg_name) -> Iterable[HPIModule]:
    from .core_config import config
    """Yield all importables under a given path and package."""
    for dir_path, dirs, file_names in os.walk(pkg_pth):
        file_names.sort()
@ -32,53 +99,105 @@ def _discover_path_importables(pkg_pth, pkg_name):
        rel_pt = pkg_dir_path.relative_to(pkg_pth)
        pkg_pref = '.'.join((pkg_name, ) + rel_pt.parts)
-
+        yield from _walk_packages(
-        # TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
+            (str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
        yield from (
            pkg_path
            for _, pkg_path, _ in pkgutil.walk_packages(
                (str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
            )
        )
        # TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
        # use onerror=?
 # ignored explicitly     -> not HPI
 # if enabled  in config  -> HPI
 # if disabled in config  -> HPI
 # otherwise, check for stats
 # recursion is relied upon using .*
 # TODO when do we need to recurse?
-# TODO marking hpi modules or unmarking non-modules? not sure what's worse
+def _walk_packages(path=None, prefix='', onerror=None) -> Iterable[HPIModule]:
-def ignored(m: str):
+    '''
-    excluded = [
+    Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53,
-        'kython.*',
+    to alvoid importing modules that are skipped
-        'mycfg_stub',
+    '''
-        'common',
+    from .core_config import config
        'error',
        'cfg',
        'core.*',
        'config.*',
        'jawbone.plots',
        'emfit.plot',
-        # todo think about these...
+    def seen(p, m={}):
-        # 'google.takeout.paths',
+        if p in m:
-        'bluemaestro.check',
+            return True
-        'location.__main__',
+        m[p] = True
        'photos.utils',
        'books',
        'coding',
        'media',
        'reading',
        '_rss',
        'twitter.common',
        'rss.common',
        'lastfm.fill_influxdb',
    ]
    exs = '|'.join(excluded)
    return re.match(f'^my.({exs})$', m)
    for info in pkgutil.iter_modules(path, prefix):
        mname = info.name
-def modules() -> Iterable[str]:
+        if ignored(mname):
-    import my as pkg # todo not sure?
+            # not sure if need to yield?
-    for x in _iter_all_importables(pkg):
+            continue
        if not ignored(x):
            yield x
        active = config._is_module_active(mname)
        skip_reason = None
        if active is False:
            skip_reason = 'suppressed in the user config'
        elif active is None:
            # unspecified by the user, rely on other means
            # stats detection is the last resort (because it actually tries to import)
            stats = get_stats(mname)
            if stats is None:
                skip_reason = "has no 'stats()' function"
        else: # active is True
            # nothing to do, enabled explicitly
            pass
-def get_modules() -> List[str]:
+        yield HPIModule(
            name=mname,
            skip_reason=skip_reason,
        )
        if not info.ispkg:
            continue
        recurse = config._is_module_active(mname + '.')
        if not recurse:
            continue
        try:
            __import__(mname)
        except ImportError:
            if onerror is not None:
                onerror(mname)
        except Exception:
            if onerror is not None:
                onerror(mname)
            else:
                raise
        else:
            path = getattr(sys.modules[mname], '__path__', None) or []
            # don't traverse path items we've seen before
            path = [p for p in path if not seen(p)]
            yield from _walk_packages(path, mname+'.', onerror)
 # deprecate?
 def get_modules() -> List[HPIModule]:
    return list(modules())
 ### tests start
 ## FIXME: add test when there is an import error -- should be defensive and yield exception
 def test_module_detection() -> None:
    from .core_config import _reset_config as reset
    with reset() as cc:
        cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*']
        mods = {m.name: m for m in modules()}
        assert mods['my.demo']  .skip_reason == "has no 'stats()' function"
    with reset() as cc:
        cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm']
        cc.enabled_modules  = ['my.demo']
        mods = {m.name: m for m in modules()}
        assert mods['my.demo']  .skip_reason is None # not skipped
        assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
 ### tests end
--- a/tests/core.py
+++ b/tests/core.py
@ -11,4 +11,5 @@ we can run against the tests in my.core directly.
 '''
 from my.core.core_config import *
-from my.core.error import *
+from my.core.error       import *
 from my.core.util        import *