Merge pull request #42 from karlicoss/updates

cleanup, move stuff to my.core, update docs
2020-05-06 23:23:41 +01:00 · 2020-05-06 23:23:41 +01:00 · 40b6a82b7c
commit 40b6a82b7c
parent 5d3c0bdb1f d4a430e12e
26 changed files with 471 additions and 429 deletions
--- a/README.org
+++ b/README.org
@ -5,6 +5,11 @@

 #+macro: map      @@html:<span style='color:darkgreen; font-weight: bolder'>@@$1@@html:</span>@@

+If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
+
+- see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP]] for the *installation/configuration guide*
+- see [[https://github.com/karlicoss/HPI/tree/master/doc/DEVELOPMENT.org][DEVELOPMENT]] for the *development guide*
+
 *TLDR*: I'm using [[https://github.com/karlicoss/HPI][HPI]] (Human Programming Interface) package as a means of unifying, accessing and interacting with all of my personal data.

 It's a Python library (named ~my~), a collection of modules for:
@ -48,11 +53,6 @@ and that's why I'm sharing this.
 Imagine if all your life was reflected digitally and available at your fingertips.
 This library is my attempt to achieve this vision.

-If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
-
-For *installation/configuration/development guide*, see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP.org]].
-
-
 #+toc: headlines 2

 
@ -593,4 +593,4 @@ In some near future I will write more about:
 - challenges I had so solve
 - more use-cases and demos -- it's impossible to fit everything in one post!

-, but happy to answer any questions on these topics now!
+, but happy to answer any questions on these topics now!
--- a/doc/DEVELOPMENT.org
+++ b/doc/DEVELOPMENT.org
@ -1,13 +1,45 @@
+* Running tests
+I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config
+and [[file:../scripts/ci/run]] for the up to date info on the specifics.
+
 * IDE setup: make sure my.config is in your package search path
 In runtime, ~my.config~ is imported from the user config directory dynamically.

 However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration.

- Pycharm: basically, follow the instruction [[https://stackoverflow.com/a/55278260/706389][here]]
+- Pycharm: basically, follow the instructions [[https://stackoverflow.com/a/55278260/706389][here]]

  i.e. create a new interpreter configuration (e.g. name it "Python 3.7 (for HPI)"), and add =~/.config/my=.

 * Linting
-You should be able to use ~./lint~ script to run mypy checks.
+You should be able to use [[file:../lint]] script to run mypy checks.

-~mypy.ini~ file points at =~/.config/my= by default.
+[[file:../mypy.ini]] points at =~/.config/my= by default.
+
+
+* Modifying/adding modules
+
+The easiest is just to run HPI via [[file:SETUP.org::#use-without-installing][with_my]] wrapper or with an editable PIP install.
+That way your changes will be reflected immediately, and you will be able to quickly iterate/fix bugs/add new methods.
+
+The "proper way" (unless you want to contribute to the upstream) is to create a separate hierarchy and add your module to =PYTHONPATH=.
+
+For example, if you want to add an =awesomedatasource=, it could be:
+
+: custom_module
+: └── my
+:     └──awesomedatasource.py
+
+You can use all existing HPI modules in =awesomedatasource.py=, for example, =my.config=, or everything from =my.core=.
+
+But also, you can use all the previously defined HPI modules too. This could be useful to *shadow/override* existing HPI module:
+
+: custom_reddit_overlay
+: └── my
+:     └──reddit.py
+
+Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead.
+
+This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind.
+
+I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info.
--- a/24
+++ b/24
@ -31,25 +31,29 @@ def package_name(p: Path) -> str:
    else:
        return mname(p)

+def subpackages(package: str) -> Iterable[str]:
+    ppath = package.replace('.', '/')
+    yield from sorted({
+        package_name(p.relative_to(DIR)) for p in (DIR / ppath).rglob('*.py')
+    })
+
+
 # TODO meh.. think how to check _everything_ on CI
 def core_modules() -> Iterable[str]:
    return [
-        'my.common',
+        *subpackages('my.core'),
+        *subpackages('my.kython'),
        'my.config',
-        'my.core',
        'my.cfg',
-        'my.error',
-        'my.init',
        'tests/misc.py',
        'tests/get_files.py',
        # 'tests/config.py', TODO hmm. unclear how to type check this module
    ]


+
 def all_modules() -> Iterable[str]:
-    yield from sorted(set(
-        package_name(p.relative_to(DIR)) for p in (DIR / 'my').rglob('*.py')
-    ))
+    yield from subpackages('my')
    yield from sorted(
        str(f.relative_to(DIR)) for f in (DIR / 'tests').rglob('*.py')
    )
@ -63,11 +67,13 @@ def pylint():

 def mypy(thing: str):
    is_package = Path(thing).suffix != '.py'
-    return run([
+    cmd = [
        'mypy',
        '--color-output', # TODO eh? doesn't work..
        *(['-p'] if is_package else []), thing,
-    ], stdout=PIPE, stderr=PIPE)
+    ]
+    print(' '.join(cmd), file=sys.stderr)
+    return run(cmd, stdout=PIPE, stderr=PIPE)


 def mypy_all() -> Iterable[Exception]:
--- a/my/books/kobo.py
+++ b/my/books/kobo.py
@ -1,8 +1,6 @@
 """
 [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats
 """
-from .. import init
-
 from typing import Callable, Union, List

 from my.config import kobo as config
--- a/my/calendar/holidays.py
+++ b/my/calendar/holidays.py
@ -13,7 +13,7 @@ from my.config.holidays_data import HOLIDAYS_DATA

 # pip3 install workalendar
 from workalendar.europe import UnitedKingdom # type: ignore
-cal = UnitedKingdom() # TODO FIXME specify in config
+cal = UnitedKingdom() # TODO
 # TODO that should depend on country/'location' of residence I suppose?


--- a/my/cfg.py
+++ b/my/cfg.py
@ -12,15 +12,12 @@ After that, you can set config attributes:
      export_path='/path/to/twitter/exports',
  )
 """
-# TODO later, If I have config stubs that might be unnecessary too..
-
-from . import init
-
+# todo why do we bring this into scope? don't remember..
 import my.config as config


 def set_repo(name: str, repo):
-    from .init import assign_module
+    from .core.init import assign_module
    from . common import import_from

    module = import_from(repo, name)
--- a/my/coding/codeforces.py
+++ b/my/coding/codeforces.py
@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-from .. import init
-
 from my.config import codeforces as config

 from datetime import datetime
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -1,9 +1,6 @@
 """
 Github events and their metadata: comments/issues/pull requests
 """
-
-from .. import init
-
 from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
 from datetime import datetime
 import json
--- a/my/coding/topcoder.py
+++ b/my/coding/topcoder.py
@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-from .. import init
-
 from my.config import topcoder as config

 from datetime import datetime
--- a/my/common.py
+++ b/my/common.py
@ -1,197 +1,2 @@
-from glob import glob as do_glob
-from pathlib import Path
-import functools
-import types
-from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
-import warnings
-
-# some helper functions
-PathIsh = Union[Path, str]
-
-# TODO port annotations to kython?..
-def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
-    p = Path(p)
-    if name is None:
-        name = p.stem
-    import importlib.util
-    spec = importlib.util.spec_from_file_location(name, p)
-    foo = importlib.util.module_from_spec(spec)
-    loader = spec.loader; assert loader is not None
-    loader.exec_module(foo) # type: ignore[attr-defined]
-    return foo
-
-
-def import_from(path: PathIsh, name: str) -> types.ModuleType:
-    path = str(path)
-    import sys
-    try:
-        sys.path.append(path)
-        import importlib
-        return importlib.import_module(name)
-    finally:
-        sys.path.remove(path)
-
-
-T = TypeVar('T')
-K = TypeVar('K')
-V = TypeVar('V')
-
-def the(l: Iterable[T]) -> T:
-    it = iter(l)
-    try:
-        first = next(it)
-    except StopIteration as ee:
-        raise RuntimeError('Empty iterator?')
-    assert all(e == first for e in it)
-    return first
-
-
-# TODO more_itertools.bucket?
-def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
-    res: Dict[K, List[T]] = {}
-    for i in l:
-        kk = key(i)
-        lst = res.get(kk, [])
-        lst.append(i)
-        res[kk] = lst
-    return res
-
-
-def _identity(v: T) -> V:
-    return cast(V, v)
-
-def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
-    res: Dict[K, V] = {}
-    for i in l:
-        k = key(i)
-        v = value(i)
-        pv = res.get(k, None) # type: ignore
-        if pv is not None:
-            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
-        res[k] = v
-    return res
-
-
-Cl = TypeVar('Cl')
-R = TypeVar('R')
-
-def cproperty(f: Callable[[Cl], R]) -> R:
-    return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
-
-
-# https://stackoverflow.com/a/12377059/706389
-def listify(fn=None, wrapper=list):
-    """
-    Wraps a function's return value in wrapper (e.g. list)
-    Useful when an algorithm can be expressed more cleanly as a generator
-    """
-    def listify_return(fn):
-        @functools.wraps(fn)
-        def listify_helper(*args, **kw):
-            return wrapper(fn(*args, **kw))
-        return listify_helper
-    if fn is None:
-        return listify_return
-    return listify_return(fn)
-
-
-# TODO FIXME use in bluemaestro
-# def dictify(fn=None, key=None, value=None):
-#     def md(it):
-#         return make_dict(it, key=key, value=value)
-#     return listify(fn=fn, wrapper=md)
-
-
-from .kython.klogging import setup_logger, LazyLogger
-
-
-Paths = Union[Sequence[PathIsh], PathIsh]
-
-DEFAULT_GLOB = '*'
-def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
-    """
-    Helper function to avoid boilerplate.
-
-    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
-    """
-    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
-    sources: List[Path] = []
-    if isinstance(pp, (str, Path)):
-        sources.append(Path(pp))
-    else:
-        sources.extend(map(Path, pp))
-
-    paths: List[Path] = []
-    for src in sources:
-        if src.is_dir():
-            gp: Iterable[Path] = src.glob(glob)
-            paths.extend(gp)
-        else:
-            ss = str(src)
-            if '*' in ss:
-                if glob != DEFAULT_GLOB:
-                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
-                paths.extend(map(Path, do_glob(ss)))
-            else:
-                assert src.is_file(), src
-                # todo assert matches glob??
-                paths.append(src)
-
-    if sort:
-        paths = list(sorted(paths))
-    return tuple(paths)
-
-
-# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from typing import Callable, TypeVar
-    from typing_extensions import Protocol
-    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
-    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
-    # ok, that's actually a super nice 'pattern'
-    F = TypeVar('F')
-    class McachewType(Protocol):
-        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
-            ...
-
-    mcachew: McachewType
-
-def mcachew(*args, **kwargs): # type: ignore[no-redef]
-    """
-    Stands for 'Maybe cachew'.
-    Defensive wrapper around @cachew to make it an optional dependency.
-    """
-    try:
-        import cachew
-    except ModuleNotFoundError:
-        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
-        return lambda orig_func: orig_func
-    else:
-        import cachew.experimental
-        cachew.experimental.enable_exceptions()  # TODO do it only once?
-        return cachew.cachew(*args, **kwargs)
-
-
-@functools.lru_cache(1)
-def _magic():
-    import magic # type: ignore
-    return magic.Magic(mime=True)
-
-
-# TODO could reuse in pdf module?
-import mimetypes # todo do I need init()?
-# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
-# whereas magic detects correctly: application/x-zstd and application/x-xz
-def fastermime(path: PathIsh) -> str:
-    paths = str(path)
-    # mimetypes is faster
-    (mime, _) = mimetypes.guess_type(paths)
-    if mime is not None:
-        return mime
-    # magic is slower but returns more stuff
-    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
-    return _magic().from_file(paths)
-
-
-Json = Dict[str, Any]
+# will be deprecated. please add stuff to my.core
+from .core.common import *
--- a/my/config/init.py
+++ b/my/config/init.py
@ -1,5 +1,5 @@
 # TODO ok, this thing should trigger .cfg import presumably??
-from .. import init
+from ..core import init

 # TODO maybe, reuse mycfg_template here?

--- a/my/core/common.py
+++ b/my/core/common.py
@ -0,0 +1,197 @@
+from glob import glob as do_glob
+from pathlib import Path
+import functools
+import types
+from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
+import warnings
+
+# some helper functions
+PathIsh = Union[Path, str]
+
+# TODO port annotations to kython?..
+def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
+    p = Path(p)
+    if name is None:
+        name = p.stem
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(name, p)
+    foo = importlib.util.module_from_spec(spec)
+    loader = spec.loader; assert loader is not None
+    loader.exec_module(foo) # type: ignore[attr-defined]
+    return foo
+
+
+def import_from(path: PathIsh, name: str) -> types.ModuleType:
+    path = str(path)
+    import sys
+    try:
+        sys.path.append(path)
+        import importlib
+        return importlib.import_module(name)
+    finally:
+        sys.path.remove(path)
+
+
+T = TypeVar('T')
+K = TypeVar('K')
+V = TypeVar('V')
+
+def the(l: Iterable[T]) -> T:
+    it = iter(l)
+    try:
+        first = next(it)
+    except StopIteration as ee:
+        raise RuntimeError('Empty iterator?')
+    assert all(e == first for e in it)
+    return first
+
+
+# TODO more_itertools.bucket?
+def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
+    res: Dict[K, List[T]] = {}
+    for i in l:
+        kk = key(i)
+        lst = res.get(kk, [])
+        lst.append(i)
+        res[kk] = lst
+    return res
+
+
+def _identity(v: T) -> V:
+    return cast(V, v)
+
+def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
+    res: Dict[K, V] = {}
+    for i in l:
+        k = key(i)
+        v = value(i)
+        pv = res.get(k, None) # type: ignore
+        if pv is not None:
+            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
+        res[k] = v
+    return res
+
+
+Cl = TypeVar('Cl')
+R = TypeVar('R')
+
+def cproperty(f: Callable[[Cl], R]) -> R:
+    return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
+
+
+# https://stackoverflow.com/a/12377059/706389
+def listify(fn=None, wrapper=list):
+    """
+    Wraps a function's return value in wrapper (e.g. list)
+    Useful when an algorithm can be expressed more cleanly as a generator
+    """
+    def listify_return(fn):
+        @functools.wraps(fn)
+        def listify_helper(*args, **kw):
+            return wrapper(fn(*args, **kw))
+        return listify_helper
+    if fn is None:
+        return listify_return
+    return listify_return(fn)
+
+
+# todo use in bluemaestro
+# def dictify(fn=None, key=None, value=None):
+#     def md(it):
+#         return make_dict(it, key=key, value=value)
+#     return listify(fn=fn, wrapper=md)
+
+
+from ..kython.klogging import setup_logger, LazyLogger
+
+
+Paths = Union[Sequence[PathIsh], PathIsh]
+
+DEFAULT_GLOB = '*'
+def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
+    """
+    Helper function to avoid boilerplate.
+
+    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
+    """
+    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
+    sources: List[Path] = []
+    if isinstance(pp, (str, Path)):
+        sources.append(Path(pp))
+    else:
+        sources.extend(map(Path, pp))
+
+    paths: List[Path] = []
+    for src in sources:
+        if src.is_dir():
+            gp: Iterable[Path] = src.glob(glob)
+            paths.extend(gp)
+        else:
+            ss = str(src)
+            if '*' in ss:
+                if glob != DEFAULT_GLOB:
+                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
+                paths.extend(map(Path, do_glob(ss)))
+            else:
+                assert src.is_file(), src
+                # todo assert matches glob??
+                paths.append(src)
+
+    if sort:
+        paths = list(sorted(paths))
+    return tuple(paths)
+
+
+# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from typing import Callable, TypeVar
+    from typing_extensions import Protocol
+    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
+    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
+    # ok, that's actually a super nice 'pattern'
+    F = TypeVar('F')
+    class McachewType(Protocol):
+        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
+            ...
+
+    mcachew: McachewType
+
+def mcachew(*args, **kwargs): # type: ignore[no-redef]
+    """
+    Stands for 'Maybe cachew'.
+    Defensive wrapper around @cachew to make it an optional dependency.
+    """
+    try:
+        import cachew
+    except ModuleNotFoundError:
+        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
+        return lambda orig_func: orig_func
+    else:
+        import cachew.experimental
+        cachew.experimental.enable_exceptions()  # TODO do it only once?
+        return cachew.cachew(*args, **kwargs)
+
+
+@functools.lru_cache(1)
+def _magic():
+    import magic # type: ignore
+    return magic.Magic(mime=True)
+
+
+# TODO could reuse in pdf module?
+import mimetypes # todo do I need init()?
+# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
+# whereas magic detects correctly: application/x-zstd and application/x-xz
+def fastermime(path: PathIsh) -> str:
+    paths = str(path)
+    # mimetypes is faster
+    (mime, _) = mimetypes.guess_type(paths)
+    if mime is not None:
+        return mime
+    # magic is slower but returns more stuff
+    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
+    return _magic().from_file(paths)
+
+
+Json = Dict[str, Any]
--- a/my/core/error.py
+++ b/my/core/error.py
@ -0,0 +1,99 @@
+"""
+Various error handling helpers
+See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
+"""
+
+from itertools import tee
+from typing import Union, TypeVar, Iterable, List, Tuple, Type
+
+
+T = TypeVar('T')
+E = TypeVar('E', bound=Exception) # TODO make covariant?
+
+ResT = Union[T, E]
+
+Res = ResT[T, Exception]
+
+
+def unwrap(res: Res[T]) -> T:
+    if isinstance(res, Exception):
+        raise res
+    else:
+        return res
+
+
+def echain(ex: E, cause: Exception) -> E:
+    ex.__cause__ = cause
+    return ex
+
+
+def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
+    # TODO would be nice to have ET=Exception default?
+    vit, eit = tee(l)
+    # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
+    values: Iterable[T] = (
+        r # type: ignore[misc]
+        for r in vit
+        if not isinstance(r, ET))
+    errors: Iterable[E] = (
+        r
+        for r in eit
+        if     isinstance(r, ET))
+    # TODO would be interesting to be able to have yield statement anywehere in code
+    # so there are multiple 'entry points' to the return value
+    return (values, errors)
+
+
+def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
+    """
+    The general idea is: just alaways carry errors with the entry that precedes them
+    """
+    # TODO ResT object should hold exception class?...
+    group = []
+    groups = []
+    for i in items:
+        if isinstance(i, Exception):
+            group.append(i)
+        else:
+            groups.append((i, group))
+            group = []
+
+    results = []
+    for v, errs in sorted(groups, key=lambda p: key(p[0])):
+        results.extend(errs)
+        results.append(v)
+    results.extend(group)
+
+    return results
+
+
+def test_sort_res_by() -> None:
+    class Exc(Exception):
+        def __eq__(self, other):
+            return self.args == other.args
+
+    ress = [
+        Exc('first'),
+        Exc('second'),
+        5,
+        3,
+        Exc('xxx'),
+        2,
+        1,
+        Exc('last'),
+    ]
+    results = sort_res_by(ress, lambda x: x) # type: ignore
+    assert results == [
+        1,
+        Exc('xxx'),
+        2,
+        3,
+        Exc('first'),
+        Exc('second'),
+        5,
+        Exc('last'),
+    ]
+
+    results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
+    assert results2 == [Exc('last'), 0] + results[:-1]
+
--- a/my/core/init.py
+++ b/my/core/init.py
@ -8,9 +8,10 @@ A hook to insert user's config directory into Python's search path.
  Please let me know if you are aware of a better way of dealing with this!
 '''

+from types import ModuleType

 # TODO not ideal to keep it here, but this should really be a leaf in the import tree
-def assign_module(parent: str, name: str, module):
+def assign_module(parent: str, name: str, module: ModuleType) -> None:
    import sys
    import importlib
    parent_module = importlib.import_module(parent)
@ -20,13 +21,15 @@ def assign_module(parent: str, name: str, module):
        # TODO that crap should be tested... I guess will get it for free when I run rest of tests in the matrix
        setattr(parent_module, name, module)

+del ModuleType

 # separate function to present namespace pollution
-def setup_config():
+def setup_config() -> None:
    from pathlib import Path
    import sys
    import os
    import warnings
+    from typing import Optional

    # not sure if that's necessary, i.e. could rely on PYTHONPATH instead
    # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
--- a/my/error.py
+++ b/my/error.py
@ -1,99 +1,2 @@
-"""
-Various error handling helpers
-See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
-"""
-
-from itertools import tee
-from typing import Union, TypeVar, Iterable, List, Tuple, Type
-
-
-T = TypeVar('T')
-E = TypeVar('E', bound=Exception) # TODO make covariant?
-
-ResT = Union[T, E]
-
-Res = ResT[T, Exception]
-
-
-def unwrap(res: Res[T]) -> T:
-    if isinstance(res, Exception):
-        raise res
-    else:
-        return res
-
-
-def echain(ex: E, cause: Exception) -> E:
-    ex.__cause__ = cause
-    return ex
-
-
-def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
-    # TODO would be nice to have ET=Exception default?
-    vit, eit = tee(l)
-    # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
-    values: Iterable[T] = (
-        r # type: ignore[misc]
-        for r in vit
-        if not isinstance(r, ET))
-    errors: Iterable[E] = (
-        r
-        for r in eit
-        if     isinstance(r, ET))
-    # TODO would be interesting to be able to have yield statement anywehere in code
-    # so there are multiple 'entry points' to the return value
-    return (values, errors)
-
-
-def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
-    """
-    The general idea is: just alaways carry errors with the entry that precedes them
-    """
-    # TODO ResT object should hold exception class?...
-    group = []
-    groups = []
-    for i in items:
-        if isinstance(i, Exception):
-            group.append(i)
-        else:
-            groups.append((i, group))
-            group = []
-
-    results = []
-    for v, errs in sorted(groups, key=lambda p: key(p[0])):
-        results.extend(errs)
-        results.append(v)
-    results.extend(group)
-
-    return results
-
-
-def test_sort_res_by():
-    class Exc(Exception):
-        def __eq__(self, other):
-            return self.args == other.args
-
-    ress = [
-        Exc('first'),
-        Exc('second'),
-        5,
-        3,
-        Exc('xxx'),
-        2,
-        1,
-        Exc('last'),
-    ]
-    results = sort_res_by(ress, lambda x: x) # type: ignore
-    assert results == [
-        1,
-        Exc('xxx'),
-        2,
-        3,
-        Exc('first'),
-        Exc('second'),
-        5,
-        Exc('last'),
-    ]
-
-    results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
-    assert results2 == [Exc('last'), 0] + results[:-1]
-
+# will be deprecated. please add stuff to my.core
+from .core.error import *
--- a/my/hypothesis.py
+++ b/my/hypothesis.py
@ -1,8 +1,6 @@
 """
 [[https://hypothes.is][Hypothes.is]] highlights and annotations
 """
-from . import init
-
 from .common import get_files
 from .error import Res, sort_res_by

--- a/my/kython/init.py
+++ b/my/kython/init.py
--- a/my/materialistic.py
+++ b/my/materialistic.py
@ -1,8 +1,6 @@
 """
 [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
 """
-from . import init
-
 from datetime import datetime
 from typing import Any, Dict, Iterator, NamedTuple

--- a/my/media/imdb.py
+++ b/my/media/imdb.py
@ -1,7 +1,4 @@
 #!/usr/bin/env python3
-
-from .. import init
-
 import csv
 import json
 from datetime import datetime
--- a/my/pdfs.py
+++ b/my/pdfs.py
@ -2,9 +2,6 @@
 '''
 PDF documents and annotations on your filesystem
 '''
-
-from . import init
-
 from concurrent.futures import ProcessPoolExecutor
 from datetime import datetime
 import re
--- a/my/pinboard.py
+++ b/my/pinboard.py
@ -1,8 +1,6 @@
 """
 [[https://pinboard.in][Pinboard]] bookmarks
 """
-from . import init
-
 from .common import get_files

 from my.config.repos.pinbexport import dal as pinbexport
--- a/my/reddit.py
+++ b/my/reddit.py
@ -1,8 +1,6 @@
 """
 Reddit data: saved items/comments/upvotes/etc.
 """
-from . import init
-
 from pathlib import Path
 from typing import List, Sequence, Mapping, Iterator

@ -13,14 +11,14 @@ from my.config import reddit as config
 import my.config.repos.rexport.dal as rexport


-def get_sources() -> Sequence[Path]:
+def inputs() -> Sequence[Path]:
    # TODO rename to export_path?
    files = get_files(config.export_dir)
+    # TODO Cpath better be automatic by get_files...
    res = list(map(CPath, files)); assert len(res) > 0
    # todo move the assert to get_files?
    return tuple(res)

-
 logger = LazyLogger(__name__, level='debug')


@ -32,30 +30,30 @@ Upvote     = rexport.Upvote


 def dal() -> rexport.DAL:
-    # TODO lru cache? but be careful when it runs continuously
-    return rexport.DAL(get_sources())
+    return rexport.DAL(inputs())


-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def saved() -> Iterator[Save]:
    return dal().saved()


-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def comments() -> Iterator[Comment]:
    return dal().comments()


-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def submissions() -> Iterator[Submission]:
    return dal().submissions()


-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def upvoted() -> Iterator[Upvote]:
    return dal().upvoted()


+### the rest of the file is some elaborate attempt of restoring favorite/unfavorite times

 from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
 from functools import lru_cache
@ -115,10 +113,11 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
        key=lambda s: s.save.sid,
    )

+# TODO hmm. think about it.. if we set default backups=inputs()
+# it's called early so it ends up as a global variable that we can't monkey patch easily
@mcachew('/L/data/.cache/reddit-events.cache')
-def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]:
+def _get_events(backups: Sequence[Path], parallel: bool=True) -> Iterator[Event]:
    # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
-    # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?

    prev_saves: Mapping[Sid, SaveWithDt] = {}
    # TODO suppress first batch??
@ -168,55 +167,18 @@ def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> I
    # TODO a bit awkward, favorited should compare lower than unfavorited?

@lru_cache(1)
-def get_events(*args, **kwargs) -> List[Event]:
-    evit = _get_events(*args, **kwargs)
+def events(*args, **kwargs) -> List[Event]:
+    evit = _get_events(inputs(), *args, **kwargs)
    return list(sorted(evit, key=lambda e: e.cmp_key))

-
-def test() -> None:
-    get_events(backups=get_sources()[-1:])
-    list(saved())
-
-
-def test_unfav() -> None:
-    events = get_events()
-    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
-    uevents = [e for e in events if e.url == url]
-    assert len(uevents) == 2
-    ff = uevents[0]
-    assert ff.text == 'favorited'
-    uf = uevents[1]
-    assert uf.text == 'unfavorited'
-
-# TODO move out..
-def test_get_all_saves() -> None:
-    # TODO not sure if this is necesasry anymore?
-    saves = list(saved())
-    # just check that they are unique..
-    make_dict(saves, key=lambda s: s.sid)
-
-
-def test_disappearing() -> None:
-    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
-    # but I guess it was just a short glitch... so whatever
-    saves = get_events()
-    favs = [s.kind for s in saves if s.text == 'favorited']
-    [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
-    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
-
-
-def test_unfavorite() -> None:
-    events = get_events()
-    unfavs = [s for s in events if s.text == 'unfavorited']
-    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
-    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
+##


 def main() -> None:
    # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
-    events = get_events(parallel=False)
-    print(len(events))
-    for e in events:
+    el = events(parallel=False)
+    print(len(el))
+    for e in el:
        print(e.text, e.url)
    # for e in get_
    # 509 with urls..
@ -226,3 +188,8 @@ def main() -> None:

 if __name__ == '__main__':
    main()
+
+# TODO deprecate...
+
+get_sources = inputs
+get_events = events
--- a/my/smscalls.py
+++ b/my/smscalls.py
@ -2,8 +2,6 @@
 Phone calls and SMS messages
 """
 # TODO extract SMS as well? I barely use them though..
-from . import init
-
 from datetime import datetime
 from pathlib import Path
 from typing import NamedTuple, Iterator, Set
--- a/tests/misc.py
+++ b/tests/misc.py
@ -7,10 +7,32 @@ import zipfile

 from my.kython.kompress import kopen, kexists, CPath

+def test_kopen(tmp_path: Path) -> None:
+    "Plaintext handled transparently"
+    assert kopen(tmp_path / 'file'   ).read() == 'just plaintext'
+    assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
+
+    "For zips behaviour is a bit different (not sure about all this, tbh...)"
+    assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
+
+
+def test_kexists(tmp_path: Path) -> None:
+    assert     kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
+    assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
+
+    # TODO not sure about this?
+    assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
+
+
+def test_cpath(tmp_path: Path) -> None:
+    CPath(str(tmp_path / 'file'  )).read_text() == 'just plaintext'
+    CPath(    tmp_path / 'file.xz').read_text() == 'compressed text'
+    # TODO not sure about zip files??
+

 import pytest # type: ignore

-@pytest.fixture
+@pytest.fixture(autouse=True)
 def prepare(tmp_path: Path):
    (tmp_path / 'file').write_text('just plaintext')
    with (tmp_path / 'file.xz').open('wb') as f:
@ -24,24 +46,5 @@ def prepare(tmp_path: Path):
        pass


-def test_kopen(prepare, tmp_path: Path) -> None:
-    "Plaintext handled transparently"
-    assert kopen(tmp_path / 'file'   ).read() == 'just plaintext'
-    assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
-
-    "For zips behaviour is a bit different (not sure about all this, tbh...)"
-    assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
-
-
-def test_kexists(prepare, tmp_path: Path) -> None:
-    assert     kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
-    assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
-
-    # TODO not sure about this?
-    assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
-
-
-def test_cpath(prepare, tmp_path: Path) -> None:
-    CPath(str(tmp_path / 'file'  )).read_text() == 'just plaintext'
-    CPath(    tmp_path / 'file.xz').read_text() == 'compressed text'
-    # TODO not sure about zip files??
+# meh
+from my.core.error import test_sort_res_by
--- a/tests/reddit.py
+++ b/tests/reddit.py
@ -1,4 +1,57 @@
-# ugh. workaround for https://github.com/pytest-dev/pytest/issues/1927
-from my.reddit import *
+from datetime import datetime
+import pytz

-# TODO for reddit test, patch up to take every 10th archive or something; but make sure it's deterministic
+from my.reddit import events, inputs, saved
+from my.common import make_dict
+
+
+def test() -> None:
+    list(events())
+    list(saved())
+
+
+def test_unfav() -> None:
+    ev = events()
+    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
+    uev = [e for e in ev if e.url == url]
+    assert len(uev) == 2
+    ff = uev[0]
+    # TODO could recover these from takeout perhaps?
+    assert ff.text == 'favorited [initial]'
+    uf = uev[1]
+    assert uf.text == 'unfavorited'
+
+
+def test_saves() -> None:
+    # TODO not sure if this is necesasry anymore?
+    saves = list(saved())
+    # just check that they are unique..
+    make_dict(saves, key=lambda s: s.sid)
+
+
+def test_disappearing() -> None:
+    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
+    # but I guess it was just a short glitch... so whatever
+    saves = events()
+    favs = [s.kind for s in saves if s.text == 'favorited']
+    [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
+    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
+
+
+def test_unfavorite() -> None:
+    evs = events()
+    unfavs = [s for s in evs if s.text == 'unfavorited']
+    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
+    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
+
+
+import pytest # type: ignore
+@pytest.fixture(autouse=True, scope='module')
+def prepare():
+    from my.common import get_files
+    from my.config import reddit as config
+    files = get_files(config.export_dir)
+    # use less files for the test to make it faster
+    # first bit is for 'test_unfavorite, the second is for test_disappearing
+    files = files[300:330] + files[500:520]
+    config.export_dir = files # type: ignore
--- a/tox.ini
+++ b/tox.ini
@ -9,10 +9,10 @@ passenv = CI CI_*
 setenv = MY_CONFIG = nonexistent
 commands =
    pip install -e .[testing]
-    # TODO ??
    # python -m pytest {posargs}
-    python3 -c 'import my.init; from my.config import stub as config; print(config.key)'
-    python3 -c 'import my.init; import my.config; import my.config.repos' # shouldn't fail at least
+    # todo these are probably not necessary anymore?
+    python3 -c 'from my.config import stub as config; print(config.key)'
+    python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
    python3 -m pytest tests/misc.py tests/get_files.py tests/config.py::test_set_repo tests/config.py::test_environment_variable
    # TODO add; once I figure out porg depdencency?? tests/config.py
    # TODO run demo.py? just make sure with_my is a bit cleverer?