Merge pull request #42 from karlicoss/updates

cleanup, move stuff to my.core, update docs
2020-05-06 23:23:41 +01:00 · 2020-05-06 23:23:41 +01:00 · 40b6a82b7c
commit 40b6a82b7c
parent 5d3c0bdb1f d4a430e12e
26 changed files with 471 additions and 429 deletions
--- a/README.org
+++ b/README.org
@ -5,6 +5,11 @@
 #+macro: map      @@html:<span style='color:darkgreen; font-weight: bolder'>@@$1@@html:</span>@@
 If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
 - see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP]] for the *installation/configuration guide*
 - see [[https://github.com/karlicoss/HPI/tree/master/doc/DEVELOPMENT.org][DEVELOPMENT]] for the *development guide*
 *TLDR*: I'm using [[https://github.com/karlicoss/HPI][HPI]] (Human Programming Interface) package as a means of unifying, accessing and interacting with all of my personal data.
 It's a Python library (named ~my~), a collection of modules for:
@ -48,11 +53,6 @@ and that's why I'm sharing this.
 Imagine if all your life was reflected digitally and available at your fingertips.
 This library is my attempt to achieve this vision.
 If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
 For *installation/configuration/development guide*, see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP.org]].
 #+toc: headlines 2
--- a/doc/DEVELOPMENT.org
+++ b/doc/DEVELOPMENT.org
@ -1,13 +1,45 @@
 * Running tests
 I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config
 and [[file:../scripts/ci/run]] for the up to date info on the specifics.
 * IDE setup: make sure my.config is in your package search path
 In runtime, ~my.config~ is imported from the user config directory dynamically.
 However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration.
- Pycharm: basically, follow the instruction [[https://stackoverflow.com/a/55278260/706389][here]]
+- Pycharm: basically, follow the instructions [[https://stackoverflow.com/a/55278260/706389][here]]
  i.e. create a new interpreter configuration (e.g. name it "Python 3.7 (for HPI)"), and add =~/.config/my=.
 * Linting
-You should be able to use ~./lint~ script to run mypy checks.
+You should be able to use [[file:../lint]] script to run mypy checks.
-~mypy.ini~ file points at =~/.config/my= by default.
+[[file:../mypy.ini]] points at =~/.config/my= by default.
 * Modifying/adding modules
 The easiest is just to run HPI via [[file:SETUP.org::#use-without-installing][with_my]] wrapper or with an editable PIP install.
 That way your changes will be reflected immediately, and you will be able to quickly iterate/fix bugs/add new methods.
 The "proper way" (unless you want to contribute to the upstream) is to create a separate hierarchy and add your module to =PYTHONPATH=.
 For example, if you want to add an =awesomedatasource=, it could be:
 : custom_module
 : └── my
 :     └──awesomedatasource.py
 You can use all existing HPI modules in =awesomedatasource.py=, for example, =my.config=, or everything from =my.core=.
 But also, you can use all the previously defined HPI modules too. This could be useful to *shadow/override* existing HPI module:
 : custom_reddit_overlay
 : └── my
 :     └──reddit.py
 Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead.
 This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind.
 I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info.
--- a/24
+++ b/24
@ -31,25 +31,29 @@ def package_name(p: Path) -> str:
    else:
        return mname(p)
 def subpackages(package: str) -> Iterable[str]:
    ppath = package.replace('.', '/')
    yield from sorted({
        package_name(p.relative_to(DIR)) for p in (DIR / ppath).rglob('*.py')
    })
 # TODO meh.. think how to check _everything_ on CI
 def core_modules() -> Iterable[str]:
    return [
-        'my.common',
+        *subpackages('my.core'),
        *subpackages('my.kython'),
        'my.config',
        'my.core',
        'my.cfg',
        'my.error',
        'my.init',
        'tests/misc.py',
        'tests/get_files.py',
        # 'tests/config.py', TODO hmm. unclear how to type check this module
    ]
 def all_modules() -> Iterable[str]:
-    yield from sorted(set(
+    yield from subpackages('my')
        package_name(p.relative_to(DIR)) for p in (DIR / 'my').rglob('*.py')
    ))
    yield from sorted(
        str(f.relative_to(DIR)) for f in (DIR / 'tests').rglob('*.py')
    )
@ -63,11 +67,13 @@ def pylint():
 def mypy(thing: str):
    is_package = Path(thing).suffix != '.py'
-    return run([
+    cmd = [
        'mypy',
        '--color-output', # TODO eh? doesn't work..
        *(['-p'] if is_package else []), thing,
-    ], stdout=PIPE, stderr=PIPE)
+    ]
    print(' '.join(cmd), file=sys.stderr)
    return run(cmd, stdout=PIPE, stderr=PIPE)
 def mypy_all() -> Iterable[Exception]:
--- a/my/books/kobo.py
+++ b/my/books/kobo.py
@ -1,8 +1,6 @@
 """
 [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats
 """
 from .. import init
 from typing import Callable, Union, List
 from my.config import kobo as config
--- a/my/calendar/holidays.py
+++ b/my/calendar/holidays.py
@ -13,7 +13,7 @@ from my.config.holidays_data import HOLIDAYS_DATA
 # pip3 install workalendar
 from workalendar.europe import UnitedKingdom # type: ignore
-cal = UnitedKingdom() # TODO FIXME specify in config
+cal = UnitedKingdom() # TODO
 # TODO that should depend on country/'location' of residence I suppose?
--- a/my/cfg.py
+++ b/my/cfg.py
@ -12,15 +12,12 @@ After that, you can set config attributes:
      export_path='/path/to/twitter/exports',
  )
 """
-# TODO later, If I have config stubs that might be unnecessary too..
+# todo why do we bring this into scope? don't remember..
 from . import init
 import my.config as config
 def set_repo(name: str, repo):
-    from .init import assign_module
+    from .core.init import assign_module
    from . common import import_from
    module = import_from(repo, name)
--- a/my/coding/codeforces.py
+++ b/my/coding/codeforces.py
@ -1,6 +1,4 @@
 #!/usr/bin/env python3
 from .. import init
 from my.config import codeforces as config
 from datetime import datetime
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -1,9 +1,6 @@
 """
 Github events and their metadata: comments/issues/pull requests
 """
 from .. import init
 from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
 from datetime import datetime
 import json
--- a/my/coding/topcoder.py
+++ b/my/coding/topcoder.py
@ -1,6 +1,4 @@
 #!/usr/bin/env python3
 from .. import init
 from my.config import topcoder as config
 from datetime import datetime
--- a/my/common.py
+++ b/my/common.py
@ -1,197 +1,2 @@
-from glob import glob as do_glob
+# will be deprecated. please add stuff to my.core
-from pathlib import Path
+from .core.common import *
 import functools
 import types
 from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
 import warnings
 # some helper functions
 PathIsh = Union[Path, str]
 # TODO port annotations to kython?..
 def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
    p = Path(p)
    if name is None:
        name = p.stem
    import importlib.util
    spec = importlib.util.spec_from_file_location(name, p)
    foo = importlib.util.module_from_spec(spec)
    loader = spec.loader; assert loader is not None
    loader.exec_module(foo) # type: ignore[attr-defined]
    return foo
 def import_from(path: PathIsh, name: str) -> types.ModuleType:
    path = str(path)
    import sys
    try:
        sys.path.append(path)
        import importlib
        return importlib.import_module(name)
    finally:
        sys.path.remove(path)
 T = TypeVar('T')
 K = TypeVar('K')
 V = TypeVar('V')
 def the(l: Iterable[T]) -> T:
    it = iter(l)
    try:
        first = next(it)
    except StopIteration as ee:
        raise RuntimeError('Empty iterator?')
    assert all(e == first for e in it)
    return first
 # TODO more_itertools.bucket?
 def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
    res: Dict[K, List[T]] = {}
    for i in l:
        kk = key(i)
        lst = res.get(kk, [])
        lst.append(i)
        res[kk] = lst
    return res
 def _identity(v: T) -> V:
    return cast(V, v)
 def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
    res: Dict[K, V] = {}
    for i in l:
        k = key(i)
        v = value(i)
        pv = res.get(k, None) # type: ignore
        if pv is not None:
            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
        res[k] = v
    return res
 Cl = TypeVar('Cl')
 R = TypeVar('R')
 def cproperty(f: Callable[[Cl], R]) -> R:
    return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
 # https://stackoverflow.com/a/12377059/706389
 def listify(fn=None, wrapper=list):
    """
    Wraps a function's return value in wrapper (e.g. list)
    Useful when an algorithm can be expressed more cleanly as a generator
    """
    def listify_return(fn):
        @functools.wraps(fn)
        def listify_helper(*args, **kw):
            return wrapper(fn(*args, **kw))
        return listify_helper
    if fn is None:
        return listify_return
    return listify_return(fn)
 # TODO FIXME use in bluemaestro
 # def dictify(fn=None, key=None, value=None):
 #     def md(it):
 #         return make_dict(it, key=key, value=value)
 #     return listify(fn=fn, wrapper=md)
 from .kython.klogging import setup_logger, LazyLogger
 Paths = Union[Sequence[PathIsh], PathIsh]
 DEFAULT_GLOB = '*'
 def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.
    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path] = []
    if isinstance(pp, (str, Path)):
        sources.append(Path(pp))
    else:
        sources.extend(map(Path, pp))
    paths: List[Path] = []
    for src in sources:
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
        else:
            ss = str(src)
            if '*' in ss:
                if glob != DEFAULT_GLOB:
                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
                paths.extend(map(Path, do_glob(ss)))
            else:
                assert src.is_file(), src
                # todo assert matches glob??
                paths.append(src)
    if sort:
        paths = list(sorted(paths))
    return tuple(paths)
 # TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from typing import Callable, TypeVar
    from typing_extensions import Protocol
    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
    # ok, that's actually a super nice 'pattern'
    F = TypeVar('F')
    class McachewType(Protocol):
        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
            ...
    mcachew: McachewType
 def mcachew(*args, **kwargs): # type: ignore[no-redef]
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
    """
    try:
        import cachew
    except ModuleNotFoundError:
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        import cachew.experimental
        cachew.experimental.enable_exceptions()  # TODO do it only once?
        return cachew.cachew(*args, **kwargs)
@functools.lru_cache(1)
 def _magic():
    import magic # type: ignore
    return magic.Magic(mime=True)
 # TODO could reuse in pdf module?
 import mimetypes # todo do I need init()?
 # todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
 # whereas magic detects correctly: application/x-zstd and application/x-xz
 def fastermime(path: PathIsh) -> str:
    paths = str(path)
    # mimetypes is faster
    (mime, _) = mimetypes.guess_type(paths)
    if mime is not None:
        return mime
    # magic is slower but returns more stuff
    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
    return _magic().from_file(paths)
 Json = Dict[str, Any]
--- a/my/config/init.py
+++ b/my/config/init.py
@ -1,5 +1,5 @@
 # TODO ok, this thing should trigger .cfg import presumably??
-from .. import init
+from ..core import init
 # TODO maybe, reuse mycfg_template here?
--- a/my/core/common.py
+++ b/my/core/common.py
@ -0,0 +1,197 @@
 from glob import glob as do_glob
 from pathlib import Path
 import functools
 import types
 from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
 import warnings
 # some helper functions
 PathIsh = Union[Path, str]
 # TODO port annotations to kython?..
 def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
    p = Path(p)
    if name is None:
        name = p.stem
    import importlib.util
    spec = importlib.util.spec_from_file_location(name, p)
    foo = importlib.util.module_from_spec(spec)
    loader = spec.loader; assert loader is not None
    loader.exec_module(foo) # type: ignore[attr-defined]
    return foo
 def import_from(path: PathIsh, name: str) -> types.ModuleType:
    path = str(path)
    import sys
    try:
        sys.path.append(path)
        import importlib
        return importlib.import_module(name)
    finally:
        sys.path.remove(path)
 T = TypeVar('T')
 K = TypeVar('K')
 V = TypeVar('V')
 def the(l: Iterable[T]) -> T:
    it = iter(l)
    try:
        first = next(it)
    except StopIteration as ee:
        raise RuntimeError('Empty iterator?')
    assert all(e == first for e in it)
    return first
 # TODO more_itertools.bucket?
 def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
    res: Dict[K, List[T]] = {}
    for i in l:
        kk = key(i)
        lst = res.get(kk, [])
        lst.append(i)
        res[kk] = lst
    return res
 def _identity(v: T) -> V:
    return cast(V, v)
 def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
    res: Dict[K, V] = {}
    for i in l:
        k = key(i)
        v = value(i)
        pv = res.get(k, None) # type: ignore
        if pv is not None:
            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
        res[k] = v
    return res
 Cl = TypeVar('Cl')
 R = TypeVar('R')
 def cproperty(f: Callable[[Cl], R]) -> R:
    return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
 # https://stackoverflow.com/a/12377059/706389
 def listify(fn=None, wrapper=list):
    """
    Wraps a function's return value in wrapper (e.g. list)
    Useful when an algorithm can be expressed more cleanly as a generator
    """
    def listify_return(fn):
        @functools.wraps(fn)
        def listify_helper(*args, **kw):
            return wrapper(fn(*args, **kw))
        return listify_helper
    if fn is None:
        return listify_return
    return listify_return(fn)
 # todo use in bluemaestro
 # def dictify(fn=None, key=None, value=None):
 #     def md(it):
 #         return make_dict(it, key=key, value=value)
 #     return listify(fn=fn, wrapper=md)
 from ..kython.klogging import setup_logger, LazyLogger
 Paths = Union[Sequence[PathIsh], PathIsh]
 DEFAULT_GLOB = '*'
 def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.
    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path] = []
    if isinstance(pp, (str, Path)):
        sources.append(Path(pp))
    else:
        sources.extend(map(Path, pp))
    paths: List[Path] = []
    for src in sources:
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
        else:
            ss = str(src)
            if '*' in ss:
                if glob != DEFAULT_GLOB:
                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
                paths.extend(map(Path, do_glob(ss)))
            else:
                assert src.is_file(), src
                # todo assert matches glob??
                paths.append(src)
    if sort:
        paths = list(sorted(paths))
    return tuple(paths)
 # TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from typing import Callable, TypeVar
    from typing_extensions import Protocol
    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
    # ok, that's actually a super nice 'pattern'
    F = TypeVar('F')
    class McachewType(Protocol):
        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
            ...
    mcachew: McachewType
 def mcachew(*args, **kwargs): # type: ignore[no-redef]
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
    """
    try:
        import cachew
    except ModuleNotFoundError:
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        import cachew.experimental
        cachew.experimental.enable_exceptions()  # TODO do it only once?
        return cachew.cachew(*args, **kwargs)
@functools.lru_cache(1)
 def _magic():
    import magic # type: ignore
    return magic.Magic(mime=True)
 # TODO could reuse in pdf module?
 import mimetypes # todo do I need init()?
 # todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
 # whereas magic detects correctly: application/x-zstd and application/x-xz
 def fastermime(path: PathIsh) -> str:
    paths = str(path)
    # mimetypes is faster
    (mime, _) = mimetypes.guess_type(paths)
    if mime is not None:
        return mime
    # magic is slower but returns more stuff
    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
    return _magic().from_file(paths)
 Json = Dict[str, Any]
--- a/my/core/error.py
+++ b/my/core/error.py
@ -0,0 +1,99 @@
 """
 Various error handling helpers
 See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
 """
 from itertools import tee
 from typing import Union, TypeVar, Iterable, List, Tuple, Type
 T = TypeVar('T')
 E = TypeVar('E', bound=Exception) # TODO make covariant?
 ResT = Union[T, E]
 Res = ResT[T, Exception]
 def unwrap(res: Res[T]) -> T:
    if isinstance(res, Exception):
        raise res
    else:
        return res
 def echain(ex: E, cause: Exception) -> E:
    ex.__cause__ = cause
    return ex
 def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
    # TODO would be nice to have ET=Exception default?
    vit, eit = tee(l)
    # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
    values: Iterable[T] = (
        r # type: ignore[misc]
        for r in vit
        if not isinstance(r, ET))
    errors: Iterable[E] = (
        r
        for r in eit
        if     isinstance(r, ET))
    # TODO would be interesting to be able to have yield statement anywehere in code
    # so there are multiple 'entry points' to the return value
    return (values, errors)
 def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
    """
    The general idea is: just alaways carry errors with the entry that precedes them
    """
    # TODO ResT object should hold exception class?...
    group = []
    groups = []
    for i in items:
        if isinstance(i, Exception):
            group.append(i)
        else:
            groups.append((i, group))
            group = []
    results = []
    for v, errs in sorted(groups, key=lambda p: key(p[0])):
        results.extend(errs)
        results.append(v)
    results.extend(group)
    return results
 def test_sort_res_by() -> None:
    class Exc(Exception):
        def __eq__(self, other):
            return self.args == other.args
    ress = [
        Exc('first'),
        Exc('second'),
        5,
        3,
        Exc('xxx'),
        2,
        1,
        Exc('last'),
    ]
    results = sort_res_by(ress, lambda x: x) # type: ignore
    assert results == [
        1,
        Exc('xxx'),
        2,
        3,
        Exc('first'),
        Exc('second'),
        5,
        Exc('last'),
    ]
    results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
    assert results2 == [Exc('last'), 0] + results[:-1]
--- a/my/core/init.py
+++ b/my/core/init.py
@ -8,9 +8,10 @@ A hook to insert user's config directory into Python's search path.
  Please let me know if you are aware of a better way of dealing with this!
 '''
 from types import ModuleType
 # TODO not ideal to keep it here, but this should really be a leaf in the import tree
-def assign_module(parent: str, name: str, module):
+def assign_module(parent: str, name: str, module: ModuleType) -> None:
    import sys
    import importlib
    parent_module = importlib.import_module(parent)
@ -20,13 +21,15 @@ def assign_module(parent: str, name: str, module):
        # TODO that crap should be tested... I guess will get it for free when I run rest of tests in the matrix
        setattr(parent_module, name, module)
 del ModuleType
 # separate function to present namespace pollution
-def setup_config():
+def setup_config() -> None:
    from pathlib import Path
    import sys
    import os
    import warnings
    from typing import Optional
    # not sure if that's necessary, i.e. could rely on PYTHONPATH instead
    # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
--- a/my/error.py
+++ b/my/error.py
@ -1,99 +1,2 @@
-"""
+# will be deprecated. please add stuff to my.core
-Various error handling helpers
+from .core.error import *
 See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
 """
 from itertools import tee
 from typing import Union, TypeVar, Iterable, List, Tuple, Type
 T = TypeVar('T')
 E = TypeVar('E', bound=Exception) # TODO make covariant?
 ResT = Union[T, E]
 Res = ResT[T, Exception]
 def unwrap(res: Res[T]) -> T:
    if isinstance(res, Exception):
        raise res
    else:
        return res
 def echain(ex: E, cause: Exception) -> E:
    ex.__cause__ = cause
    return ex
 def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
    # TODO would be nice to have ET=Exception default?
    vit, eit = tee(l)
    # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
    values: Iterable[T] = (
        r # type: ignore[misc]
        for r in vit
        if not isinstance(r, ET))
    errors: Iterable[E] = (
        r
        for r in eit
        if     isinstance(r, ET))
    # TODO would be interesting to be able to have yield statement anywehere in code
    # so there are multiple 'entry points' to the return value
    return (values, errors)
 def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
    """
    The general idea is: just alaways carry errors with the entry that precedes them
    """
    # TODO ResT object should hold exception class?...
    group = []
    groups = []
    for i in items:
        if isinstance(i, Exception):
            group.append(i)
        else:
            groups.append((i, group))
            group = []
    results = []
    for v, errs in sorted(groups, key=lambda p: key(p[0])):
        results.extend(errs)
        results.append(v)
    results.extend(group)
    return results
 def test_sort_res_by():
    class Exc(Exception):
        def __eq__(self, other):
            return self.args == other.args
    ress = [
        Exc('first'),
        Exc('second'),
        5,
        3,
        Exc('xxx'),
        2,
        1,
        Exc('last'),
    ]
    results = sort_res_by(ress, lambda x: x) # type: ignore
    assert results == [
        1,
        Exc('xxx'),
        2,
        3,
        Exc('first'),
        Exc('second'),
        5,
        Exc('last'),
    ]
    results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
    assert results2 == [Exc('last'), 0] + results[:-1]
--- a/my/hypothesis.py
+++ b/my/hypothesis.py
@ -1,8 +1,6 @@
 """
 [[https://hypothes.is][Hypothes.is]] highlights and annotations
 """
 from . import init
 from .common import get_files
 from .error import Res, sort_res_by
--- a/my/kython/init.py
+++ b/my/kython/init.py
--- a/my/materialistic.py
+++ b/my/materialistic.py
@ -1,8 +1,6 @@
 """
 [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
 """
 from . import init
 from datetime import datetime
 from typing import Any, Dict, Iterator, NamedTuple
--- a/my/media/imdb.py
+++ b/my/media/imdb.py
@ -1,7 +1,4 @@
 #!/usr/bin/env python3
 from .. import init
 import csv
 import json
 from datetime import datetime
--- a/my/pdfs.py
+++ b/my/pdfs.py
@ -2,9 +2,6 @@
 '''
 PDF documents and annotations on your filesystem
 '''
 from . import init
 from concurrent.futures import ProcessPoolExecutor
 from datetime import datetime
 import re
--- a/my/pinboard.py
+++ b/my/pinboard.py
@ -1,8 +1,6 @@
 """
 [[https://pinboard.in][Pinboard]] bookmarks
 """
 from . import init
 from .common import get_files
 from my.config.repos.pinbexport import dal as pinbexport
--- a/my/reddit.py
+++ b/my/reddit.py
@ -1,8 +1,6 @@
 """
 Reddit data: saved items/comments/upvotes/etc.
 """
 from . import init
 from pathlib import Path
 from typing import List, Sequence, Mapping, Iterator
@ -13,14 +11,14 @@ from my.config import reddit as config
 import my.config.repos.rexport.dal as rexport
-def get_sources() -> Sequence[Path]:
+def inputs() -> Sequence[Path]:
    # TODO rename to export_path?
    files = get_files(config.export_dir)
    # TODO Cpath better be automatic by get_files...
    res = list(map(CPath, files)); assert len(res) > 0
    # todo move the assert to get_files?
    return tuple(res)
 logger = LazyLogger(__name__, level='debug')
@ -32,30 +30,30 @@ Upvote     = rexport.Upvote
 def dal() -> rexport.DAL:
-    # TODO lru cache? but be careful when it runs continuously
+    return rexport.DAL(inputs())
    return rexport.DAL(get_sources())
-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def saved() -> Iterator[Save]:
    return dal().saved()
-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def comments() -> Iterator[Comment]:
    return dal().comments()
-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def submissions() -> Iterator[Submission]:
    return dal().submissions()
-@mcachew(hashf=lambda: get_sources())
+@mcachew(hashf=lambda: inputs())
 def upvoted() -> Iterator[Upvote]:
    return dal().upvoted()
 ### the rest of the file is some elaborate attempt of restoring favorite/unfavorite times
 from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
 from functools import lru_cache
@ -115,10 +113,11 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
        key=lambda s: s.save.sid,
    )
 # TODO hmm. think about it.. if we set default backups=inputs()
 # it's called early so it ends up as a global variable that we can't monkey patch easily
@mcachew('/L/data/.cache/reddit-events.cache')
-def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]:
+def _get_events(backups: Sequence[Path], parallel: bool=True) -> Iterator[Event]:
    # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
    # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?
    prev_saves: Mapping[Sid, SaveWithDt] = {}
    # TODO suppress first batch??
@ -168,55 +167,18 @@ def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> I
    # TODO a bit awkward, favorited should compare lower than unfavorited?
@lru_cache(1)
-def get_events(*args, **kwargs) -> List[Event]:
+def events(*args, **kwargs) -> List[Event]:
-    evit = _get_events(*args, **kwargs)
+    evit = _get_events(inputs(), *args, **kwargs)
    return list(sorted(evit, key=lambda e: e.cmp_key))
-
+##
 def test() -> None:
    get_events(backups=get_sources()[-1:])
    list(saved())
 def test_unfav() -> None:
    events = get_events()
    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
    uevents = [e for e in events if e.url == url]
    assert len(uevents) == 2
    ff = uevents[0]
    assert ff.text == 'favorited'
    uf = uevents[1]
    assert uf.text == 'unfavorited'
 # TODO move out..
 def test_get_all_saves() -> None:
    # TODO not sure if this is necesasry anymore?
    saves = list(saved())
    # just check that they are unique..
    make_dict(saves, key=lambda s: s.sid)
 def test_disappearing() -> None:
    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
    # but I guess it was just a short glitch... so whatever
    saves = get_events()
    favs = [s.kind for s in saves if s.text == 'favorited']
    [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
 def test_unfavorite() -> None:
    events = get_events()
    unfavs = [s for s in events if s.text == 'unfavorited']
    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
 def main() -> None:
    # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
-    events = get_events(parallel=False)
+    el = events(parallel=False)
-    print(len(events))
+    print(len(el))
-    for e in events:
+    for e in el:
        print(e.text, e.url)
    # for e in get_
    # 509 with urls..
@ -226,3 +188,8 @@ def main() -> None:
 if __name__ == '__main__':
    main()
 # TODO deprecate...
 get_sources = inputs
 get_events = events
--- a/my/smscalls.py
+++ b/my/smscalls.py
@ -2,8 +2,6 @@
 Phone calls and SMS messages
 """
 # TODO extract SMS as well? I barely use them though..
 from . import init
 from datetime import datetime
 from pathlib import Path
 from typing import NamedTuple, Iterator, Set
--- a/tests/misc.py
+++ b/tests/misc.py
@ -7,10 +7,32 @@ import zipfile
 from my.kython.kompress import kopen, kexists, CPath
 def test_kopen(tmp_path: Path) -> None:
    "Plaintext handled transparently"
    assert kopen(tmp_path / 'file'   ).read() == 'just plaintext'
    assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
    "For zips behaviour is a bit different (not sure about all this, tbh...)"
    assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
 def test_kexists(tmp_path: Path) -> None:
    assert     kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
    assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
    # TODO not sure about this?
    assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
 def test_cpath(tmp_path: Path) -> None:
    CPath(str(tmp_path / 'file'  )).read_text() == 'just plaintext'
    CPath(    tmp_path / 'file.xz').read_text() == 'compressed text'
    # TODO not sure about zip files??
 import pytest # type: ignore
-@pytest.fixture
+@pytest.fixture(autouse=True)
 def prepare(tmp_path: Path):
    (tmp_path / 'file').write_text('just plaintext')
    with (tmp_path / 'file.xz').open('wb') as f:
@ -24,24 +46,5 @@ def prepare(tmp_path: Path):
        pass
-def test_kopen(prepare, tmp_path: Path) -> None:
+# meh
-    "Plaintext handled transparently"
+from my.core.error import test_sort_res_by
    assert kopen(tmp_path / 'file'   ).read() == 'just plaintext'
    assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
    "For zips behaviour is a bit different (not sure about all this, tbh...)"
    assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
 def test_kexists(prepare, tmp_path: Path) -> None:
    assert     kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
    assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
    # TODO not sure about this?
    assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
 def test_cpath(prepare, tmp_path: Path) -> None:
    CPath(str(tmp_path / 'file'  )).read_text() == 'just plaintext'
    CPath(    tmp_path / 'file.xz').read_text() == 'compressed text'
    # TODO not sure about zip files??
--- a/tests/reddit.py
+++ b/tests/reddit.py
@ -1,4 +1,57 @@
-# ugh. workaround for https://github.com/pytest-dev/pytest/issues/1927
+from datetime import datetime
-from my.reddit import *
+import pytz
-# TODO for reddit test, patch up to take every 10th archive or something; but make sure it's deterministic
+from my.reddit import events, inputs, saved
 from my.common import make_dict
 def test() -> None:
    list(events())
    list(saved())
 def test_unfav() -> None:
    ev = events()
    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
    uev = [e for e in ev if e.url == url]
    assert len(uev) == 2
    ff = uev[0]
    # TODO could recover these from takeout perhaps?
    assert ff.text == 'favorited [initial]'
    uf = uev[1]
    assert uf.text == 'unfavorited'
 def test_saves() -> None:
    # TODO not sure if this is necesasry anymore?
    saves = list(saved())
    # just check that they are unique..
    make_dict(saves, key=lambda s: s.sid)
 def test_disappearing() -> None:
    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
    # but I guess it was just a short glitch... so whatever
    saves = events()
    favs = [s.kind for s in saves if s.text == 'favorited']
    [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
 def test_unfavorite() -> None:
    evs = events()
    unfavs = [s for s in evs if s.text == 'unfavorited']
    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
 import pytest # type: ignore
@pytest.fixture(autouse=True, scope='module')
 def prepare():
    from my.common import get_files
    from my.config import reddit as config
    files = get_files(config.export_dir)
    # use less files for the test to make it faster
    # first bit is for 'test_unfavorite, the second is for test_disappearing
    files = files[300:330] + files[500:520]
    config.export_dir = files # type: ignore
--- a/tox.ini
+++ b/tox.ini
@ -9,10 +9,10 @@ passenv = CI CI_*
 setenv = MY_CONFIG = nonexistent
 commands =
    pip install -e .[testing]
    # TODO ??
    # python -m pytest {posargs}
-    python3 -c 'import my.init; from my.config import stub as config; print(config.key)'
+    # todo these are probably not necessary anymore?
-    python3 -c 'import my.init; import my.config; import my.config.repos' # shouldn't fail at least
+    python3 -c 'from my.config import stub as config; print(config.key)'
    python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
    python3 -m pytest tests/misc.py tests/get_files.py tests/config.py::test_set_repo tests/config.py::test_environment_variable
    # TODO add; once I figure out porg depdencency?? tests/config.py
    # TODO run demo.py? just make sure with_my is a bit cleverer?