233 changed files with 3623 additions and 6758 deletions
--- a/.ci/run
+++ b/.ci/run
@ -11,8 +11,6 @@ if ! command -v sudo; then
    }
 fi
 # --parallel-live to show outputs while it's running
 tox_cmd='run-parallel --parallel-live'
 if [ -n "${CI-}" ]; then
    # install OS specific stuff here
    case "$OSTYPE" in
@ -22,8 +20,7 @@ if [ -n "${CI-}" ]; then
        ;;
    cygwin* | msys* | win*)
        # windows
-        # ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that
+        :
        tox_cmd='run'
        ;;
    *)
        # must be linux?
@ -40,9 +37,5 @@ if ! command -v python3 &> /dev/null; then
    PY_BIN="python"
 fi
-
+"$PY_BIN" -m pip install --user tox
-# TODO hmm for some reason installing uv with pip and then running
+"$PY_BIN" -m tox --parallel --parallel-live "$@"
 # "$PY_BIN" -m uv tool fails with missing setuptools error??
 # just uvx directly works, but it's not present in PATH...
 "$PY_BIN" -m pip install --user pipx
 "$PY_BIN" -m pipx run uv tool run --with=tox-uv tox $tox_cmd "$@"
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -21,20 +21,19 @@ on:
 jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        platform: [ubuntu-latest, macos-latest,  windows-latest]
-        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
        exclude: [
            # windows runners are pretty scarce, so let's only run lowest and highest python version
            {platform: windows-latest, python-version: '3.9' },
            {platform: windows-latest, python-version: '3.10'},
            {platform: windows-latest, python-version: '3.11'},
            {platform: windows-latest, python-version: '3.12'},
            # same, macos is a bit too slow and ubuntu covers python quirks well
            {platform: macos-latest  , python-version: '3.9'  },
            {platform: macos-latest  , python-version: '3.10' },
            {platform: macos-latest  , python-version: '3.11' },
            {platform: macos-latest  , python-version: '3.12' },
        ]
    runs-on: ${{ matrix.platform }}
@ -46,11 +45,11 @@ jobs:
    # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
    - run: echo "$HOME/.local/bin" >> $GITHUB_PATH
-    - uses: actions/setup-python@v5
+    - uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3
      with:
        submodules: recursive
        fetch-depth: 0  # nicer to have all git history when debugging/for tests
@ -62,15 +61,13 @@ jobs:
    - run: bash .ci/run
    - if: matrix.platform == 'ubuntu-latest'  # no need to compute coverage for other platforms
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v3
      with:
        include-hidden-files: true
        name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
        path: .coverage.mypy-misc/
    - if: matrix.platform == 'ubuntu-latest'  # no need to compute coverage for other platforms
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v3
      with:
        include-hidden-files: true
        name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
        path: .coverage.mypy-core/
@ -82,11 +79,11 @@ jobs:
    # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
    - run: echo "$HOME/.local/bin" >> $GITHUB_PATH
-    - uses: actions/setup-python@v5
+    - uses: actions/setup-python@v4
      with:
-        python-version: '3.10'
+        python-version: '3.8'
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3
      with:
        submodules: recursive
--- a/.gitignore
+++ b/.gitignore
@ -155,9 +155,6 @@ celerybeat-schedule
 .dmypy.json
 dmypy.json
 # linters
 .ruff_cache/
 # Pyre type checker
 .pyre/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,7 +20,7 @@ General/my.core changes:
 - e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
 Modules:
- some initial work on filling **InfluxDB** with HPI data
+- some innitial work on filling **InfluxDB** with HPI data
 - pinboard
  - 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
--- a/README.org
+++ b/README.org
@ -723,10 +723,10 @@ If you want to write modules for personal use but don't want to merge them into
 Other HPI Repositories:
- [[https://github.com/purarue/HPI][purarue/HPI]]
+- [[https://github.com/seanbreckenridge/HPI][seanbreckenridge/HPI]]
 - [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
-If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/purarue/HPI-template][template]].
+If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/seanbreckenridge/HPI-template][template]].
 * Related links
 :PROPERTIES:
--- a/conftest.py
+++ b/conftest.py
@ -1,47 +0,0 @@
 # this is a hack to monkey patch pytest so it handles tests inside namespace packages without __init__.py properly
 # without it, pytest can't discover the package root for some reason
 # also see https://github.com/karlicoss/pytest_namespace_pkgs for more
 import os
 import pathlib
 from typing import Optional
 import _pytest.main
 import _pytest.pathlib
 # we consider all dirs in repo/ to be namespace packages
 root_dir = pathlib.Path(__file__).absolute().parent.resolve()  # / 'src'
 assert root_dir.exists(), root_dir
 # TODO assert it contains package name?? maybe get it via setuptools..
 namespace_pkg_dirs = [str(d) for d in root_dir.iterdir() if d.is_dir()]
 # resolve_package_path is called from _pytest.pathlib.import_path
 # takes a full abs path to the test file and needs to return the path to the 'root' package on the filesystem
 resolve_pkg_path_orig = _pytest.pathlib.resolve_package_path
 def resolve_package_path(path: pathlib.Path) -> Optional[pathlib.Path]:
    result = path  # search from the test file upwards
    for parent in result.parents:
        if str(parent) in namespace_pkg_dirs:
            return parent
    if os.name == 'nt':
        # ??? for some reason on windows it is trying to call this against conftest? but not on linux/osx
        if path.name == 'conftest.py':
            return resolve_pkg_path_orig(path)
    raise RuntimeError("Couldn't determine path for ", path)
 _pytest.pathlib.resolve_package_path = resolve_package_path
 # without patching, the orig function returns just a package name for some reason
 # (I think it's used as a sort of fallback)
 # so we need to point it at the absolute path properly
 # not sure what are the consequences.. maybe it wouldn't be able to run against installed packages? not sure..
 search_pypath_orig = _pytest.main.search_pypath
 def search_pypath(module_name: str) -> str:
    mpath = root_dir / module_name.replace('.', os.sep)
    if not mpath.is_dir():
        mpath = mpath.with_suffix('.py')
        assert mpath.exists(), mpath  # just in case
    return str(mpath)
 _pytest.main.search_pypath = search_pypath
--- a/doc/DENYLIST.md
+++ b/doc/DENYLIST.md
@ -76,7 +76,7 @@ This would typically be used in an overridden `all.py` file, or in a one-off scr
 which you may want to filter out some items from a source, progressively adding more
 items to the denylist as you go.
-A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/purarue/HPI)):
+A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/seanbreckenridge/HPI)):
 ```python
 from typing import Iterator
@ -119,9 +119,9 @@ python3 -c 'from my.ip import all; all.deny.deny_cli(all.ips())'
 To edit the `all.py`, you could either:
 - install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/purarue/reorder_editable), and possibly the [`HPI-template`](https://github.com/purarue/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
+- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/seanbreckenridge/reorder_editable), and possibly the [`HPI-template`](https://github.com/seanbreckenridge/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
-For a real example of this see, [purarue/HPI-personal](https://github.com/purarue/HPI-personal/blob/master/my/ip/all.py)
+For a real example of this see, [seanbreckenridge/HPI-personal](https://github.com/seanbreckenridge/HPI-personal/blob/master/my/ip/all.py)
 Sidenote: the reason why we want to specifically override
 the all.py and not just create a script that filters out the items you're
--- a/doc/MODULES.org
+++ b/doc/MODULES.org
@ -76,7 +76,7 @@ The config snippets below are meant to be modified accordingly and *pasted into
 You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
-For an extensive/complex example, you can check out ~@purarue~'s [[https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
+For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
 # Nested Configurations before the doc generation using the block below
 ** [[file:../my/reddit][my.reddit]]
@ -96,7 +96,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
        class pushshift:
            '''
-            Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments
+            Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments
            '''
            # path[s]/glob to the exported JSON data
@ -106,7 +106,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
 ** [[file:../my/browser/][my.browser]]
-    Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
+    Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
    #+begin_src python
    class browser:
@ -132,7 +132,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
   You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
   provide geolocation data for an IPs (though no IPs are provided from any
- of the sources here). For an example of usage, see [[https://github.com/purarue/HPI/tree/master/my/ip][here]]
+ of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
    #+begin_src python
    class location:
@ -256,9 +256,9 @@ for cls, p in modules:
 ** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
-      Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
+      Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
-      See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
+      See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
      If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
      cache individual exports in =~/.cache/google_takeout_parser=
--- a/doc/MODULE_DESIGN.org
+++ b/doc/MODULE_DESIGN.org
@ -67,7 +67,7 @@ If you want to disable a source, you have a few options.
 ... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
-Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/purarue/HPI#partially-in-usewith-overrides][purarue]]s location and ip modules.
+Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/seanbreckenridge/HPI#partially-in-usewith-overrides][seanbreckenridge]]s location and ip modules.
 This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
@ -208,13 +208,13 @@ Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this
 Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
-If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/purarue/reorder_editable][reorder_editable]] repository.
+If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository.
 There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
-There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/purarue/HPI-template][template]] to get started.
+There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/seanbreckenridge/HPI-template][template]] to get started.
-Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/purarue/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
+Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/seanbreckenridge/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
 You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
--- a/doc/OVERLAYS.org
+++ b/doc/OVERLAYS.org
@ -1,322 +0,0 @@
 NOTE this kinda overlaps with [[file:MODULE_DESIGN.org][the module design doc]], should be unified in the future.
 Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
 # This is describing TODO
 # TODO goals
 # - overrides
 # - proper mypy support
 # - TODO reusing parent modules?
 # You can see them TODO in overlays dir
 Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
 - =main= package structure
  # TODO do links
  - =my/twitter/gdpr.py=
    Extracts Twitter data from GDPR archive.
  - =my/twitter/all.py=
    Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
    This will be overridden by =overlay=.
  - =my/twitter/common.py=
    Contains helper function to merge data, so they can be reused by overlay's =all.py=.
  - =my/reddit.py=
    Extracts Reddit data -- this won't be overridden by the overlay, we just keep it for demonstration purposes.
 - =overlay= package structure
  - =my/twitter/talon.py=
    Extracts Twitter data from Talon android app.
  - =my/twitter/all.py=
    Override for =all.py= from =main= package -- it merges together data from =gpdr= and =talon= modules.
 # TODO mention resolution? reorder_editable
 * Installing (editable install)
 NOTE: this was tested with =python 3.10= and =pip 23.3.2=.
 To install, we run:
 : pip3 install --user -e overlay/
 : pip3 install --user -e main/
 # TODO mention non-editable installs (this bit will still work with non-editable install)
 As a result, we get:
 : pip3 list | grep hpi
 : hpi-main           0.0.0       /project/main/src
 : hpi-overlay        0.0.0       /project/overlay/src
 : cat ~/.local/lib/python3.10/site-packages/easy-install.pth
 : /project/overlay/src
 : /project/main/src
 (the order above is important, so =overlay= takes precedence over =main= TODO link)
 Verify the setup:
 : $ python3 -c 'import my; print(my.__path__)'
 : _NamespacePath(['/project/overlay/src/my', '/project/main/src/my'])
 This basically means that modules will be searched in both paths, with overlay taking precedence.
 ** Installing with =--use-pep517=
 See here for discussion https://github.com/purarue/reorder_editable/issues/2, but TLDR it should work similarly.
 * Testing runtime behaviour (editable install)
 : $ python3 -c 'import my.reddit as R; print(R.upvotes())'
 : [main] my.reddit hello
 : ['reddit upvote1', 'reddit upvote2']
 Just as expected here, =my.reddit= is imported from the =main= package, since it doesn't exist in =overlay=.
 Let's theck twitter now:
 : $ python3 -c 'import my.twitter.all as T; print(T.tweets())'
 : [overlay] my.twitter.all hello
 : [main] my.twitter.common hello
 : [main] my.twitter.gdpr hello
 : [overlay] my.twitter.talon hello
 : ['gdpr tweet 1', 'gdpr tweet 2', 'talon tweet 1', 'talon tweet 2']
 As expected, =my.twitter.all= was imported from the =overlay=.
 As you can see it's merged data from =gdpr= (from =main= package) and =talon= (from =overlay= package).
 So far so good, let's see how it works with mypy.
 * Mypy support (editable install)
 To check that mypy works as expected I injected some statements in modules that have no impact on runtime,
 but should trigger mypy, like this =trigger_mypy_error: str = 123=:
 Let's run it:
 : $ mypy --namespace-packages --strict -p my
 : overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
 : [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^
 : Found 1 error in 1 file (checked 4 source files)
 Hmm, this did find the statement in the =overlay=, but missed everything from =main= (e.g. =reddit.py= and =gdpr.py= should have also triggered the check).
 First, let's check which sources mypy is processing:
 : $ mypy --namespace-packages --strict -p my -v 2>&1 | grep BuildSource
 : LOG:  Found source:           BuildSource(path='/project/overlay/src/my', module='my', has_text=False, base_dir=None)
 : LOG:  Found source:           BuildSource(path='/project/overlay/src/my/twitter', module='my.twitter', has_text=False, base_dir=None)
 : LOG:  Found source:           BuildSource(path='/project/overlay/src/my/twitter/all.py', module='my.twitter.all', has_text=False, base_dir=None)
 : LOG:  Found source:           BuildSource(path='/project/overlay/src/my/twitter/talon.py', module='my.twitter.talon', has_text=False, base_dir=None)
 So seems like mypy is not processing anything from =main= package at all?
 At this point I cloned mypy, put a breakpoint, and found out this is the culprit: https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/mypy/modulefinder.py#L288
 This basically returns the first path where it finds =my= package, which happens to be the overlay in this case.
 So everything else is ignored?
 It even seems to have a test for a similar usecase, which is quite sad.
 https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/mypy/test/testmodulefinder.py#L64-L71
 For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
 But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
 Let's see what's going on with imports:
 : $ mypy --namespace-packages --strict -p my --follow-imports=error
 : overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
 : [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^
 : overlay/src/my/twitter/all.py:3: error: Import of "my.twitter.common" ignored  [misc]
 :     from .common import merge
 :     ^
 : overlay/src/my/twitter/all.py:6: error: Import of "my.twitter.gdpr" ignored  [misc]
 :         from . import gdpr
 :     ^
 : overlay/src/my/twitter/all.py:6: note: (Using --follow-imports=error, module not passed on command line)
 : overlay/src/my/twitter/all.py: note: In function "tweets":
 : overlay/src/my/twitter/all.py:8: error: Returning Any from function declared to return "List[str]"  [no-any-return]
 :         return merge(gdpr, talon)
 :         ^
 : Found 4 errors in 2 files (checked 4 source files)
 Nope -- looks like it's completely unawareof =main=, and what's worst, by default (without tweaking =--follow-imports=), these errors would be suppressed.
 What if we check =my.twitter= directly?
 : $ mypy --namespace-packages --strict -p my.twitter  --follow-imports=error
 : overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
 : [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^~~
 : overlay/src/my/twitter: error: Ancestor package "my" ignored  [misc]
 : overlay/src/my/twitter: note: (Using --follow-imports=error, submodule passed on command line)
 : overlay/src/my/twitter/all.py:3: error: Import of "my.twitter.common" ignored  [misc]
 :     from .common import merge
 :     ^
 : overlay/src/my/twitter/all.py:3: note: (Using --follow-imports=error, module not passed on command line)
 : overlay/src/my/twitter/all.py:6: error: Import of "my.twitter.gdpr" ignored  [misc]
 :         from . import gdpr
 :     ^
 : overlay/src/my/twitter/all.py: note: In function "tweets":
 : overlay/src/my/twitter/all.py:8: error: Returning Any from function declared to return "list[str]"  [no-any-return]
 :         return merge(gdpr, talon)
 :         ^~~~~~~~~~~~~~~~~~~~~~~~~
 : Found 5 errors in 3 files (checked 3 source files)
 Now we're also getting =error: Ancestor package "my" ignored  [misc]= .. not ideal.
 * What if we don't install at all?
 Instead of editable install let's try running mypy directly over source files
 First let's only check =main= package:
 : $ MYPYPATH=main/src mypy --namespace-packages --strict -p my
 : main/src/my/twitter/gdpr.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")  [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^~~
 : main/src/my/reddit.py:11: error: Incompatible types in assignment (expression has type "int", variable has type "str")  [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^~~
 : Found 2 errors in 2 files (checked 6 source files)
 As expected, it found both errors.
 Now with overlay as well:
 : $ MYPYPATH=overlay/src:main/src mypy --namespace-packages --strict -p my
 : overlay/src/my/twitter/all.py:6: note: In module imported here:
 : main/src/my/twitter/gdpr.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")  [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^~~
 : overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
 : [assignment]
 :     trigger_mypy_error: str = 123
 :                               ^~~
 : Found 2 errors in 2 files (checked 4 source files)
 Interesting enough, this is slightly better than the editable install (it detected error in =gdpr.py= as well).
 But still no =reddit.py= error.
 TODO possibly worth submitting to mypy issue tracker as well...
 Overall it seems that properly type checking HPI setup as a whole is kinda problematic, especially if the modules actually override/extend base modules.
 * Modifying (monkey patching) original module in the overlay
 Let's say we want to modify/monkey patch =my.twitter.talon= module from =main=, for example, convert "gdpr" to uppercase, i.e. =tweet.replace('gdpr', 'GDPR')=.
 # TODO see overlay2/
 I think our options are:
 - symlink to the 'parent' packages, e.g. =main= in the case
  Alternatively, somehow install =main= under a different name/alias (managed by pip).
  This is discussed here: https://github.com/karlicoss/HPI/issues/102
  The main upside is that it's relatively simple and (sort of works with mypy).
  There are a few big downsides:
  - creates a parallel package hierarchy (to the one maintained by pip), symlinks will need to be carefully managed manually
    This may not be such a huge deal if you don't have too many overlays.
    However this results in problems if you're trying to switch between two different HPI checkouts (e.g. stable and development). If you have symlinks into "stable" from the overlay then stable modules will sometimes be picked up when you're expecting "development" package.
  - symlinks pointing outside of the source tree might cause pip install to go into infinite loop
  - it modifies the package name
    This may potentially result in some confusing behaviours.
    One thing I noticed for example is that cachew caches might get duplicated.
  - it might not work in all cases or might result in recursive imports
 - do not shadow the original module
  Basically instead of shadowing via namespace package mechanism and creating identically named module,
  create some sort of hook that would patch the original =my.twitter.talon= module from =main=.
  The downside is that it's a bit unclear where to do that, we need some sort of entry point?
  - it could be some global dynamic hook defined in the overlay, and then executed from =my.core=
    However, it's a bit intrusive, and unclear how to handle errors. E.g. what if we're monkey patching a module that we weren't intending to use, don't have dependencies installed and it's crashing?
    Perhaps core could support something like =_hook= in each of HPI's modules?
    Note that it can't be =my.twitter.all=, since we might want to override =.all= itself.
    The downside is is this probably not going to work well with =tmp_config= and such -- we'll need to somehow execute the hook again on reloading the module?
  - ideally we'd have something that integrates with =importlib= and executed automatically when module is imported?
    TODO explore these:
    - https://stackoverflow.com/questions/43571737/how-to-implement-an-import-hook-that-can-modify-the-source-code-on-the-fly-using
    - https://github.com/brettlangdon/importhook
      This one is pretty intrusive, and has some issues, e.g. https://github.com/brettlangdon/importhook/issues/4
      Let's try it:
      : $ PYTHONPATH=overlay3/src:main/src python3 -c 'import my.twitter._hook; import my.twitter.all as M; print(M.tweets())'
      : [main] my.twitter.all hello
      : [main] my.twitter.common hello
      : [main] my.twitter.gdpr hello
      : EXECUTING IMPORT HOOK!
      : ['GDPR tweet 1', 'GDPR tweet 2']
      Ok it worked, and seems pretty neat.
      However sadly it doesn't work with =tmp_config= (TODO add a proper demo?)
      Not sure if it's more of an issue with =tmp_config= implementation (which is very hacky), or =importhook= itself?
    In addition, still the question is where to put the hook itself, but in that case even a global one could be fine.
  - define hook in =my/twitter/__init__.py=
    Basically, use =extend_path= to make it behave like a namespace package, but in addition, patch original =my.twitter.talon=?
    : $ cat overlay2/src/my/twitter/__init__.py
    : print(f'[overlay2] {__name__} hello')
    :
    : from pkgutil import extend_path
    : __path__ = extend_path(__path__, __name__)
    :
    : def hack_gdpr_module() -> None:
    :     from . import gdpr
    :     tweets_orig = gdpr.tweets
    :     def tweets_patched():
    :         return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
    :     gdpr.tweets = tweets_patched
    :
    : hack_gdpr_module()
    This actually seems to work??
    : PYTHONPATH=overlay2/src:main/src python3 -c 'import my.twitter.all as M; print(M.tweets())'
    : [overlay2] my.twitter hello
    : [main] my.twitter.gdpr hello
    : [main] my.twitter.all hello
    : [main] my.twitter.common hello
    : ['GDPR tweet 1', 'GDPR tweet 2']
    However, this doesn't stack, i.e. if the 'parent' overlay had its own =__init__.py=, it won't get called.
 - shadow the original module and temporarily modify =__path__= before importing the same module from the parent overlay
  This approach is implemented in =my.core.experimental.import_original_module=
  TODO demonstrate it properly, but I think that also works in a 'chain' of overlays
  Seems like that option is the most promising so far, albeit very hacky.
 Note that none of these options work well with mypy (since it's all dynamic hackery), even if you disregard the issues described in the previous sections.
 # TODO .pkg files? somewhat interesting... https://github.com/python/cpython/blob/3.12/Lib/pkgutil.py#L395-L410
--- a/doc/QUERY.md
+++ b/doc/QUERY.md
@ -97,9 +97,9 @@ By default, this just returns the items in the order they were returned by the f
 hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
 Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
       authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
-       message='sources.smscalls: propagate errors if there are breaking '
+       message='sources.smscalls: propogate errors if there are breaking '
               'schema changes',
-       repo='/home/username/Repos/promnesia-fork',
+       repo='/home/sean/Repos/promnesia-fork',
       sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
       ref='refs/heads/smscalls-handle-result')
 ```
@ -195,7 +195,7 @@ To preview, you can use something like [`qgis`](https://qgis.org/en/site/) or fo
 <img src="https://user-images.githubusercontent.com/7804791/232249184-7e203ee6-a3ec-4053-800c-751d2c28e690.png" width=500 alt="chicago trip" />
-(Sidenote: this is [`@purarue`](https://github.com/purarue/)s locations, on a trip to Chicago)
+(Sidenote: this is [`@seanbreckenridge`](https://github.com/seanbreckenridge/)s locations, on a trip to Chicago)
 ## Python reference
@ -301,4 +301,4 @@ The `hpi query` command is a CLI wrapper around the code in [`query.py`](../my/c
    If you specify a range, drop_unsorted is forced to be True
 ```
-Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/purarue/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
+Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/seanbreckenridge/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
--- a/doc/SETUP.org
+++ b/doc/SETUP.org
@ -387,7 +387,7 @@ But there is an extra caveat: rexport is already coming with nice [[https://gith
 Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
-Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/purarue/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
+Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
 ** Twitter
--- a/doc/overlays/install_packages.sh
+++ b/doc/overlays/install_packages.sh
@ -1,4 +0,0 @@
 #!/bin/bash
 set -eux
 pip3 install --user "$@" -e main/
 pip3 install --user "$@" -e overlay/
--- a/doc/overlays/main/setup.py
+++ b/doc/overlays/main/setup.py
@ -1,17 +0,0 @@
 from setuptools import setup, find_namespace_packages # type: ignore
 def main() -> None:
    pkgs = find_namespace_packages('src')
    pkg = min(pkgs)
    setup(
        name='hpi-main',
        zip_safe=False,
        packages=pkgs,
        package_dir={'': 'src'},
        package_data={pkg: ['py.typed']},
    )
 if __name__ == '__main__':
    main()
--- a/doc/overlays/main/src/my/py.typed
+++ b/doc/overlays/main/src/my/py.typed
--- a/doc/overlays/main/src/my/reddit.py
+++ b/doc/overlays/main/src/my/reddit.py
@ -1,11 +0,0 @@
 print(f'[main] {__name__} hello')
 def upvotes() -> list[str]:
    return [
        'reddit upvote1',
        'reddit upvote2',
    ]
 trigger_mypy_error: str = 123
--- a/doc/overlays/main/src/my/twitter/all.py
+++ b/doc/overlays/main/src/my/twitter/all.py
@ -1,7 +0,0 @@
 print(f'[main] {__name__} hello')
 from .common import merge
 def tweets() -> list[str]:
    from . import gdpr
    return merge(gdpr)
--- a/doc/overlays/main/src/my/twitter/common.py
+++ b/doc/overlays/main/src/my/twitter/common.py
@ -1,11 +0,0 @@
 print(f'[main] {__name__} hello')
 from typing import Protocol
 class Source(Protocol):
    def tweets(self) -> list[str]:
        ...
 def merge(*sources: Source) -> list[str]:
    from itertools import chain
    return list(chain.from_iterable(src.tweets() for src in sources))
--- a/doc/overlays/main/src/my/twitter/gdpr.py
+++ b/doc/overlays/main/src/my/twitter/gdpr.py
@ -1,9 +0,0 @@
 print(f'[main] {__name__} hello')
 def tweets() -> list[str]:
    return [
        'gdpr tweet 1',
        'gdpr tweet 2',
    ]
 trigger_mypy_error: str = 123
--- a/doc/overlays/overlay/setup.py
+++ b/doc/overlays/overlay/setup.py
@ -1,17 +0,0 @@
 from setuptools import setup, find_namespace_packages # type: ignore
 def main() -> None:
    pkgs = find_namespace_packages('src')
    pkg = min(pkgs)
    setup(
        name='hpi-overlay',
        zip_safe=False,
        packages=pkgs,
        package_dir={'': 'src'},
        package_data={pkg: ['py.typed']},
    )
 if __name__ == '__main__':
    main()
--- a/doc/overlays/overlay/src/my/py.typed
+++ b/doc/overlays/overlay/src/my/py.typed
--- a/doc/overlays/overlay/src/my/twitter/all.py
+++ b/doc/overlays/overlay/src/my/twitter/all.py
@ -1,8 +0,0 @@
 print(f'[overlay] {__name__} hello')
 from .common import merge
 def tweets() -> list[str]:
    from . import gdpr
    from . import talon
    return merge(gdpr, talon)
--- a/doc/overlays/overlay/src/my/twitter/talon.py
+++ b/doc/overlays/overlay/src/my/twitter/talon.py
@ -1,9 +0,0 @@
 print(f'[overlay] {__name__} hello')
 def tweets() -> list[str]:
    return [
        'talon tweet 1',
        'talon tweet 2',
    ]
 trigger_mypy_error: str = 123
--- a/doc/overlays/overlay2/setup.py
+++ b/doc/overlays/overlay2/setup.py
@ -1,17 +0,0 @@
 from setuptools import setup, find_namespace_packages # type: ignore
 def main() -> None:
    pkgs = find_namespace_packages('src')
    pkg = min(pkgs)
    setup(
        name='hpi-overlay2',
        zip_safe=False,
        packages=pkgs,
        package_dir={'': 'src'},
        package_data={pkg: ['py.typed']},
    )
 if __name__ == '__main__':
    main()
--- a/doc/overlays/overlay2/src/my/py.typed
+++ b/doc/overlays/overlay2/src/my/py.typed
--- a/doc/overlays/overlay2/src/my/twitter/init.py
+++ b/doc/overlays/overlay2/src/my/twitter/init.py
@ -1,13 +0,0 @@
 print(f'[overlay2] {__name__} hello')
 from pkgutil import extend_path
 __path__ = extend_path(__path__, __name__)
 def hack_gdpr_module() -> None:
    from . import gdpr
    tweets_orig = gdpr.tweets
    def tweets_patched():
        return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
    gdpr.tweets = tweets_patched
 hack_gdpr_module()
--- a/doc/overlays/overlay3/setup.py
+++ b/doc/overlays/overlay3/setup.py
@ -1,17 +0,0 @@
 from setuptools import setup, find_namespace_packages # type: ignore
 def main() -> None:
    pkgs = find_namespace_packages('src')
    pkg = min(pkgs)
    setup(
        name='hpi-overlay3',
        zip_safe=False,
        packages=pkgs,
        package_dir={'': 'src'},
        package_data={pkg: ['py.typed']},
    )
 if __name__ == '__main__':
    main()
--- a/doc/overlays/overlay3/src/my/py.typed
+++ b/doc/overlays/overlay3/src/my/py.typed
--- a/doc/overlays/overlay3/src/my/twitter/_hook.py
+++ b/doc/overlays/overlay3/src/my/twitter/_hook.py
@ -1,9 +0,0 @@
 import importhook
@importhook.on_import('my.twitter.gdpr')
 def on_import(gdpr):
    print("EXECUTING IMPORT HOOK!")
    tweets_orig = gdpr.tweets
    def tweets_patched():
        return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
    gdpr.tweets = tweets_patched
--- a/misc/.flake8-karlicoss
+++ b/misc/.flake8-karlicoss
@ -32,6 +32,6 @@ ignore =
 # 
 # as a reference:
-# https://github.com/purarue/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
+# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
 # and this https://github.com/karlicoss/HPI/pull/151
 # find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
--- a/my/arbtt.py
+++ b/my/arbtt.py
@ -2,22 +2,19 @@
 [[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
 '''
 from __future__ import annotations
 REQUIRES = ['ijson', 'cffi']
 # NOTE likely also needs libyajl2 from apt or elsewhere?
 from collections.abc import Iterable, Sequence
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Sequence, Iterable, List, Optional
 def inputs() -> Sequence[Path]:
    try:
        from my.config import arbtt as user_config
    except ImportError:
-        from my.core.warnings import low
+        from .core.warnings import low
        low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
        return []
    else:
@ -25,9 +22,8 @@ def inputs() -> Sequence[Path]:
        return get_files(user_config.logfiles)
-
+from .core import dataclass, Json, PathIsh, datetime_aware
-from my.core import Json, PathIsh, datetime_aware
+from .core.common import isoparse
 from my.core.compat import fromisoformat
@dataclass
@ -43,7 +39,6 @@ class Entry:
    @property
    def dt(self) -> datetime_aware:
        # contains utc already
        # TODO after python>=3.11, could just use fromisoformat
        ds = self.json['date']
        elen = 27
        lds = len(ds)
@ -51,13 +46,13 @@ class Entry:
            # ugh. sometimes contains less that 6 decimal points
            ds = ds[:-1] + '0' * (elen - lds) + 'Z'
        elif lds > elen:
-            # and sometimes more...
+            # ahd sometimes more...
            ds = ds[:elen - 1] + 'Z'
-        return fromisoformat(ds)
+        return isoparse(ds)
    @property
-    def active(self) -> str | None:
+    def active(self) -> Optional[str]:
        # NOTE: WIP, might change this in the future...
        ait = (w for w in self.json['windows'] if w['active'])
        a = next(ait, None)
@ -76,18 +71,17 @@ class Entry:
 def entries() -> Iterable[Entry]:
    inps = list(inputs())
-    base: list[PathIsh] = ['arbtt-dump', '--format=json']
+    base: List[PathIsh] = ['arbtt-dump', '--format=json']
-    cmds: list[list[PathIsh]]
+    cmds: List[List[PathIsh]]
    if len(inps) == 0:
        cmds = [base] # rely on default
    else:
        # otherwise, 'merge' them
-        cmds = [[*base, '--logfile', f] for f in inps]
+        cmds = [base + ['--logfile', f] for f in inps]
-    from subprocess import PIPE, Popen
+    import ijson.backends.yajl2_cffi as ijson # type: ignore
-
+    from subprocess import Popen, PIPE
    import ijson.backends.yajl2_cffi as ijson  # type: ignore
    for cmd in cmds:
        with Popen(cmd, stdout=PIPE) as p:
            out = p.stdout; assert out is not None
@ -96,8 +90,8 @@ def entries() -> Iterable[Entry]:
 def fill_influxdb() -> None:
    from .core.freezer import Freezer
    from .core.influxdb import magic_fill
    from .core.freezer import Freezer
    freezer = Freezer(Entry)
    fit = (freezer.freeze(e) for e in entries())
    # TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
@ -109,8 +103,6 @@ def fill_influxdb() -> None:
    magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
-from .core import Stats, stat
+from .core import stat, Stats
 def stats() -> Stats:
    return stat(entries)
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@ -1,63 +1,39 @@
 #!/usr/bin/python3
 """
 [[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
 """
 from __future__ import annotations
 # todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
 import re
 import sqlite3
 from abc import abstractmethod
 from collections.abc import Iterable, Sequence
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Protocol
+import re
 import sqlite3
 from typing import Iterable, Sequence, Set, Optional
 import pytz
 from my.core import (
    Paths,
    Res,
    Stats,
    get_files,
    make_logger,
    Res,
    stat,
-    unwrap,
+    Stats,
    influxdb,
 )
-from my.core.cachew import mcachew
+from my.core.common import mcachew
 from my.core.error import unwrap
 from my.core.pandas import DataFrameT, as_dataframe
 from my.core.sqlite import sqlite_connect_immutable
-
+from my.config import bluemaestro as config
 class config(Protocol):
    @property
    @abstractmethod
    def export_path(self) -> Paths:
        raise NotImplementedError
    @property
    def tz(self) -> pytz.BaseTzInfo:
        # fixme: later, rely on the timezone provider
        # NOTE: the timezone should be set with respect to the export date!!!
        return pytz.timezone('Europe/London')
        # TODO when I change tz, check the diff
 def make_config() -> config:
    from my.config import bluemaestro as user_config
    class combined_config(user_config, config): ...
    return combined_config()
 logger = make_logger(__name__)
 def inputs() -> Sequence[Path]:
-    cfg = make_config()
+    return get_files(config.export_path)
    return get_files(cfg.export_path)
 Celsius = float
@ -74,6 +50,12 @@ class Measurement:
    dewpoint: Celsius
 # fixme: later, rely on the timezone provider
 # NOTE: the timezone should be set with respect to the export date!!!
 tz = pytz.timezone('Europe/London')
 # TODO when I change tz, check the diff
 def is_bad_table(name: str) -> bool:
    # todo hmm would be nice to have a hook that can patch any module up to
    delegate = getattr(config, 'is_bad_table', None)
@ -82,31 +64,28 @@ def is_bad_table(name: str) -> bool:
@mcachew(depends_on=inputs)
 def measurements() -> Iterable[Res[Measurement]]:
    cfg = make_config()
    tz = cfg.tz
    # todo ideally this would be via arguments... but needs to be lazy
    paths = inputs()
    total = len(paths)
    width = len(str(total))
-    last: datetime | None = None
+    last: Optional[datetime] = None
    # tables are immutable, so can save on processing..
-    processed_tables: set[str] = set()
+    processed_tables: Set[str] = set()
    for idx, path in enumerate(paths):
        logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
        tot = 0
        new = 0
        # todo assert increasing timestamp?
        with sqlite_connect_immutable(path) as db:
-            db_dt: datetime | None = None
+            db_dt: Optional[datetime] = None
            try:
                datas = db.execute(
                    f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
                )
                oldfmt = True
-                [(db_dts,)] = db.execute('SELECT last_download FROM info')
+                db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
                if db_dts == 'N/A':
                    # ??? happens for 20180923-20180928
                    continue
@ -139,7 +118,7 @@ def measurements() -> Iterable[Res[Measurement]]:
                processed_tables |= set(log_tables)
                # todo use later?
-                frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]  # noqa: RUF015
+                frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]
                # todo could just filter out the older datapoints?? dunno.
@ -155,7 +134,7 @@ def measurements() -> Iterable[Res[Measurement]]:
                oldfmt = False
                db_dt = None
-            for (name, tsc, temp, hum, pres, dewp) in datas:
+            for i, (name, tsc, temp, hum, pres, dewp) in enumerate(datas):
                if is_bad_table(name):
                    continue
@ -232,8 +211,6 @@ def dataframe() -> DataFrameT:
 def fill_influxdb() -> None:
    from my.core import influxdb
    influxdb.fill(measurements(), measurement=__name__)
--- a/my/body/blood.py
+++ b/my/body/blood.py
@ -2,42 +2,41 @@
 Blood tracking (manual org-mode entries)
 """
 from __future__ import annotations
 from collections.abc import Iterable
 from datetime import datetime
-from typing import NamedTuple
+from typing import Iterable, NamedTuple, Optional
 import orgparse
 import pandas as pd
 from my.config import blood as config  # type: ignore[attr-defined]
 from ..core.error import Res
-from ..core.orgmode import one_table, parse_org_datetime
+from ..core.orgmode import parse_org_datetime, one_table
 import pandas as pd
 import orgparse
 from my.config import blood as config  # type: ignore[attr-defined]
 class Entry(NamedTuple):
    dt: datetime
-    ketones      : float | None=None
+    ketones      : Optional[float]=None
-    glucose      : float | None=None
+    glucose      : Optional[float]=None
-    vitamin_d    : float | None=None
+    vitamin_d    : Optional[float]=None
-    vitamin_b12  : float | None=None
+    vitamin_b12  : Optional[float]=None
-    hdl          : float | None=None
+    hdl          : Optional[float]=None
-    ldl          : float | None=None
+    ldl          : Optional[float]=None
-    triglycerides: float | None=None
+    triglycerides: Optional[float]=None
-    source       : str | None=None
+    source       : Optional[str]=None
-    extra        : str | None=None
+    extra        : Optional[str]=None
 Result = Res[Entry]
-def try_float(s: str) -> float | None:
+def try_float(s: str) -> Optional[float]:
    l = s.split()
    if len(l) == 0:
        return None
@ -106,7 +105,6 @@ def blood_tests_data() -> Iterable[Result]:
 def data() -> Iterable[Result]:
    from itertools import chain
    from ..core.error import sort_res_by
    datas = chain(glucose_ketones_data(), blood_tests_data())
    return sort_res_by(datas, key=lambda e: e.dt)
--- a/my/body/exercise/all.py
+++ b/my/body/exercise/all.py
@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe
@check_dataframe
 def dataframe() -> DataFrameT:
    # this should be somehow more flexible...
    import pandas as pd
    from ...endomondo import dataframe as EDF
-    from ...runnerup import dataframe as RDF
+    from ...runnerup  import dataframe as RDF
    import pandas as pd
    return pd.concat([
        EDF(),
        RDF(),
--- a/my/body/exercise/cardio.py
+++ b/my/body/exercise/cardio.py
@ -3,6 +3,7 @@ Cardio data, filtered from various data sources
 '''
 from ...core.pandas import DataFrameT, check_dataframe
 CARDIO     = {
    'Running',
    'Running, treadmill',
--- a/my/body/exercise/cross_trainer.py
+++ b/my/body/exercise/cross_trainer.py
@ -5,18 +5,16 @@ This is probably too specific to my needs, so later I will move it away to a per
 For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
 '''
 from __future__ import annotations
 from datetime import datetime, timedelta
 from typing import Optional
-import pytz
+from ...core.pandas import DataFrameT, check_dataframe as cdf
 from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
 from my.config import exercise as config
 from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime
 from ...core.pandas import DataFrameT
 from ...core.pandas import check_dataframe as cdf
 import pytz
 # FIXME how to attach it properly?
 tz = pytz.timezone('Europe/London')
@ -107,7 +105,7 @@ def dataframe() -> DataFrameT:
    rows = []
    idxs = [] # type: ignore[var-annotated]
    NO_ENDOMONDO = 'no endomondo matches'
-    for _i, row in mdf.iterrows():
+    for i, row in mdf.iterrows():
        rd = row.to_dict()
        mdate = row['date']
        if pd.isna(mdate):
@ -116,7 +114,7 @@ def dataframe() -> DataFrameT:
            rows.append(rd) # presumably has an error set
            continue
-        idx: int | None
+        idx: Optional[int]
        close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
        if len(close) == 0:
            idx = None
@ -165,9 +163,7 @@ def dataframe() -> DataFrameT:
 # TODO wtf?? where is speed coming from??
-from ...core import Stats, stat
+from ...core import stat, Stats
 def stats() -> Stats:
    return stat(cross_trainer_data)
--- a/my/body/sleep/common.py
+++ b/my/body/sleep/common.py
@ -1,6 +1,5 @@
-from ...core import Stats, stat
+from ...core import stat, Stats
-from ...core.pandas import DataFrameT
+from ...core.pandas import DataFrameT, check_dataframe as cdf
 from ...core.pandas import check_dataframe as cdf
 class Combine:
@ -8,7 +7,7 @@ class Combine:
        self.modules = modules
    @cdf
-    def dataframe(self, *, with_temperature: bool=True) -> DataFrameT:
+    def dataframe(self, with_temperature: bool=True) -> DataFrameT:
        import pandas as pd
        # todo include 'source'?
        df = pd.concat([m.dataframe() for m in self.modules])
@ -18,21 +17,15 @@ class Combine:
            bdf = BM.dataframe()
            temp = bdf['temp']
            # sort index and drop nans, otherwise indexing with [start: end] gonna complain
            temp = pd.Series(
                temp.values,
                index=pd.to_datetime(temp.index, utc=True)
            ).sort_index()
            temp = temp.loc[temp.index.dropna()]
            def calc_avg_temperature(row):
                start = row['sleep_start']
                end   = row['sleep_end']
                if pd.isna(start) or pd.isna(end):
                    return None
                between = (start <= temp.index) & (temp.index <= end)
                # on no temp data, returns nan, ok
-                return temp[start: end].mean()
+                return temp[between].mean()
            df['avg_temp'] = df.apply(calc_avg_temperature, axis=1)
        return df
--- a/my/body/sleep/main.py
+++ b/my/body/sleep/main.py
@ -1,6 +1,7 @@
-from ... import emfit, jawbone
+from ... import jawbone
-from .common import Combine
+from ... import emfit
 from .common import Combine
 _combined = Combine([
    jawbone,
    emfit,
--- a/my/body/weight.py
+++ b/my/body/weight.py
@ -2,29 +2,21 @@
 Weight data (manually logged)
 '''
 from collections.abc import Iterator
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any
+from typing import NamedTuple, Iterator
-from my import orgmode
+from ..core import LazyLogger
-from my.core import make_logger
+from ..core.error import Res, set_error_datetime, extract_error_datetime
 from my.core.error import Res, extract_error_datetime, set_error_datetime
-config = Any
+from .. import orgmode
 from my.config import weight as config  # type: ignore[attr-defined]
-def make_config() -> config:
+log = LazyLogger('my.body.weight')
    from my.config import weight as user_config  # type: ignore[attr-defined]
    return user_config()
-log = make_logger(__name__)
+class Entry(NamedTuple):
@dataclass
 class Entry:
    dt: datetime
    value: float
    # TODO comment??
@ -34,8 +26,6 @@ Result = Res[Entry]
 def from_orgmode() -> Iterator[Result]:
    cfg = make_config()
    orgs = orgmode.query()
    for o in orgmode.query().all():
        if 'weight' not in o.tags:
@ -56,8 +46,8 @@ def from_orgmode() -> Iterator[Result]:
            yield e
            continue
        # FIXME use timezone provider
-        created = cfg.default_timezone.localize(created)
+        created = config.default_timezone.localize(created)
-        assert created is not None  # ??? somehow mypy wasn't happy?
+        assert created is not None #??? somehow mypy wasn't happy?
        yield Entry(
            dt=created,
            value=w,
@ -67,23 +57,21 @@ def from_orgmode() -> Iterator[Result]:
 def make_dataframe(data: Iterator[Result]):
    import pandas as pd
    def it():
        for e in data:
            if isinstance(e, Exception):
                dt = extract_error_datetime(e)
                yield {
-                    'dt': dt,
+                    'dt'    : dt,
                    'error': str(e),
                }
            else:
                yield {
-                    'dt': e.dt,
+                    'dt'    : e.dt,
                    'weight': e.value,
                }
    df = pd.DataFrame(it())
-    df = df.set_index('dt')
+    df.set_index('dt', inplace=True)
    # TODO not sure about UTC??
    df.index = pd.to_datetime(df.index, utc=True)
    return df
@ -93,7 +81,6 @@ def dataframe():
    entries = from_orgmode()
    return make_dataframe(entries)
 # TODO move to a submodule? e.g. my.body.weight.orgmode?
 # so there could be more sources
 # not sure about my.body thing though
--- a/my/books/kobo.py
+++ b/my/books/kobo.py
@ -1,6 +1,7 @@
-from my.core import warnings
+from ..core import warnings
 warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
-from my.core.util import __NOT_HPI_MODULE__
+from ..core.util import __NOT_HPI_MODULE__
-from my.kobo import *
+
 from ..kobo import *  # type: ignore[no-redef]
--- a/my/browser/active_browser.py
+++ b/my/browser/active_browser.py
@ -1,13 +1,12 @@
 """
-Parses active browser history by backing it up with [[http://github.com/purarue/sqlite_backup][sqlite_backup]]
+Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
 """
 REQUIRES = ["browserexport", "sqlite_backup"]
 from dataclasses import dataclass
 from my.config import browser as user_config
-from my.core import Paths
+from my.core import Paths, dataclass
@dataclass
@ -19,18 +18,16 @@ class config(user_config.active_browser):
    export_path: Paths
 from collections.abc import Iterator, Sequence
 from pathlib import Path
 from typing import Sequence, Iterator
-from browserexport.merge import Visit, read_visits
+from my.core import get_files, Stats, make_logger
 from browserexport.merge import read_visits, Visit
 from sqlite_backup import sqlite_backup
 from my.core import Stats, get_files, make_logger
 logger = make_logger(__name__)
 from .common import _patch_browserexport_logs
 _patch_browserexport_logs(logger.level)
--- a/my/browser/all.py
+++ b/my/browser/all.py
@ -1,9 +1,9 @@
-from collections.abc import Iterator
+from typing import Iterator
 from browserexport.merge import Visit, merge_visits
 from my.core import Stats
 from my.core.source import import_source
 from browserexport.merge import merge_visits, Visit
 src_export = import_source(module_name="my.browser.export")
 src_active = import_source(module_name="my.browser.active_browser")
--- a/my/browser/export.py
+++ b/my/browser/export.py
@ -1,36 +1,31 @@
 """
-Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
+Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
 """
 REQUIRES = ["browserexport"]
-from collections.abc import Iterator, Sequence
+from my.config import browser as user_config
-from dataclasses import dataclass
+from my.core import Paths, dataclass
 from pathlib import Path
 from browserexport.merge import Visit, read_and_merge
 from my.core import (
    Paths,
    Stats,
    get_files,
    make_logger,
    stat,
 )
 from my.core.cachew import mcachew
 from .common import _patch_browserexport_logs
 import my.config  # isort: skip
@dataclass
-class config(my.config.browser.export):
+class config(user_config.export):
    # path[s]/glob to your backed up browser history sqlite files
    export_path: Paths
-logger = make_logger(__name__)
+from pathlib import Path
 from typing import Iterator, Sequence, List
 from my.core import Stats, get_files, LazyLogger
 from my.core.common import mcachew
 from browserexport.merge import read_and_merge, Visit
 from .common import _patch_browserexport_logs
 logger = LazyLogger(__name__, level="warning")
 _patch_browserexport_logs(logger.level)
@ -39,10 +34,16 @@ def inputs() -> Sequence[Path]:
    return get_files(config.export_path)
-@mcachew(depends_on=inputs, logger=logger)
+def _cachew_depends_on() -> List[str]:
    return [str(f) for f in inputs()]
@mcachew(depends_on=_cachew_depends_on, logger=logger)
 def history() -> Iterator[Visit]:
    yield from read_and_merge(inputs())
 def stats() -> Stats:
    from my.core import stat
    return {**stat(history)}
--- a/my/bumble/android.py
+++ b/my/bumble/android.py
@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C
 """
 from __future__ import annotations
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass
 from datetime import datetime
-from pathlib import Path
+from typing import Iterator, Sequence, Optional, Dict
 from more_itertools import unique_everseen
-from my.core import Paths, get_files
+from my.config import bumble as user_config
 from my.config import bumble as user_config  # isort: skip
 from ..core import Paths
@dataclass
 class config(user_config.android):
    # paths[s]/glob to the exported sqlite databases
    export_path: Paths
 from ..core import get_files
 from pathlib import Path
 def inputs() -> Sequence[Path]:
    return get_files(config.export_path)
@ -43,23 +43,20 @@ class _BaseMessage:
@dataclass(unsafe_hash=True)
 class _Message(_BaseMessage):
    conversation_id: str
-    reply_to_id: str | None
+    reply_to_id: Optional[str]
@dataclass(unsafe_hash=True)
 class Message(_BaseMessage):
    person: Person
-    reply_to: Message | None
+    reply_to: Optional[Message]
 import json
 import sqlite3
 from typing import Union
-
+from ..core import Res, assert_never
-from my.core.compat import assert_never
+import sqlite3
-
+from ..core.sqlite import sqlite_connect_immutable, select
 from ..core import Res
 from ..core.sqlite import select, sqlite_connect_immutable
 EntitiesRes = Res[Union[Person, _Message]]
@ -109,11 +106,10 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[EntitiesRes]:
 def _key(r: EntitiesRes):
    if isinstance(r, _Message):
-        if '/hidden?' in r.text:
+        if '&srv_width=' in r.text:
            # ugh. seems that image URLs change all the time in the db?
            # can't access them without login anyway
            # so use a different key for such messages
            # todo maybe normalize text instead? since it's gonna always trigger diffs down the line
            return (r.id, r.created)
    return r
@ -122,8 +118,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON"
 def messages() -> Iterator[Res[Message]]:
-    id2person: dict[str, Person] = {}
+    id2person: Dict[str, Person] = {}
-    id2msg: dict[str, Message] = {}
+    id2msg: Dict[str, Message] = {}
    for x in unique_everseen(_entities(), key=_key):
        if isinstance(x, Exception):
            yield x
--- a/my/calendar/holidays.py
+++ b/my/calendar/holidays.py
@ -9,18 +9,16 @@ from datetime import date, datetime, timedelta
 from functools import lru_cache
 from typing import Union
-from my.core import Stats
+from ..core.time import zone_to_countrycode
 from my.core.time import zone_to_countrycode
@lru_cache(1)
 def _calendar():
-    from workalendar.registry import registry  # type: ignore
+    from workalendar.registry import registry # type: ignore
    # todo switch to using time.tz.main once _get_tz stabilizes?
    from ..time.tz import via_location as LTZ
    # TODO would be nice to do it dynamically depending on the past timezones...
-    tz = LTZ.get_tz(datetime.now())
+    tz = LTZ._get_tz(datetime.now())
    assert tz is not None
    zone = tz.zone; assert zone is not None
    code = zone_to_countrycode(zone)
@ -48,6 +46,7 @@ def is_workday(d: DateIsh) -> bool:
    return not is_holiday(d)
 from ..core.common import Stats
 def stats() -> Stats:
    # meh, but not sure what would be a better test?
    res = {}
--- a/my/cfg.py
+++ b/my/cfg.py
@ -1,6 +1,7 @@
 import my.config as config
 from .core import __NOT_HPI_MODULE__
 from .core import warnings as W
 # still used in Promnesia, maybe in dashboard?
--- a/my/codeforces.py
+++ b/my/codeforces.py
@ -1,78 +0,0 @@
 import json
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from functools import cached_property
 from pathlib import Path
 from my.config import codeforces as config  # type: ignore[attr-defined]
 from my.core import Res, datetime_aware, get_files
 def inputs() -> Sequence[Path]:
    return get_files(config.export_path)
 ContestId = int
@dataclass
 class Contest:
    contest_id: ContestId
    when: datetime_aware
    name: str
@dataclass
 class Competition:
    contest: Contest
    old_rating: int
    new_rating: int
    @cached_property
    def when(self) -> datetime_aware:
        return self.contest.when
 # todo not sure if parser is the best name? hmm
 class Parser:
    def __init__(self, *, inputs: Sequence[Path]) -> None:
        self.inputs = inputs
        self.contests: dict[ContestId, Contest] = {}
    def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
        j = json.loads(p.read_text())
        for c in j['result']:
            yield Contest(
                contest_id=c['id'],
                when=datetime.fromtimestamp(c['startTimeSeconds'], tz=timezone.utc),
                name=c['name'],
            )
    def _parse_competitions(self, p: Path) -> Iterator[Competition]:
        j = json.loads(p.read_text())
        for c in j['result']:
            contest_id = c['contestId']
            contest = self.contests[contest_id]
            yield Competition(
                contest=contest,
                old_rating=c['oldRating'],
                new_rating=c['newRating'],
            )
    def parse(self) -> Iterator[Res[Competition]]:
        for path in inputs():
            if 'allcontests' in path.name:
                # these contain information about all CF contests along with useful metadata
                for contest in self._parse_allcontests(path):
                    # TODO some method to assert on mismatch if it exists? not sure
                    self.contests[contest.contest_id] = contest
            elif 'codeforces' in path.name:
                # these contain only contests the user participated in
                yield from self._parse_competitions(path)
            else:
                raise RuntimeError(f"shouldn't happen: {path.name}")
 def data() -> Iterator[Res[Competition]]:
    return Parser(inputs=inputs()).parse()
--- a/my/coding/codeforces.py
+++ b/my/coding/codeforces.py
@ -0,0 +1,91 @@
 #!/usr/bin/env python3
 from my.config import codeforces as config  # type: ignore[attr-defined]
 from datetime import datetime, timezone
 from functools import cached_property
 import json
 from typing import NamedTuple, Dict, Iterator
 from ..core import get_files, Res, unwrap
 from ..core.konsume import ignore, wrap
 Cid = int
 class Contest(NamedTuple):
    cid: Cid
    when: datetime
    @classmethod
    def make(cls, j) -> 'Contest':
        return cls(
            cid=j['id'],
            when=datetime.fromtimestamp(j['startTimeSeconds'], tz=timezone.utc),
        )
 Cmap = Dict[Cid, Contest]
 def get_contests() -> Cmap:
    last = max(get_files(config.export_path, 'allcontests*.json'))
    j = json.loads(last.read_text())
    d = {}
    for c in j['result']:
        cc = Contest.make(c)
        d[cc.cid] = cc
    return d
 class Competition(NamedTuple):
    contest_id: Cid
    contest: str
    cmap: Cmap
    @cached_property
    def uid(self) -> Cid:
        return self.contest_id
    def __hash__(self):
        return hash(self.contest_id)
    @cached_property
    def when(self) -> datetime:
        return self.cmap[self.uid].when
    @cached_property
    def summary(self) -> str:
        return f'participated in {self.contest}' # TODO 
    @classmethod
    def make(cls, cmap, json) -> Iterator[Res['Competition']]:
        # TODO try here??
        contest_id = json['contestId'].zoom().value
        contest = json['contestName'].zoom().value
        yield cls(
            contest_id=contest_id,
            contest=contest,
            cmap=cmap,
        )
        # TODO ytry???
        ignore(json, 'rank', 'oldRating', 'newRating')
 def iter_data() -> Iterator[Res[Competition]]:
    cmap = get_contests()
    last = max(get_files(config.export_path, 'codeforces*.json'))
    with wrap(json.loads(last.read_text())) as j:
        j['status'].ignore()
        res = j['result'].zoom()
        for c in list(res): # TODO maybe we want 'iter' method??
            ignore(c, 'handle', 'ratingUpdateTimeSeconds')
            yield from Competition.make(cmap=cmap, json=c)
            c.consume()
            # TODO maybe if they are all empty, no need to consume??
 def get_data():
    return list(sorted(iter_data(), key=Competition.when.fget))
--- a/my/coding/commits.py
+++ b/my/coding/commits.py
@ -1,32 +1,30 @@
 """
 Git commits data for repositories on your filesystem
 """
 from __future__ import annotations
 REQUIRES = [
    'gitpython',
 ]
 import shutil
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional, cast
-from my.core import LazyLogger, PathIsh, make_config
+import shutil
-from my.core.cachew import cache_dir, mcachew
+from pathlib import Path
 from datetime import datetime, timezone
 from dataclasses import dataclass, field
 from typing import List, Optional, Iterator, Set, Sequence, cast
 from my.core import PathIsh, LazyLogger, make_config
 from my.core.cachew import cache_dir
 from my.core.common import mcachew
 from my.core.warnings import high
 from my.config import commits as user_config  # isort: skip
 from my.config import commits as user_config
@dataclass
 class commits_cfg(user_config):
    roots: Sequence[PathIsh] = field(default_factory=list)
-    emails: Sequence[str] | None = None
+    emails: Optional[Sequence[str]] = None
-    names: Sequence[str] | None = None
+    names: Optional[Sequence[str]] = None
 # experiment to make it lazy?
@ -43,6 +41,7 @@ def config() -> commits_cfg:
 import git
 from git.repo.fun import is_git_dir
 log = LazyLogger(__name__, level='info')
@ -95,7 +94,7 @@ def _git_root(git_dir: PathIsh) -> Path:
        return gd # must be bare
-def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]:
+def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]:
    # without path might not handle pull heads properly
    for c in gr.iter_commits(rev=rev):
        if not by_me(c):
@ -122,7 +121,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Com
 def repo_commits(repo: PathIsh):
    gr = git.Repo(str(repo))
-    emitted: set[str] = set()
+    emitted: Set[str] = set()
    for r in gr.references:
        yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
@ -138,61 +137,61 @@ def canonical_name(repo: Path) -> str:
        # else:
        #     rname = r.name
    # if 'backups/github' in repo:
-    #     pass # TODO
+    #     pass # TODO 
 def _fd_path() -> str:
    # todo move it to core
-    fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
+    fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
    if fd_path is None:
        high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
    assert fd_path is not None
    return fd_path
-def git_repos_in(roots: list[Path]) -> list[Path]:
+def git_repos_in(roots: List[Path]) -> List[Path]:
    from subprocess import check_output
    outputs = check_output([
        _fd_path(),
        # '--follow', # right, not so sure about follow... make configurable?
        '--hidden',
        '--no-ignore',  # otherwise doesn't go inside .git directory (from fd v9)
        '--full-path',
        '--type', 'f',
        '/HEAD', # judging by is_git_dir, it should always be here..
        *roots,
    ]).decode('utf8').splitlines()
-    candidates = {Path(o).resolve().absolute().parent for o in outputs}
+    candidates = set(Path(o).resolve().absolute().parent for o in outputs)
    # exclude stuff within .git dirs (can happen for submodules?)
    candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
    candidates = {c for c in candidates if is_git_dir(c)}
-    repos = sorted(map(_git_root, candidates))
+    repos = list(sorted(map(_git_root, candidates)))
    return repos
-def repos() -> list[Path]:
+def repos() -> List[Path]:
    return git_repos_in(list(map(Path, config().roots)))
 # returns modification time for an index to use as hash function
 def _repo_depends_on(_repo: Path) -> int:
-    for pp in [
+    for pp in {
        ".git/FETCH_HEAD",
        ".git/HEAD",
        "FETCH_HEAD",  # bare
        "HEAD",  # bare
-    ]:
+    }:
        ff = _repo / pp
        if ff.exists():
            return int(ff.stat().st_mtime)
-    raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
+    else:
        raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
-def _commits(_repos: list[Path]) -> Iterator[Commit]:
+def _commits(_repos: List[Path]) -> Iterator[Commit]:
    for r in _repos:
        yield from _cached_commits(r)
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -1,12 +1,9 @@
-from typing import TYPE_CHECKING
+import warnings
-from my.core import warnings
+warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!')
 warnings.high('my.coding.github is deprecated! Please use my.github.all instead!')
 # todo why aren't DeprecationWarning shown by default??
-if not TYPE_CHECKING:
+from ..github.all import events, get_events
    from ..github.all import events, get_events  # noqa: F401
-    # todo deprecate properly
+# todo deprecate properly
-    iter_events = events
+iter_events = events
--- a/my/coding/topcoder.py
+++ b/my/coding/topcoder.py
@ -0,0 +1,83 @@
 #!/usr/bin/env python3
 from my.config import topcoder as config  # type: ignore[attr-defined]
 from datetime import datetime
 from functools import cached_property
 import json
 from typing import NamedTuple, Dict, Iterator
 from ..core import get_files, Res, unwrap, Json
 from ..core.error import Res, unwrap
 from ..core.konsume import zoom, wrap, ignore
 def _get_latest() -> Json:
    pp = max(get_files(config.export_path))
    return json.loads(pp.read_text())
 class Competition(NamedTuple):
    contest_id: str
    contest: str
    percentile: float
    dates: str
    @cached_property
    def uid(self) -> str:
        return self.contest_id
    def __hash__(self):
        return hash(self.contest_id)
    @cached_property
    def when(self) -> datetime:
        return datetime.strptime(self.dates, '%Y-%m-%dT%H:%M:%S.%fZ')
    @cached_property
    def summary(self) -> str:
        return f'participated in {self.contest}: {self.percentile:.0f}'
    @classmethod
    def make(cls, json) -> Iterator[Res['Competition']]:
        ignore(json, 'rating', 'placement')
        cid = json['challengeId'].zoom().value
        cname = json['challengeName'].zoom().value
        percentile = json['percentile'].zoom().value
        dates = json['date'].zoom().value
        yield cls(
            contest_id=cid,
            contest=cname,
            percentile=percentile,
            dates=dates,
        )
 def iter_data() -> Iterator[Res[Competition]]:
    with wrap(_get_latest()) as j:
        ignore(j, 'id', 'version')
        res = j['result'].zoom()
        ignore(res, 'success', 'status', 'metadata')
        cont = res['content'].zoom()
        ignore(cont, 'handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy')
        cont['DEVELOP'].ignore() # TODO handle it??
        ds = cont['DATA_SCIENCE'].zoom()
        mar, srm = zoom(ds, 'MARATHON_MATCH', 'SRM')
        mar = mar['history'].zoom()
        srm = srm['history'].zoom()
    # TODO right, I guess I could rely on pylint for unused variables??
        for c in mar + srm:
            yield from Competition.make(json=c)
            c.consume()
 def get_data():
    return list(sorted(iter_data(), key=Competition.when.fget))
--- a/my/common.py
+++ b/my/common.py
@ -1,6 +1,6 @@
 from .core.warnings import high
 high("DEPRECATED! Please use my.core.common instead.")
 from .core import __NOT_HPI_MODULE__
 from .core.common import *
--- a/my/config.py
+++ b/my/config.py
@ -9,18 +9,17 @@ This file is used for:
 - mypy: this file provides some type annotations
 - for loading the actual user config
 '''
 from __future__ import annotations
 #### NOTE: you won't need this line VVVV in your personal config
-from my.core import init  # noqa: F401  # isort: skip
+from my.core import init
 ###
 from datetime import tzinfo
 from pathlib import Path
 from typing import List
-from my.core import PathIsh, Paths
+
 from my.core import Paths, PathIsh
 class hypothesis:
@ -69,23 +68,17 @@ class pinboard:
    export_dir: Paths = ''
 class google:
    class maps:
        class android:
            export_path: Paths = ''
    takeout_path: Paths = ''
-from collections.abc import Sequence
+from typing import Sequence, Union, Tuple
-from datetime import date, datetime, timedelta
+from datetime import datetime, date, timedelta
 from typing import Union
 DateIsh = Union[datetime, date, str]
-LatLon = tuple[float, float]
+LatLon = Tuple[float, float]
 class location:
    # todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
    # and we can't import the types from the module itself, otherwise would be circular. common module?
-    home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0)
+    home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
    home_accuracy = 30_000.0
    class via_ip:
@ -106,8 +99,6 @@ class location:
 from typing import Literal
 class time:
    class tz:
        policy: Literal['keep', 'convert', 'throw']
@ -126,9 +117,10 @@ class arbtt:
    logfiles: Paths
 from typing import Optional
 class commits:
-    emails: Sequence[str] | None
+    emails: Optional[Sequence[str]]
-    names: Sequence[str] | None
+    names: Optional[Sequence[str]]
    roots: Sequence[PathIsh]
@ -154,8 +146,8 @@ class tinder:
 class instagram:
    class android:
        export_path: Paths
-        username: str | None
+        username: Optional[str]
-        full_name: str | None
+        full_name: Optional[str]
    class gdpr:
        export_path: Paths
@ -173,7 +165,7 @@ class materialistic:
 class fbmessenger:
    class fbmessengerexport:
        export_db: PathIsh
-        facebook_id: str | None
+        facebook_id: Optional[str]
    class android:
        export_path: Paths
@ -185,8 +177,6 @@ class twitter_archive:
 class twitter:
    class talon:
        export_path: Paths
    class android:
        export_path: Paths
 class twint:
@ -251,7 +241,7 @@ class runnerup:
 class emfit:
    export_path: Path
    timezone: tzinfo
-    excluded_sids: list[str]
+    excluded_sids: List[str]
 class foursquare:
@ -274,13 +264,8 @@ class roamresearch:
 class whatsapp:
    class android:
        export_path: Paths
-        my_user_id: str | None
+        my_user_id: Optional[str]
 class harmonic:
    export_path: Paths
 class monzo:
    class monzoexport:
        export_path: Paths
--- a/my/core/init.py
+++ b/my/core/init.py
@ -1,53 +1,40 @@
 # this file only keeps the most common & critical types/utility functions
-from typing import TYPE_CHECKING
+from .common import get_files, PathIsh, Paths
 from .common import Json
 from .common import warn_if_empty
 from .common import stat, Stats
 from .common import datetime_naive, datetime_aware
 from .common import assert_never
 from .cfg import make_config
-from .common import PathIsh, Paths, get_files
+from .error import Res, unwrap
-from .compat import assert_never
+from .logging import make_logger, LazyLogger
 from .error import Res, notnone, unwrap
 from .logging import (
    make_logger,
 )
 from .stats import Stats, stat
 from .types import (
    Json,
    datetime_aware,
    datetime_naive,
 )
 from .util import __NOT_HPI_MODULE__
 from .utils.itertools import warn_if_empty
 LazyLogger = make_logger  # TODO deprecate this in favor of make_logger
-if not TYPE_CHECKING:
+# just for brevity in modules
-    # we used to keep these here for brevity, but feels like it only adds confusion,
+# todo not sure about these.. maybe best to rely on regular imports.. perhaps compare?
-    #  e.g. suggest that we perhaps somehow modify builtin behaviour or whatever
+from dataclasses import dataclass
-    #  so best to prefer explicit behaviour
+from pathlib import Path
    from dataclasses import dataclass
    from pathlib import Path
 __all__ = [
-    '__NOT_HPI_MODULE__',
+    'get_files', 'PathIsh', 'Paths',
    'Json',
    'LazyLogger',  # legacy import
    'Path',
    'PathIsh',
    'Paths',
    'Res',
    'Stats',
    'assert_never',  # TODO maybe deprecate from use in my.core? will be in stdlib soon
    'dataclass',
    'datetime_aware',
    'datetime_naive',
    'get_files',
    'make_config',
    'make_logger',
-    'notnone',
+    'LazyLogger',  # legacy import
    'stat',
    'unwrap',
    'warn_if_empty',
    'stat', 'Stats',
    'datetime_aware', 'datetime_naive',
    'assert_never',
    'make_config',
    '__NOT_HPI_MODULE__',
    'Res', 'unwrap',
    'dataclass', 'Path',
 ]
@ -55,7 +42,7 @@ __all__ = [
 # you could put _init_hook.py next to your private my/config
 # that way you can configure logging/warnings/env variables on every HPI import
 try:
-    import my._init_hook  # type: ignore[import-not-found]  # noqa: F401
+    import my._init_hook  # type: ignore[import-not-found]
 except:
    pass
 ##
--- a/my/core/main.py
+++ b/my/core/main.py
@ -1,26 +1,23 @@
-from __future__ import annotations
+from contextlib import ExitStack
 import functools
 import importlib
 import inspect
 from itertools import chain
 import os
 import shlex
 import shutil
 import sys
 import tempfile
 import traceback
-from collections.abc import Iterable, Sequence
+from typing import Optional, Sequence, Iterable, List, Type, Any, Callable
 from contextlib import ExitStack
 from itertools import chain
 from pathlib import Path
-from subprocess import PIPE, CompletedProcess, Popen, check_call, run
+from subprocess import check_call, run, PIPE, CompletedProcess, Popen
 from typing import Any, Callable
 import click
-@functools.lru_cache
+@functools.lru_cache()
-def mypy_cmd() -> Sequence[str] | None:
+def mypy_cmd() -> Optional[Sequence[str]]:
    try:
        # preferably, use mypy from current python env
        import mypy  # noqa: F401 fine not to use it
@ -35,7 +32,7 @@ def mypy_cmd() -> Sequence[str] | None:
    return None
-def run_mypy(cfg_path: Path) -> CompletedProcess | None:
+def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]:
    # todo dunno maybe use the same mypy config in repository?
    # I'd need to install mypy.ini then??
    env = {**os.environ}
@ -46,7 +43,7 @@ def run_mypy(cfg_path: Path) -> CompletedProcess | None:
    cmd = mypy_cmd()
    if cmd is None:
        return None
-    mres = run([  # noqa: UP022,PLW1510
+    mres = run([
        *cmd,
        '--namespace-packages',
        '--color-output', # not sure if works??
@ -66,27 +63,21 @@ def eprint(x: str) -> None:
    # err=True prints to stderr
    click.echo(x, err=True)
 def indent(x: str) -> str:
    # todo use textwrap.indent?
    return ''.join('   ' + l for l in x.splitlines(keepends=True))
-OK = '✅'
+OK  = '✅'
 OFF = '🔲'
 def info(x: str) -> None:
    eprint(OK + ' ' + x)
 def error(x: str) -> None:
    eprint('❌ ' + x)
 def warning(x: str) -> None:
-    eprint('❗ ' + x)  # todo yellow?
+    eprint('❗ ' + x) # todo yellow?
 def tb(e: Exception) -> None:
    tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
@ -95,7 +86,6 @@ def tb(e: Exception) -> None:
 def config_create() -> None:
    from .preinit import get_mycfg_dir
    mycfg_dir = get_mycfg_dir()
    created = False
@ -104,8 +94,7 @@ def config_create() -> None:
        my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
        my_config.parent.mkdir(parents=True)
-        my_config.write_text(
+        my_config.write_text('''
            '''
 ### HPI personal config
 ## see
 # https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
@ -128,8 +117,7 @@ class example:
 ### you can insert your own configuration below
 ### but feel free to delete the stuff above if you don't need ti
-'''.lstrip()
+'''.lstrip())
        )
        info(f'created empty config: {my_config}')
        created = True
    else:
@ -142,13 +130,12 @@ class example:
 # todo return the config as a result?
 def config_ok() -> bool:
-    errors: list[Exception] = []
+    errors: List[Exception] = []
    # at this point 'my' should already be imported, so doesn't hurt to extract paths from it
    import my
    try:
-        paths: list[str] = list(my.__path__)
+        paths: List[str] = list(my.__path__)
    except Exception as e:
        errors.append(e)
        error('failed to determine module import path')
@ -158,23 +145,19 @@ def config_ok() -> bool:
    # first try doing as much as possible without actually importing my.config
    from .preinit import get_mycfg_dir
    cfg_path = get_mycfg_dir()
    # alternative is importing my.config and then getting cfg_path from its __file__/__path__
    # not sure which is better tbh
    ## check we're not using stub config
    import my.core
    try:
        core_pkg_path = str(Path(my.core.__path__[0]).parent)
        if str(cfg_path).startswith(core_pkg_path):
-            error(
+            error(f'''
                f'''
 Seems that the stub config is used ({cfg_path}). This is likely not going to work.
 See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
-'''.strip()
+'''.strip())
            )
            errors.append(RuntimeError('bad config path'))
    except Exception as e:
        errors.append(e)
@ -188,6 +171,8 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
        # use a temporary directory, useful because
        # - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
        # - compileall isn't following symlinks, just silently ignores them
        # note: ugh, annoying that copytree requires a non-existing dir before 3.8.
        # once we have min version 3.8, can use dirs_exist_ok=True param
        tdir = Path(td) / 'cfg'
        # NOTE: compileall still returns code 0 if the path doesn't exist..
        # but in our case hopefully it's not an issue
@ -196,7 +181,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
        try:
            # this will resolve symlinks when copying
            # should be under try/catch since might fail if some symlinks are missing
-            shutil.copytree(cfg_path, tdir, dirs_exist_ok=True)
+            shutil.copytree(cfg_path, tdir)
            check_call(cmd)
            info('syntax check: ' + ' '.join(cmd))
        except Exception as e:
@ -206,7 +191,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
    ## check types
    mypy_res = run_mypy(cfg_path)
-    if mypy_res is not None:  # has mypy
+    if mypy_res is not None: # has mypy
        rc = mypy_res.returncode
        if rc == 0:
            info('mypy check  : success')
@ -229,16 +214,14 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
    if len(errors) > 0:
        error(f'config check: {len(errors)} errors')
        return False
-
+    else:
-    # note: shouldn't exit here, might run something else
+        # note: shouldn't exit here, might run something else
-    info('config check: success!')
+        info('config check: success!')
-    return True
+        return True
 from .util import HPIModule, modules
-
+def _modules(*, all: bool=False) -> Iterable[HPIModule]:
 def _modules(*, all: bool = False) -> Iterable[HPIModule]:
    skipped = []
    for m in modules():
        if not all and m.skip_reason is not None:
@ -249,7 +232,7 @@ def _modules(*, all: bool = False) -> Iterable[HPIModule]:
        warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
-def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: list[str]) -> None:
+def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: List[str]) -> None:
    if len(for_modules) > 0:
        # if you're checking specific modules, show errors
        # hopefully makes sense?
@ -260,9 +243,10 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
    import contextlib
    from .common import quick_stats
    from .util import get_stats, HPIModule
    from .stats import guess_stats
    from .error import warn_my_config_import_error
    from .stats import get_stats, quick_stats
    from .util import HPIModule
    mods: Iterable[HPIModule]
    if len(for_modules) == 0:
@ -273,7 +257,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
    # todo add a --all argument to disregard is_active check?
    for mr in mods:
        skip = mr.skip_reason
-        m = mr.name
+        m    = mr.name
        if skip is not None:
            eprint(f'{OFF} {click.style("SKIP", fg="yellow")}: {m:<50} {skip}')
            continue
@ -292,8 +276,11 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
            continue
        info(f'{click.style("OK", fg="green")}  : {m:<50}')
-        # TODO add hpi 'stats'? instead of doctor? not sure
+        # first try explicitly defined stats function:
-        stats = get_stats(m, guess=True)
+        stats = get_stats(m)
        if stats is None:
            # then try guessing.. not sure if should log somehow?
            stats = guess_stats(m, quick=quick)
        if stats is None:
            eprint("       - no 'stats' function, can't check the data")
@ -304,7 +291,6 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
        try:
            kwargs = {}
            # todo hmm why wouldn't they be callable??
            if callable(stats) and 'quick' in inspect.signature(stats).parameters:
                kwargs['quick'] = quick
            with quick_context:
@ -323,8 +309,8 @@ def list_modules(*, list_all: bool) -> None:
    tabulate_warnings()
    for mr in _modules(all=list_all):
-        m = mr.name
+        m    = mr.name
-        sr = mr.skip_reason
+        sr   = mr.skip_reason
        if sr is None:
            pre = OK
            suf = ''
@ -340,20 +326,17 @@ def tabulate_warnings() -> None:
    Helper to avoid visual noise in hpi modules/doctor
    '''
    import warnings
    orig = warnings.formatwarning
    def override(*args, **kwargs) -> str:
        res = orig(*args, **kwargs)
        return ''.join('  ' + x for x in res.splitlines(keepends=True))
    warnings.formatwarning = override
    # TODO loggers as well?
 def _requires(modules: Sequence[str]) -> Sequence[str]:
    from .discovery_pure import module_by_name
    mods = [module_by_name(module) for module in modules]
    res = []
    for mod in mods:
@ -380,7 +363,7 @@ def module_requires(*, module: Sequence[str]) -> None:
        click.echo(x)
-def module_install(*, user: bool, module: Sequence[str], parallel: bool = False, break_system_packages: bool = False) -> None:
+def module_install(*, user: bool, module: Sequence[str], parallel: bool=False, break_system_packages: bool=False) -> None:
    if isinstance(module, str):
        # legacy behavior, used to take a since argument
        module = [module]
@ -391,9 +374,8 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
        warning('requirements list is empty, no need to install anything')
        return
    use_uv = 'HPI_MODULE_INSTALL_USE_UV' in os.environ
    pre_cmd = [
-        sys.executable, '-m', *(['uv'] if use_uv else []), 'pip',
+        sys.executable, '-m', 'pip',
        'install',
        *(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
        *(['--break-system-packages'] if break_system_packages else []), # https://peps.python.org/pep-0668/
@ -411,7 +393,7 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
        # I think it only helps for pypi artifacts (not git!),
        # and only if they weren't cached
        for r in requirements:
-            cmds.append([*pre_cmd, r])
+            cmds.append(pre_cmd + [r])
    else:
        if parallel:
            warning('parallel install is not supported on this platform, installing sequentially...')
@ -457,8 +439,8 @@ def _ui_getchar_pick(choices: Sequence[str], prompt: str = 'Select from: ') -> i
        return result_map[ch]
-def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]:
+def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True) -> Iterable[Callable[..., Any]]:
-    from .query import QueryException, locate_qualified_function
+    from .query import locate_qualified_function, QueryException
    from .stats import is_data_provider
    # if not connected to a terminal, can't prompt
@ -475,9 +457,9 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
            # user to select a 'data provider' like function
            try:
                mod = importlib.import_module(qualname)
-            except Exception as ie:
+            except Exception:
                eprint(f"During fallback, importing '{qualname}' as module failed")
-                raise qr_err from ie
+                raise qr_err
            # find data providers in this module
            data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
@ -506,9 +488,8 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
 def _warn_exceptions(exc: Exception) -> None:
-    from my.core import make_logger
+    from my.core.common import LazyLogger
-
+    logger = LazyLogger('CLI', level='warning')
    logger = make_logger('CLI', level='warning')
    logger.exception(f'hpi query: {exc}')
@ -519,28 +500,26 @@ def query_hpi_functions(
    *,
    output: str = 'json',
    stream: bool = False,
-    qualified_names: list[str],
+    qualified_names: List[str],
-    order_key: str | None,
+    order_key: Optional[str],
-    order_by_value_type: type | None,
+    order_by_value_type: Optional[Type],
    after: Any,
    before: Any,
    within: Any,
    reverse: bool = False,
-    limit: int | None,
+    limit: Optional[int],
    drop_unsorted: bool,
    wrap_unsorted: bool,
    warn_exceptions: bool,
    raise_exceptions: bool,
    drop_exceptions: bool,
 ) -> None:
-    from .query_range import RangeTuple, select_range
+    from .query_range import select_range, RangeTuple
    import my.core.error as err
    # chain list of functions from user, in the order they wrote them on the CLI
    input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
    # NOTE: if passing just one function to this which returns a single namedtuple/dataclass,
    # using both --order-key and --order-type will often be faster as it does not need to
    # duplicate the iterator in memory, or try to find the --order-type type on each object before sorting
    res = select_range(
        input_src,
        order_key=order_key,
@ -553,8 +532,7 @@ def query_hpi_functions(
        warn_exceptions=warn_exceptions,
        warn_func=_warn_exceptions,
        raise_exceptions=raise_exceptions,
-        drop_exceptions=drop_exceptions,
+        drop_exceptions=drop_exceptions)
    )
    if output == 'json':
        from .serialize import dumps
@ -588,7 +566,7 @@ def query_hpi_functions(
        # can ignore the mypy warning here, locations_to_gpx yields any errors
        # if you didnt pass it something that matches the LocationProtocol
-        for exc in locations_to_gpx(res, sys.stdout):  # type: ignore[arg-type]
+        for exc in locations_to_gpx(res, sys.stdout):   # type: ignore[arg-type]
            if warn_exceptions:
                _warn_exceptions(exc)
            elif raise_exceptions:
@ -601,11 +579,10 @@ def query_hpi_functions(
        # output == 'repl'
        eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
        try:
-            import IPython  # type: ignore[import,unused-ignore]
+            import IPython  # type: ignore[import]
        except ModuleNotFoundError:
            eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
            import code
            code.interact(local=locals())
        else:
            IPython.embed()
@ -613,7 +590,7 @@ def query_hpi_functions(
@click.group()
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
-def main(*, debug: bool) -> None:
+def main(debug: bool) -> None:
    '''
    Human Programming Interface
@ -639,19 +616,20 @@ def main(*, debug: bool) -> None:
    # to run things at the end (would need to use a callback or pass context)
    # https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
-    tdir = Path(tempfile.gettempdir()) / 'hpi_temp_dir'
+    tdir: str = os.path.join(tempfile.gettempdir(), 'hpi_temp_dir')
-    tdir.mkdir(exist_ok=True)
+    if not os.path.exists(tdir):
        os.makedirs(tdir)
    os.chdir(tdir)
@functools.lru_cache(maxsize=1)
-def _all_mod_names() -> list[str]:
+def _all_mod_names() -> List[str]:
    """Should include all modules, in case user is trying to diagnose issues"""
    # sort this, so that the order doesn't change while tabbing through
    return sorted([m.name for m in modules()])
-def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> list[str]:
+def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> List[str]:
    return [m for m in _all_mod_names() if m.startswith(incomplete)]
@ -661,7 +639,7 @@ def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: st
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
-def doctor_cmd(*, verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
+def doctor_cmd(verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
    '''
    Run various checks
@ -695,7 +673,7 @@ def config_create_cmd() -> None:
@main.command(name='modules', short_help='list available modules')
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
-def module_cmd(*, list_all: bool) -> None:
+def module_cmd(list_all: bool) -> None:
    '''List available modules'''
    list_modules(list_all=list_all)
@ -708,7 +686,7 @@ def module_grp() -> None:
@module_grp.command(name='requires', short_help='print module reqs')
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
-def module_requires_cmd(*, modules: Sequence[str]) -> None:
+def module_requires_cmd(modules: Sequence[str]) -> None:
    '''
    Print MODULES requirements
@ -725,7 +703,7 @@ def module_requires_cmd(*, modules: Sequence[str]) -> None:
              is_flag=True,
              help='Bypass PEP 668 and install dependencies into the system-wide python package directory.')
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
-def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
+def module_install_cmd(user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
    '''
    Install dependencies for modules using pip
@ -806,18 +784,17 @@ def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: boo
              help='ignore any errors returned as objects from the functions')
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
 def query_cmd(
    *,
    function_name: Sequence[str],
    output: str,
    stream: bool,
-    order_key: str | None,
+    order_key: Optional[str],
-    order_type: str | None,
+    order_type: Optional[str],
-    after: str | None,
+    after: Optional[str],
-    before: str | None,
+    before: Optional[str],
-    within: str | None,
+    within: Optional[str],
-    recent: str | None,
+    recent: Optional[str],
    reverse: bool,
-    limit: int | None,
+    limit: Optional[int],
    drop_unsorted: bool,
    wrap_unsorted: bool,
    warn_exceptions: bool,
@ -851,9 +828,9 @@ def query_cmd(
    hpi query --order-type datetime --after '2016-01-01' --before '2019-01-01' my.reddit.all.comments
    '''
-    from datetime import date, datetime
+    from datetime import datetime, date
-    chosen_order_type: type | None
+    chosen_order_type: Optional[Type]
    if order_type == "datetime":
        chosen_order_type = datetime
    elif order_type == "date":
@ -889,8 +866,7 @@ def query_cmd(
            wrap_unsorted=wrap_unsorted,
            warn_exceptions=warn_exceptions,
            raise_exceptions=raise_exceptions,
-            drop_exceptions=drop_exceptions,
+            drop_exceptions=drop_exceptions)
        )
    except QueryException as qe:
        eprint(str(qe))
        sys.exit(1)
@ -905,7 +881,6 @@ def query_cmd(
 def test_requires() -> None:
    from click.testing import CliRunner
    result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
    assert result.exit_code == 0
    assert "github.com/karlicoss/ghexport" in result.output
--- a/my/core/_cpu_pool.py
+++ b/my/core/_cpu_pool.py
@ -10,18 +10,16 @@ how many cores we want to dedicate to the DAL.
 Enabled by the env variable, specifying how many cores to dedicate
 e.g. "HPI_CPU_POOL=4 hpi query ..."
 """
 from __future__ import annotations
 import os
 from concurrent.futures import ProcessPoolExecutor
-from typing import cast
+import os
 from typing import cast, Optional
 _NOT_SET = cast(ProcessPoolExecutor, object())
-_INSTANCE: ProcessPoolExecutor | None = _NOT_SET
+_INSTANCE: Optional[ProcessPoolExecutor] = _NOT_SET
-def get_cpu_pool() -> ProcessPoolExecutor | None:
+def get_cpu_pool() -> Optional[ProcessPoolExecutor]:
    global _INSTANCE
    if _INSTANCE is _NOT_SET:
        use_cpu_pool = os.environ.get('HPI_CPU_POOL')
--- a/my/core/_deprecated/dataset.py
+++ b/my/core/_deprecated/dataset.py
@ -1,12 +0,0 @@
 from ..common import PathIsh
 from ..sqlite import sqlite_connect_immutable
 def connect_readonly(db: PathIsh):
    import dataset  # type: ignore
    # see https://github.com/pudo/dataset/issues/136#issuecomment-128693122
    # todo not sure if mode=ro has any benefit, but it doesn't work on read-only filesystems
    # maybe it should autodetect readonly filesystems and apply this? not sure
    creator = lambda: sqlite_connect_immutable(db)
    return dataset.connect('sqlite:///', engine_kwargs={'creator': creator})
--- a/my/core/_deprecated/kompress.py
+++ b/my/core/_deprecated/kompress.py
@ -1,17 +1,16 @@
 """
 Various helpers for compression
 """
 # fmt: off
 from __future__ import annotations
 import io
 import pathlib
 from collections.abc import Iterator, Sequence
 from datetime import datetime
 from functools import total_ordering
 import io
 import pathlib
 from pathlib import Path
-from typing import IO, Union
+import sys
 from typing import Union, IO, Sequence, Any, Iterator
 PathIsh = Union[Path, str]
@ -28,11 +27,11 @@ class Ext:
 def is_compressed(p: Path) -> bool:
    # todo kinda lame way for now.. use mime ideally?
    # should cooperate with kompress.kopen?
-    return any(p.name.endswith(ext) for ext in [Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz])
+    return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
 def _zstd_open(path: Path, *args, **kwargs) -> IO:
-    import zstandard as zstd  # type: ignore
+    import zstandard as zstd # type: ignore
    fh = path.open('rb')
    dctx = zstd.ZstdDecompressor()
    reader = dctx.stream_reader(fh)
@ -86,9 +85,9 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
        # todo 'expected "BinaryIO"'??
        return io.TextIOWrapper(ifile, encoding=encoding)
    elif name.endswith(Ext.lz4):
-        import lz4.frame  # type: ignore
+        import lz4.frame # type: ignore
        return lz4.frame.open(str(pp), mode, *args, **kwargs)
-    elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):  # noqa: PIE810
+    elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
        kwargs['mode'] = mode
        return _zstd_open(pp, *args, **kwargs)
    elif name.endswith(Ext.targz):
@ -102,8 +101,8 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
        return pp.open(mode, *args, **kwargs)
 import os
 import typing
 import os
 if typing.TYPE_CHECKING:
    # otherwise mypy can't figure out that BasePath is a type alias..
@ -121,7 +120,7 @@ class CPath(BasePath):
    Path only has _accessor and _closed slots, so can't directly set .open method
    _accessor.open has to return file descriptor, doesn't work for compressed stuff.
    """
-    def open(self, *args, **kwargs):  # noqa: ARG002
+    def open(self, *args, **kwargs):
        kopen_kwargs = {}
        mode = kwargs.get('mode')
        if mode is not None:
@ -142,16 +141,20 @@ open = kopen # TODO deprecate
 def kexists(path: PathIsh, subpath: str) -> bool:
    try:
        kopen(path, subpath)
        return True
    except Exception:
        return False
    else:
        return True
 import zipfile
-
+if sys.version_info[:2] >= (3, 8):
-# meh... zipfile.Path is not available on 3.7
+    # meh... zipfile.Path is not available on 3.7
-zipfile_Path = zipfile.Path
+    zipfile_Path = zipfile.Path
 else:
    if typing.TYPE_CHECKING:
        zipfile_Path = Any
    else:
        zipfile_Path = object
@total_ordering
@ -159,7 +162,7 @@ class ZipPath(zipfile_Path):
    # NOTE: is_dir/is_file might not behave as expected, the base class checks it only based on the slash in path
    # seems that root/at are not exposed in the docs, so might be an implementation detail
-    root: zipfile.ZipFile  # type: ignore[assignment]
+    root: zipfile.ZipFile
    at: str
    @property
@ -188,14 +191,14 @@ class ZipPath(zipfile_Path):
        # note: seems that zip always uses forward slash, regardless OS?
        return zipfile_Path(self.root, self.at + '/')
-    def rglob(self, glob: str) -> Iterator[ZipPath]:
+    def rglob(self, glob: str) -> Sequence[ZipPath]:
        # note: not 100% sure about the correctness, but seem fine?
        # Path.match() matches from the right, so need to
        rpaths = [p for p in self.root.namelist() if p.startswith(self.at)]
        rpaths = [p for p in rpaths if Path(p).match(glob)]
-        return (ZipPath(self.root, p) for p in rpaths)
+        return [ZipPath(self.root, p) for p in rpaths]
-    def relative_to(self, other: ZipPath) -> Path:  # type: ignore[override, unused-ignore]
+    def relative_to(self, other: ZipPath) -> Path:
        assert self.filepath == other.filepath, (self.filepath, other.filepath)
        return self.subpath.relative_to(other.subpath)
@ -211,7 +214,7 @@ class ZipPath(zipfile_Path):
    def iterdir(self) -> Iterator[ZipPath]:
        for s in self._as_dir().iterdir():
-            yield ZipPath(s.root, s.at)
+            yield ZipPath(s.root, s.at)  # type: ignore[attr-defined]
    @property
    def stem(self) -> str:
@ -240,7 +243,7 @@ class ZipPath(zipfile_Path):
        # see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
        dt = datetime(*self.root.getinfo(self.at).date_time)
        ts = int(dt.timestamp())
-        params = dict(  # noqa: C408
+        params = dict(
            st_mode=0,
            st_ino=0,
            st_dev=0,
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@ -1,27 +1,13 @@
-from __future__ import annotations
+from .common import assert_subpackage; assert_subpackage(__name__)
 from .internal import assert_subpackage
 assert_subpackage(__name__)
 import logging
 import sys
 from collections.abc import Iterator
 from contextlib import contextmanager
 import logging
 from pathlib import Path
-from typing import (
+import sys
-    TYPE_CHECKING,
+from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type
-    Any,
+import warnings
    Callable,
    TypeVar,
    Union,
    cast,
    overload,
 )
-import appdirs  # type: ignore[import-untyped]
+import appdirs
 from . import warnings
 PathIsh = Union[str, Path]  # avoid circular import from .common
@ -61,12 +47,12 @@ def _appdirs_cache_dir() -> Path:
 _CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
-def cache_dir(suffix: PathIsh | None = None) -> Path:
+def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
    from . import core_config as CC
    cdir_ = CC.config.get_cache_dir()
-    sp: Path | None = None
+    sp: Optional[Path] = None
    if suffix is not None:
        sp = Path(suffix)
        # guess if you do need absolute, better path it directly instead of as suffix?
@ -119,7 +105,7 @@ def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
    try:
        import cachew
    except ModuleNotFoundError:
-        warnings.high('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
+        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        kwargs['cache_path'] = cache_path
@ -136,7 +122,7 @@ if TYPE_CHECKING:
    CC = Callable[P, R]  # need to give it a name, if inlined into bound=, mypy runs in a bug
    PathProvider = Union[PathIsh, Callable[P, PathIsh]]
    # NOTE: in cachew, HashFunction type returns str
-    # however in practice, cachew always calls str for its result
+    # however in practice, cachew alwasy calls str for its result
    # so perhaps better to switch it to Any in cachew as well
    HashFunction = Callable[P, Any]
@ -145,19 +131,21 @@ if TYPE_CHECKING:
    # we need two versions due to @doublewrap
    # this is when we just annotate as @cachew without any args
    @overload  # type: ignore[no-overload-impl]
-    def mcachew(fun: F) -> F: ...
+    def mcachew(fun: F) -> F:
        ...
    @overload
    def mcachew(
-        cache_path: PathProvider | None = ...,
+        cache_path: Optional[PathProvider] = ...,
        *,
        force_file: bool = ...,
-        cls: type | None = ...,
+        cls: Optional[Type] = ...,
        depends_on: HashFunction = ...,
-        logger: logging.Logger | None = ...,
+        logger: Optional[logging.Logger] = ...,
        chunk_by: int = ...,
-        synthetic_key: str | None = ...,
+        synthetic_key: Optional[str] = ...,
-    ) -> Callable[[F], F]: ...
+    ) -> Callable[[F], F]:
        ...
 else:
    mcachew = _mcachew_impl
--- a/my/core/cfg.py
+++ b/my/core/cfg.py
@ -1,42 +1,34 @@
 from __future__ import annotations
-import importlib
+from typing import TypeVar, Type, Callable, Dict, Any
 import re
 import sys
 from collections.abc import Iterator
 from contextlib import ExitStack, contextmanager
 from typing import Any, Callable, TypeVar
-Attrs = dict[str, Any]
+Attrs = Dict[str, Any]
 C = TypeVar('C')
 # todo not sure about it, could be overthinking...
 # but short enough to change later
 # TODO document why it's necessary?
-def make_config(cls: type[C], migration: Callable[[Attrs], Attrs] = lambda x: x) -> C:
+def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C:
    user_config = cls.__base__
    old_props = {
        # NOTE: deliberately use gettatr to 'force' class properties here
-        k: getattr(user_config, k)
+        k: getattr(user_config, k) for k in vars(user_config)
        for k in vars(user_config)
    }
    new_props = migration(old_props)
    from dataclasses import fields
    params = {
        k: v
        for k, v in new_props.items()
-        if k in {f.name for f in fields(cls)}  # type: ignore[arg-type]  # see https://github.com/python/typing_extensions/issues/115
+        if k in {f.name for f in fields(cls)}   # type: ignore[arg-type]  # see https://github.com/python/typing_extensions/issues/115
    }
    # todo maybe return type here?
    return cls(**params)
 F = TypeVar('F')
-
+from contextlib import contextmanager
-
+from typing import Iterator
@contextmanager
 def _override_config(config: F) -> Iterator[F]:
    '''
@ -54,9 +46,10 @@ def _override_config(config: F) -> Iterator[F]:
            delattr(config, k)
 import importlib
 import sys
 from typing import Optional
 ModuleRegex = str
@contextmanager
 def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
    # need to use list here, otherwise reordering with set might mess things up
@ -86,15 +79,16 @@ def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
                sys.modules.pop(m, None)
 from contextlib import ExitStack
 import re
@contextmanager
-def tmp_config(*, modules: ModuleRegex | None = None, config=None):
+def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None):
    if modules is None:
        assert config is None
    if modules is not None:
        assert config is not None
    import my.config
    with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
        if config is not None:
            overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
@ -109,7 +103,6 @@ def tmp_config(*, modules: ModuleRegex | None = None, config=None):
 def test_tmp_config() -> None:
    class extra:
        data_path = '/path/to/data'
    with tmp_config() as c:
        assert c.google != 'whatever'
        assert not hasattr(c, 'extra')
--- a/my/core/common.py
+++ b/my/core/common.py
@ -1,43 +1,199 @@
 from __future__ import annotations
 import os
 from collections.abc import Iterable, Sequence
 from glob import glob as do_glob
 from pathlib import Path
 from datetime import datetime
 import functools
 from contextlib import contextmanager
 import os
 import sys
 import types
 from typing import (
-    TYPE_CHECKING,
+    Any,
    Callable,
-    Generic,
+    Dict,
    Iterable,
    Iterator,
    List,
    NoReturn,
    Optional,
    Sequence,
    TYPE_CHECKING,
    Tuple,
    TypeVar,
    Union,
    cast,
    get_args,
    get_type_hints,
    get_origin,
 )
-
+import warnings
-from . import compat, warnings
+from . import warnings as core_warnings
 # some helper functions
 # TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit
 PathIsh = Union[Path, str]
 # TODO only used in tests? not sure if useful at all.
 def import_file(p: PathIsh, name: Optional[str] = None) -> types.ModuleType:
    p = Path(p)
    if name is None:
        name = p.stem
    import importlib.util
    spec = importlib.util.spec_from_file_location(name, p)
    assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}"
    foo = importlib.util.module_from_spec(spec)
    loader = spec.loader; assert loader is not None
    loader.exec_module(foo)
    return foo
 def import_from(path: PathIsh, name: str) -> types.ModuleType:
    path = str(path)
    try:
        sys.path.append(path)
        import importlib
        return importlib.import_module(name)
    finally:
        sys.path.remove(path)
 def import_dir(path: PathIsh, extra: str='') -> types.ModuleType:
    p = Path(path)
    if p.parts[0] == '~':
        p = p.expanduser() # TODO eh. not sure about this..
    return import_from(p.parent, p.name + extra)
 T = TypeVar('T')
 K = TypeVar('K')
 V = TypeVar('V')
 # TODO deprecate? more_itertools.one should be used
 def the(l: Iterable[T]) -> T:
    it = iter(l)
    try:
        first = next(it)
    except StopIteration:
        raise RuntimeError('Empty iterator?')
    assert all(e == first for e in it)
    return first
 # TODO more_itertools.bucket?
 def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
    res: Dict[K, List[T]] = {}
    for i in l:
        kk = key(i)
        lst = res.get(kk, [])
        lst.append(i)
        res[kk] = lst
    return res
 def _identity(v: T) -> V:  # type: ignore[type-var]
    return cast(V, v)
 # ugh. nothing in more_itertools?
 def ensure_unique(
        it: Iterable[T],
        *,
        key: Callable[[T], K],
        value: Callable[[T], V]=_identity,
        key2value: Optional[Dict[K, V]]=None
 ) -> Iterable[T]:
    if key2value is None:
        key2value = {}
    for i in it:
        k = key(i)
        v = value(i)
        pv = key2value.get(k, None)
        if pv is not None:
            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
        key2value[k] = v
        yield i
 def test_ensure_unique() -> None:
    import pytest
    assert list(ensure_unique([1, 2, 3], key=lambda i: i)) == [1, 2, 3]
    dups = [1, 2, 1, 4]
    # this works because it's lazy
    it = ensure_unique(dups, key=lambda i: i)
    # but forcing throws
    with pytest.raises(RuntimeError, match='Duplicate key'):
        list(it)
    # hacky way to force distinct objects?
    list(ensure_unique(dups, key=lambda i: object()))
 def make_dict(
        it: Iterable[T],
        *,
        key: Callable[[T], K],
        value: Callable[[T], V]=_identity
 ) -> Dict[K, V]:
    res: Dict[K, V] = {}
    uniques = ensure_unique(it, key=key, value=value, key2value=res)
    for _ in uniques:
        pass  # force the iterator
    return res
 def test_make_dict() -> None:
    it = range(5)
    d = make_dict(it, key=lambda i: i, value=lambda i: i % 2)
    assert d == {0: 0, 1: 1, 2: 0, 3: 1, 4: 0}
    # check type inference
    d2: Dict[str, int ] = make_dict(it, key=lambda i: str(i))
    d3: Dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
 # https://stackoverflow.com/a/12377059/706389
 def listify(fn=None, wrapper=list):
    """
    Wraps a function's return value in wrapper (e.g. list)
    Useful when an algorithm can be expressed more cleanly as a generator
    """
    def listify_return(fn):
        @functools.wraps(fn)
        def listify_helper(*args, **kw):
            return wrapper(fn(*args, **kw))
        return listify_helper
    if fn is None:
        return listify_return
    return listify_return(fn)
 # todo use in bluemaestro
 # def dictify(fn=None, key=None, value=None):
 #     def md(it):
 #         return make_dict(it, key=key, value=value)
 #     return listify(fn=fn, wrapper=md)
 from .logging import setup_logger, LazyLogger
 Paths = Union[Sequence[PathIsh], PathIsh]
 DEFAULT_GLOB = '*'
 def get_files(
-    pp: Paths,
+        pp: Paths,
-    glob: str = DEFAULT_GLOB,
+        glob: str=DEFAULT_GLOB,
-    *,
+        sort: bool=True,
-    sort: bool = True,
+        guess_compression: bool=True,
-    guess_compression: bool = True,
+) -> Tuple[Path, ...]:
 ) -> tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.
    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
-    sources: list[Path]
+    sources: List[Path]
    if isinstance(pp, Path):
        sources = [pp]
    elif isinstance(pp, str):
@ -54,7 +210,7 @@ def get_files(
        # TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
        return traceback.extract_stack()[-3].filename
-    paths: list[Path] = []
+    paths: List[Path] = []
    for src in sources:
        if src.parts[0] == '~':
            src = src.expanduser()
@ -62,9 +218,9 @@ def get_files(
        gs = str(src)
        if '*' in gs:
            if glob != DEFAULT_GLOB:
-                warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
+                warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
-            paths.extend(map(Path, do_glob(gs)))  # noqa: PTH207
+            paths.extend(map(Path, do_glob(gs)))
-        elif os.path.isdir(str(src)):  # noqa: PTH112
+        elif os.path.isdir(str(src)):
            # NOTE: we're using os.path here on purpose instead of src.is_dir
            # the reason is is_dir for archives might return True and then
            # this clause would try globbing insize the archives
@ -80,11 +236,11 @@ def get_files(
            paths.append(src)
    if sort:
-        paths = sorted(paths)
+        paths = list(sorted(paths))
    if len(paths) == 0:
        # todo make it conditionally defensive based on some global settings
-        warnings.high(f'''
+        core_warnings.high(f'''
 {caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
 '''.strip())
        # traceback is useful to figure out what config caused it?
@ -93,7 +249,7 @@ def get_files(
        traceback.print_stack()
    if guess_compression:
-        from .kompress import CPath, ZipPath, is_compressed
+        from .kompress import CPath, is_compressed, ZipPath
        # NOTE: wrap is just for backwards compat with vendorized kompress
        # with kompress library, only is_compressed check and Cpath should be enough
@ -110,33 +266,44 @@ def get_files(
    return tuple(paths)
@functools.lru_cache(1)
 def _magic():
    import magic # type: ignore
    return magic.Magic(mime=True)
 # TODO could reuse in pdf module?
 import mimetypes # todo do I need init()?
 # todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
 # whereas magic detects correctly: application/x-zstd and application/x-xz
 def fastermime(path: PathIsh) -> str:
    paths = str(path)
    # mimetypes is faster
    (mime, _) = mimetypes.guess_type(paths)
    if mime is not None:
        return mime
    # magic is slower but returns more stuff
    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
    return _magic().from_file(paths)
 Json = Dict[str, Any]
 from typing import TypeVar, Callable, Generic
 _C = TypeVar('_C')
 _R = TypeVar('_R')
 # https://stackoverflow.com/a/5192374/706389
 # NOTE: it was added to stdlib in 3.9 and then deprecated in 3.11
 # seems that the suggested solution is to use custom decorator?
 class classproperty(Generic[_R]):
-    def __init__(self, f: Callable[..., _R]) -> None:
+    def __init__(self, f: Callable[[_C], _R]) -> None:
        self.f = f
-    def __get__(self, obj, cls) -> _R:
+    def __get__(self, obj: None, cls: _C) -> _R:
        return self.f(cls)
 def test_classproperty() -> None:
    from .compat import assert_type
    class C:
        @classproperty
        def prop(cls) -> str:
            return 'hello'
    res = C.prop
    assert_type(res, str)
    assert res == 'hello'
 # hmm, this doesn't really work with mypy well..
 # https://github.com/python/mypy/issues/6244
 # class staticproperty(Generic[_R]):
@ -146,117 +313,394 @@ def test_classproperty() -> None:
 #     def __get__(self) -> _R:
 #         return self.f()
 # TODO deprecate in favor of datetime_aware
 tzdatetime = datetime
 # TODO doctests?
 def isoparse(s: str) -> tzdatetime:
    """
    Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
    """
    # TODO could use dateutil? but it's quite slow as far as I remember..
    # TODO support non-utc.. somehow?
    assert s.endswith('Z'), s
    s = s[:-1] + '+00:00'
    return datetime.fromisoformat(s)
 import re
 # https://stackoverflow.com/a/295466/706389
 def get_valid_filename(s: str) -> str:
    s = str(s).strip().replace(' ', '_')
    return re.sub(r'(?u)[^-\w.]', '', s)
-# TODO deprecate and suggest to use one from my.core directly? not sure
+from typing import Generic, Sized, Callable
 from .utils.itertools import unique_everseen  # noqa: F401
 ### legacy imports, keeping them here for backwards compatibility
 ## hiding behind TYPE_CHECKING so it works in runtime
 ## in principle, warnings.deprecated decorator should cooperate with mypy, but doesn't look like it works atm?
 ## perhaps it doesn't work when it's used from typing_extensions
-if not TYPE_CHECKING:
+# X = TypeVar('X')
-    from .compat import deprecated
+def _warn_iterator(it, f: Any=None):
    emitted = False
    for i in it:
        yield i
        emitted = True
    if not emitted:
        warnings.warn(f"Function {f} didn't emit any data, make sure your config paths are correct")
    @deprecated('use my.core.compat.assert_never instead')
    def assert_never(*args, **kwargs):
        return compat.assert_never(*args, **kwargs)
-    @deprecated('use my.core.compat.fromisoformat instead')
+# TODO ugh, so I want to express something like:
-    def isoparse(*args, **kwargs):
+# X = TypeVar('X')
-        return compat.fromisoformat(*args, **kwargs)
+# C = TypeVar('C', bound=Iterable[X])
 # _warn_iterable(it: C) -> C
 # but apparently I can't??? ugh.
 # https://github.com/python/typing/issues/548
 # I guess for now overloads are fine...
-    @deprecated('use more_itertools.one instead')
+from typing import overload
-    def the(*args, **kwargs):
+X = TypeVar('X')
-        import more_itertools
+@overload
 def _warn_iterable(it: List[X]    , f: Any=None) -> List[X]    : ...
@overload
 def _warn_iterable(it: Iterable[X], f: Any=None) -> Iterable[X]: ...
 def _warn_iterable(it, f=None):
    if isinstance(it, Sized):
        sz = len(it)
        if sz == 0:
            warnings.warn(f"Function {f} returned empty container, make sure your config paths are correct")
        return it
    else:
        return _warn_iterator(it, f=f)
        return more_itertools.one(*args, **kwargs)
-    @deprecated('use functools.cached_property instead')
+# ok, this seems to work...
-    def cproperty(*args, **kwargs):
+# https://github.com/python/mypy/issues/1927#issue-167100413
-        import functools
+FL = TypeVar('FL', bound=Callable[..., List])
 FI = TypeVar('FI', bound=Callable[..., Iterable])
-        return functools.cached_property(*args, **kwargs)
+@overload
 def warn_if_empty(f: FL) -> FL: ...
@overload
 def warn_if_empty(f: FI) -> FI: ...
    @deprecated('use more_itertools.bucket instead')
    def group_by_key(l, key):
        res = {}
        for i in l:
            kk = key(i)
            lst = res.get(kk, [])
            lst.append(i)
            res[kk] = lst
        return res
-    @deprecated('use my.core.utils.itertools.make_dict instead')
+def warn_if_empty(f):
-    def make_dict(*args, **kwargs):
+    from functools import wraps
        from .utils import itertools as UI
-        return UI.make_dict(*args, **kwargs)
+    @wraps(f)
    def wrapped(*args, **kwargs):
        res = f(*args, **kwargs)
        return _warn_iterable(res, f=f)
    return wrapped
    @deprecated('use my.core.utils.itertools.listify instead')
    def listify(*args, **kwargs):
        from .utils import itertools as UI
-        return UI.listify(*args, **kwargs)
+# global state that turns on/off quick stats
 # can use the 'quick_stats' contextmanager
 # to enable/disable this in cli so that module 'stats'
 # functions don't have to implement custom 'quick' logic
 QUICK_STATS = False
    @deprecated('use my.core.warn_if_empty instead')
    def warn_if_empty(*args, **kwargs):
        from .utils import itertools as UI
-        return UI.listify(*args, **kwargs)
+# in case user wants to use the stats functions/quick option
 # elsewhere -- can use this decorator instead of editing
 # the global state directly
@contextmanager
 def quick_stats():
    global QUICK_STATS
    prev = QUICK_STATS
    try:
        QUICK_STATS = True
        yield
    finally:
        QUICK_STATS = prev
    @deprecated('use my.core.stat instead')
    def stat(*args, **kwargs):
        from . import stats
-        return stats.stat(*args, **kwargs)
+C = TypeVar('C')
 Stats = Dict[str, Any]
 StatsFun = Callable[[], Stats]
 # todo not sure about return type...
 def stat(
    func: Union[Callable[[], Iterable[C]], Iterable[C]],
    *,
    quick: bool = False,
    name: Optional[str] = None,
 ) -> Stats:
    if callable(func):
        fr = func()
        if hasattr(fr, '__enter__') and hasattr(fr, '__exit__'):
            # context managers has Iterable type, but they aren't data providers
            # sadly doesn't look like there is a way to tell from typing annotations
            return {}
        fname = func.__name__
    else:
        # meh. means it's just a list.. not sure how to generate a name then
        fr = func
        fname = f'unnamed_{id(fr)}'
    type_name = type(fr).__name__
    if type_name == 'DataFrame':
        # dynamic, because pandas is an optional dependency..
        df = cast(Any, fr)  # todo ugh, not sure how to annotate properly
        res = dict(
            dtypes=df.dtypes.to_dict(),
            rows=len(df),
        )
    else:
        res = _stat_iterable(fr, quick=quick)
-    @deprecated('use my.core.make_logger instead')
+    stat_name = name if name is not None else fname
-    def LazyLogger(*args, **kwargs):
+    return {
-        from . import logging
+        stat_name: res,
    }
        return logging.LazyLogger(*args, **kwargs)
-    @deprecated('use my.core.types.asdict instead')
+def _stat_iterable(it: Iterable[C], quick: bool = False) -> Any:
-    def asdict(*args, **kwargs):
+    from more_itertools import ilen, take, first
        from . import types
-        return types.asdict(*args, **kwargs)
+    # todo not sure if there is something in more_itertools to compute this?
    total = 0
    errors = 0
    first_item = None
    last_item = None
-    # todo wrap these in deprecated decorator as well?
+    def funcit():
-    # TODO hmm how to deprecate these in runtime?
+        nonlocal errors, first_item, last_item, total
-    # tricky cause they are actually classes/types
+        for x in it:
-    from typing import Literal  # noqa: F401
+            total += 1
            if isinstance(x, Exception):
                errors += 1
            else:
                last_item = x
                if first_item is None:
                    first_item = x
            yield x
-    from .cachew import mcachew  # noqa: F401
+    eit = funcit()
    count: Any
    if quick or QUICK_STATS:
        initial = take(100, eit)
        count = len(initial)
        if first(eit, None) is not None: # todo can actually be none...
            # haven't exhausted
            count = f'{count}+'
    else:
        count = ilen(eit)
-    # this is kinda internal, should just use my.core.logging.setup_logger if necessary
+    res = {
-    from .logging import setup_logger
+        'count': count,
-    from .stats import Stats
+    }
    from .types import (
        Json,
        datetime_aware,
        datetime_naive,
    )
-    tzdatetime = datetime_aware
+    if total == 0:
-else:
+        # not sure but I guess a good balance? wouldn't want to throw early here?
-    from .compat import Never
+        res['warning'] = 'THE ITERABLE RETURNED NO DATA'
-    # make these invalid during type check while working in runtime
+    if errors > 0:
-    Stats = Never
+        res['errors'] = errors
-    tzdatetime = Never
+
-    Json = Never
+    def stat_item(item):
-    datetime_naive = Never
+        if item is None:
-    datetime_aware = Never
+            return None
-###
+        if isinstance(item, Path):
            return str(item)
        return guess_datetime(item)
    if (stat_first := stat_item(first_item)) is not None:
        res['first'] = stat_first
    if (stat_last := stat_item(last_item)) is not None:
        res['last'] = stat_last
    return res
 def test_stat_iterable() -> None:
    from datetime import datetime, timedelta
    from typing import NamedTuple
    dd = datetime.utcfromtimestamp(123)
    day = timedelta(days=3)
    X = NamedTuple('X', [('x', int), ('d', datetime)])
    def it():
        yield RuntimeError('oops!')
        for i in range(2):
            yield X(x=i, d=dd + day * i)
        yield RuntimeError('bad!')
        for i in range(3):
            yield X(x=i * 10, d=dd + day * (i * 10))
        yield X(x=123, d=dd + day * 50)
    res = _stat_iterable(it())
    assert res['count']  == 1 + 2 + 1 + 3 + 1
    assert res['errors'] == 1 + 1
    assert res['last'] == dd + day * 50
 # experimental, not sure about it..
 def guess_datetime(x: Any) -> Optional[datetime]:
    # todo hmm implement withoutexception..
    try:
        d = asdict(x)
    except: # noqa: E722 bare except
        return None
    for k, v in d.items():
        if isinstance(v, datetime):
            return v
    return None
 def test_guess_datetime() -> None:
    from datetime import datetime
    from dataclasses import dataclass
    from typing import NamedTuple
    dd = isoparse('2021-02-01T12:34:56Z')
    # ugh.. https://github.com/python/mypy/issues/7281
    A = NamedTuple('A', [('x', int)])
    B = NamedTuple('B', [('x', int), ('created', datetime)])
    assert guess_datetime(A(x=4)) is None
    assert guess_datetime(B(x=4, created=dd)) == dd
    @dataclass
    class C:
        a: datetime
        x: int
    assert guess_datetime(C(a=dd, x=435)) == dd
    # TODO not sure what to return when multiple datetime fields?
    # TODO test @property?
 def is_namedtuple(thing: Any) -> bool:
    # basic check to see if this is namedtuple-like
    _asdict = getattr(thing, '_asdict', None)
    return (_asdict is not None) and callable(_asdict)
 def asdict(thing: Any) -> Json:
    # todo primitive?
    # todo exception?
    if isinstance(thing, dict):
        return thing
    import dataclasses as D
    if D.is_dataclass(thing):
        return D.asdict(thing)
    if is_namedtuple(thing):
        return thing._asdict()
    raise TypeError(f'Could not convert object {thing} to dict')
 # for now just serves documentation purposes... but one day might make it statically verifiable where possible?
 # TODO e.g. maybe use opaque mypy alias?
 datetime_naive = datetime
 datetime_aware = datetime
 def assert_subpackage(name: str) -> None:
    # can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
    # NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
    assert name == '__main__' or 'my.core' in name, f'Expected module __name__ ({name}) to be __main__ or start with my.core'
 from .compat import ParamSpec
 _P = ParamSpec('_P')
 _T = TypeVar('_T')
 # https://stackoverflow.com/a/10436851/706389
 from concurrent.futures import Future, Executor
 class DummyExecutor(Executor):
    def __init__(self, max_workers: Optional[int]=1) -> None:
        self._shutdown = False
        self._max_workers = max_workers
    if TYPE_CHECKING:
        if sys.version_info[:2] <= (3, 8):
            # 3.8 doesn't support ParamSpec as Callable arg :(
            # and any attempt to type results in incompatible supertype.. so whatever
            def submit(self, fn, *args, **kwargs): ...
        else:
            def submit(self, fn: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Future[_T]: ...
    else:
        def submit(self, fn, *args, **kwargs):
            if self._shutdown:
                raise RuntimeError('cannot schedule new futures after shutdown')
            f: Future[Any] = Future()
            try:
                result = fn(*args, **kwargs)
            except KeyboardInterrupt:
                raise
            except BaseException as e:
                f.set_exception(e)
            else:
                f.set_result(result)
            return f
    def shutdown(self, wait: bool=True, **kwargs) -> None:
        self._shutdown = True
 # see https://hakibenita.com/python-mypy-exhaustive-checking#exhaustiveness-checking
 def assert_never(value: NoReturn) -> NoReturn:
    assert False, f'Unhandled value: {value} ({type(value).__name__})'
 def _check_all_hashable(fun):
    # TODO ok, take callable?
    hints = get_type_hints(fun)
    # TODO needs to be defensive like in cachew?
    return_type = hints.get('return')
    # TODO check if None
    origin = get_origin(return_type)  # Iterator etc?
    (arg,) = get_args(return_type)
    # options we wanna handle are simple type on the top level or union
    arg_origin = get_origin(arg)
    if sys.version_info[:2] >= (3, 10):
        is_uniontype = arg_origin is types.UnionType
    else:
        is_uniontype = False
    is_union = arg_origin is Union or is_uniontype
    if is_union:
        to_check = get_args(arg)
    else:
        to_check = (arg,)
    no_hash = [
        t
        for t in to_check
        # seems that objects that have not overridden hash have the attribute but it's set to None
        if getattr(t, '__hash__', None) is None
    ]
    assert len(no_hash) == 0, f'Types {no_hash} are not hashable, this will result in significant performance downgrade for unique_everseen'
 _UET = TypeVar('_UET')
 _UEU = TypeVar('_UEU')
 def unique_everseen(
    fun: Callable[[], Iterable[_UET]],
    key: Optional[Callable[[_UET], _UEU]] = None,
 ) -> Iterator[_UET]:
    # TODO support normal iterable as well?
    import more_itertools
    # NOTE: it has to take original callable, because otherwise we don't have access to generator type annotations
    iterable = fun()
    if key is None:
        # todo check key return type as well? but it's more likely to be hashable
        if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None:
            _check_all_hashable(fun)
    return more_itertools.unique_everseen(iterable=iterable, key=key)
 ## legacy imports, keeping them here for backwards compatibility
 from functools import cached_property as cproperty
 from typing import Literal
 from .cachew import mcachew
 ##
--- a/my/core/compat.py
+++ b/my/core/compat.py
@ -2,60 +2,57 @@
 Contains backwards compatibility helpers for different python versions.
 If something is relevant to HPI itself, please put it in .hpi_compat instead
 '''
-
+import os
 from __future__ import annotations
 import sys
 from typing import TYPE_CHECKING
-if sys.version_info[:2] >= (3, 13):
+
-    from warnings import deprecated
+windows = os.name == 'nt'
 else:
    from typing_extensions import deprecated
 # keeping just for backwards compatibility, used to have compat implementation for 3.6
-if not TYPE_CHECKING:
+import sqlite3
-    import sqlite3
+def sqlite_backup(*, source: sqlite3.Connection, dest: sqlite3.Connection, **kwargs) -> None:
    source.backup(dest, **kwargs)
    @deprecated('use .backup method on sqlite3.Connection directly instead')
    def sqlite_backup(*, source: sqlite3.Connection, dest: sqlite3.Connection, **kwargs) -> None:
        # TODO warn here?
        source.backup(dest, **kwargs)
-    # keeping for runtime backwards compatibility (added in 3.9)
+# can remove after python3.9 (although need to keep the method itself for bwd compat)
-    @deprecated('use .removeprefix method on string directly instead')
+def removeprefix(text: str, prefix: str) -> str:
-    def removeprefix(text: str, prefix: str) -> str:
+    if text.startswith(prefix):
-        return text.removeprefix(prefix)
+        return text[len(prefix):]
    return text
    @deprecated('use .removesuffix method on string directly instead')
    def removesuffix(text: str, suffix: str) -> str:
        return text.removesuffix(suffix)
-    ##
+## used to have compat function before 3.8 for these
-
+from functools import cached_property
-    ## used to have compat function before 3.8 for these, keeping for runtime back compatibility
+from typing import Literal, Protocol, TypedDict
    from functools import cached_property
    from typing import Literal, Protocol, TypedDict
 ##
 if sys.version_info[:2] >= (3, 10):
    from typing import ParamSpec
 else:
-    from typing_extensions import ParamSpec
+    if TYPE_CHECKING:
        from typing_extensions import ParamSpec
    else:
        from typing import NamedTuple, Any
        # erm.. I guess as long as it's not crashing, whatever...
        class _ParamSpec:
            def __call__(self, args):
                class _res:
                    args = None
                    kwargs = None
                return _res
        ParamSpec = _ParamSpec()
 # bisect_left doesn't have a 'key' parameter (which we use)
 # till python3.10
 if sys.version_info[:2] <= (3, 9):
-    from typing import Any, Callable, List, Optional, TypeVar  # noqa: UP035
+    from typing import List, TypeVar, Any, Optional, Callable
    X = TypeVar('X')
    # copied from python src
-    # fmt: off
+    def bisect_left(a: List[Any], x: Any, lo: int=0, hi: Optional[int]=None, *, key: Optional[Callable[..., Any]]=None) -> int:
    def bisect_left(a: list[Any], x: Any, lo: int=0, hi: int | None=None, *, key: Callable[..., Any] | None=None) -> int:
        if lo < 0:
            raise ValueError('lo must be non-negative')
        if hi is None:
@ -77,63 +74,5 @@ if sys.version_info[:2] <= (3, 9):
                else:
                    hi = mid
        return lo
    # fmt: on
 else:
    from bisect import bisect_left
 from datetime import datetime
 if sys.version_info[:2] >= (3, 11):
    fromisoformat = datetime.fromisoformat
 else:
    # fromisoformat didn't support Z as "utc" before 3.11
    # https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat
    def fromisoformat(date_string: str) -> datetime:
        if date_string.endswith('Z'):
            date_string = date_string[:-1] + '+00:00'
        return datetime.fromisoformat(date_string)
 def test_fromisoformat() -> None:
    from datetime import timezone
    # fmt: off
    # feedbin has this format
    assert fromisoformat('2020-05-01T10:32:02.925961Z') == datetime(
        2020, 5, 1, 10, 32, 2, 925961, timezone.utc,
    )
    # polar has this format
    assert fromisoformat('2018-11-28T22:04:01.304Z') == datetime(
        2018, 11, 28, 22, 4, 1, 304000, timezone.utc,
    )
    # stackexchange, runnerup has this format
    assert fromisoformat('2020-11-30T00:53:12Z') == datetime(
        2020, 11, 30, 0, 53, 12, 0, timezone.utc,
    )
    # fmt: on
    # arbtt has this format (sometimes less/more than 6 digits in milliseconds)
    # TODO doesn't work atm, not sure if really should be supported...
    # maybe should have flags for weird formats?
    # assert isoparse('2017-07-18T18:59:38.21731Z') == datetime(
    #     2017, 7, 18, 18, 59, 38, 217310, timezone.utc,
    # )
 if sys.version_info[:2] >= (3, 10):
    from types import NoneType
    from typing import TypeAlias
 else:
    NoneType = type(None)
    from typing_extensions import TypeAlias
 if sys.version_info[:2] >= (3, 11):
    from typing import Never, assert_never, assert_type
 else:
    from typing_extensions import Never, assert_never, assert_type
--- a/my/core/core_config.py
+++ b/my/core/core_config.py
@ -1,22 +1,16 @@
 '''
 Bindings for the 'core' HPI configuration
 '''
 from __future__ import annotations
 import re
-from collections.abc import Sequence
+from typing import Sequence, Optional
 from dataclasses import dataclass
 from pathlib import Path
-from . import warnings
+from . import warnings, PathIsh, Path
 try:
    from my.config import core as user_config  # type: ignore[attr-defined]
 except Exception as e:
    try:
-        from my.config import common as user_config  # type: ignore[attr-defined]
+        from my.config import common as user_config # type: ignore[attr-defined]
        warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
    except Exception as e2:
        # make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
@ -27,7 +21,7 @@ except Exception as e:
 _HPI_CACHE_DIR_DEFAULT = ''
-
+from dataclasses import dataclass
@dataclass
 class Config(user_config):
    '''
@ -38,7 +32,7 @@ class Config(user_config):
        cache_dir = '/your/custom/cache/path'
    '''
-    cache_dir: Path | str | None = _HPI_CACHE_DIR_DEFAULT
+    cache_dir: Optional[PathIsh] = _HPI_CACHE_DIR_DEFAULT
    '''
    Base directory for cachew.
    - if None             , means cache is disabled
@ -48,7 +42,7 @@ class Config(user_config):
    NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
    '''
-    tmp_dir: Path | str | None = None
+    tmp_dir: Optional[PathIsh] = None
    '''
    Path to a temporary directory.
    This can be used temporarily while extracting zipfiles etc...
@ -56,36 +50,34 @@ class Config(user_config):
    - otherwise           , use the specified directory as the base temporary directory
    '''
-    enabled_modules: Sequence[str] | None = None
+    enabled_modules : Optional[Sequence[str]] = None
    '''
    list of regexes/globs
    - None means 'rely on disabled_modules'
    '''
-    disabled_modules: Sequence[str] | None = None
+    disabled_modules: Optional[Sequence[str]] = None
    '''
    list of regexes/globs
    - None means 'rely on enabled_modules'
    '''
-    def get_cache_dir(self) -> Path | None:
+    def get_cache_dir(self) -> Optional[Path]:
        cdir = self.cache_dir
        if cdir is None:
            return None
        if cdir == _HPI_CACHE_DIR_DEFAULT:
            from .cachew import _appdirs_cache_dir
            return _appdirs_cache_dir()
        else:
            return Path(cdir).expanduser()
    def get_tmp_dir(self) -> Path:
-        tdir: Path | str | None = self.tmp_dir
+        tdir: Optional[PathIsh] = self.tmp_dir
        tpath: Path
        # use tempfile if unset
        if tdir is None:
            import tempfile
            tpath = Path(tempfile.gettempdir()) / 'HPI'
        else:
            tpath = Path(tdir)
@ -93,10 +85,10 @@ class Config(user_config):
        tpath.mkdir(parents=True, exist_ok=True)
        return tpath
-    def _is_module_active(self, module: str) -> bool | None:
+    def _is_module_active(self, module: str) -> Optional[bool]:
        # None means the config doesn't specify anything
        # todo might be nice to return the 'reason' too? e.g. which option has matched
-        def matches(specs: Sequence[str]) -> str | None:
+        def matches(specs: Sequence[str]) -> Optional[str]:
            for spec in specs:
                # not sure because . (packages separate) matches anything, but I guess unlikely to clash
                if re.match(spec, module):
@ -112,25 +104,22 @@ class Config(user_config):
                return None
            else:
                return False
-        else:  # not None
+        else: # not None
            if off is None:
                return True
-            else:  # not None
+            else: # not None
                # fallback onto the 'enable everything', then the user will notice
                warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.")
                return True
 from .cfg import make_config
 config = make_config(Config)
 ### tests start
-from collections.abc import Iterator
+from typing import Iterator
 from contextlib import contextmanager as ctx
@ctx
 def _reset_config() -> Iterator[Config]:
    # todo maybe have this decorator for the whole of my.config?
@ -169,5 +158,4 @@ def test_active_modules() -> None:
            assert cc._is_module_active("my.body.exercise") is True
        assert len(record_warnings) == 1
 ### tests end
--- a/my/core/dataset.py
+++ b/my/core/dataset.py
@ -1,5 +1,31 @@
-from . import warnings
+from __future__ import annotations
 from .common import assert_subpackage; assert_subpackage(__name__)
-warnings.high(f"{__name__} is deprecated, please use dataset directly if you need or switch to my.core.sqlite")
+from .common import PathIsh
 from .sqlite import sqlite_connect_immutable
-from ._deprecated.dataset import *
+## sadly dataset doesn't have any type definitions
 from typing import Iterable, Iterator, Dict, Optional, Any, Protocol
 from contextlib import AbstractContextManager
 # NOTE: may not be true in general, but will be in the vast majority of cases
 row_type_T = Dict[str, Any]
 class TableT(Iterable, Protocol):
    def find(self, *, order_by: Optional[str]=None) -> Iterator[row_type_T]: ...
 class DatabaseT(AbstractContextManager['DatabaseT'], Protocol):
    def __getitem__(self, table: str) -> TableT: ...
 ##
 # TODO wonder if also need to open without WAL.. test this on read-only directory/db file
 def connect_readonly(db: PathIsh) -> DatabaseT:
    import dataset # type: ignore
    # see https://github.com/pudo/dataset/issues/136#issuecomment-128693122
    # todo not sure if mode=ro has any benefit, but it doesn't work on read-only filesystems
    # maybe it should autodetect readonly filesystems and apply this? not sure
    creator = lambda: sqlite_connect_immutable(db)
    return dataset.connect('sqlite:///', engine_kwargs={'creator': creator})
--- a/my/core/denylist.py
+++ b/my/core/denylist.py
@ -5,25 +5,23 @@ A helper module for defining denylists for sources programmatically
 For docs, see doc/DENYLIST.md
 """
 from __future__ import annotations
 import functools
 import json
 import sys
 import json
 import functools
 from collections import defaultdict
-from collections.abc import Iterator, Mapping
+from typing import TypeVar, Set, Any, Mapping, Iterator, Dict, List
 from pathlib import Path
 from typing import Any, TypeVar
 import click
 from more_itertools import seekable
 from my.core.serialize import dumps
 from my.core.common import PathIsh
 from my.core.warnings import medium
 from .serialize import dumps
 from .warnings import medium
 T = TypeVar("T")
-DenyMap = Mapping[str, set[Any]]
+DenyMap = Mapping[str, Set[Any]]
 def _default_key_func(obj: T) -> str:
@ -31,9 +29,9 @@ def _default_key_func(obj: T) -> str:
 class DenyList:
-    def __init__(self, denylist_file: Path | str) -> None:
+    def __init__(self, denylist_file: PathIsh):
        self.file = Path(denylist_file).expanduser().absolute()
-        self._deny_raw_list: list[dict[str, Any]] = []
+        self._deny_raw_list: List[Dict[str, Any]] = []
        self._deny_map: DenyMap = defaultdict(set)
        # deny cli, user can override these
@ -47,7 +45,7 @@ class DenyList:
            return
        deny_map: DenyMap = defaultdict(set)
-        data: list[dict[str, Any]] = json.loads(self.file.read_text())
+        data: List[Dict[str, Any]]= json.loads(self.file.read_text())
        self._deny_raw_list = data
        for ignore in data:
@ -98,7 +96,6 @@ class DenyList:
    def filter(
        self,
        itr: Iterator[T],
        *,
        invert: bool = False,
    ) -> Iterator[T]:
        denyf = functools.partial(self._allow, deny_map=self.load())
@ -106,7 +103,7 @@ class DenyList:
            return filter(lambda x: not denyf(x), itr)
        return filter(denyf, itr)
-    def deny(self, key: str, value: Any, *, write: bool = False) -> None:
+    def deny(self, key: str, value: Any, write: bool = False) -> None:
        '''
        add a key/value pair to the denylist
        '''
@ -114,7 +111,7 @@ class DenyList:
            self._load()
        self._deny_raw({key: self._stringify_value(value)}, write=write)
-    def _deny_raw(self, data: dict[str, Any], *, write: bool = False) -> None:
+    def _deny_raw(self, data: Dict[str, Any], write: bool = False) -> None:
        self._deny_raw_list.append(data)
        if write:
            self.write()
@ -133,7 +130,7 @@ class DenyList:
    def _deny_cli_remember(
        self,
        items: Iterator[T],
-        mem: dict[str, T],
+        mem: Dict[str, T],
    ) -> Iterator[str]:
        keyf = self._deny_cli_key_func or _default_key_func
        # i.e., convert each item to a string, and map str -> item
@ -159,8 +156,10 @@ class DenyList:
            # reset the iterator
            sit.seek(0)
            # so we can map the selected string from fzf back to the original objects
-            memory_map: dict[str, T] = {}
+            memory_map: Dict[str, T] = {}
-            picker = FzfPrompt(executable_path=self.fzf_path, default_options="--no-multi")
+            picker = FzfPrompt(
                executable_path=self.fzf_path, default_options="--no-multi"
            )
            picked_l = picker.prompt(
                self._deny_cli_remember(itr, memory_map),
                "--read0",
--- a/my/core/discovery_pure.py
+++ b/my/core/discovery_pure.py
@ -10,20 +10,17 @@ This potentially allows it to be:
 It should be free of external modules, importlib, exec, etc. etc.
 '''
 from __future__ import annotations
 REQUIRES = 'REQUIRES'
 NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
 ###
 import ast
 import logging
 import os
-import re
+from typing import Optional, Sequence, List, NamedTuple, Iterable, cast, Any
 from collections.abc import Iterable, Sequence
 from pathlib import Path
-from typing import Any, NamedTuple, Optional, cast
+import re
 import logging
 '''
 None means that requirements weren't defined (different from empty requirements)
@ -33,11 +30,11 @@ Requires = Optional[Sequence[str]]
 class HPIModule(NamedTuple):
    name: str
-    skip_reason: str | None
+    skip_reason: Optional[str]
-    doc: str | None = None
+    doc: Optional[str] = None
-    file: Path | None = None
+    file: Optional[Path] = None
    requires: Requires = None
-    legacy: str | None = None  # contains reason/deprecation warning
+    legacy: Optional[str] = None  # contains reason/deprecation warning
 def ignored(m: str) -> bool:
@ -58,13 +55,13 @@ def has_stats(src: Path) -> bool:
 def _has_stats(code: str) -> bool:
    a: ast.Module = ast.parse(code)
    for x in a.body:
-        try:  # maybe assign
+        try: # maybe assign
            [tg] = cast(Any, x).targets
            if tg.id == 'stats':
                return True
        except:
            pass
-        try:  # maybe def?
+        try: # maybe def?
            name = cast(Any, x).name
            if name == 'stats':
                return True
@ -147,7 +144,7 @@ def all_modules() -> Iterable[HPIModule]:
 def _iter_my_roots() -> Iterable[Path]:
    import my  # doesn't import any code, because of namespace package
-    paths: list[str] = list(my.__path__)
+    paths: List[str] = list(my.__path__)
    if len(paths) == 0:
        # should probably never happen?, if this code is running, it was imported
        # because something was added to __path__ to match this name
@ -245,7 +242,7 @@ def test_pure() -> None:
    src = Path(__file__).read_text()
    # 'import my' is allowed, but
    # dont allow anything other HPI modules
-    assert re.findall('import ' + r'my\.\S+', src, re.MULTILINE) == []
+    assert re.findall('import ' + r'my\.\S+', src, re.M) == []
    assert 'from ' + 'my' not in src
--- a/my/core/error.py
+++ b/my/core/error.py
@ -3,25 +3,12 @@ Various error handling helpers
 See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
 """
 from __future__ import annotations
 import traceback
 from collections.abc import Iterable, Iterator
 from datetime import datetime
 from itertools import tee
-from typing import (
+from typing import Union, TypeVar, Iterable, List, Tuple, Type, Optional, Callable, Any, cast, Iterator, Literal
    Any,
    Callable,
    Literal,
    TypeVar,
    Union,
    cast,
 )
 from .types import Json
 T = TypeVar('T')
-E = TypeVar('E', bound=Exception)  # TODO make covariant?
+E = TypeVar('E', bound=Exception) # TODO make covariant?
 ResT = Union[T, E]
@ -29,8 +16,7 @@ Res = ResT[T, Exception]
 ErrorPolicy = Literal["yield", "raise", "drop"]
-
+def notnone(x: Optional[T]) -> T:
 def notnone(x: T | None) -> T:
    assert x is not None
    return x
@ -38,8 +24,8 @@ def notnone(x: T | None) -> T:
 def unwrap(res: Res[T]) -> T:
    if isinstance(res, Exception):
        raise res
-    return res
+    else:
-
+        return res
 def drop_exceptions(itr: Iterator[Res[T]]) -> Iterator[T]:
    """Return non-errors from the iterable"""
@ -57,15 +43,13 @@ def raise_exceptions(itr: Iterable[Res[T]]) -> Iterator[T]:
        yield o
-def warn_exceptions(itr: Iterable[Res[T]], warn_func: Callable[[Exception], None] | None = None) -> Iterator[T]:
+def warn_exceptions(itr: Iterable[Res[T]], warn_func: Optional[Callable[[Exception], None]] = None) -> Iterator[T]:
    # if not provided, use the 'warnings' module
    if warn_func is None:
        from my.core.warnings import medium
        def _warn_func(e: Exception) -> None:
            # TODO: print traceback? but user could always --raise-exceptions as well
            medium(str(e))
        warn_func = _warn_func
    for o in itr:
@ -80,7 +64,7 @@ def echain(ex: E, cause: Exception) -> E:
    return ex
-def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Iterable[E]]:
+def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
    # TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
    vit, eit = tee(l)
    # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
@ -98,9 +82,7 @@ def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Ite
 K = TypeVar('K')
-
+def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]]:
 def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]]:
    """
    Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
    The general idea is: the error sticks to the non-error entry that follows it
@ -108,7 +90,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
    group = []
    groups = []
    for i in items:
-        k: K | None
+        k: Optional[K]
        try:
            k = key(i)
        except Exception:  # error white computing key? dunno, might be nice to handle...
@ -118,10 +100,10 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
            groups.append((k, group))
            group = []
-    results: list[Res[T]] = []
+    results: List[Res[T]] = []
-    for _v, grp in sorted(groups, key=lambda p: p[0]):  # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
+    for v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
        results.extend(grp)
-    results.extend(group)  # handle last group (it will always be errors only)
+    results.extend(group) # handle last group (it will always be errors only)
    return results
@ -153,7 +135,7 @@ def test_sort_res_by() -> None:
        Exc('last'),
    ]
-    results2 = sort_res_by([*ress, 0], lambda x: int(x))
+    results2 = sort_res_by(ress + [0], lambda x: int(x))
    assert results2 == [Exc('last'), 0] + results[:-1]
    assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
@ -162,23 +144,23 @@ def test_sort_res_by() -> None:
 # helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
 # todo document it under 'patterns' somewhere...
 # todo proper typevar?
-def set_error_datetime(e: Exception, dt: datetime | None) -> None:
+from datetime import datetime
 def set_error_datetime(e: Exception, dt: Optional[datetime]) -> None:
    if dt is None:
        return
-    e.args = (*e.args, dt)
+    e.args = e.args + (dt,)
    # todo not sure if should return new exception?
-
+def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception:
 def attach_dt(e: Exception, *, dt: datetime | None) -> Exception:
    set_error_datetime(e, dt)
    return e
 # todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
-def extract_error_datetime(e: Exception) -> datetime | None:
+def extract_error_datetime(e: Exception) -> Optional[datetime]:
    import re
-
+    from datetime import datetime
    for x in reversed(e.args):
        if isinstance(x, datetime):
            return x
@ -193,6 +175,8 @@ def extract_error_datetime(e: Exception) -> datetime | None:
    return None
 import traceback
 from .common import Json
 def error_to_json(e: Exception) -> Json:
    estr = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
    return {'error': estr}
@ -200,13 +184,7 @@ def error_to_json(e: Exception) -> Json:
 MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
-
+def warn_my_config_import_error(err: Union[ImportError, AttributeError], help_url: Optional[str] = None) -> bool:
 def warn_my_config_import_error(
    err: ImportError | AttributeError,
    *,
    help_url: str | None = None,
    module_name: str | None = None,
 ) -> bool:
    """
    If the user tried to import something from my.config but it failed,
    possibly due to missing the config block in my.config?
@ -214,12 +192,10 @@ def warn_my_config_import_error(
    Returns True if it matched a possible config error
    """
    import re
    import click
    if help_url is None:
        help_url = MODULE_SETUP_URL
-    if type(err) is ImportError:
+    if type(err) == ImportError:
        if err.name != 'my.config':
            return False
        # parse name that user attempted to import
@ -231,31 +207,17 @@ You may be missing the '{section_name}' section from your config.
 See {help_url}\
 """, fg='yellow', err=True)
            return True
-    elif type(err) is AttributeError:
+    elif type(err) == AttributeError:
        # test if user had a nested config block missing
        # https://github.com/karlicoss/HPI/issues/223
        if hasattr(err, 'obj') and hasattr(err, "name"):
            config_obj = cast(object, getattr(err, 'obj'))  # the object that caused the attribute error
            # e.g. active_browser for my.browser
            nested_block_name = err.name
            errmsg = f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
 See {help_url} or check the corresponding module.py file for an example\
 """
            if config_obj.__module__ == 'my.config':
-                click.secho(errmsg, fg='yellow', err=True)
+                click.secho(f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
-                return True
+See {help_url} or check the corresponding module.py file for an example\
-            if module_name is not None and nested_block_name == module_name.split('.')[-1]:
+""", fg='yellow', err=True)
                # this tries to cover cases like these
                # user config:
                # class location:
                #     class via_ip:
                #         accuracy = 10_000
                # then when we import it, we do something like
                # from my.config import location
                # user_config = location.via_ip
                # so if location is present, but via_ip is not, we get
                # AttributeError: type object 'location' has no attribute 'via_ip'
                click.secho(errmsg, fg='yellow', err=True)
                return True
    else:
        click.echo(f"Unexpected error... {err}", err=True)
@ -263,10 +225,9 @@ See {help_url} or check the corresponding module.py file for an example\
 def test_datetime_errors() -> None:
-    import pytz  # noqa: I001
+    import pytz
    dt_notz = datetime.now()
-    dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
+    dt_tz   = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
    for dt in [dt_tz, dt_notz]:
        e1 = RuntimeError('whatever')
        assert extract_error_datetime(e1) is None
--- a/my/core/experimental.py
+++ b/my/core/experimental.py
@ -1,8 +1,6 @@
 from __future__ import annotations
 import sys
 from typing import Any, Dict, Optional
 import types
 from typing import Any
 # The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages
@ -22,7 +20,7 @@ def import_original_module(
    file: str,
    *,
    star: bool = False,
-    globals: dict[str, Any] | None = None,
+    globals: Optional[Dict[str, Any]] = None,
 ) -> types.ModuleType:
    module_to_restore = sys.modules[module_name]
--- a/my/core/freezer.py
+++ b/my/core/freezer.py
@ -1,29 +1,27 @@
-from __future__ import annotations
+from .common import assert_subpackage; assert_subpackage(__name__)
-from .internal import assert_subpackage
+import dataclasses as dcl
 assert_subpackage(__name__)
 import dataclasses
 import inspect
-from typing import Any, Generic, TypeVar
+from typing import TypeVar, Type, Any
 D = TypeVar('D')
-def _freeze_dataclass(Orig: type[D]):
+def _freeze_dataclass(Orig: Type[D]):
-    ofields = [(f.name, f.type, f) for f in dataclasses.fields(Orig)]  # type: ignore[arg-type]  # see https://github.com/python/typing_extensions/issues/115
+    ofields = [(f.name, f.type, f) for f in dcl.fields(Orig)]   # type: ignore[arg-type]  # see https://github.com/python/typing_extensions/issues/115
    # extract properties along with their types
-    props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
+    props   = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
    pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
    # FIXME not sure about name?
    # NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
-    RRR = dataclasses.make_dataclass('RRR', fields=[*ofields, *pfields])
+    RRR = dcl.make_dataclass('RRR', fields=[*ofields, *pfields])
    # todo maybe even declare as slots?
    return props, RRR
 # todo need some decorator thingie?
 from typing import Generic
 class Freezer(Generic[D]):
    '''
    Some magic which converts dataclass properties into fields.
@ -31,13 +29,13 @@ class Freezer(Generic[D]):
    For now only supports dataclasses.
    '''
-    def __init__(self, Orig: type[D]) -> None:
+    def __init__(self, Orig: Type[D]) -> None:
        self.Orig = Orig
        self.props, self.Frozen = _freeze_dataclass(Orig)
    def freeze(self, value: D) -> D:
        pvalues = {name: getattr(value, name) for name, _ in self.props}
-        return self.Frozen(**dataclasses.asdict(value), **pvalues)  # type: ignore[call-overload]  # see https://github.com/python/typing_extensions/issues/115
+        return self.Frozen(**dcl.asdict(value), **pvalues)  # type: ignore[call-overload]  # see https://github.com/python/typing_extensions/issues/115
 ### tests
@ -45,7 +43,7 @@ class Freezer(Generic[D]):
 # this needs to be defined here to prevent a mypy bug
 # see https://github.com/python/mypy/issues/7281
-@dataclasses.dataclass
+@dcl.dataclass
 class _A:
    x: Any
@ -60,10 +58,8 @@ class _A:
 def test_freezer() -> None:
-    val = _A(x={
+
-        'an_int': 123,
+    val = _A(x=dict(an_int=123, an_any=[1, 2, 3]))
        'an_any': [1, 2, 3],
    })
    af = Freezer(_A)
    fval = af.freeze(val)
@ -71,7 +67,6 @@ def test_freezer() -> None:
    assert fd['typed']   == 123
    assert fd['untyped'] == [1, 2, 3]
 ###
 # TODO shit. what to do with exceptions?
--- a/my/core/hpi_compat.py
+++ b/my/core/hpi_compat.py
@ -2,15 +2,11 @@
 Contains various backwards compatibility/deprecation helpers relevant to HPI itself.
 (as opposed to .compat module which implements compatibility between python versions)
 """
 from __future__ import annotations
 import inspect
 import os
 import inspect
 import re
 from collections.abc import Iterator, Sequence
 from types import ModuleType
-from typing import TypeVar
+from typing import Iterator, List, Optional, TypeVar
 from . import warnings
@ -18,7 +14,7 @@ from . import warnings
 def handle_legacy_import(
    parent_module_name: str,
    legacy_submodule_name: str,
-    parent_module_path: list[str],
+    parent_module_path: List[str],
 ) -> bool:
    ###
    # this is to trick mypy into treating this as a proper namespace package
@ -75,7 +71,7 @@ def pre_pip_dal_handler(
    name: str,
    e: ModuleNotFoundError,
    cfg,
-    requires: Sequence[str] = (),
+    requires=[],
 ) -> ModuleType:
    '''
    https://github.com/karlicoss/HPI/issues/79
@ -105,7 +101,7 @@ Please install {' '.join(requires)} as PIP packages (see the corresponding READM
 def _get_dal(cfg, module_name: str):
    mpath = getattr(cfg, module_name, None)
    if mpath is not None:
-        from .utils.imports import import_dir
+        from .common import import_dir
        return import_dir(mpath, '.dal')
    else:
@ -120,141 +116,32 @@ V = TypeVar('V')
 # named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
 class always_supports_sequence(Iterator[V]):
    """
-    Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible in runtime
+    Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible
    """
    def __init__(self, it: Iterator[V]) -> None:
-        self._it = it
+        self.it = it
-        self._list: list[V] | None = None
+        self._list: Optional[List] = None
        self._lit: Iterator[V] | None = None
-    def __iter__(self) -> Iterator[V]:  # noqa: PYI034
+    def __iter__(self) -> Iterator[V]:
-        if self._list is not None:
+        return self.it.__iter__()
            self._lit = iter(self._list)
        return self
    def __next__(self) -> V:
-        if self._list is not None:
+        return self.it.__next__()
            assert self._lit is not None
            delegate = self._lit
        else:
            delegate = self._it
        return next(delegate)
    def __getattr__(self, name):
-        return getattr(self._it, name)
+        return getattr(self.it, name)
    @property
-    def _aslist(self) -> list[V]:
+    def aslist(self) -> List[V]:
        if self._list is None:
-            qualname = getattr(self._it, '__qualname__', '<no qualname>')  # defensive just in case
+            qualname = getattr(self.it, '__qualname__', '<no qualname>')  # defensive just in case
            warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
-            self._list = list(self._it)
+            self._list = list(self.it)
            # this is necessary for list constructor to work correctly
            # since it's __iter__ first, then tries to compute length and then starts iterating...
            self._lit = iter(self._list)
        return self._list
    def __len__(self) -> int:
-        return len(self._aslist)
+        return len(self.aslist)
    def __getitem__(self, i: int) -> V:
-        return self._aslist[i]
+        return self.aslist[i]
 def test_always_supports_sequence_list_constructor() -> None:
    exhausted = 0
    def it() -> Iterator[str]:
        nonlocal exhausted
        yield from ['a', 'b', 'c']
        exhausted += 1
    sit = always_supports_sequence(it())
    # list constructor is a bit special... it's trying to compute length if it's available to optimize memory allocation
    # so, what's happening in this case is
    # - sit.__iter__ is called
    # - sit.__len__  is called
    # - sit.__next__ is called
    res = list(sit)
    assert res == ['a', 'b', 'c']
    assert exhausted == 1
    res = list(sit)
    assert res == ['a', 'b', 'c']
    assert exhausted == 1  # this will iterate over 'cached' list now, so original generator is only exhausted once
 def test_always_supports_sequence_indexing() -> None:
    exhausted = 0
    def it() -> Iterator[str]:
        nonlocal exhausted
        yield from ['a', 'b', 'c']
        exhausted += 1
    sit = always_supports_sequence(it())
    assert len(sit) == 3
    assert exhausted == 1
    assert sit[2] == 'c'
    assert sit[1] == 'b'
    assert sit[0] == 'a'
    assert exhausted == 1
    # a few tests to make sure list-like operations are working..
    assert list(sit) == ['a', 'b', 'c']
    assert [x for x in sit] == ['a', 'b', 'c']  # noqa: C416
    assert list(sit) == ['a', 'b', 'c']
    assert [x for x in sit] == ['a', 'b', 'c']  # noqa: C416
    assert exhausted == 1
 def test_always_supports_sequence_next() -> None:
    exhausted = 0
    def it() -> Iterator[str]:
        nonlocal exhausted
        yield from ['a', 'b', 'c']
        exhausted += 1
    sit = always_supports_sequence(it())
    x = next(sit)
    assert x == 'a'
    assert exhausted == 0
    x = next(sit)
    assert x == 'b'
    assert exhausted == 0
 def test_always_supports_sequence_iter() -> None:
    exhausted = 0
    def it() -> Iterator[str]:
        nonlocal exhausted
        yield from ['a', 'b', 'c']
        exhausted += 1
    sit = always_supports_sequence(it())
    for x in sit:
        assert x == 'a'
        break
    x = next(sit)
    assert x == 'b'
    assert exhausted == 0
    x = next(sit)
    assert x == 'c'
    assert exhausted == 0
    for _ in sit:
        raise RuntimeError  # shouldn't trigger, just exhaust the iterator
    assert exhausted == 1
--- a/my/core/influxdb.py
+++ b/my/core/influxdb.py
@ -1,22 +1,14 @@
 '''
 TODO doesn't really belong to 'core' morally, but can think of moving out later
 '''
 from .common import assert_subpackage; assert_subpackage(__name__)
-from __future__ import annotations
+from typing import Iterable, Any, Optional, Dict
-from .internal import assert_subpackage
+from .common import LazyLogger, asdict, Json
 assert_subpackage(__name__)
-from collections.abc import Iterable
+logger = LazyLogger(__name__)
 from typing import Any
 import click
 from .logging import make_logger
 from .types import Json, asdict
 logger = make_logger(__name__)
 class config:
@ -26,7 +18,7 @@ class config:
 RESET_DEFAULT = False
-def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt_col: str = 'dt') -> None:
+def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_col: str='dt') -> None:
    # todo infer dt column automatically, reuse in stat?
    # it doesn't like dots, ends up some syntax error?
    measurement = measurement.replace('.', '_')
@ -34,8 +26,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
    db = config.db
-    from influxdb import InfluxDBClient  # type: ignore
+    from influxdb import InfluxDBClient # type: ignore
    client = InfluxDBClient()
    # todo maybe create if not exists?
    # client.create_database(db)
@ -46,7 +37,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
        client.delete_series(database=db, measurement=measurement)
    # TODO need to take schema here...
-    cache: dict[str, bool] = {}
+    cache: Dict[str, bool] = {}
    def good(f, v) -> bool:
        c = cache.get(f)
@ -65,9 +56,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
    def dit() -> Iterable[Json]:
        for i in it:
            d = asdict(i)
-            tags: Json | None = None
+            tags: Optional[Json] = None
-            tags_ = d.get('tags')  # meh... handle in a more robust manner
+            tags_ = d.get('tags') # meh... handle in a more robust manner
-            if tags_ is not None and isinstance(tags_, dict):  # FIXME meh.
+            if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
                del d['tags']
                tags = tags_
@ -78,19 +69,18 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
            fields = filter_dict(d)
-            yield {
+            yield dict(
-                'measurement': measurement,
+                measurement=measurement,
                # TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
                # hmm, so tags are autoindexed and might be faster?
                # not sure what's the big difference though
                # "fields are data and tags are metadata"
-                'tags': tags,
+                tags=tags,
-                'time': dt,
+                time=dt,
-                'fields': fields,
+                fields=fields,
-            }
+            )
    from more_itertools import chunked
    # "The optimal batch size is 5000 lines of line protocol."
    # some chunking is def necessary, otherwise it fails
    inserted = 0
@ -104,9 +94,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
    # todo "Specify timestamp precision when writing to InfluxDB."?
-def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> None:
+def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None:
    if name is None:
-        assert callable(it)  # generators have no name/module
+        assert callable(it) # generators have no name/module
        name = f'{it.__module__}:{it.__name__}'
    assert name is not None
@ -114,9 +104,7 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
        it = it()
    from itertools import tee
    from more_itertools import first, one
    it, x = tee(it)
    f = first(x, default=None)
    if f is None:
@ -126,17 +114,17 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
    # TODO can we reuse pandas code or something?
    #
    from .pandas import _as_columns
    schema = _as_columns(type(f))
    from datetime import datetime
    dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
    dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
    fill(it, measurement=name, reset=reset, dt_col=dtf)
 import click
@click.group()
 def main() -> None:
    pass
@ -145,9 +133,8 @@ def main() -> None:
@main.command(name='populate', short_help='populate influxdb')
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
@click.argument('FUNCTION_NAME', type=str, required=True)
-def populate(*, function_name: str, reset: bool) -> None:
+def populate(function_name: str, reset: bool) -> None:
    from .__main__ import _locate_functions_or_prompt
    [provider] = list(_locate_functions_or_prompt([function_name]))
    # todo could have a non-interactive version which populates from all data sources for the provider?
    magic_fill(provider, reset=reset)
--- a/my/core/init.py
+++ b/my/core/init.py
@ -14,19 +14,18 @@ Please let me know if you are aware of a better way of dealing with this!
 # separate function to present namespace pollution
 def setup_config() -> None:
    from pathlib import Path
    import sys
    import warnings
    from pathlib import Path
    from .preinit import get_mycfg_dir
    mycfg_dir = get_mycfg_dir()
    if not mycfg_dir.exists():
        warnings.warn(f"""
 'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
 See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""".strip(), stacklevel=1)
+""".strip())
        return
    mpath = str(mycfg_dir)
@ -44,12 +43,11 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
    except ImportError as ex:
        # just in case... who knows what crazy setup users have
        import logging
        logging.exception(ex)
        warnings.warn(f"""
 Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
 See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""", stacklevel=1)
+""")
    else:
        # defensive just in case -- __file__ may not be present if there is some dynamic magic involved
        used_config_file = getattr(my.config, '__file__', None)
@ -65,7 +63,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
 Expected my.config to be located at {mycfg_dir}, but instead its path is {used_config_path}.
 This will likely cause issues down the line -- double check {mycfg_dir} structure.
 See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""", stacklevel=1
+""",
                    )
--- a/my/core/internal.py
+++ b/my/core/internal.py
@ -1,9 +0,0 @@
 """
 Utils specific to hpi core, shouldn't really be used by HPI modules
 """
 def assert_subpackage(name: str) -> None:
    # can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
    # NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
    assert name == '__main__' or 'my.core' in name, f'Expected module __name__ ({name}) to be __main__ or start with my.core'
--- a/my/core/kompress.py
+++ b/my/core/kompress.py
@ -1,7 +1,4 @@
-from .internal import assert_subpackage
+from .common import assert_subpackage; assert_subpackage(__name__)
 assert_subpackage(__name__)
 from . import warnings
 # do this later -- for now need to transition modules to avoid using kompress directly (e.g. ZipPath)
@ -11,7 +8,10 @@ try:
    from kompress import *
 except ModuleNotFoundError as e:
    if e.name == 'kompress':
-        warnings.high('Please install kompress (pip3 install kompress). Falling onto vendorized kompress for now.')
+        warnings.high('Please install kompress (pip3 install kompress), it will be required in the future. Falling onto vendorized kompress for now.')
        from ._deprecated.kompress import *  # type: ignore[assignment]
    else:
        raise e
 # this is deprecated in compress, keep here for backwards compatibility
 open = kopen  # noqa: F405
--- a/my/core/konsume.py
+++ b/my/core/konsume.py
@ -5,21 +5,17 @@ This can potentially allow both for safer defensive parsing, and let you know if
 TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
 '''
 from __future__ import annotations
 from collections import OrderedDict
-from typing import Any
+from typing import Any, List
 def ignore(w, *keys):
    for k in keys:
        w[k].ignore()
 def zoom(w, *keys):
    return [w[k].zoom() for k in keys]
 # TODO need to support lists
 class Zoomable:
    def __init__(self, parent, *args, **kwargs) -> None:
@ -44,7 +40,7 @@ class Zoomable:
        assert self.parent is not None
        self.parent._remove(self)
-    def zoom(self) -> Zoomable:
+    def zoom(self) -> 'Zoomable':
        self.consume()
        return self
@ -67,7 +63,6 @@ class Wdict(Zoomable, OrderedDict):
    def this_consumed(self):
        return len(self) == 0
    # TODO specify mypy type for the index special method?
@ -82,7 +77,6 @@ class Wlist(Zoomable, list):
    def this_consumed(self):
        return len(self) == 0
 class Wvalue(Zoomable):
    def __init__(self, parent, value: Any) -> None:
        super().__init__(parent)
@ -93,20 +87,21 @@ class Wvalue(Zoomable):
        return []
    def this_consumed(self):
-        return True  # TODO not sure..
+        return True # TODO not sure..
    def __repr__(self):
        return 'WValue{' + repr(self.value) + '}'
-def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
+from typing import Tuple
 def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
    res: Zoomable
-    cc: list[Zoomable]
+    cc: List[Zoomable]
    if isinstance(j, dict):
        res = Wdict(parent)
        cc = [res]
        for k, v in j.items():
-            vv, c = _wrap(v, parent=res)
+            vv, c  = _wrap(v, parent=res)
            res[k] = vv
            cc.extend(c)
        return res, cc
@ -125,23 +120,21 @@ def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
        raise RuntimeError(f'Unexpected type: {type(j)} {j}')
 from collections.abc import Iterator
 from contextlib import contextmanager
-
+from typing import Iterator
 class UnconsumedError(Exception):
    pass
 # TODO think about error policy later...
@contextmanager
-def wrap(j, *, throw=True) -> Iterator[Zoomable]:
+def wrap(j, throw=True) -> Iterator[Zoomable]:
    w, children = _wrap(j)
    yield w
    for c in children:
-        if not c.this_consumed():  # TODO hmm. how does it figure out if it's consumed???
+        if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed???
            if throw:
                # TODO need to keep a full path or something...
                raise UnconsumedError(f'''
@ -153,11 +146,8 @@ Expected {c} to be fully consumed by the parser.
 from typing import cast
 def test_unconsumed() -> None:
    import pytest
    with pytest.raises(UnconsumedError):
        with wrap({'a': 1234}) as w:
            w = cast(Wdict, w)
@ -168,7 +158,6 @@ def test_unconsumed() -> None:
            w = cast(Wdict, w)
            d = w['c']['d'].zoom()
 def test_consumed() -> None:
    with wrap({'a': 1234}) as w:
        w = cast(Wdict, w)
@ -179,7 +168,6 @@ def test_consumed() -> None:
        c = w['c'].zoom()
        d = c['d'].zoom()
 def test_types() -> None:
    # (string, number, object, array, boolean or nul
    with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
@ -188,10 +176,9 @@ def test_types() -> None:
        w['number'].consume()
        w['boolean'].zoom()
        w['null'].zoom()
-        for x in list(w['list'].zoom()):  # TODO eh. how to avoid the extra list thing?
+        for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
            x.consume()
 def test_consume_all() -> None:
    with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
        w = cast(Wdict, w)
@ -201,9 +188,11 @@ def test_consume_all() -> None:
 def test_consume_few() -> None:
    import pytest
    pytest.skip('Will think about it later..')
-    with wrap({'important': 123, 'unimportant': 'whatever'}) as w:
+    with wrap({
            'important': 123,
            'unimportant': 'whatever'
    }) as w:
        w = cast(Wdict, w)
        w['important'].zoom()
        w.consume_all()
@ -212,7 +201,6 @@ def test_consume_few() -> None:
 def test_zoom() -> None:
    import pytest
    with wrap({'aaa': 'whatever'}) as w:
        w = cast(Wdict, w)
        with pytest.raises(KeyError):
@ -221,34 +209,3 @@ def test_zoom() -> None:
 # TODO type check this...
 # TODO feels like the whole thing kind of unnecessarily complex
 # - cons:
 #     - in most cases this is not even needed? who cares if we miss a few attributes?
 # - pro: on the other hand it could be interesting to know about new attributes in data,
 #        and without this kind of processing we wouldn't even know
 # alternatives
 # - manually process data
 #   e.g. use asserts, dict.pop and dict.values() methods to unpack things
 #   - pros:
 #     - very simple, since uses built in syntax
 #     - very performant, as fast as it gets
 #     - very flexible, easy to adjust behaviour
 #   - cons:
 #     - can forget to assert about extra entities etc, so error prone
 #     - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
 #     - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
 #     - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
 #       - TODO perhaps combine warnings somehow or at least only emit once per module?
 #       - hmm actually tbh if we carefully go through everything and don't make copies, then only requires one assert at the very end?
 #   - TODO this is kinda useful? https://discuss.python.org/t/syntax-for-dictionnary-unpacking-to-variables/18718
 #     operator.itemgetter?
 #   - TODO can use match operator in python for this? quite nice actually! and allows for dynamic behaviour
 #     only from 3.10 tho, and gonna be tricky to do dynamic defensive behaviour with this
 #   - TODO in a sense, blenser already would hint if some meaningful fields aren't being processed? only if they are changing though
 # - define a "schema" for data, then just recursively match data against the schema?
 #   possibly pydantic already does something like that? not sure about performance though
 #   pros:
 #     - much simpler to extend and understand what's going on
 #   cons:
 #     - more rigid, so it becomes tricky to do dynamic stuff (e.g. if schema actually changes)
--- a/my/core/logging.py
+++ b/my/core/logging.py
@ -1,11 +1,11 @@
 from __future__ import annotations
 from functools import lru_cache
 import logging
 import os
 import sys
 from typing import Union
 import warnings
 from functools import lru_cache
 from typing import TYPE_CHECKING, Union
 def test() -> None:
@ -15,7 +15,7 @@ def test() -> None:
    ## prepare exception for later
    try:
-        None.whatever  # type: ignore[attr-defined]  # noqa: B018
+        None.whatever  # type: ignore[attr-defined]
    except Exception as e:
        ex = e
    ##
@ -146,7 +146,7 @@ def _setup_handlers_and_formatters(name: str) -> None:
        # try colorlog first, so user gets nice colored logs
        import colorlog
    except ModuleNotFoundError:
-        warnings.warn("You might want to 'pip install colorlog' for nice colored logs", stacklevel=1)
+        warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
        formatter = logging.Formatter(FORMAT_NOCOLOR)
    else:
        # log_color/reset are specific to colorlog
@ -222,9 +222,7 @@ def make_logger(name: str, *, level: LevelIsh = None) -> logging.Logger:
 # OK, when stdout is not a tty, enlighten doesn't log anything, good
 def get_enlighten():
    # TODO could add env variable to disable enlighten for a module?
-    from unittest.mock import (
+    from unittest.mock import Mock  # Mock to return stub so cients don't have to think about it
        Mock,  # Mock to return stub so cients don't have to think about it
    )
    # for now hidden behind the flag since it's a little experimental
    if os.environ.get('ENLIGHTEN_ENABLE', None) is None:
@ -233,7 +231,7 @@ def get_enlighten():
    try:
        import enlighten  # type: ignore[import-untyped]
    except ModuleNotFoundError:
-        warnings.warn("You might want to 'pip install enlighten' for a nice progress bar", stacklevel=1)
+        warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")
        return Mock()
@ -250,17 +248,7 @@ if __name__ == '__main__':
    test()
-## legacy/deprecated methods for backwards compatibility
+## legacy/deprecated methods for backwards compatilibity
-if not TYPE_CHECKING:
+LazyLogger = make_logger
-    from .compat import deprecated
+logger = make_logger
    @deprecated('use make_logger instead')
    def LazyLogger(*args, **kwargs):
        return make_logger(*args, **kwargs)
    @deprecated('use make_logger instead')
    def logger(*args, **kwargs):
        return make_logger(*args, **kwargs)
 ##
--- a/my/core/mime.py
+++ b/my/core/mime.py
@ -1,37 +0,0 @@
 """
 Utils for mime/filetype handling
 """
 from __future__ import annotations
 from .internal import assert_subpackage
 assert_subpackage(__name__)
 import functools
 from pathlib import Path
@functools.lru_cache(1)
 def _magic():
    import magic  # type: ignore
    # TODO also has uncompess=True? could be useful
    return magic.Magic(mime=True)
 # TODO could reuse in pdf module?
 import mimetypes  # todo do I need init()?
 # todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
 # whereas magic detects correctly: application/x-zstd and application/x-xz
 def fastermime(path: Path | str) -> str:
    paths = str(path)
    # mimetypes is faster, so try it first
    (mime, _) = mimetypes.guess_type(paths)
    if mime is not None:
        return mime
    # magic is slower but handles more types
    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
    return _magic().from_file(paths)
--- a/my/core/orgmode.py
+++ b/my/core/orgmode.py
@ -1,46 +1,37 @@
 """
 Various helpers for reading org-mode data
 """
 from datetime import datetime
 def parse_org_datetime(s: str) -> datetime:
    s = s.strip('[]')
-    for fmt, _cls in [
+    for fmt, cl in [
-        ("%Y-%m-%d %a %H:%M", datetime),
+            ("%Y-%m-%d %a %H:%M", datetime),
-        ("%Y-%m-%d %H:%M"   , datetime),
+            ("%Y-%m-%d %H:%M"   , datetime),
-        # todo not sure about these... fallback on 00:00?
+            # todo not sure about these... fallback on 00:00?
-        # ("%Y-%m-%d %a"      , date),
+            # ("%Y-%m-%d %a"      , date),
-        # ("%Y-%m-%d"         , date),
+            # ("%Y-%m-%d"         , date),
    ]:
        try:
            return datetime.strptime(s, fmt)
        except ValueError:
            continue
-    raise RuntimeError(f"Bad datetime string {s}")
+    else:
        raise RuntimeError(f"Bad datetime string {s}")
 # TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
 from collections.abc import Iterable
 from typing import Callable, TypeVar
 from orgparse import OrgNode
-
+from typing import Iterable, TypeVar, Callable
 V = TypeVar('V')
 def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
    yield from cfun(n)
    for c in n.children:
        yield from collect(c, cfun)
 from more_itertools import one
 from orgparse.extra import Table
 def one_table(o: OrgNode) -> Table:
    return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table))))
@ -50,7 +41,7 @@ class TypedTable(Table):
        tt = super().__new__(TypedTable)
        tt.__dict__ = orig.__dict__
        blocks = list(orig.blocks)
-        header = blocks[0]  # fist block is schema
+        header = blocks[0] # fist block is schema
        if len(header) == 2:
            # TODO later interpret first line as types
            header = header[1:]
--- a/my/core/pandas.py
+++ b/my/core/pandas.py
@ -1,31 +1,23 @@
 '''
 Various pandas helpers and convenience functions
 '''
 from __future__ import annotations
 # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
 # NOTE: this file is meant to be importable without Pandas installed
 import dataclasses
 from collections.abc import Iterable, Iterator
 from datetime import datetime, timezone
 from pprint import pformat
-from typing import (
+from typing import TYPE_CHECKING, Any, Iterable, Type, Dict, Literal, Callable, TypeVar
    TYPE_CHECKING,
    Any,
    Callable,
    Literal,
    TypeVar,
 )
 from decorator import decorator
-from . import warnings
+from . import warnings, Res
-from .error import Res, error_to_json, extract_error_datetime
+from .common import LazyLogger, Json, asdict
-from .logging import make_logger
+from .error import error_to_json, extract_error_datetime
 from .types import Json, asdict
-logger = make_logger(__name__)
+
 logger = LazyLogger(__name__)
 if TYPE_CHECKING:
@ -46,7 +38,7 @@ else:
    S1 = Any
-def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
+def check_dateish(s: SeriesT[S1]) -> Iterable[str]:
    import pandas as pd  # noqa: F811 not actually a redefinition
    ctype = s.dtype
@ -58,7 +50,7 @@ def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
    all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
    if not all_timestamps:
        return  # not sure why it would happen, but ok
-    tzs = s.map(lambda x: x.tzinfo).drop_duplicates()  # type: ignore[union-attr, var-annotated, arg-type, return-value, unused-ignore]
+    tzs = s.map(lambda x: x.tzinfo).drop_duplicates()
    examples = s[tzs.index]
    # todo not so sure this warning is that useful... except for stuff without tz
    yield f'''
@ -70,37 +62,9 @@ def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
 def test_check_dateish() -> None:
    import pandas as pd
-    from .compat import fromisoformat
+    # todo just a dummy test to check it doesn't crash, need something meaningful
-
+    s1 = pd.Series([1, 2, 3])
-    # empty series shouldn't warn
+    list(check_dateish(s1))
    assert list(_check_dateish(pd.Series([]))) == []
    # if no dateimes, shouldn't return any warnings
    assert list(_check_dateish(pd.Series([1, 2, 3]))) == []
    # all values are datetimes, shouldn't warn
    # fmt: off
    assert list(_check_dateish(pd.Series([
        fromisoformat('2024-08-19T01:02:03'),
        fromisoformat('2024-08-19T03:04:05'),
    ]))) == []
    # fmt: on
    # mixture of timezones -- should warn
    # fmt: off
    assert len(list(_check_dateish(pd.Series([
        fromisoformat('2024-08-19T01:02:03'),
        fromisoformat('2024-08-19T03:04:05Z'),
    ])))) == 1
    # fmt: on
    # TODO hmm. maybe this should actually warn?
    # fmt: off
    assert len(list(_check_dateish(pd.Series([
        'whatever',
        fromisoformat('2024-08-19T01:02:03'),
    ])))) == 0
    # fmt: on
 # fmt: off
@ -138,7 +102,7 @@ def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy = 'add_if_missing
    # makes sense to keep super defensive
    try:
        for col, data in df.reset_index().items():
-            for w in _check_dateish(data):
+            for w in check_dateish(data):
                warnings.low(f"{tag}, column '{col}': {w}")
    except Exception as e:
        logger.exception(e)
@ -162,7 +126,8 @@ def error_to_row(e: Exception, *, dt_col: str = 'dt', tz: timezone | None = None
    return err_dict
-def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
+# todo not sure about naming
 def to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
    for r in it:
        if isinstance(r, Exception):
            yield error_to_row(r)
@ -175,10 +140,10 @@ def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
 Schema = Any
-def _as_columns(s: Schema) -> dict[str, type]:
+def _as_columns(s: Schema) -> Dict[str, Type]:
    # todo would be nice to extract properties; add tests for this as well
    if dataclasses.is_dataclass(s):
-        return {f.name: f.type for f in dataclasses.fields(s)}  # type: ignore[misc]  # ugh, why mypy thinks f.type can return str??
+        return {f.name: f.type for f in dataclasses.fields(s)}
    # else must be NamedTuple??
    # todo assert my.core.common.is_namedtuple?
    return getattr(s, '_field_types')
@ -197,7 +162,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Schema | None = None) -> DataFr
    import pandas as pd  # noqa: F811 not actually a redefinition
    columns = None if schema is None else list(_as_columns(schema).keys())
-    return pd.DataFrame(_to_jsons(it), columns=columns)
+    return pd.DataFrame(to_jsons(it), columns=columns)
 # ugh. in principle this could be inside the test
@ -207,76 +172,20 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Schema | None = None) -> DataFr
 # see https://github.com/pytest-dev/pytest/issues/7856
@dataclasses.dataclass
 class _X:
    # FIXME try moving inside?
    x: int
 def test_as_dataframe() -> None:
    import numpy as np
    import pandas as pd
    import pytest
    from pandas.testing import assert_frame_equal
-    from .compat import fromisoformat
+    it = (dict(i=i, s=f'str{i}') for i in range(10))
    it = ({'i': i, 's': f'str{i}'} for i in range(5))
    with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings:  # noqa: F841
        df: DataFrameT = as_dataframe(it)
        # todo test other error col policies
    assert list(df.columns) == ['i', 's', 'error']
-    # fmt: off
+    assert len(as_dataframe([])) == 0
    assert_frame_equal(
        df,
        pd.DataFrame({
            'i'    : [0     , 1     , 2     , 3     , 4     ],
            's'    : ['str0', 'str1', 'str2', 'str3', 'str4'],
            # NOTE: error column is always added
            'error': [None  , None  , None  , None  , None  ],
        }),
    )
    # fmt: on
    assert_frame_equal(as_dataframe([]), pd.DataFrame(columns=['error']))
    # makes sense to specify the schema so the downstream program doesn't fail in case of empty iterable
    df2: DataFrameT = as_dataframe([], schema=_X)
-    assert_frame_equal(
+    assert list(df2.columns) == ['x', 'error']
        df2,
        # FIXME hmm. x column type should be an int?? and error should be string (or object??)
        pd.DataFrame(columns=['x', 'error']),
    )
    @dataclasses.dataclass
    class S:
        value: str
    def it2() -> Iterator[Res[S]]:
        yield S(value='test')
        yield RuntimeError('i failed')
    df = as_dataframe(it2())
    # fmt: off
    assert_frame_equal(
        df,
        pd.DataFrame(data={
            'value': ['test', np.nan                    ],
            'error': [np.nan, 'RuntimeError: i failed\n'],
            'dt'   : [np.nan, np.nan                    ],
        }).astype(dtype={'dt': 'float'}),  # FIXME should be datetime64 as below
    )
    # fmt: on
    def it3() -> Iterator[Res[S]]:
        yield S(value='aba')
        yield RuntimeError('whoops')
        yield S(value='cde')
        yield RuntimeError('exception with datetime', fromisoformat('2024-08-19T22:47:01Z'))
    df = as_dataframe(it3())
    # fmt: off
    assert_frame_equal(df, pd.DataFrame(data={
        'value': ['aba' , np.nan                  , 'cde' , np.nan                     ],
        'error': [np.nan, 'RuntimeError: whoops\n', np.nan, "RuntimeError: ('exception with datetime', datetime.datetime(2024, 8, 19, 22, 47, 1, tzinfo=datetime.timezone.utc))\n"],
        # note: dt column is added even if errors don't have an associated datetime
        'dt'   : [np.nan, np.nan                  , np.nan, '2024-08-19 22:47:01+00:00'],
    }).astype(dtype={'dt': 'datetime64[ns, UTC]'}))
    # fmt: on
--- a/my/core/preinit.py
+++ b/my/core/preinit.py
@ -1,14 +1,11 @@
 from pathlib import Path
 # todo preinit isn't really a good name? it's only in a separate file because
 # - it's imported from my.core.init (so we wan't to keep this file as small/reliable as possible, hence not common or something)
 # - we still need this function in __main__, so has to be separate from my/core/init.py
 def get_mycfg_dir() -> Path:
    import appdirs
    import os
    import appdirs  # type: ignore[import-untyped]
    # not sure if that's necessary, i.e. could rely on PYTHONPATH instead
    # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
    mvar = os.environ.get('MY_CONFIG')
--- a/my/core/pytest.py
+++ b/my/core/pytest.py
@ -1,24 +0,0 @@
 """
 Helpers to prevent depending on pytest in runtime
 """
 from .internal import assert_subpackage
 assert_subpackage(__name__)
 import sys
 import typing
 under_pytest = 'pytest' in sys.modules
 if typing.TYPE_CHECKING or under_pytest:
    import pytest
    parametrize = pytest.mark.parametrize
 else:
    def parametrize(*_args, **_kwargs):
        def wrapper(f):
            return f
        return wrapper
--- a/my/core/query.py
+++ b/my/core/query.py
@ -5,29 +5,21 @@ The main entrypoint to this library is the 'select' function below; try:
 python3 -c "from my.core.query import select; help(select)"
 """
 from __future__ import annotations
 import dataclasses
 import importlib
 import inspect
 import itertools
 from collections.abc import Iterable, Iterator
 from datetime import datetime
-from typing import (
+from typing import TypeVar, Tuple, Optional, Union, Callable, Iterable, Iterator, Dict, Any, NamedTuple, List
    Any,
    Callable,
    NamedTuple,
    Optional,
    TypeVar,
 )
 import more_itertools
-from . import error as err
+import my.core.error as err
 from .common import is_namedtuple
 from .error import Res, unwrap
 from .types import is_namedtuple
 from .warnings import low
 T = TypeVar("T")
 ET = Res[T]
@ -48,7 +40,6 @@ class Unsortable(NamedTuple):
 class QueryException(ValueError):
    """Used to differentiate query-related errors, so the CLI interface is more expressive"""
    pass
@ -61,7 +52,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
    """
    try:
        mod = importlib.import_module(module_name)
-        for fname, f in inspect.getmembers(mod, inspect.isfunction):
+        for (fname, f) in inspect.getmembers(mod, inspect.isfunction):
            if fname == function_name:
                return f
        # in case the function is defined dynamically,
@ -70,7 +61,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
        if func is not None and callable(func):
            return func
    except Exception as e:
-        raise QueryException(str(e))  # noqa: B904
+        raise QueryException(str(e))
    raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
@ -81,10 +72,10 @@ def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]]
    if "." not in qualified_name:
        raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.rexport.comments")
    rdot_index = qualified_name.rindex(".")
-    return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :])
+    return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1:])
-def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc | None:
+def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]:
    """
    Attempts to find an attribute which matches the 'where_function' on the object,
    using some getattr/dict checks. Returns a function which when called with
@ -112,7 +103,7 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
            if where(v):
                return lambda o: o.get(k, default)  # type: ignore[union-attr]
    elif dataclasses.is_dataclass(obj):
-        for field_name in obj.__annotations__.keys():
+        for (field_name, _annotation) in obj.__annotations__.items():
            if where(getattr(obj, field_name)):
                return lambda o: getattr(o, field_name, default)
    elif is_namedtuple(obj):
@ -129,13 +120,12 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
 def _generate_order_by_func(
-    obj_res: Res[T],
+        obj_res: Res[T],
-    *,
+        key: Optional[str] = None,
-    key: str | None = None,
+        where_function: Optional[Where] = None,
-    where_function: Where | None = None,
+        default: Optional[U] = None,
-    default: U | None = None,
+        force_unsortable: bool = False,
-    force_unsortable: bool = False,
+) -> Optional[OrderFunc]:
 ) -> OrderFunc | None:
    """
    Accepts an object Res[T] (Instance of some class or Exception)
@ -188,7 +178,7 @@ pass 'drop_exceptions' to ignore exceptions""")
                return lambda o: o.get(key, default)  # type: ignore[union-attr]
        else:
            if hasattr(obj, key):
-                return lambda o: getattr(o, key, default)
+                return lambda o: getattr(o, key, default)  # type: ignore[arg-type]
    # Note: if the attribute you're ordering by is an Optional type,
    # and on some objects it'll return None, the getattr(o, field_name, default) won't
@ -200,7 +190,7 @@ pass 'drop_exceptions' to ignore exceptions""")
    # user must provide either a key or a where predicate
    if where_function is not None:
-        func: OrderFunc | None = attribute_func(obj, where_function, default)
+        func: Optional[OrderFunc] = attribute_func(obj, where_function, default)
        if func is not None:
            return func
@ -216,13 +206,15 @@ pass 'drop_exceptions' to ignore exceptions""")
        return None  # couldn't compute a OrderFunc for this class/instance
 # currently using the 'key set' as a proxy for 'this is the same type of thing'
 def _determine_order_by_value_key(obj_res: ET) -> Any:
    """
    Returns either the class, or a tuple of the dictionary keys
    """
    key = obj_res.__class__
-    if key is dict:
+    if key == dict:
        # assuming same keys signify same way to determine ordering
        return tuple(obj_res.keys())  # type: ignore[union-attr]
    return key
@ -240,7 +232,7 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]:
 # try getting the first value from the iterator
 # similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
-def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
+def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
    itr = more_itertools.peekable(itr)
    try:
        first_item = itr.peek()
@ -251,9 +243,9 @@ def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
 # similar to 'my.core.error.sort_res_by'?
-def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Unsortable], Iterator[ET]]:
+def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
-    unsortable: list[Unsortable] = []
+    unsortable: List[Unsortable] = []
-    sortable: list[ET] = []
+    sortable: List[ET] = []
    for o in itr:
        # if input to select was another select
        if isinstance(o, Unsortable):
@ -271,11 +263,10 @@ def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Un
 # the second being items for which orderfunc returned a non-none value
 def _handle_unsorted(
    itr: Iterator[ET],
    *,
    orderfunc: OrderFunc,
    drop_unsorted: bool,
    wrap_unsorted: bool
-) -> tuple[Iterator[Unsortable], Iterator[ET]]:
+) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
    # prefer drop_unsorted to wrap_unsorted, if both were present
    if drop_unsorted:
        return iter([]), _drop_unsorted(itr, orderfunc)
@ -290,16 +281,16 @@ def _handle_unsorted(
 # different types. ***This consumes the iterator***, so
 # you should definitely itertoolts.tee it beforehand
 # as to not exhaust the values
-def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U | None = None) -> OrderFunc:
+def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc:
    # TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
-    order_by_lookup: dict[Any, OrderFunc] = {}
+    order_by_lookup: Dict[Any, OrderFunc] = {}
    # need to go through a copy of the whole iterator here to
    # pre-generate functions to support sorting mixed types
    for obj_res in itr:
        key: Any = _determine_order_by_value_key(obj_res)
        if key not in order_by_lookup:
-            keyfunc: OrderFunc | None = _generate_order_by_func(
+            keyfunc: Optional[OrderFunc] = _generate_order_by_func(
                obj_res,
                where_function=order_value,
                default=default,
@ -320,12 +311,12 @@ def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U
 def _handle_generate_order_by(
    itr,
    *,
-    order_by: OrderFunc | None = None,
+    order_by: Optional[OrderFunc] = None,
-    order_key: str | None = None,
+    order_key: Optional[str] = None,
-    order_value: Where | None = None,
+    order_value: Optional[Where] = None,
-    default: U | None = None,
+    default: Optional[U] = None,
-) -> tuple[OrderFunc | None, Iterator[ET]]:
+) -> Tuple[Optional[OrderFunc], Iterator[ET]]:
-    order_by_chosen: OrderFunc | None = order_by  # if the user just supplied a function themselves
+    order_by_chosen: Optional[OrderFunc] = order_by  # if the user just supplied a function themselves
    if order_by is not None:
        return order_by, itr
    if order_key is not None:
@ -350,19 +341,19 @@ def _handle_generate_order_by(
 def select(
-    src: Iterable[ET] | Callable[[], Iterable[ET]],
+    src: Union[Iterable[ET], Callable[[], Iterable[ET]]],
    *,
-    where: Where | None = None,
+    where: Optional[Where] = None,
-    order_by: OrderFunc | None = None,
+    order_by: Optional[OrderFunc] = None,
-    order_key: str | None = None,
+    order_key: Optional[str] = None,
-    order_value: Where | None = None,
+    order_value: Optional[Where] = None,
-    default: U | None = None,
+    default: Optional[U] = None,
    reverse: bool = False,
-    limit: int | None = None,
+    limit: Optional[int] = None,
    drop_unsorted: bool = False,
    wrap_unsorted: bool = True,
    warn_exceptions: bool = False,
-    warn_func: Callable[[Exception], None] | None = None,
+    warn_func: Optional[Callable[[Exception], None]] = None,
    drop_exceptions: bool = False,
    raise_exceptions: bool = False,
 ) -> Iterator[ET]:
@ -464,7 +455,7 @@ Will attempt to call iter() on the value""")
    try:
        itr: Iterator[ET] = iter(it)
    except TypeError as t:
-        raise QueryException("Could not convert input src to an Iterator: " + str(t))  # noqa: B904
+        raise QueryException("Could not convert input src to an Iterator: " + str(t))
    # if both drop_exceptions and drop_exceptions are provided for some reason,
    # should raise exceptions before dropping them
@ -501,12 +492,7 @@ Will attempt to call iter() on the value""")
        # note: can't just attach sort unsortable values in the same iterable as the
        # other items because they don't have any lookups for order_key or functions
        # to handle items in the order_by_lookup dictionary
-        unsortable, itr = _handle_unsorted(
+        unsortable, itr = _handle_unsorted(itr, order_by_chosen, drop_unsorted, wrap_unsorted)
            itr,
            orderfunc=order_by_chosen,
            drop_unsorted=drop_unsorted,
            wrap_unsorted=wrap_unsorted,
        )
        # run the sort, with the computed order by function
        itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse))  # type: ignore[arg-type]
@ -597,7 +583,7 @@ def test_couldnt_determine_order() -> None:
    res = list(select(iter([object()]), order_value=lambda o: isinstance(o, datetime)))
    assert len(res) == 1
    assert isinstance(res[0], Unsortable)
-    assert type(res[0].obj) is object
+    assert type(res[0].obj) == object
 # same value type, different keys, with clashing keys
@ -613,7 +599,7 @@ class _B(NamedTuple):
 # move these to tests/? They are re-used so much in the tests below,
 # not sure where the best place for these is
-def _mixed_iter() -> Iterator[_A | _B]:
+def _mixed_iter() -> Iterator[Union[_A, _B]]:
    yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
    yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
    yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
@ -622,7 +608,7 @@ def _mixed_iter() -> Iterator[_A | _B]:
    yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
-def _mixed_iter_errors() -> Iterator[Res[_A | _B]]:
+def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]:
    m = _mixed_iter()
    yield from itertools.islice(m, 0, 3)
    yield RuntimeError("Unhandled error!")
@ -658,7 +644,7 @@ def test_wrap_unsortable() -> None:
    # by default, wrap unsortable
    res = list(select(_mixed_iter(), order_key="z"))
-    assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 2})
+    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 2})
 def test_disabled_wrap_unsorted() -> None:
@ -677,7 +663,7 @@ def test_drop_unsorted() -> None:
    # test drop unsortable, should remove them before the 'sorted' call
    res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
    assert len(res) == 4
-    assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4})
+    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4})
 def test_drop_exceptions() -> None:
@ -701,16 +687,15 @@ def test_raise_exceptions() -> None:
 def test_wrap_unsortable_with_error_and_warning() -> None:
    from collections import Counter
    import pytest
    from collections import Counter
    # by default should wrap unsortable (error)
    with pytest.warns(UserWarning, match=r"encountered exception"):
        res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
-    assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
+    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
    # compare the returned error wrapped in the Unsortable
-    returned_error = next(o for o in res if isinstance(o, Unsortable)).obj
+    returned_error = next((o for o in res if isinstance(o, Unsortable))).obj
    assert "Unhandled error!" == str(returned_error)
@ -720,7 +705,7 @@ def test_order_key_unsortable() -> None:
    # both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
    res = list(select(_mixed_iter_errors(), order_key="z"))
-    assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
+    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
 def test_order_default_param() -> None:
@ -740,7 +725,7 @@ def test_no_recursive_unsortables() -> None:
    # select to select as input, wrapping unsortables the first time, second should drop them
    # reverse=True to send errors to the end, so the below order_key works
    res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
-    assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
+    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
    # drop_unsorted
    dropped = list(select(res, order_key="z", drop_unsorted=True))
--- a/my/core/query_range.py
+++ b/my/core/query_range.py
@ -7,30 +7,27 @@ filtered iterator
 See the select_range function below
 """
 from __future__ import annotations
 import re
 import time
-from collections.abc import Iterator
+from functools import lru_cache
-from datetime import date, datetime, timedelta
+from datetime import datetime, timedelta, date
-from functools import cache
+from typing import Callable, Iterator, NamedTuple, Optional, Any, Type
 from typing import Any, Callable, NamedTuple
 import more_itertools
 from .compat import fromisoformat
 from .query import (
    ET,
    OrderFunc,
    QueryException,
    select,
    OrderFunc,
    Where,
    _handle_generate_order_by,
-    select,
+    ET,
 )
-timedelta_regex = re.compile(
+from .common import isoparse
-    r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$"
+
-)
+
 timedelta_regex = re.compile(r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$")
 # https://stackoverflow.com/a/51916936
@ -81,7 +78,7 @@ def parse_datetime_float(date_str: str) -> float:
    except ValueError:
        pass
    try:
-        return fromisoformat(ds).timestamp()
+        return isoparse(ds).timestamp()
    except (AssertionError, ValueError):
        pass
@ -93,7 +90,7 @@ def parse_datetime_float(date_str: str) -> float:
        # dateparser is a bit more lenient than the above, lets you type
        # all sorts of dates as inputs
        # https://github.com/scrapinghub/dateparser#how-to-use
-        res: datetime | None = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
+        res: Optional[datetime] = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
        if res is not None:
            return res.timestamp()
@ -103,7 +100,7 @@ def parse_datetime_float(date_str: str) -> float:
 # probably DateLike input? but a user could specify an order_key
 # which is an epoch timestamp or a float value which they
 # expect to be converted to a datetime to compare
-@cache
+@lru_cache(maxsize=None)
 def _datelike_to_float(dl: Any) -> float:
    if isinstance(dl, datetime):
        return dl.timestamp()
@ -114,7 +111,7 @@ def _datelike_to_float(dl: Any) -> float:
        try:
            return parse_datetime_float(dl)
        except QueryException as q:
-            raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))  # noqa: B904
+            raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))
 class RangeTuple(NamedTuple):
@ -135,12 +132,11 @@ class RangeTuple(NamedTuple):
            of the timeframe -- 'before'
        - before and after - anything after 'after' and before 'before', acts as a time range
    """
    # technically doesn't need to be Optional[Any],
    # just to make it more clear these can be None
-    after: Any | None
+    after: Optional[Any]
-    before: Any | None
+    before: Optional[Any]
-    within: Any | None
+    within: Optional[Any]
 Converter = Callable[[Any], Any]
@ -151,9 +147,9 @@ def _parse_range(
    unparsed_range: RangeTuple,
    end_parser: Converter,
    within_parser: Converter,
-    parsed_range: RangeTuple | None = None,
+    parsed_range: Optional[RangeTuple] = None,
-    error_message: str | None = None,
+    error_message: Optional[str] = None
-) -> RangeTuple | None:
+) -> Optional[RangeTuple]:
    if parsed_range is not None:
        return parsed_range
@ -182,11 +178,11 @@ def _create_range_filter(
    end_parser: Converter,
    within_parser: Converter,
    attr_func: Where,
-    parsed_range: RangeTuple | None = None,
+    parsed_range: Optional[RangeTuple] = None,
-    default_before: Any | None = None,
+    default_before: Optional[Any] = None,
-    value_coercion_func: Converter | None = None,
+    value_coercion_func: Optional[Converter] = None,
-    error_message: str | None = None,
+    error_message: Optional[str] = None,
-) -> Where | None:
+) -> Optional[Where]:
    """
    Handles:
        - parsing the user input into values that are comparable to items the iterable returns
@ -278,17 +274,17 @@ def _create_range_filter(
 def select_range(
    itr: Iterator[ET],
    *,
-    where: Where | None = None,
+    where: Optional[Where] = None,
-    order_key: str | None = None,
+    order_key: Optional[str] = None,
-    order_value: Where | None = None,
+    order_value: Optional[Where] = None,
-    order_by_value_type: type | None = None,
+    order_by_value_type: Optional[Type] = None,
-    unparsed_range: RangeTuple | None = None,
+    unparsed_range: Optional[RangeTuple] = None,
    reverse: bool = False,
-    limit: int | None = None,
+    limit: Optional[int] = None,
    drop_unsorted: bool = False,
    wrap_unsorted: bool = False,
    warn_exceptions: bool = False,
-    warn_func: Callable[[Exception], None] | None = None,
+    warn_func: Optional[Callable[[Exception], None]] = None,
    drop_exceptions: bool = False,
    raise_exceptions: bool = False,
 ) -> Iterator[ET]:
@ -323,22 +319,19 @@ def select_range(
            drop_exceptions=drop_exceptions,
            raise_exceptions=raise_exceptions,
            warn_exceptions=warn_exceptions,
-            warn_func=warn_func,
+            warn_func=warn_func)
        )
-    order_by_chosen: OrderFunc | None = None
+    order_by_chosen: Optional[OrderFunc] = None
    # if the user didn't specify an attribute to order value, but specified a type
    # we should search for on each value in the iterator
    if order_value is None and order_by_value_type is not None:
        # search for that type on the iterator object
-        order_value = lambda o: isinstance(o, order_by_value_type)
+        order_value = lambda o: isinstance(o, order_by_value_type)  # type: ignore
    # if the user supplied a order_key, and/or we've generated an order_value, create
    # the function that accesses that type on each value in the iterator
    if order_key is not None or order_value is not None:
        # _generate_order_value_func internally here creates a copy of the iterator, which has to
        # be consumed in-case we're sorting by mixed types
        order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
        # signifies that itr is empty -- can early return here
        if order_by_chosen is None:
@ -350,39 +343,37 @@ def select_range(
        if order_by_chosen is None:
            raise QueryException("""Can't order by range if we have no way to order_by!
 Specify a type or a key to order the value by""")
        # force drop_unsorted=True so we can use _create_range_filter
        # sort the iterable by the generated order_by_chosen function
        itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
        filter_func: Where | None
        if order_by_value_type in [datetime, date]:
            filter_func = _create_range_filter(
                unparsed_range=unparsed_range,
                end_parser=parse_datetime_float,
                within_parser=parse_timedelta_float,
                attr_func=order_by_chosen,  # type: ignore[arg-type]
                default_before=time.time(),
                value_coercion_func=_datelike_to_float,
            )
        elif order_by_value_type in [int, float]:
            # allow primitives to be converted using the default int(), float() callables
            filter_func = _create_range_filter(
                unparsed_range=unparsed_range,
                end_parser=order_by_value_type,
                within_parser=order_by_value_type,
                attr_func=order_by_chosen,  # type: ignore[arg-type]
                default_before=None,
                value_coercion_func=order_by_value_type,
            )
        else:
-            # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
+            # force drop_unsorted=True so we can use _create_range_filter
-            # would need to allow passing the end_parser, within parser, default before and value_coercion_func...
+            # sort the iterable by the generated order_by_chosen function
-            # (seems like a lot?)
+            itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
-            raise QueryException("Sorting by custom types is currently unsupported")
+            filter_func: Optional[Where]
            if order_by_value_type in [datetime, date]:
                filter_func = _create_range_filter(
                    unparsed_range=unparsed_range,
                    end_parser=parse_datetime_float,
                    within_parser=parse_timedelta_float,
                    attr_func=order_by_chosen,  # type: ignore[arg-type]
                    default_before=time.time(),
                    value_coercion_func=_datelike_to_float)
            elif order_by_value_type in [int, float]:
                # allow primitives to be converted using the default int(), float() callables
                filter_func = _create_range_filter(
                    unparsed_range=unparsed_range,
                    end_parser=order_by_value_type,
                    within_parser=order_by_value_type,
                    attr_func=order_by_chosen,  # type: ignore[arg-type]
                    default_before=None,
                    value_coercion_func=order_by_value_type)
            else:
                # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
                # would need to allow passing the end_parser, within parser, default before and value_coercion_func...
                # (seems like a lot?)
                raise QueryException("Sorting by custom types is currently unsupported")
-        # use the created filter function
+            # use the created filter function
-        # we've already applied drop_exceptions and kwargs related to unsortable values above
+            # we've already applied drop_exceptions and kwargs related to unsortable values above
-        itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
+            itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
    else:
        # wrap_unsorted may be used here if the user specified an order_key,
        # or manually passed a order_value function
@ -400,7 +391,7 @@ Specify a type or a key to order the value by""")
    return itr
-# reuse items from query for testing
+# re-use items from query for testing
 from .query import _A, _B, _Float, _mixed_iter_errors
@ -480,8 +471,8 @@ def test_range_predicate() -> None:
    )
    # filter from 0 to 5
-    rn: RangeTuple = RangeTuple("0", "5", None)
+    rn: Optional[RangeTuple] = RangeTuple("0", "5", None)
-    zero_to_five_filter: Where | None = int_filter_func(unparsed_range=rn)
+    zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn)
    assert zero_to_five_filter is not None
    # this is just a Where function, given some input it return True/False if the value is allowed
    assert zero_to_five_filter(3) is True
@ -494,7 +485,6 @@ def test_range_predicate() -> None:
    rn = RangeTuple(None, 3, "3.5")
    assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
 def test_parse_range() -> None:
    from functools import partial
@ -538,8 +528,9 @@ def test_parse_timedelta_string() -> None:
 def test_parse_datetime_float() -> None:
    pnow = parse_datetime_float("now")
-    sec_diff = abs(pnow - datetime.now().timestamp())
+    sec_diff = abs((pnow - datetime.now().timestamp()))
    # should probably never fail? could mock time.time
    # but there seems to be issues with doing that use C-libraries (as time.time) does
    # https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
--- a/my/core/serialize.py
+++ b/my/core/serialize.py
@ -1,15 +1,12 @@
 from __future__ import annotations
 import datetime
-from dataclasses import asdict, is_dataclass
+import dataclasses
 from decimal import Decimal
 from functools import cache
 from pathlib import Path
-from typing import Any, Callable, NamedTuple
+from decimal import Decimal
 from typing import Any, Optional, Callable, NamedTuple
 from functools import lru_cache
 from .common import is_namedtuple
 from .error import error_to_json
 from .pytest import parametrize
 from .types import is_namedtuple
 # note: it would be nice to combine the 'asdict' and _default_encode to some function
 # that takes a complex python object and returns JSON-compatible fields, while still
@ -19,8 +16,6 @@ from .types import is_namedtuple
 DefaultEncoder = Callable[[Any], Any]
 Dumps = Callable[[Any], str]
 def _default_encode(obj: Any) -> Any:
    """
@ -38,9 +33,8 @@ def _default_encode(obj: Any) -> Any:
    # convert paths to their string representation
    if isinstance(obj, Path):
        return str(obj)
-    if is_dataclass(obj):
+    if dataclasses.is_dataclass(obj):
-        assert not isinstance(obj, type)  # to help mypy
+        return dataclasses.asdict(obj)
        return asdict(obj)
    if isinstance(obj, Exception):
        return error_to_json(obj)
    # if something was stored as 'decimal', you likely
@ -59,12 +53,12 @@ def _default_encode(obj: Any) -> Any:
 # could possibly run multiple times/raise warning if you provide different 'default'
 # functions or change the kwargs? The alternative is to maintain all of this at the module
 # level, which is just as annoying
-@cache
+@lru_cache(maxsize=None)
 def _dumps_factory(**kwargs) -> Callable[[Any], str]:
    use_default: DefaultEncoder = _default_encode
    # if the user passed an additional 'default' parameter,
    # try using that to serialize before before _default_encode
-    _additional_default: DefaultEncoder | None = kwargs.get("default")
+    _additional_default: Optional[DefaultEncoder] = kwargs.get("default")
    if _additional_default is not None and callable(_additional_default):
        def wrapped_default(obj: Any) -> Any:
@ -80,29 +74,22 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
    kwargs["default"] = use_default
-    prefer_factory: str | None = kwargs.pop('_prefer_factory', None)
+    try:
-
+        import orjson
    def orjson_factory() -> Dumps | None:
        try:
            import orjson
        except ModuleNotFoundError:
            return None
        # todo: add orjson.OPT_NON_STR_KEYS? would require some bitwise ops
        # most keys are typically attributes from a NT/Dataclass,
        # so most seem to work: https://github.com/ijl/orjson#opt_non_str_keys
-        def _orjson_dumps(obj: Any) -> str:  # TODO rename?
+        def _orjson_dumps(obj: Any) -> str:
            # orjson returns json as bytes, encode to string
            return orjson.dumps(obj, **kwargs).decode('utf-8')
        return _orjson_dumps
    except ModuleNotFoundError:
        pass
-    def simplejson_factory() -> Dumps | None:
+    try:
-        try:
+        from simplejson import dumps as simplejson_dumps
            from simplejson import dumps as simplejson_dumps
        except ModuleNotFoundError:
            return None
        # if orjson couldn't be imported, try simplejson
        # This is included for compatibility reasons because orjson
        # is rust-based and compiling on rarer architectures may not work
@ -117,42 +104,23 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
        return _simplejson_dumps
-    def stdlib_factory() -> Dumps | None:
+    except ModuleNotFoundError:
-        import json
+        pass
-        from .warnings import high
+    import json
    from .warnings import high
-        high(
+    high("You might want to install 'orjson' to support serialization for lots more types! If that does not work for you, you can install 'simplejson' instead")
            "You might want to install 'orjson' to support serialization for lots more types! If that does not work for you, you can install 'simplejson' instead"
        )
-        def _stdlib_dumps(obj: Any) -> str:
+    def _stdlib_dumps(obj: Any) -> str:
-            return json.dumps(obj, **kwargs)
+        return json.dumps(obj, **kwargs)
-        return _stdlib_dumps
+    return _stdlib_dumps
    factories = {
        'orjson': orjson_factory,
        'simplejson': simplejson_factory,
        'stdlib': stdlib_factory,
    }
    if prefer_factory is not None:
        factory = factories[prefer_factory]
        res = factory()
        assert res is not None, prefer_factory
        return res
    for factory in factories.values():
        res = factory()
        if res is not None:
            return res
    raise RuntimeError("Should not happen!")
 def dumps(
    obj: Any,
-    default: DefaultEncoder | None = None,
+    default: Optional[DefaultEncoder] = None,
    **kwargs,
 ) -> str:
    """
@ -185,17 +153,8 @@ def dumps(
    return _dumps_factory(default=default, **kwargs)(obj)
-@parametrize('factory', ['orjson', 'simplejson', 'stdlib'])
+def test_serialize_fallback() -> None:
-def test_dumps(factory: str) -> None:
+    import json as jsn  # dont cause possible conflicts with module code
    import pytest
    orig_dumps = globals()['dumps']  # hack to prevent error from using local variable before declaring
    def dumps(*args, **kwargs) -> str:
        kwargs['_prefer_factory'] = factory
        return orig_dumps(*args, **kwargs)
    import json as json_builtin  # dont cause possible conflicts with module code
    # can't use a namedtuple here, since the default json.dump serializer
    # serializes namedtuples as tuples, which become arrays
@ -206,12 +165,36 @@ def test_dumps(factory: str) -> None:
    # the lru_cache'd warning may have already been sent,
    # so checking may be nondeterministic?
    import warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
-        res = json_builtin.loads(dumps(X))
+        res = jsn.loads(dumps(X))
        assert res == [5, 5.0]
 # this needs to be defined here to prevent a mypy bug
 # see https://github.com/python/mypy/issues/7281
 class _A(NamedTuple):
    x: int
    y: float
 def test_nt_serialize() -> None:
    import json as jsn  # dont cause possible conflicts with module code
    import orjson  # import to make sure this is installed
    res: str = dumps(_A(x=1, y=2.0))
    assert res == '{"x":1,"y":2.0}'
    # test orjson option kwarg
    data = {datetime.date(year=1970, month=1, day=1): 5}
    res2 = jsn.loads(dumps(data, option=orjson.OPT_NON_STR_KEYS))
    assert res2 == {'1970-01-01': 5}
 def test_default_serializer() -> None:
    import pytest
    import json as jsn  # dont cause possible conflicts with module code
    class Unserializable:
        def __init__(self, x: int):
            self.x = x
@ -225,7 +208,7 @@ def test_dumps(factory: str) -> None:
        def _serialize(self) -> Any:
            return {"x": self.x, "y": self.y}
-    res = json_builtin.loads(dumps(WithUnderscoreSerialize(6)))
+    res = jsn.loads(dumps(WithUnderscoreSerialize(6)))
    assert res == {"x": 6, "y": 6.0}
    # test passing additional 'default' func
@ -237,25 +220,5 @@ def test_dumps(factory: str) -> None:
    # this serializes both Unserializable, which is a custom type otherwise
    # not handled, and timedelta, which is handled by the '_default_encode'
    # in the 'wrapped_default' function
-    res2 = json_builtin.loads(dumps(Unserializable(10), default=_serialize_with_default))
+    res2 = jsn.loads(dumps(Unserializable(10), default=_serialize_with_default))
    assert res2 == {"x": 10, "y": 10.0}
    if factory == 'orjson':
        import orjson
        # test orjson option kwarg
        data = {datetime.date(year=1970, month=1, day=1): 5}
        res2 = json_builtin.loads(dumps(data, option=orjson.OPT_NON_STR_KEYS))
        assert res2 == {'1970-01-01': 5}
@parametrize('factory', ['orjson', 'simplejson'])
 def test_dumps_namedtuple(factory: str) -> None:
    import json as json_builtin  # dont cause possible conflicts with module code
    class _A(NamedTuple):
        x: int
        y: float
    res: str = dumps(_A(x=1, y=2.0), _prefer_factory=factory)
    assert json_builtin.loads(res) == {'x': 1, 'y': 2.0}
--- a/my/core/source.py
+++ b/my/core/source.py
@ -3,12 +3,9 @@ Decorator to gracefully handle importing a data source, or warning
 and yielding nothing (or a default) when its not available
 """
 from __future__ import annotations
 import warnings
 from collections.abc import Iterable, Iterator
 from functools import wraps
-from typing import Any, Callable, TypeVar
+from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable
 import warnings
 from .warnings import medium
@ -29,8 +26,8 @@ _DEFAULT_ITR = ()
 def import_source(
    *,
    default: Iterable[T] = _DEFAULT_ITR,
-    module_name: str | None = None,
+    module_name: Optional[str] = None,
-    help_url: str | None = None,
+    help_url: Optional[str] = None,
 ) -> Callable[..., Callable[..., Iterator[T]]]:
    """
    doesn't really play well with types, but is used to catch
@ -53,7 +50,6 @@ def import_source(
            except (ImportError, AttributeError) as err:
                from . import core_config as CC
                from .error import warn_my_config_import_error
                suppressed_in_conf = False
                if module_name is not None and CC.config._is_module_active(module_name) is False:
                    suppressed_in_conf = True
@ -65,18 +61,16 @@ def import_source(
                        warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
 class core:
-    disabled_modules = [{module_name!r}]
+    disabled_modules = [{repr(module_name)}]
-""", stacklevel=1)
+""")
                    # try to check if this is a config error or based on dependencies not being installed
                    if isinstance(err, (ImportError, AttributeError)):
-                        matched_config_err = warn_my_config_import_error(err, module_name=module_name, help_url=help_url)
+                        matched_config_err = warn_my_config_import_error(err, help_url=help_url)
                        # if we determined this wasn't a config error, and it was an attribute error
                        # it could be *any* attribute error -- we should raise this since its otherwise a fatal error
                        # from some code in the module failing
                        if not matched_config_err and isinstance(err, AttributeError):
                            raise err
                yield from default
        return wrapper
    return decorator
--- a/my/core/sqlite.py
+++ b/my/core/sqlite.py
@ -1,19 +1,15 @@
-from __future__ import annotations
+from .common import assert_subpackage; assert_subpackage(__name__)
 from .internal import assert_subpackage  # noqa: I001
 assert_subpackage(__name__)
 import shutil
 import sqlite3
 from collections.abc import Iterator
 from contextlib import contextmanager
 from pathlib import Path
 import shutil
 import sqlite3
 from tempfile import TemporaryDirectory
-from typing import Any, Callable, Literal, Union, overload
+from typing import Tuple, Any, Iterator, Callable, Optional, Union, Literal
-from .common import PathIsh
+
-from .compat import assert_never
+from .common import PathIsh, assert_never
 def sqlite_connect_immutable(db: PathIsh) -> sqlite3.Connection:
@ -26,7 +22,6 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
        conn.execute('CREATE TABLE testtable (col)')
    import pytest
    with pytest.raises(sqlite3.OperationalError, match='readonly database'):
        with sqlite_connect_immutable(db) as conn:
            conn.execute('DROP TABLE testtable')
@ -38,17 +33,15 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
 SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
 def dict_factory(cursor, row):
    fields = [column[0] for column in cursor.description]
-    return dict(zip(fields, row))
+    return {key: value for key, value in zip(fields, row)}
 Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
@contextmanager
-def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]:
+def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]:
    dbp = f'file:{db}'
    # https://www.sqlite.org/draft/uri.html#uriimmutable
    if immutable:
@ -104,76 +97,32 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
 # and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
 # a bit annoying to have this copy-pasting, but hopefully not a big issue
-# fmt: off
+from typing import overload
@overload
-def select(cols: tuple[str                                   ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str                                   ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any                                   ]]: ...
+        Iterator[Tuple[Any                                   ]]: ...
@overload
-def select(cols: tuple[str, str                              ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str                              ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any                              ]]: ...
+        Iterator[Tuple[Any, Any                              ]]: ...
@overload
-def select(cols: tuple[str, str, str                         ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str                         ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any                         ]]: ...
+        Iterator[Tuple[Any, Any, Any                         ]]: ...
@overload
-def select(cols: tuple[str, str, str, str                    ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str, str                    ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any, Any                    ]]: ...
+        Iterator[Tuple[Any, Any, Any, Any                    ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str               ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str, str, str               ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any, Any, Any               ]]: ...
+        Iterator[Tuple[Any, Any, Any, Any, Any               ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str          ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str, str, str, str          ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any, Any, Any, Any          ]]: ...
+        Iterator[Tuple[Any, Any, Any, Any, Any, Any          ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str, str     ], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str, str, str, str, str     ], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any, Any, Any, Any, Any     ]]: ...
+        Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any     ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
+def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
-        Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
+        Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
 # fmt: on
 def select(cols, rest, *, db):
    # db arg is last cause that results in nicer code formatting..
    return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
 class SqliteTool:
    def __init__(self, connection: sqlite3.Connection) -> None:
        self.connection = connection
    def _get_sqlite_master(self) -> dict[str, str]:
        res = {}
        for c in self.connection.execute('SELECT name, type FROM sqlite_master'):
            [name, type_] = c
            assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_)  # just in case
            res[name] = type_
        return res
    def get_table_names(self) -> list[str]:
        master = self._get_sqlite_master()
        res = []
        for name, type_ in master.items():
            if type_ != 'table':
                continue
            res.append(name)
        return res
    def get_table_schema(self, name: str) -> dict[str, str]:
        """
        Returns map from column name to column type
        NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps)
              In this case you might still be able to use get_table_names
        """
        schema: dict[str, str] = {}
        for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'):
            col   = row[1]
            type_ = row[2]
            # hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase
            # let's do this just in case since python < 3.10 are using the old version
            # e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs)
            type_ = type_.upper()
            schema[col] = type_
        return schema
    def get_table_schemas(self) -> dict[str, dict[str, str]]:
        return {name: self.get_table_schema(name) for name in self.get_table_names()}
--- a/my/core/stats.py
+++ b/my/core/stats.py
@ -1,178 +1,23 @@
 '''
 Helpers for hpi doctor/stats functionality.
 '''
-
+import collections
 from __future__ import annotations
 import collections.abc
 import importlib
 import inspect
 import typing
-from collections.abc import Iterable, Iterator, Sequence
+from typing import Optional, Callable, Any, Iterator, Sequence, Dict, List
 from contextlib import contextmanager
 from datetime import datetime
 from pathlib import Path
 from types import ModuleType
 from typing import (
    Any,
    Callable,
    Protocol,
    cast,
 )
-from .types import asdict
+from .common import StatsFun, Stats, stat
 Stats = dict[str, Any]
 class StatsFun(Protocol):
    def __call__(self, *, quick: bool = False) -> Stats: ...
 # global state that turns on/off quick stats
 # can use the 'quick_stats' contextmanager
 # to enable/disable this in cli so that module 'stats'
 # functions don't have to implement custom 'quick' logic
 QUICK_STATS = False
 # in case user wants to use the stats functions/quick option
 # elsewhere -- can use this decorator instead of editing
 # the global state directly
@contextmanager
 def quick_stats():
    global QUICK_STATS
    prev = QUICK_STATS
    try:
        QUICK_STATS = True
        yield
    finally:
        QUICK_STATS = prev
 def stat(
    func: Callable[[], Iterable[Any]] | Iterable[Any],
    *,
    quick: bool = False,
    name: str | None = None,
 ) -> Stats:
    """
    Extracts various statistics from a passed iterable/callable, e.g.:
    - number of items
    - first/last item
    - timestamps associated with first/last item
    If quick is set, then only first 100 items of the iterable will be processed
    """
    if callable(func):
        fr = func()
        if hasattr(fr, '__enter__') and hasattr(fr, '__exit__'):
            # context managers has Iterable type, but they aren't data providers
            #  sadly doesn't look like there is a way to tell from typing annotations
            # Ideally we'd detect this in is_data_provider...
            #  but there is no way of knowing without actually calling it first :(
            return {}
        fname = func.__name__
    else:
        # meh. means it's just a list.. not sure how to generate a name then
        fr = func
        fname = f'unnamed_{id(fr)}'
    type_name = type(fr).__name__
    extras = {}
    if type_name == 'DataFrame':
        # dynamic, because pandas is an optional dependency..
        df = cast(Any, fr)  # todo ugh, not sure how to annotate properly
        df = df.reset_index()
        fr = df.to_dict(orient='records')
        dtypes = df.dtypes.to_dict()
        extras['dtypes'] = dtypes
    res = _stat_iterable(fr, quick=quick)
    res.update(extras)
    stat_name = name if name is not None else fname
    return {
        stat_name: res,
    }
 def test_stat() -> None:
    # the bulk of testing is in test_stat_iterable
    # works with 'anonymous' lists
    res = stat([1, 2, 3])
    [(name, v)] = res.items()
    # note: name will be a little funny since anonymous list doesn't have one
    assert v == {'count': 3}
    #
    # works with functions:
    def fun():
        return [4, 5, 6]
    assert stat(fun) == {'fun': {'count': 3}}
    #
    # context managers are technically iterable
    #  , but usually we wouldn't want to compute stats for them
    # this is mainly intended for guess_stats,
    #  since it can't tell whether the function is a ctx manager without calling it
    @contextmanager
    def cm():
        yield 1
        yield 3
    assert stat(cm) == {}  # type: ignore[arg-type]
    #
    # works with pandas dataframes
    import numpy as np
    import pandas as pd
    def df() -> pd.DataFrame:
        dates = pd.date_range(start='2024-02-10 08:00', end='2024-02-11 16:00', freq='5h')
        return pd.DataFrame([f'value{i}' for i, _ in enumerate(dates)], index=dates, columns=['value'])
    assert stat(df) == {
        'df': {
            'count': 7,
            'dtypes': {
                'index': np.dtype('<M8[ns]'),
                'value': np.dtype('O'),
            },
            'first': pd.Timestamp('2024-02-10 08:00'),
            'last': pd.Timestamp('2024-02-11 14:00'),
        },
    }
    #
 def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
    stats: StatsFun | None = None
    try:
        module = importlib.import_module(module_name)
    except Exception:
        return None
    stats = getattr(module, 'stats', None)
    if stats is None:
        stats = guess_stats(module)
    return stats
 # TODO maybe could be enough to annotate OUTPUTS or something like that?
 # then stats could just use them as hints?
-def guess_stats(module: ModuleType) -> StatsFun | None:
+def guess_stats(module_name: str, quick: bool = False) -> Optional[StatsFun]:
-    """
+    providers = guess_data_providers(module_name)
    If the module doesn't have explicitly defined 'stat' function,
     this is used to try to guess what could be included in stats automatically
    """
    providers = _guess_data_providers(module)
    if len(providers) == 0:
        return None
-    def auto_stats(*, quick: bool = False) -> Stats:
+    def auto_stats() -> Stats:
        res = {}
        for k, v in providers.items():
            res.update(stat(v, quick=quick, name=k))
@ -182,11 +27,12 @@ def guess_stats(module: ModuleType) -> StatsFun | None:
 def test_guess_stats() -> None:
    from datetime import datetime
    import my.core.tests.auto_stats as M
-    auto_stats = guess_stats(M)
+    auto_stats = guess_stats(M.__name__)
    assert auto_stats is not None
-    res = auto_stats(quick=False)
+    res = auto_stats()
    assert res == {
        'inputs': {
@ -202,15 +48,15 @@ def test_guess_stats() -> None:
    }
-def _guess_data_providers(module: ModuleType) -> dict[str, Callable]:
+def guess_data_providers(module_name: str) -> Dict[str, Callable]:
    module = importlib.import_module(module_name)
    mfunctions = inspect.getmembers(module, inspect.isfunction)
    return {k: v for k, v in mfunctions if is_data_provider(v)}
-# todo how to exclude deprecated data providers?
+# todo how to exclude deprecated stuff?
 def is_data_provider(fun: Any) -> bool:
    """
    Criteria for being a "data provider":
    1. returns iterable or something like that
    2. takes no arguments? (otherwise not callable by stats anyway?)
    3. doesn't start with an underscore (those are probably helper functions?)
@ -226,7 +72,7 @@ def is_data_provider(fun: Any) -> bool:
        return False
    # has at least one argument without default values
-    if len(list(_sig_required_params(sig))) > 0:
+    if len(list(sig_required_params(sig))) > 0:
        return False
    if hasattr(fun, '__name__'):
@ -242,7 +88,7 @@ def is_data_provider(fun: Any) -> bool:
    if return_type is None:
        return False
-    return _type_is_iterable(return_type)
+    return type_is_iterable(return_type)
 def test_is_data_provider() -> None:
@ -253,42 +99,34 @@ def test_is_data_provider() -> None:
    def no_return_type():
        return [1, 2, 3]
    assert not idp(no_return_type)
    lam = lambda: [1, 2]
    assert not idp(lam)
-    def has_extra_args(count) -> list[int]:
+    def has_extra_args(count) -> List[int]:
        return list(range(count))
    assert not idp(has_extra_args)
    def has_return_type() -> Sequence[str]:
        return ['a', 'b', 'c']
    assert idp(has_return_type)
    def _helper_func() -> Iterator[Any]:
        yield 1
    assert not idp(_helper_func)
    def inputs() -> Iterator[Any]:
        yield 1
    assert idp(inputs)
    def producer_inputs() -> Iterator[Any]:
        yield 1
    assert idp(producer_inputs)
-def _sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
+# return any parameters the user is required to provide - those which don't have default values
-    """
+def sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
    Returns parameters the user is required to provide - e.g. ones that don't have default values
    """
    for param in sig.parameters.values():
        if param.default == inspect.Parameter.empty:
            yield param
@ -298,24 +136,21 @@ def test_sig_required_params() -> None:
    def x() -> int:
        return 5
-
+    assert len(list(sig_required_params(inspect.signature(x)))) == 0
    assert len(list(_sig_required_params(inspect.signature(x)))) == 0
    def y(arg: int) -> int:
        return arg
-
+    assert len(list(sig_required_params(inspect.signature(y)))) == 1
    assert len(list(_sig_required_params(inspect.signature(y)))) == 1
    # from stats perspective, this should be treated as a data provider as well
    # could be that the default value to the data provider is the 'default'
    # path to use for inputs/a function to provide input data
    def z(arg: int = 5) -> int:
        return arg
-
+    assert len(list(sig_required_params(inspect.signature(z)))) == 0
    assert len(list(_sig_required_params(inspect.signature(z)))) == 0
-def _type_is_iterable(type_spec) -> bool:
+def type_is_iterable(type_spec) -> bool:
    origin = typing.get_origin(type_spec)
    if origin is None:
        return False
@ -332,139 +167,14 @@ def _type_is_iterable(type_spec) -> bool:
 # todo docstring test?
 def test_type_is_iterable() -> None:
-    fun = _type_is_iterable
+    from typing import List, Sequence, Iterable, Dict, Any
    fun = type_is_iterable
    assert not fun(None)
    assert not fun(int)
    assert not fun(Any)
-    assert not fun(dict[int, int])
+    assert not fun(Dict[int, int])
-    assert fun(list[int])
+    assert fun(List[int])
-    assert fun(Sequence[dict[str, str]])
+    assert fun(Sequence[Dict[str, str]])
    assert fun(Iterable[Any])
 def _stat_item(item):
    if item is None:
        return None
    if isinstance(item, Path):
        return str(item)
    return _guess_datetime(item)
 def _stat_iterable(it: Iterable[Any], *, quick: bool = False) -> Stats:
    from more_itertools import first, ilen, take
    # todo not sure if there is something in more_itertools to compute this?
    total = 0
    errors = 0
    first_item = None
    last_item = None
    def funcit():
        nonlocal errors, first_item, last_item, total
        for x in it:
            total += 1
            if isinstance(x, Exception):
                errors += 1
            else:
                last_item = x
                if first_item is None:
                    first_item = x
            yield x
    eit = funcit()
    count: Any
    if quick or QUICK_STATS:
        initial = take(100, eit)
        count = len(initial)
        if first(eit, None) is not None:  # todo can actually be none...
            # haven't exhausted
            count = f'{count}+'
    else:
        count = ilen(eit)
    res = {
        'count': count,
    }
    if total == 0:
        # not sure but I guess a good balance? wouldn't want to throw early here?
        res['warning'] = 'THE ITERABLE RETURNED NO DATA'
    if errors > 0:
        res['errors'] = errors
    if (stat_first := _stat_item(first_item)) is not None:
        res['first'] = stat_first
    if (stat_last := _stat_item(last_item)) is not None:
        res['last'] = stat_last
    return res
 def test_stat_iterable() -> None:
    from datetime import datetime, timedelta, timezone
    from typing import NamedTuple
    dd = datetime.fromtimestamp(123, tz=timezone.utc)
    day = timedelta(days=3)
    class X(NamedTuple):
        x: int
        d: datetime
    def it():
        yield RuntimeError('oops!')
        for i in range(2):
            yield X(x=i, d=dd + day * i)
        yield RuntimeError('bad!')
        for i in range(3):
            yield X(x=i * 10, d=dd + day * (i * 10))
        yield X(x=123, d=dd + day * 50)
    res = _stat_iterable(it())
    assert res['count'] == 1 + 2 + 1 + 3 + 1
    assert res['errors'] == 1 + 1
    assert res['last'] == dd + day * 50
 # experimental, not sure about it..
 def _guess_datetime(x: Any) -> datetime | None:
    # todo hmm implement without exception..
    try:
        d = asdict(x)
    except:  # noqa: E722 bare except
        return None
    for v in d.values():
        if isinstance(v, datetime):
            return v
    return None
 def test_guess_datetime() -> None:
    from dataclasses import dataclass
    from typing import NamedTuple
    from .compat import fromisoformat
    dd = fromisoformat('2021-02-01T12:34:56Z')
    class A(NamedTuple):
        x: int
    class B(NamedTuple):
        x: int
        created: datetime
    assert _guess_datetime(A(x=4)) is None
    assert _guess_datetime(B(x=4, created=dd)) == dd
    @dataclass
    class C:
        a: datetime
        x: int
    assert _guess_datetime(C(a=dd, x=435)) == dd
    # TODO not sure what to return when multiple datetime fields?
    # TODO test @property?
--- a/my/core/structure.py
+++ b/my/core/structure.py
@ -1,22 +1,20 @@
 from __future__ import annotations
 import atexit
 import os
 import shutil
 import sys
 import tarfile
 import tempfile
 import zipfile
-from collections.abc import Generator, Sequence
+import atexit
 from typing import Sequence, Generator, List, Union, Tuple
 from contextlib import contextmanager
 from pathlib import Path
-from .logging import make_logger
+from .common import LazyLogger
 logger = make_logger(__name__, level="info")
-def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = False) -> bool:
+logger = LazyLogger(__name__, level="info")
 def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = False) -> bool:
    """
    Helper function for match_structure to check if
    all subpaths exist at some base directory
@ -38,18 +36,17 @@ def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = F
 ZIP_EXT = {".zip"}
 TARGZ_EXT = {".tar.gz"}
@contextmanager
 def match_structure(
    base: Path,
-    expected: str | Sequence[str],
+    expected: Union[str, Sequence[str]],
    *,
    partial: bool = False,
-) -> Generator[tuple[Path, ...], None, None]:
+) -> Generator[Tuple[Path, ...], None, None]:
    """
-    Given a 'base' directory or archive (zip/tar.gz), recursively search for one or more paths that match the
+    Given a 'base' directory or zipfile, recursively search for one or more paths that match the
    pattern described in 'expected'. That can be a single string, or a list
    of relative paths (as strings) you expect at the same directory.
@ -57,12 +54,12 @@ def match_structure(
    expected be present, not all of them.
    This reduces the chances of the user misconfiguring gdpr exports, e.g.
-    if they archived the folders instead of the parent directory or vice-versa
+    if they zipped the folders instead of the parent directory or vice-versa
    When this finds a matching directory structure, it stops searching in that subdirectory
    and continues onto other possible subdirectories which could match
-    If base is an archive, this extracts it into a temporary directory
+    If base is a zipfile, this extracts the zipfile into a temporary directory
    (configured by core_config.config.get_tmp_dir), and then searches the extracted
    folder for matching structures
@ -72,21 +69,21 @@ def match_structure(
    export_dir
    ├── exp_2020
-    │   ├── channel_data
+    │   ├── channel_data
-    │   │   ├── data1
+    │   │   ├── data1
-    │   │   └── data2
+    │   │   └── data2
-    │   ├── index.json
+    │   ├── index.json
-    │   ├── messages
+    │   ├── messages
-    │   │   └── messages.csv
+    │   │   └── messages.csv
-    │   └── profile
+    │   └── profile
-    │       └── settings.json
+    │       └── settings.json
    └── exp_2021
        ├── channel_data
-        │   ├── data1
+        │   ├── data1
-        │   └── data2
+        │   └── data2
        ├── index.json
        ├── messages
-        │   └── messages.csv
+        │   └── messages.csv
        └── profile
            └── settings.json
@ -98,12 +95,12 @@ def match_structure(
    This doesn't require an exhaustive list of expected values, but its a good idea to supply
    a complete picture of the expected structure to avoid false-positives
-    This does not recursively decompress archives in the subdirectories,
+    This does not recursively unzip zipfiles in the subdirectories,
-    it only unpacks into a temporary directory if 'base' is an archive
+    it only unzips into a temporary directory if 'base' is a zipfile
    A common pattern for using this might be to use get_files to get a list
-    of archives or top-level gdpr export directories, and use match_structure
+    of zipfiles or top-level gdpr export directories, and use match_structure
-    to search the resulting paths for an export structure you're expecting
+    to search the resulting paths for a export structure you're expecting
    """
    from . import core_config as CC
@ -113,37 +110,29 @@ def match_structure(
        expected = (expected,)
    is_zip: bool = base.suffix in ZIP_EXT
    is_targz: bool = any(base.name.endswith(suffix) for suffix in TARGZ_EXT)
    searchdir: Path = base.absolute()
    try:
-        # if the file given by the user is an archive, create a temporary
+        # if the file given by the user is a zipfile, create a temporary
-        # directory and extract it to that temporary directory
+        # directory and extract the zipfile to that temporary directory
        #
        # this temporary directory is removed in the finally block
-        if is_zip or is_targz:
+        if is_zip:
            # sanity check before we start creating directories/rm-tree'ing things
-            assert base.exists(), f"archive at {base} doesn't exist"
+            assert base.exists(), f"zipfile at {base} doesn't exist"
            searchdir = Path(tempfile.mkdtemp(dir=tdir))
-            if is_zip:
+            # base might already be a ZipPath, and str(base) would end with /
-                # base might already be a ZipPath, and str(base) would end with /
+            zf = zipfile.ZipFile(str(base).rstrip('/'))
-                zf = zipfile.ZipFile(str(base).rstrip('/'))
+            zf.extractall(path=str(searchdir))
-                zf.extractall(path=str(searchdir))
+
            elif is_targz:
                with tarfile.open(str(base)) as tar:
                    # filter is a security feature, will be required param in later python version
                    mfilter = {'filter': 'data'} if sys.version_info[:2] >= (3, 12) else {}
                    tar.extractall(path=str(searchdir), **mfilter)  # type: ignore[arg-type]
            else:
                raise RuntimeError("can't happen")
        else:
            if not searchdir.is_dir():
-                raise NotADirectoryError(f"Expected either a zip/tar.gz archive or a directory, received {searchdir}")
+                raise NotADirectoryError(f"Expected either a zipfile or a directory, received {searchdir}")
-        matches: list[Path] = []
+        matches: List[Path] = []
-        possible_targets: list[Path] = [searchdir]
+        possible_targets: List[Path] = [searchdir]
        while len(possible_targets) > 0:
            p = possible_targets.pop(0)
@ -163,9 +152,9 @@ def match_structure(
    finally:
-        if is_zip or is_targz:
+        if is_zip:
            # make sure we're not mistakenly deleting data
-            assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting archive to start with the temporary directory prefix ({tdir}), found {searchdir}"
+            assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting zip to start with the temporary directory prefix ({tdir}), found {searchdir}"
            shutil.rmtree(str(searchdir))
@ -174,7 +163,7 @@ def warn_leftover_files() -> None:
    from . import core_config as CC
    base_tmp: Path = CC.config.get_tmp_dir()
-    leftover: list[Path] = list(base_tmp.iterdir())
+    leftover: List[Path] = list(base_tmp.iterdir())
    if leftover:
        logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
--- a/my/core/tests/auto_stats.py
+++ b/my/core/tests/auto_stats.py
@ -1,12 +1,11 @@
 """
 Helper 'module' for test_guess_stats
 """
 from collections.abc import Iterable, Iterator, Sequence
 from contextlib import contextmanager
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Iterable, Sequence, Iterator
@dataclass
--- a/my/core/tests/common.py
+++ b/my/core/tests/common.py
@ -1,32 +0,0 @@
 from __future__ import annotations
 import os
 from collections.abc import Iterator
 from contextlib import contextmanager
 import pytest
 V = 'HPI_TESTS_USES_OPTIONAL_DEPS'
 # TODO use it for serialize tests that are using simplejson/orjson?
 skip_if_uses_optional_deps = pytest.mark.skipif(
    V not in os.environ,
    reason=f'test only works when optional dependencies are installed. Set env variable {V}=true to override.',
 )
 # TODO maybe move to hpi core?
@contextmanager
 def tmp_environ_set(key: str, value: str | None) -> Iterator[None]:
    prev_value = os.environ.get(key)
    if value is None:
        os.environ.pop(key, None)
    else:
        os.environ[key] = value
    try:
        yield
    finally:
        if prev_value is None:
            os.environ.pop(key, None)
        else:
            os.environ[key] = prev_value
--- a/my/core/tests/denylist.py
+++ b/my/core/tests/denylist.py
@ -1,9 +1,8 @@
 import json
 import warnings
 from collections.abc import Iterator
 from datetime import datetime
 import json
 from pathlib import Path
-from typing import NamedTuple
+from typing import NamedTuple, Iterator
 import warnings
 from ..denylist import DenyList
@ -92,7 +91,8 @@ def test_denylist(tmp_path: Path) -> None:
        assert "59.40.113.87" not in [i.addr for i in filtered]
-        data_json = json.loads(tf.read_text())
+        with open(tf, "r") as f:
            data_json = json.loads(f.read())
        assert data_json == [
            {
--- a/my/core/tests/sqlite.py
+++ b/my/core/tests/sqlite.py
@ -1,7 +1,7 @@
 import shutil
 import sqlite3
 from concurrent.futures import ProcessPoolExecutor
 from pathlib import Path
 import shutil
 import sqlite3
 from tempfile import TemporaryDirectory
 from ..sqlite import sqlite_connect_immutable, sqlite_copy_and_open
--- a/my/core/tests/structure.py
+++ b/my/core/tests/structure.py
@ -1,8 +1,9 @@
 from pathlib import Path
 from ..structure import match_structure
 import pytest
 from ..structure import match_structure
 structure_data: Path = Path(__file__).parent / "structure_data"
@ -14,9 +15,8 @@ def test_gdpr_structure_exists() -> None:
        assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
-@pytest.mark.parametrize("archive", ["gdpr_export.zip", "gdpr_export.tar.gz"])
+def test_gdpr_unzip() -> None:
-def test_gdpr_unpack(archive: str) -> None:
+    with match_structure(structure_data / "gdpr_export.zip", expected=gdpr_expected) as results:
    with match_structure(structure_data / archive, expected=gdpr_expected) as results:
        assert len(results) == 1
        extracted = results[0]
        index_file = extracted / "messages" / "index.csv"
@ -33,6 +33,6 @@ def test_match_partial() -> None:
 def test_not_directory() -> None:
-    with pytest.raises(NotADirectoryError, match=r"Expected either a zip/tar.gz archive or a directory"):
+    with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"):
        with match_structure(structure_data / "messages/index.csv", expected=gdpr_expected):
            pass
--- a/my/core/tests/structure_data/gdpr_export.tar.gz
+++ b/my/core/tests/structure_data/gdpr_export.tar.gz
--- a/my/core/tests/test_cachew.py
+++ b/my/core/tests/test_cachew.py
@ -1,52 +0,0 @@
 from __future__ import annotations
 from .common import skip_if_uses_optional_deps as pytestmark
 # TODO ugh, this is very messy.. need to sort out config overriding here
 def test_cachew() -> None:
    from cachew import settings
    settings.ENABLE = True  # by default it's off in tests (see conftest.py)
    from my.core.cachew import mcachew
    called = 0
    # TODO ugh. need doublewrap or something to avoid having to pass parens
    @mcachew()
    def cf() -> list[int]:
        nonlocal called
        called += 1
        return [1, 2, 3]
    list(cf())
    cc = called
    # todo ugh. how to clean cache?
    # assert called == 1 # precondition, to avoid turdes from previous tests
    assert list(cf()) == [1, 2, 3]
    assert called == cc
 def test_cachew_dir_none() -> None:
    from cachew import settings
    settings.ENABLE = True  # by default it's off in tests (see conftest.py)
    from my.core.cachew import cache_dir, mcachew
    from my.core.core_config import _reset_config as reset
    with reset() as cc:
        cc.cache_dir = None
        called = 0
        @mcachew(cache_path=cache_dir() / 'ctest')
        def cf() -> list[int]:
            nonlocal called
            called += 1
            return [called, called, called]
        assert list(cf()) == [1, 1, 1]
        assert list(cf()) == [2, 2, 2]
--- a/my/core/tests/test_cli.py
+++ b/my/core/tests/test_cli.py
@ -1,6 +1,6 @@
 import os
 import sys
 from subprocess import check_call
 import sys
 def test_lists_modules() -> None:
--- a/my/core/tests/test_config.py
+++ b/my/core/tests/test_config.py
@ -1,178 +0,0 @@
 """
 Various tests that are checking behaviour of user config wrt to various things
 """
 import os
 import sys
 from pathlib import Path
 import pytest
 import pytz
 import my.config
 from my.core import notnone
 from my.demo import items, make_config
 from .common import tmp_environ_set
 # TODO would be nice to randomize test order here to catch various config issues
 # run the same test multiple times to make sure there are not issues with import order etc
@pytest.mark.parametrize('run_id', ['1', '2'])
 def test_override_config(tmp_path: Path, run_id: str) -> None:
    class user_config:
        username = f'user_{run_id}'
        data_path = f'{tmp_path}/*.json'
    my.config.demo = user_config  # type: ignore[misc, assignment]
    [item1, item2] = items()
    assert item1.username == f'user_{run_id}'
    assert item2.username == f'user_{run_id}'
@pytest.mark.skip(reason="won't work at the moment because of inheritance")
 def test_dynamic_config_simplenamespace(tmp_path: Path) -> None:
    from types import SimpleNamespace
    user_config = SimpleNamespace(
        username='user3',
        data_path=f'{tmp_path}/*.json',
    )
    my.config.demo = user_config  # type: ignore[misc, assignment]
    cfg = make_config()
    assert cfg.username == 'user3'
 def test_mixin_attribute_handling(tmp_path: Path) -> None:
    """
    Tests that arbitrary mixin attributes work with our config handling pattern
    """
    nytz = pytz.timezone('America/New_York')
    class user_config:
        # check that override is taken into the account
        timezone = nytz
        irrelevant = 'hello'
        username = 'UUU'
        data_path = f'{tmp_path}/*.json'
    my.config.demo = user_config  # type: ignore[misc, assignment]
    cfg = make_config()
    assert cfg.username == 'UUU'
    # mypy doesn't know about it, but the attribute is there
    assert getattr(cfg, 'irrelevant') == 'hello'
    # check that overridden default attribute is actually getting overridden
    assert cfg.timezone == nytz
    [item1, item2] = items()
    assert item1.username == 'UUU'
    assert notnone(item1.dt.tzinfo).zone == nytz.zone  # type: ignore[attr-defined]
    assert item2.username == 'UUU'
    assert notnone(item2.dt.tzinfo).zone == nytz.zone  # type: ignore[attr-defined]
 # use multiple identical tests to make sure there are no issues with cached imports etc
@pytest.mark.parametrize('run_id', ['1', '2'])
 def test_dynamic_module_import(tmp_path: Path, run_id: str) -> None:
    """
    Test for dynamic hackery in config properties
     e.g. importing some external modules
    """
    ext = tmp_path / 'external'
    ext.mkdir()
    (ext / '__init__.py').write_text(
        '''
 def transform(x):
    from .submodule import do_transform
    return do_transform(x)
 '''
    )
    (ext / 'submodule.py').write_text(
        f'''
 def do_transform(x):
    return {{"total_{run_id}": sum(x.values())}}
 '''
    )
    class user_config:
        username = 'someuser'
        data_path = f'{tmp_path}/*.json'
        external = f'{ext}'
    my.config.demo = user_config  # type: ignore[misc, assignment]
    [item1, item2] = items()
    assert item1.raw == {f'total_{run_id}': 1 + 123}, item1
    assert item2.raw == {f'total_{run_id}': 2 + 456}, item2
    # need to reset these modules, otherwise they get cached
    # kind of relevant to my.core.cfg.tmp_config
    sys.modules.pop('external', None)
    sys.modules.pop('external.submodule', None)
@pytest.mark.parametrize('run_id', ['1', '2'])
 def test_my_config_env_variable(tmp_path: Path, run_id: str) -> None:
    """
    Tests handling of MY_CONFIG variable
    """
    # ugh. so by this point, my.config is already loaded (default stub), so we need to unload it
    sys.modules.pop('my.config', None)
    # but my.config itself relies on my.core.init hook, so unless it's reloaded too it wouldn't help
    sys.modules.pop('my.core', None)
    sys.modules.pop('my.core.init', None)
    # it's a bit of a mouthful of course, but in most cases MY_CONFIG would be set once
    #  , and before hpi runs, so hopefully it's not a huge deal
    cfg_dir = tmp_path / 'my'
    cfg_file = cfg_dir / 'config.py'
    cfg_dir.mkdir()
    cfg_file.write_text(
        f'''
 # print("IMPORTING CONFIG {run_id}")
 class demo:
    username = 'xxx_{run_id}'
    data_path = r'{tmp_path}{os.sep}*.json'  # need raw string for windows...
 '''
    )
    with tmp_environ_set('MY_CONFIG', str(tmp_path)):
        [item1, item2] = items()
        assert item1.username == f'xxx_{run_id}'
        assert item2.username == f'xxx_{run_id}'
        # sigh.. so this is cached in sys.path
        #  so it takes precedence later during next import, not giving the MY_CONFIG hook
        #  (imported from builtin my.config) to kick in
        sys.path.remove(str(tmp_path))
        # FIXME ideally this shouldn't be necessary?
        #  remove this after we fixup my.tests.reddit and my.tests.commits
        #  (they were failing ci when running all tests)
        sys.modules.pop('my.config', None)
@pytest.fixture(autouse=True)
 def prepare_data(tmp_path: Path):
    (tmp_path / 'data.json').write_text(
        '''
 [
    {"key": 1, "value": 123},
    {"key": 2, "value": 456}
 ]
 '''
    )
--- a/my/core/tests/test_get_files.py
+++ b/my/core/tests/test_get_files.py
@ -1,15 +1,16 @@
 import os
 from pathlib import Path
 import shutil
 import tempfile
 import zipfile
 from pathlib import Path
 from typing import TYPE_CHECKING
-
+import zipfile
 import pytest
 from ..common import get_files
 from ..compat import windows
 from ..kompress import CPath, ZipPath
 import pytest
 # hack to replace all /tmp with 'real' tmp dir
 # not ideal, but makes tests more concise
@ -55,9 +56,8 @@ def test_single_file() -> None:
    '''
    assert get_files('/tmp/hpi_test/file.ext') == (Path('/tmp/hpi_test/file.ext'),)
    is_windows = os.name == 'nt'
    "if the path starts with ~, we expand it"
-    if not is_windows:  # windows doesn't have bashrc.. ugh
+    if not windows:  # windows doesn't have bashrc.. ugh
        assert get_files('~/.bashrc') == (Path('~').expanduser() / '.bashrc',)
@ -175,17 +175,12 @@ TMP = tempfile.gettempdir()
 test_path = Path(TMP) / 'hpi_test'
-@pytest.fixture(autouse=True)
+def setup():
 def prepare():
    teardown()
    test_path.mkdir()
    try:
        yield
    finally:
        teardown()
-def teardown() -> None:
+def teardown():
    if test_path.is_dir():
        shutil.rmtree(test_path)
--- a/my/core/tests/test_tmp_config.py
+++ b/my/core/tests/test_tmp_config.py
@ -12,7 +12,7 @@ def _init_default_config() -> None:
 def test_tmp_config() -> None:
    ## ugh. ideally this would be on the top level (would be a better test)
-    ## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
+    ## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
    ## later would be nice to be a bit more careful about them
    _init_default_config()
    from my.simple import items
--- a/my/core/time.py
+++ b/my/core/time.py
@ -1,11 +1,9 @@
-from __future__ import annotations
+from functools import lru_cache
-
+from typing import Sequence, Dict
 from collections.abc import Sequence
 from functools import cache, lru_cache
 import pytz
-from .types import datetime_aware, datetime_naive
+from .common import datetime_aware, datetime_naive
 def user_forced() -> Sequence[str]:
@ -13,24 +11,22 @@ def user_forced() -> Sequence[str]:
    # https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
    try:
        from my.config import time as user_config
-
+        return user_config.tz.force_abbreviations # type: ignore[attr-defined]
        return user_config.tz.force_abbreviations  # type: ignore[attr-defined]  # noqa: TRY300
        # note: noqa since we're catching case where config doesn't have attribute here as well
    except:
        # todo log/apply policy
        return []
@lru_cache(1)
-def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
+def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
    # also force UTC to always correspond to utc
    # this makes more sense than Zulu it ends up by default
-    timezones = [*pytz.all_timezones, 'UTC', *user_forced()]
+    timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
-    res: dict[str, pytz.BaseTzInfo] = {}
+    res: Dict[str, pytz.BaseTzInfo] = {}
    for tzname in timezones:
        tz = pytz.timezone(tzname)
-        infos = getattr(tz, '_tzinfos', [])  # not sure if can rely on attr always present?
+        infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
        for info in infos:
            abbr = info[-1]
            # todo could support this with a better error handling strategy?
@ -46,7 +42,7 @@ def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
    return res
-@cache
+@lru_cache(maxsize=None)
 def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
    return _abbr_to_timezone_map()[abbr]
--- a/Show more
+++ b/Show more