diff --git a/.ci/run b/.ci/run
index 7656575..fe2719e 100755
--- a/.ci/run
+++ b/.ci/run
@@ -11,8 +11,6 @@ if ! command -v sudo; then
}
fi
-# --parallel-live to show outputs while it's running
-tox_cmd='run-parallel --parallel-live'
if [ -n "${CI-}" ]; then
# install OS specific stuff here
case "$OSTYPE" in
@@ -22,8 +20,7 @@ if [ -n "${CI-}" ]; then
;;
cygwin* | msys* | win*)
# windows
- # ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that
- tox_cmd='run'
+ :
;;
*)
# must be linux?
@@ -40,9 +37,5 @@ if ! command -v python3 &> /dev/null; then
PY_BIN="python"
fi
-
-# TODO hmm for some reason installing uv with pip and then running
-# "$PY_BIN" -m uv tool fails with missing setuptools error??
-# just uvx directly works, but it's not present in PATH...
-"$PY_BIN" -m pip install --user pipx
-"$PY_BIN" -m pipx run uv tool run --with=tox-uv tox $tox_cmd "$@"
+"$PY_BIN" -m pip install --user tox
+"$PY_BIN" -m tox --parallel --parallel-live "$@"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 111d0e9..53d8e53 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -21,20 +21,19 @@ on:
jobs:
build:
strategy:
- fail-fast: false
matrix:
platform: [ubuntu-latest, macos-latest, windows-latest]
- python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+ python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
exclude: [
# windows runners are pretty scarce, so let's only run lowest and highest python version
+ {platform: windows-latest, python-version: '3.9' },
{platform: windows-latest, python-version: '3.10'},
{platform: windows-latest, python-version: '3.11'},
- {platform: windows-latest, python-version: '3.12'},
# same, macos is a bit too slow and ubuntu covers python quirks well
+ {platform: macos-latest , python-version: '3.9' },
{platform: macos-latest , python-version: '3.10' },
{platform: macos-latest , python-version: '3.11' },
- {platform: macos-latest , python-version: '3.12' },
]
runs-on: ${{ matrix.platform }}
@@ -64,13 +63,11 @@ jobs:
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
uses: actions/upload-artifact@v4
with:
- include-hidden-files: true
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
path: .coverage.mypy-misc/
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
uses: actions/upload-artifact@v4
with:
- include-hidden-files: true
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
path: .coverage.mypy-core/
@@ -84,7 +81,7 @@ jobs:
- uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: '3.8'
- uses: actions/checkout@v4
with:
diff --git a/.gitignore b/.gitignore
index 65ba630..19c3380 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,9 +155,6 @@ celerybeat-schedule
.dmypy.json
dmypy.json
-# linters
-.ruff_cache/
-
# Pyre type checker
.pyre/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d60ef35..3dd19df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,7 +20,7 @@ General/my.core changes:
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
Modules:
-- some initial work on filling **InfluxDB** with HPI data
+- some innitial work on filling **InfluxDB** with HPI data
- pinboard
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
diff --git a/README.org b/README.org
index 79621a5..c065a0c 100644
--- a/README.org
+++ b/README.org
@@ -723,10 +723,10 @@ If you want to write modules for personal use but don't want to merge them into
Other HPI Repositories:
-- [[https://github.com/purarue/HPI][purarue/HPI]]
+- [[https://github.com/seanbreckenridge/HPI][seanbreckenridge/HPI]]
- [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
-If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/purarue/HPI-template][template]].
+If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/seanbreckenridge/HPI-template][template]].
* Related links
:PROPERTIES:
diff --git a/doc/DENYLIST.md b/doc/DENYLIST.md
index 3d8dea0..440715c 100644
--- a/doc/DENYLIST.md
+++ b/doc/DENYLIST.md
@@ -76,7 +76,7 @@ This would typically be used in an overridden `all.py` file, or in a one-off scr
which you may want to filter out some items from a source, progressively adding more
items to the denylist as you go.
-A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/purarue/HPI)):
+A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/seanbreckenridge/HPI)):
```python
from typing import Iterator
@@ -119,9 +119,9 @@ python3 -c 'from my.ip import all; all.deny.deny_cli(all.ips())'
To edit the `all.py`, you could either:
- install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
-- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/purarue/reorder_editable), and possibly the [`HPI-template`](https://github.com/purarue/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
+- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/seanbreckenridge/reorder_editable), and possibly the [`HPI-template`](https://github.com/seanbreckenridge/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
-For a real example of this see, [purarue/HPI-personal](https://github.com/purarue/HPI-personal/blob/master/my/ip/all.py)
+For a real example of this see, [seanbreckenridge/HPI-personal](https://github.com/seanbreckenridge/HPI-personal/blob/master/my/ip/all.py)
Sidenote: the reason why we want to specifically override
the all.py and not just create a script that filters out the items you're
diff --git a/doc/MODULES.org b/doc/MODULES.org
index 347d88d..9f48024 100644
--- a/doc/MODULES.org
+++ b/doc/MODULES.org
@@ -76,7 +76,7 @@ The config snippets below are meant to be modified accordingly and *pasted into
You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
-For an extensive/complex example, you can check out ~@purarue~'s [[https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
+For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
# Nested Configurations before the doc generation using the block below
** [[file:../my/reddit][my.reddit]]
@@ -96,7 +96,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
class pushshift:
'''
- Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments
+ Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments
'''
# path[s]/glob to the exported JSON data
@@ -106,7 +106,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
** [[file:../my/browser/][my.browser]]
- Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
+ Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
#+begin_src python
class browser:
@@ -132,7 +132,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
provide geolocation data for an IPs (though no IPs are provided from any
- of the sources here). For an example of usage, see [[https://github.com/purarue/HPI/tree/master/my/ip][here]]
+ of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
#+begin_src python
class location:
@@ -256,9 +256,9 @@ for cls, p in modules:
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
- Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
+ Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
- See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
+ See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
cache individual exports in =~/.cache/google_takeout_parser=
diff --git a/doc/MODULE_DESIGN.org b/doc/MODULE_DESIGN.org
index 442dbf2..7aedf2f 100644
--- a/doc/MODULE_DESIGN.org
+++ b/doc/MODULE_DESIGN.org
@@ -67,7 +67,7 @@ If you want to disable a source, you have a few options.
... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
-Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/purarue/HPI#partially-in-usewith-overrides][purarue]]s location and ip modules.
+Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/seanbreckenridge/HPI#partially-in-usewith-overrides][seanbreckenridge]]s location and ip modules.
This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
@@ -208,13 +208,13 @@ Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this
Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
-If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/purarue/reorder_editable][reorder_editable]] repository.
+If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository.
There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
-There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/purarue/HPI-template][template]] to get started.
+There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/seanbreckenridge/HPI-template][template]] to get started.
-Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/purarue/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
+Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/seanbreckenridge/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
diff --git a/doc/OVERLAYS.org b/doc/OVERLAYS.org
index 7bafa48..1e6cf8f 100644
--- a/doc/OVERLAYS.org
+++ b/doc/OVERLAYS.org
@@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
# You can see them TODO in overlays dir
-Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
+Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes.
- =main= package structure
# TODO do links
@@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, without any actual da
Extracts Twitter data from GDPR archive.
- =my/twitter/all.py=
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
- This will be overridden by =overlay=.
+ This will be overriden by =overlay=.
- =my/twitter/common.py=
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
- =my/reddit.py=
@@ -66,7 +66,7 @@ This basically means that modules will be searched in both paths, with overlay t
** Installing with =--use-pep517=
-See here for discussion https://github.com/purarue/reorder_editable/issues/2, but TLDR it should work similarly.
+See here for discussion https://github.com/seanbreckenridge/reorder_editable/issues/2, but TLDR it should work similarly.
* Testing runtime behaviour (editable install)
@@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
-But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
+But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly?
Let's see what's going on with imports:
: $ mypy --namespace-packages --strict -p my --follow-imports=error
diff --git a/doc/QUERY.md b/doc/QUERY.md
index 9a5d9d3..b672dff 100644
--- a/doc/QUERY.md
+++ b/doc/QUERY.md
@@ -97,9 +97,9 @@ By default, this just returns the items in the order they were returned by the f
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
- message='sources.smscalls: propagate errors if there are breaking '
+ message='sources.smscalls: propogate errors if there are breaking '
'schema changes',
- repo='/home/username/Repos/promnesia-fork',
+ repo='/home/sean/Repos/promnesia-fork',
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
ref='refs/heads/smscalls-handle-result')
```
@@ -195,7 +195,7 @@ To preview, you can use something like [`qgis`](https://qgis.org/en/site/) or fo
-(Sidenote: this is [`@purarue`](https://github.com/purarue/)s locations, on a trip to Chicago)
+(Sidenote: this is [`@seanbreckenridge`](https://github.com/seanbreckenridge/)s locations, on a trip to Chicago)
## Python reference
@@ -301,4 +301,4 @@ The `hpi query` command is a CLI wrapper around the code in [`query.py`](../my/c
If you specify a range, drop_unsorted is forced to be True
```
-Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/purarue/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
+Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/seanbreckenridge/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
diff --git a/doc/SETUP.org b/doc/SETUP.org
index ee9571c..0fced62 100644
--- a/doc/SETUP.org
+++ b/doc/SETUP.org
@@ -387,7 +387,7 @@ But there is an extra caveat: rexport is already coming with nice [[https://gith
Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
-Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/purarue/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
+Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
** Twitter
diff --git a/doc/experiments_with_config/run b/doc/experiments_with_config/run
new file mode 100755
index 0000000..8e77e8a
--- /dev/null
+++ b/doc/experiments_with_config/run
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -eu
+cd "$(dirname "0")"
+
+WHAT="$1"
+
+export PYTHONPATH=src
+
+ERROR=0
+python3 -m mypy -p "pkg.$WHAT" || ERROR=1
+python3 -c "import pkg.$WHAT as M; M.run()" || ERROR=1
+exit "$ERROR"
diff --git a/doc/experiments_with_config/src/pkg/config.py b/doc/experiments_with_config/src/pkg/config.py
new file mode 100644
index 0000000..8bc30e2
--- /dev/null
+++ b/doc/experiments_with_config/src/pkg/config.py
@@ -0,0 +1,74 @@
+from dataclasses import dataclass
+
+
+# 'bare' config, no typing annotations even
+# current_impl : works both mypy and runtime
+# if we comment out export_path, mypy DOES NOT fail (bad!)
+# via_dataclass : FAILS both mypy and runtime
+# via_properties: works both mypy and runtime
+# if we comment out export_path, mypy fails (good!)
+# src/pkg/via_properties.py:32:12:32:28: error: Cannot instantiate abstract class "combined_config" with abstract attribute "export_path" [abstract]
+# return combined_config()
+class module_config_1:
+ custom_setting = 'adhoc setting'
+ export_path = '/path/to/data'
+
+
+# config defined with @dataclass annotation
+# current_impl : works in runtime
+# mypy DOES NOT pass
+# seems like it doesn't like that non-default attributes (export_path: str) in module config
+# are following default attributes (export_path in this config)
+# via_dataclass : works both mypy and runtime
+# if we comment out export_path, mypy fails (good!)
+# src/pkg/via_dataclass.py:56:12:56:28: error: Missing positional argument "export_path" in call to "combined_config" [call-arg]
+# return combined_config()
+# via_properties: works both mypy and runtime
+# if we comment out export_path, mypy fails (good!)
+# same error as above
+
+@dataclass
+class module_config_2:
+ custom_setting: str = 'adhoc setting'
+ export_path: str = '/path/to/data'
+
+
+# NOTE: ok, if a config attrubute happened to be a classproperty, then it fails mypy
+# but it still works in runtime, which is good, easy to migrate if necessary
+
+
+# mixed style config, some attributes are defined via property
+# this is quite useful if you want to defer some computations from config import time
+# current_impl : works both mypy and runtime
+# if we comment out export_path, mypy DOES NOT fail (bad!)
+# via_dataclass : FAILS both mypy and runtime
+# via_properties: works both mypy and runtime
+# if we comment out export_path, mypy fails (good!)
+# same error as above
+class module_config_3:
+ custom_setting: str = 'adhoc setting'
+
+ @property
+ def export_path(self) -> str:
+ return '/path/to/data'
+
+
+# same mixed style as above, but also a @dataclass annotation
+# via_dataclass: FAILS both mypy and runtime
+# src/pkg/via_dataclass.py: note: In function "make_config":
+# src/pkg/via_dataclass.py:53:5:54:12: error: Definition of "export_path" in base class "module_config" is incompatible with definition in base class "config" [misc]
+# class combined_config(user_config, config):
+# ^
+# src/pkg/via_dataclass.py:56:12:56:28: error: Missing positional argument "export_path" in call to "combined_config" [call-arg]
+# return combined_config()
+# ^~~~~~~~~~~~~~~~~
+# via_properties: works both mypy and runtime
+# if we comment out export_path, mypy fails (good!)
+# same error as above
+@dataclass
+class module_config_4:
+ custom_setting: str = 'adhoc setting'
+
+ @classproperty
+ def export_path(self) -> str:
+ return '/path/to/data'
diff --git a/doc/experiments_with_config/src/pkg/current_impl.py b/doc/experiments_with_config/src/pkg/current_impl.py
new file mode 100644
index 0000000..dbb4756
--- /dev/null
+++ b/doc/experiments_with_config/src/pkg/current_impl.py
@@ -0,0 +1,29 @@
+"""
+Currently 'preferred' way of defining configs as of 20240818
+"""
+from dataclasses import dataclass
+
+from pkg.config import module_config as user_config
+
+
+@dataclass
+class config(user_config):
+ export_path: str
+
+ cache_path: str | None = None
+
+
+def run() -> None:
+ print('hello from', __name__)
+
+ cfg = config
+
+ # check a required attribute
+ print(f'{cfg.export_path=}')
+
+ # check a non-required attribute with default value
+ print(f'{cfg.cache_path=}')
+
+ # check a 'dynamically' defined attribute in user config
+ print(f'{cfg.custom_setting=}')
+
diff --git a/doc/experiments_with_config/src/pkg/py.typed b/doc/experiments_with_config/src/pkg/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/doc/experiments_with_config/src/pkg/via_dataclass.py b/doc/experiments_with_config/src/pkg/via_dataclass.py
new file mode 100644
index 0000000..b2135d8
--- /dev/null
+++ b/doc/experiments_with_config/src/pkg/via_dataclass.py
@@ -0,0 +1,38 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class config:
+ export_path: str
+ cache_path: str | None = None
+
+
+def make_config() -> config:
+ from pkg.config import module_config as user_config
+
+ # NOTE: order is important -- attributes would be added in reverse order
+ # e.g. first from config, then from user_config -- just what we want
+ # NOTE: in theory, this works without @dataclass annotation on combined_config
+ # however, having @dataclass adds extra type checks about missing required attributes
+ # when we instantiate combined_config
+ @dataclass
+ class combined_config(user_config, config): ...
+
+ return combined_config()
+
+
+def run() -> None:
+ print('hello from', __name__)
+
+ cfg = make_config()
+
+ # check a required attribute
+ print(f'{cfg.export_path=}')
+
+ # check a non-required attribute with default value
+ print(f'{cfg.cache_path=}')
+
+ # check a 'dynamically' defined attribute in user config
+ # NOTE: mypy fails as it has no static knowledge of the attribute
+ # but kinda expected, not much we can do
+ print(f'{cfg.custom_setting=}') # type: ignore[attr-defined]
diff --git a/doc/experiments_with_config/src/pkg/via_properties.py b/doc/experiments_with_config/src/pkg/via_properties.py
new file mode 100644
index 0000000..b40b498
--- /dev/null
+++ b/doc/experiments_with_config/src/pkg/via_properties.py
@@ -0,0 +1,38 @@
+from abc import abstractmethod
+
+class config:
+ @property
+ @abstractmethod
+ def export_path(self) -> str:
+ raise NotImplementedError
+
+ @property
+ def cache_path(self) -> str | None:
+ return None
+
+
+def make_config() -> config:
+ from pkg.config import module_config as user_config
+
+ # NOTE: order is important -- attributes would be added in reverse order
+ # e.g. first from config, then from user_config -- just what we want
+ class combined_config(user_config, config): ...
+
+ return combined_config()
+
+
+def run() -> None:
+ print('hello from', __name__)
+
+ cfg = make_config()
+
+ # check a required attribute
+ print(f'{cfg.export_path=}')
+
+ # check a non-required attribute with default value
+ print(f'{cfg.cache_path=}')
+
+ # check a 'dynamically' defined attribute in user config
+ # NOTE: mypy fails as it has no static knowledge of the attribute
+ # but kinda expected, not much we can do
+ print(f'{cfg.custom_setting=}') # type: ignore[attr-defined]
diff --git a/misc/.flake8-karlicoss b/misc/.flake8-karlicoss
index 5933253..3c98b96 100644
--- a/misc/.flake8-karlicoss
+++ b/misc/.flake8-karlicoss
@@ -32,6 +32,6 @@ ignore =
#
# as a reference:
-# https://github.com/purarue/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
+# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
# and this https://github.com/karlicoss/HPI/pull/151
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
diff --git a/my/arbtt.py b/my/arbtt.py
index 5d4bf8e..6de8cb2 100644
--- a/my/arbtt.py
+++ b/my/arbtt.py
@@ -2,22 +2,20 @@
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
'''
-from __future__ import annotations
-
REQUIRES = ['ijson', 'cffi']
# NOTE likely also needs libyajl2 from apt or elsewhere?
-from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from pathlib import Path
+from typing import Sequence, Iterable, List, Optional
def inputs() -> Sequence[Path]:
try:
from my.config import arbtt as user_config
except ImportError:
- from my.core.warnings import low
+ from .core.warnings import low
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
return []
else:
@@ -57,7 +55,7 @@ class Entry:
return fromisoformat(ds)
@property
- def active(self) -> str | None:
+ def active(self) -> Optional[str]:
# NOTE: WIP, might change this in the future...
ait = (w for w in self.json['windows'] if w['active'])
a = next(ait, None)
@@ -76,18 +74,17 @@ class Entry:
def entries() -> Iterable[Entry]:
inps = list(inputs())
- base: list[PathIsh] = ['arbtt-dump', '--format=json']
+ base: List[PathIsh] = ['arbtt-dump', '--format=json']
- cmds: list[list[PathIsh]]
+ cmds: List[List[PathIsh]]
if len(inps) == 0:
cmds = [base] # rely on default
else:
# otherwise, 'merge' them
- cmds = [[*base, '--logfile', f] for f in inps]
+ cmds = [base + ['--logfile', f] for f in inps]
- from subprocess import PIPE, Popen
-
- import ijson.backends.yajl2_cffi as ijson # type: ignore
+ import ijson.backends.yajl2_cffi as ijson # type: ignore
+ from subprocess import Popen, PIPE
for cmd in cmds:
with Popen(cmd, stdout=PIPE) as p:
out = p.stdout; assert out is not None
@@ -96,8 +93,8 @@ def entries() -> Iterable[Entry]:
def fill_influxdb() -> None:
- from .core.freezer import Freezer
from .core.influxdb import magic_fill
+ from .core.freezer import Freezer
freezer = Freezer(Entry)
fit = (freezer.freeze(e) for e in entries())
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
@@ -109,8 +106,6 @@ def fill_influxdb() -> None:
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
-from .core import Stats, stat
-
-
+from .core import stat, Stats
def stats() -> Stats:
return stat(entries)
diff --git a/my/bluemaestro.py b/my/bluemaestro.py
index 8c739f0..3e25cae 100644
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@@ -1,63 +1,39 @@
+#!/usr/bin/python3
"""
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
"""
-from __future__ import annotations
-
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
-import re
-import sqlite3
-from abc import abstractmethod
-from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
-from typing import Protocol
+import re
+import sqlite3
+from typing import Iterable, Sequence, Set, Optional
import pytz
from my.core import (
- Paths,
- Res,
- Stats,
get_files,
make_logger,
+ Res,
stat,
- unwrap,
+ Stats,
+ influxdb,
)
from my.core.cachew import mcachew
+from my.core.error import unwrap
from my.core.pandas import DataFrameT, as_dataframe
from my.core.sqlite import sqlite_connect_immutable
-
-class config(Protocol):
- @property
- @abstractmethod
- def export_path(self) -> Paths:
- raise NotImplementedError
-
- @property
- def tz(self) -> pytz.BaseTzInfo:
- # fixme: later, rely on the timezone provider
- # NOTE: the timezone should be set with respect to the export date!!!
- return pytz.timezone('Europe/London')
- # TODO when I change tz, check the diff
-
-
-def make_config() -> config:
- from my.config import bluemaestro as user_config
-
- class combined_config(user_config, config): ...
-
- return combined_config()
+from my.config import bluemaestro as config
logger = make_logger(__name__)
def inputs() -> Sequence[Path]:
- cfg = make_config()
- return get_files(cfg.export_path)
+ return get_files(config.export_path)
Celsius = float
@@ -74,6 +50,12 @@ class Measurement:
dewpoint: Celsius
+# fixme: later, rely on the timezone provider
+# NOTE: the timezone should be set with respect to the export date!!!
+tz = pytz.timezone('Europe/London')
+# TODO when I change tz, check the diff
+
+
def is_bad_table(name: str) -> bool:
# todo hmm would be nice to have a hook that can patch any module up to
delegate = getattr(config, 'is_bad_table', None)
@@ -82,31 +64,28 @@ def is_bad_table(name: str) -> bool:
@mcachew(depends_on=inputs)
def measurements() -> Iterable[Res[Measurement]]:
- cfg = make_config()
- tz = cfg.tz
-
# todo ideally this would be via arguments... but needs to be lazy
paths = inputs()
total = len(paths)
width = len(str(total))
- last: datetime | None = None
+ last: Optional[datetime] = None
# tables are immutable, so can save on processing..
- processed_tables: set[str] = set()
+ processed_tables: Set[str] = set()
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
tot = 0
new = 0
# todo assert increasing timestamp?
with sqlite_connect_immutable(path) as db:
- db_dt: datetime | None = None
+ db_dt: Optional[datetime] = None
try:
datas = db.execute(
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
)
oldfmt = True
- [(db_dts,)] = db.execute('SELECT last_download FROM info')
+ db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
if db_dts == 'N/A':
# ??? happens for 20180923-20180928
continue
@@ -139,7 +118,7 @@ def measurements() -> Iterable[Res[Measurement]]:
processed_tables |= set(log_tables)
# todo use later?
- frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables] # noqa: RUF015
+ frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]
# todo could just filter out the older datapoints?? dunno.
@@ -155,7 +134,7 @@ def measurements() -> Iterable[Res[Measurement]]:
oldfmt = False
db_dt = None
- for (name, tsc, temp, hum, pres, dewp) in datas:
+ for i, (name, tsc, temp, hum, pres, dewp) in enumerate(datas):
if is_bad_table(name):
continue
@@ -232,8 +211,6 @@ def dataframe() -> DataFrameT:
def fill_influxdb() -> None:
- from my.core import influxdb
-
influxdb.fill(measurements(), measurement=__name__)
diff --git a/my/body/blood.py b/my/body/blood.py
index 867568c..fb035eb 100644
--- a/my/body/blood.py
+++ b/my/body/blood.py
@@ -2,42 +2,41 @@
Blood tracking (manual org-mode entries)
"""
-from __future__ import annotations
-
-from collections.abc import Iterable
from datetime import datetime
-from typing import NamedTuple
-
-import orgparse
-import pandas as pd
-
-from my.config import blood as config # type: ignore[attr-defined]
+from typing import Iterable, NamedTuple, Optional
from ..core.error import Res
-from ..core.orgmode import one_table, parse_org_datetime
+from ..core.orgmode import parse_org_datetime, one_table
+
+
+import pandas as pd
+import orgparse
+
+
+from my.config import blood as config # type: ignore[attr-defined]
class Entry(NamedTuple):
dt: datetime
- ketones : float | None=None
- glucose : float | None=None
+ ketones : Optional[float]=None
+ glucose : Optional[float]=None
- vitamin_d : float | None=None
- vitamin_b12 : float | None=None
+ vitamin_d : Optional[float]=None
+ vitamin_b12 : Optional[float]=None
- hdl : float | None=None
- ldl : float | None=None
- triglycerides: float | None=None
+ hdl : Optional[float]=None
+ ldl : Optional[float]=None
+ triglycerides: Optional[float]=None
- source : str | None=None
- extra : str | None=None
+ source : Optional[str]=None
+ extra : Optional[str]=None
Result = Res[Entry]
-def try_float(s: str) -> float | None:
+def try_float(s: str) -> Optional[float]:
l = s.split()
if len(l) == 0:
return None
@@ -106,7 +105,6 @@ def blood_tests_data() -> Iterable[Result]:
def data() -> Iterable[Result]:
from itertools import chain
-
from ..core.error import sort_res_by
datas = chain(glucose_ketones_data(), blood_tests_data())
return sort_res_by(datas, key=lambda e: e.dt)
diff --git a/my/body/exercise/all.py b/my/body/exercise/all.py
index d0df747..e86a5af 100644
--- a/my/body/exercise/all.py
+++ b/my/body/exercise/all.py
@@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe
@check_dataframe
def dataframe() -> DataFrameT:
# this should be somehow more flexible...
- import pandas as pd
-
from ...endomondo import dataframe as EDF
- from ...runnerup import dataframe as RDF
+ from ...runnerup import dataframe as RDF
+
+ import pandas as pd
return pd.concat([
EDF(),
RDF(),
diff --git a/my/body/exercise/cardio.py b/my/body/exercise/cardio.py
index d8a6afd..083b972 100644
--- a/my/body/exercise/cardio.py
+++ b/my/body/exercise/cardio.py
@@ -3,6 +3,7 @@ Cardio data, filtered from various data sources
'''
from ...core.pandas import DataFrameT, check_dataframe
+
CARDIO = {
'Running',
'Running, treadmill',
diff --git a/my/body/exercise/cross_trainer.py b/my/body/exercise/cross_trainer.py
index 30f96f9..d073f43 100644
--- a/my/body/exercise/cross_trainer.py
+++ b/my/body/exercise/cross_trainer.py
@@ -5,18 +5,16 @@ This is probably too specific to my needs, so later I will move it away to a per
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
'''
-from __future__ import annotations
-
from datetime import datetime, timedelta
+from typing import Optional
-import pytz
+from ...core.pandas import DataFrameT, check_dataframe as cdf
+from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
from my.config import exercise as config
-from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime
-from ...core.pandas import DataFrameT
-from ...core.pandas import check_dataframe as cdf
+import pytz
# FIXME how to attach it properly?
tz = pytz.timezone('Europe/London')
@@ -107,7 +105,7 @@ def dataframe() -> DataFrameT:
rows = []
idxs = [] # type: ignore[var-annotated]
NO_ENDOMONDO = 'no endomondo matches'
- for _i, row in mdf.iterrows():
+ for i, row in mdf.iterrows():
rd = row.to_dict()
mdate = row['date']
if pd.isna(mdate):
@@ -116,7 +114,7 @@ def dataframe() -> DataFrameT:
rows.append(rd) # presumably has an error set
continue
- idx: int | None
+ idx: Optional[int]
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
if len(close) == 0:
idx = None
@@ -165,9 +163,7 @@ def dataframe() -> DataFrameT:
# TODO wtf?? where is speed coming from??
-from ...core import Stats, stat
-
-
+from ...core import stat, Stats
def stats() -> Stats:
return stat(cross_trainer_data)
diff --git a/my/body/sleep/common.py b/my/body/sleep/common.py
index fc288e5..e84c8d5 100644
--- a/my/body/sleep/common.py
+++ b/my/body/sleep/common.py
@@ -1,6 +1,5 @@
-from ...core import Stats, stat
-from ...core.pandas import DataFrameT
-from ...core.pandas import check_dataframe as cdf
+from ...core import stat, Stats
+from ...core.pandas import DataFrameT, check_dataframe as cdf
class Combine:
@@ -8,7 +7,7 @@ class Combine:
self.modules = modules
@cdf
- def dataframe(self, *, with_temperature: bool=True) -> DataFrameT:
+ def dataframe(self, with_temperature: bool=True) -> DataFrameT:
import pandas as pd
# todo include 'source'?
df = pd.concat([m.dataframe() for m in self.modules])
diff --git a/my/body/sleep/main.py b/my/body/sleep/main.py
index 2460e03..29b12a7 100644
--- a/my/body/sleep/main.py
+++ b/my/body/sleep/main.py
@@ -1,6 +1,7 @@
-from ... import emfit, jawbone
-from .common import Combine
+from ... import jawbone
+from ... import emfit
+from .common import Combine
_combined = Combine([
jawbone,
emfit,
diff --git a/my/body/weight.py b/my/body/weight.py
index d5478ef..def3e87 100644
--- a/my/body/weight.py
+++ b/my/body/weight.py
@@ -2,29 +2,21 @@
Weight data (manually logged)
'''
-from collections.abc import Iterator
-from dataclasses import dataclass
from datetime import datetime
-from typing import Any
+from typing import NamedTuple, Iterator
-from my import orgmode
-from my.core import make_logger
-from my.core.error import Res, extract_error_datetime, set_error_datetime
+from ..core import LazyLogger
+from ..core.error import Res, set_error_datetime, extract_error_datetime
-config = Any
+from .. import orgmode
+
+from my.config import weight as config # type: ignore[attr-defined]
-def make_config() -> config:
- from my.config import weight as user_config # type: ignore[attr-defined]
-
- return user_config()
+log = LazyLogger('my.body.weight')
-log = make_logger(__name__)
-
-
-@dataclass
-class Entry:
+class Entry(NamedTuple):
dt: datetime
value: float
# TODO comment??
@@ -34,8 +26,6 @@ Result = Res[Entry]
def from_orgmode() -> Iterator[Result]:
- cfg = make_config()
-
orgs = orgmode.query()
for o in orgmode.query().all():
if 'weight' not in o.tags:
@@ -56,8 +46,8 @@ def from_orgmode() -> Iterator[Result]:
yield e
continue
# FIXME use timezone provider
- created = cfg.default_timezone.localize(created)
- assert created is not None # ??? somehow mypy wasn't happy?
+ created = config.default_timezone.localize(created)
+ assert created is not None #??? somehow mypy wasn't happy?
yield Entry(
dt=created,
value=w,
@@ -67,23 +57,21 @@ def from_orgmode() -> Iterator[Result]:
def make_dataframe(data: Iterator[Result]):
import pandas as pd
-
def it():
for e in data:
if isinstance(e, Exception):
dt = extract_error_datetime(e)
yield {
- 'dt': dt,
+ 'dt' : dt,
'error': str(e),
}
else:
yield {
- 'dt': e.dt,
+ 'dt' : e.dt,
'weight': e.value,
}
-
df = pd.DataFrame(it())
- df = df.set_index('dt')
+ df.set_index('dt', inplace=True)
# TODO not sure about UTC??
df.index = pd.to_datetime(df.index, utc=True)
return df
@@ -93,7 +81,6 @@ def dataframe():
entries = from_orgmode()
return make_dataframe(entries)
-
# TODO move to a submodule? e.g. my.body.weight.orgmode?
# so there could be more sources
# not sure about my.body thing though
diff --git a/my/books/kobo.py b/my/books/kobo.py
index 40b7ed7..2a469d0 100644
--- a/my/books/kobo.py
+++ b/my/books/kobo.py
@@ -1,6 +1,7 @@
-from my.core import warnings
+from ..core import warnings
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
-from my.core.util import __NOT_HPI_MODULE__
-from my.kobo import *
+from ..core.util import __NOT_HPI_MODULE__
+
+from ..kobo import * # type: ignore[no-redef]
diff --git a/my/browser/active_browser.py b/my/browser/active_browser.py
index 1686fc5..6f335bd 100644
--- a/my/browser/active_browser.py
+++ b/my/browser/active_browser.py
@@ -1,5 +1,5 @@
"""
-Parses active browser history by backing it up with [[http://github.com/purarue/sqlite_backup][sqlite_backup]]
+Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
"""
REQUIRES = ["browserexport", "sqlite_backup"]
@@ -19,18 +19,16 @@ class config(user_config.active_browser):
export_path: Paths
-from collections.abc import Iterator, Sequence
from pathlib import Path
+from typing import Sequence, Iterator
-from browserexport.merge import Visit, read_visits
+from my.core import get_files, Stats, make_logger
+from browserexport.merge import read_visits, Visit
from sqlite_backup import sqlite_backup
-from my.core import Stats, get_files, make_logger
-
logger = make_logger(__name__)
from .common import _patch_browserexport_logs
-
_patch_browserexport_logs(logger.level)
diff --git a/my/browser/all.py b/my/browser/all.py
index feb973a..a7d12b4 100644
--- a/my/browser/all.py
+++ b/my/browser/all.py
@@ -1,9 +1,9 @@
-from collections.abc import Iterator
-
-from browserexport.merge import Visit, merge_visits
+from typing import Iterator
from my.core import Stats
from my.core.source import import_source
+from browserexport.merge import merge_visits, Visit
+
src_export = import_source(module_name="my.browser.export")
src_active = import_source(module_name="my.browser.active_browser")
diff --git a/my/browser/export.py b/my/browser/export.py
index 52ade0e..1b428b5 100644
--- a/my/browser/export.py
+++ b/my/browser/export.py
@@ -1,15 +1,14 @@
"""
-Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
+Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
"""
REQUIRES = ["browserexport"]
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
+from typing import Iterator, Sequence
-from browserexport.merge import Visit, read_and_merge
-
+import my.config
from my.core import (
Paths,
Stats,
@@ -19,9 +18,9 @@ from my.core import (
)
from my.core.cachew import mcachew
-from .common import _patch_browserexport_logs
+from browserexport.merge import read_and_merge, Visit
-import my.config # isort: skip
+from .common import _patch_browserexport_logs
@dataclass
diff --git a/my/bumble/android.py b/my/bumble/android.py
index 3f9fa13..54a0441 100644
--- a/my/bumble/android.py
+++ b/my/bumble/android.py
@@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C
"""
from __future__ import annotations
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
-from pathlib import Path
+from typing import Iterator, Sequence, Optional, Dict
from more_itertools import unique_everseen
-from my.core import Paths, get_files
-
-from my.config import bumble as user_config # isort: skip
+from my.config import bumble as user_config
+from ..core import Paths
@dataclass
class config(user_config.android):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
+from ..core import get_files
+from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@@ -43,23 +43,21 @@ class _BaseMessage:
@dataclass(unsafe_hash=True)
class _Message(_BaseMessage):
conversation_id: str
- reply_to_id: str | None
+ reply_to_id: Optional[str]
@dataclass(unsafe_hash=True)
class Message(_BaseMessage):
person: Person
- reply_to: Message | None
+ reply_to: Optional[Message]
import json
-import sqlite3
from typing import Union
-
-from my.core.compat import assert_never
-
from ..core import Res
-from ..core.sqlite import select, sqlite_connect_immutable
+import sqlite3
+from ..core.sqlite import sqlite_connect_immutable, select
+from my.core.compat import assert_never
EntitiesRes = Res[Union[Person, _Message]]
@@ -122,8 +120,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON"
def messages() -> Iterator[Res[Message]]:
- id2person: dict[str, Person] = {}
- id2msg: dict[str, Message] = {}
+ id2person: Dict[str, Person] = {}
+ id2msg: Dict[str, Message] = {}
for x in unique_everseen(_entities(), key=_key):
if isinstance(x, Exception):
yield x
diff --git a/my/calendar/holidays.py b/my/calendar/holidays.py
index 522672e..f73bf70 100644
--- a/my/calendar/holidays.py
+++ b/my/calendar/holidays.py
@@ -15,12 +15,11 @@ from my.core.time import zone_to_countrycode
@lru_cache(1)
def _calendar():
- from workalendar.registry import registry # type: ignore
-
+ from workalendar.registry import registry # type: ignore
# todo switch to using time.tz.main once _get_tz stabilizes?
from ..time.tz import via_location as LTZ
# TODO would be nice to do it dynamically depending on the past timezones...
- tz = LTZ.get_tz(datetime.now())
+ tz = LTZ._get_tz(datetime.now())
assert tz is not None
zone = tz.zone; assert zone is not None
code = zone_to_countrycode(zone)
diff --git a/my/cfg.py b/my/cfg.py
index 9331e8a..e4020b4 100644
--- a/my/cfg.py
+++ b/my/cfg.py
@@ -1,6 +1,7 @@
import my.config as config
from .core import __NOT_HPI_MODULE__
+
from .core import warnings as W
# still used in Promnesia, maybe in dashboard?
diff --git a/my/codeforces.py b/my/codeforces.py
index 9c6b7c9..f2d150a 100644
--- a/my/codeforces.py
+++ b/my/codeforces.py
@@ -1,12 +1,13 @@
-import json
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from functools import cached_property
+import json
from pathlib import Path
+from typing import Dict, Iterator, Sequence
+
+from my.core import get_files, Res, datetime_aware
from my.config import codeforces as config # type: ignore[attr-defined]
-from my.core import Res, datetime_aware, get_files
def inputs() -> Sequence[Path]:
@@ -38,7 +39,7 @@ class Competition:
class Parser:
def __init__(self, *, inputs: Sequence[Path]) -> None:
self.inputs = inputs
- self.contests: dict[ContestId, Contest] = {}
+ self.contests: Dict[ContestId, Contest] = {}
def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
j = json.loads(p.read_text())
diff --git a/my/coding/commits.py b/my/coding/commits.py
index fe17dee..dac3b1f 100644
--- a/my/coding/commits.py
+++ b/my/coding/commits.py
@@ -1,32 +1,29 @@
"""
Git commits data for repositories on your filesystem
"""
-
-from __future__ import annotations
-
REQUIRES = [
'gitpython',
]
-import shutil
-from collections.abc import Iterator, Sequence
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Optional, cast
-from my.core import LazyLogger, PathIsh, make_config
+import shutil
+from pathlib import Path
+from datetime import datetime, timezone
+from dataclasses import dataclass, field
+from typing import List, Optional, Iterator, Set, Sequence, cast
+
+
+from my.core import PathIsh, LazyLogger, make_config
from my.core.cachew import cache_dir, mcachew
from my.core.warnings import high
-from my.config import commits as user_config # isort: skip
-
+from my.config import commits as user_config
@dataclass
class commits_cfg(user_config):
roots: Sequence[PathIsh] = field(default_factory=list)
- emails: Sequence[str] | None = None
- names: Sequence[str] | None = None
+ emails: Optional[Sequence[str]] = None
+ names: Optional[Sequence[str]] = None
# experiment to make it lazy?
@@ -43,6 +40,7 @@ def config() -> commits_cfg:
import git
from git.repo.fun import is_git_dir
+
log = LazyLogger(__name__, level='info')
@@ -95,7 +93,7 @@ def _git_root(git_dir: PathIsh) -> Path:
return gd # must be bare
-def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]:
+def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]:
# without path might not handle pull heads properly
for c in gr.iter_commits(rev=rev):
if not by_me(c):
@@ -122,7 +120,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Com
def repo_commits(repo: PathIsh):
gr = git.Repo(str(repo))
- emitted: set[str] = set()
+ emitted: Set[str] = set()
for r in gr.references:
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
@@ -138,19 +136,19 @@ def canonical_name(repo: Path) -> str:
# else:
# rname = r.name
# if 'backups/github' in repo:
- # pass # TODO
+ # pass # TODO
def _fd_path() -> str:
# todo move it to core
- fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
+ fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
if fd_path is None:
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
assert fd_path is not None
return fd_path
-def git_repos_in(roots: list[Path]) -> list[Path]:
+def git_repos_in(roots: List[Path]) -> List[Path]:
from subprocess import check_output
outputs = check_output([
_fd_path(),
@@ -163,36 +161,37 @@ def git_repos_in(roots: list[Path]) -> list[Path]:
*roots,
]).decode('utf8').splitlines()
- candidates = {Path(o).resolve().absolute().parent for o in outputs}
+ candidates = set(Path(o).resolve().absolute().parent for o in outputs)
# exclude stuff within .git dirs (can happen for submodules?)
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
candidates = {c for c in candidates if is_git_dir(c)}
- repos = sorted(map(_git_root, candidates))
+ repos = list(sorted(map(_git_root, candidates)))
return repos
-def repos() -> list[Path]:
+def repos() -> List[Path]:
return git_repos_in(list(map(Path, config().roots)))
# returns modification time for an index to use as hash function
def _repo_depends_on(_repo: Path) -> int:
- for pp in [
+ for pp in {
".git/FETCH_HEAD",
".git/HEAD",
"FETCH_HEAD", # bare
"HEAD", # bare
- ]:
+ }:
ff = _repo / pp
if ff.exists():
return int(ff.stat().st_mtime)
- raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
+ else:
+ raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
-def _commits(_repos: list[Path]) -> Iterator[Commit]:
+def _commits(_repos: List[Path]) -> Iterator[Commit]:
for r in _repos:
yield from _cached_commits(r)
diff --git a/my/coding/github.py b/my/coding/github.py
index c495554..9358b04 100644
--- a/my/coding/github.py
+++ b/my/coding/github.py
@@ -1,12 +1,9 @@
-from typing import TYPE_CHECKING
+import warnings
-from my.core import warnings
-
-warnings.high('my.coding.github is deprecated! Please use my.github.all instead!')
+warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!')
# todo why aren't DeprecationWarning shown by default??
-if not TYPE_CHECKING:
- from ..github.all import events, get_events # noqa: F401
+from ..github.all import events, get_events
- # todo deprecate properly
- iter_events = events
+# todo deprecate properly
+iter_events = events
diff --git a/my/common.py b/my/common.py
index 22e9487..1b56fb5 100644
--- a/my/common.py
+++ b/my/common.py
@@ -1,6 +1,6 @@
from .core.warnings import high
-
high("DEPRECATED! Please use my.core.common instead.")
from .core import __NOT_HPI_MODULE__
+
from .core.common import *
diff --git a/my/config.py b/my/config.py
index 301bf49..a92b2bc 100644
--- a/my/config.py
+++ b/my/config.py
@@ -9,18 +9,17 @@ This file is used for:
- mypy: this file provides some type annotations
- for loading the actual user config
'''
-
-from __future__ import annotations
-
#### NOTE: you won't need this line VVVV in your personal config
-from my.core import init # noqa: F401 # isort: skip
+from my.core import init
###
from datetime import tzinfo
from pathlib import Path
+from typing import List
-from my.core import PathIsh, Paths
+
+from my.core import Paths, PathIsh
class hypothesis:
@@ -76,16 +75,14 @@ class google:
takeout_path: Paths = ''
-from collections.abc import Sequence
-from datetime import date, datetime, timedelta
-from typing import Union
-
+from typing import Sequence, Union, Tuple
+from datetime import datetime, date, timedelta
DateIsh = Union[datetime, date, str]
-LatLon = tuple[float, float]
+LatLon = Tuple[float, float]
class location:
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
# and we can't import the types from the module itself, otherwise would be circular. common module?
- home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0)
+ home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
home_accuracy = 30_000.0
class via_ip:
@@ -106,8 +103,6 @@ class location:
from typing import Literal
-
-
class time:
class tz:
policy: Literal['keep', 'convert', 'throw']
@@ -126,9 +121,10 @@ class arbtt:
logfiles: Paths
+from typing import Optional
class commits:
- emails: Sequence[str] | None
- names: Sequence[str] | None
+ emails: Optional[Sequence[str]]
+ names: Optional[Sequence[str]]
roots: Sequence[PathIsh]
@@ -154,8 +150,8 @@ class tinder:
class instagram:
class android:
export_path: Paths
- username: str | None
- full_name: str | None
+ username: Optional[str]
+ full_name: Optional[str]
class gdpr:
export_path: Paths
@@ -173,7 +169,7 @@ class materialistic:
class fbmessenger:
class fbmessengerexport:
export_db: PathIsh
- facebook_id: str | None
+ facebook_id: Optional[str]
class android:
export_path: Paths
@@ -251,7 +247,7 @@ class runnerup:
class emfit:
export_path: Path
timezone: tzinfo
- excluded_sids: list[str]
+ excluded_sids: List[str]
class foursquare:
@@ -274,7 +270,7 @@ class roamresearch:
class whatsapp:
class android:
export_path: Paths
- my_user_id: str | None
+ my_user_id: Optional[str]
class harmonic:
diff --git a/my/core/__init__.py b/my/core/__init__.py
index a8a41f4..19be7fe 100644
--- a/my/core/__init__.py
+++ b/my/core/__init__.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING
from .cfg import make_config
from .common import PathIsh, Paths, get_files
from .compat import assert_never
-from .error import Res, notnone, unwrap
+from .error import Res, unwrap
from .logging import (
make_logger,
)
@@ -29,25 +29,22 @@ if not TYPE_CHECKING:
__all__ = [
- '__NOT_HPI_MODULE__',
+ 'get_files', 'PathIsh', 'Paths',
'Json',
- 'LazyLogger', # legacy import
- 'Path',
- 'PathIsh',
- 'Paths',
- 'Res',
- 'Stats',
- 'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon
- 'dataclass',
- 'datetime_aware',
- 'datetime_naive',
- 'get_files',
- 'make_config',
'make_logger',
- 'notnone',
- 'stat',
- 'unwrap',
+ 'LazyLogger', # legacy import
'warn_if_empty',
+ 'stat', 'Stats',
+ 'datetime_aware', 'datetime_naive',
+ 'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon
+
+ 'make_config',
+
+ '__NOT_HPI_MODULE__',
+
+ 'Res', 'unwrap',
+
+ 'dataclass', 'Path',
]
@@ -55,7 +52,7 @@ __all__ = [
# you could put _init_hook.py next to your private my/config
# that way you can configure logging/warnings/env variables on every HPI import
try:
- import my._init_hook # type: ignore[import-not-found] # noqa: F401
+ import my._init_hook # type: ignore[import-not-found]
except:
pass
##
diff --git a/my/core/__main__.py b/my/core/__main__.py
index 7e2d8f9..276de26 100644
--- a/my/core/__main__.py
+++ b/my/core/__main__.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
import functools
import importlib
import inspect
@@ -9,18 +7,17 @@ import shutil
import sys
import tempfile
import traceback
-from collections.abc import Iterable, Sequence
from contextlib import ExitStack
from itertools import chain
from pathlib import Path
from subprocess import PIPE, CompletedProcess, Popen, check_call, run
-from typing import Any, Callable
+from typing import Any, Callable, Iterable, List, Optional, Sequence, Type
import click
-@functools.lru_cache
-def mypy_cmd() -> Sequence[str] | None:
+@functools.lru_cache()
+def mypy_cmd() -> Optional[Sequence[str]]:
try:
# preferably, use mypy from current python env
import mypy # noqa: F401 fine not to use it
@@ -35,7 +32,7 @@ def mypy_cmd() -> Sequence[str] | None:
return None
-def run_mypy(cfg_path: Path) -> CompletedProcess | None:
+def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]:
# todo dunno maybe use the same mypy config in repository?
# I'd need to install mypy.ini then??
env = {**os.environ}
@@ -46,7 +43,7 @@ def run_mypy(cfg_path: Path) -> CompletedProcess | None:
cmd = mypy_cmd()
if cmd is None:
return None
- mres = run([ # noqa: UP022,PLW1510
+ mres = run([
*cmd,
'--namespace-packages',
'--color-output', # not sure if works??
@@ -66,27 +63,21 @@ def eprint(x: str) -> None:
# err=True prints to stderr
click.echo(x, err=True)
-
def indent(x: str) -> str:
- # todo use textwrap.indent?
return ''.join(' ' + l for l in x.splitlines(keepends=True))
-OK = '✅'
+OK = '✅'
OFF = '🔲'
-
def info(x: str) -> None:
eprint(OK + ' ' + x)
-
def error(x: str) -> None:
eprint('❌ ' + x)
-
def warning(x: str) -> None:
- eprint('❗ ' + x) # todo yellow?
-
+ eprint('❗ ' + x) # todo yellow?
def tb(e: Exception) -> None:
tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
@@ -95,7 +86,6 @@ def tb(e: Exception) -> None:
def config_create() -> None:
from .preinit import get_mycfg_dir
-
mycfg_dir = get_mycfg_dir()
created = False
@@ -104,8 +94,7 @@ def config_create() -> None:
my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
my_config.parent.mkdir(parents=True)
- my_config.write_text(
- '''
+ my_config.write_text('''
### HPI personal config
## see
# https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
@@ -128,8 +117,7 @@ class example:
### you can insert your own configuration below
### but feel free to delete the stuff above if you don't need ti
-'''.lstrip()
- )
+'''.lstrip())
info(f'created empty config: {my_config}')
created = True
else:
@@ -142,13 +130,12 @@ class example:
# todo return the config as a result?
def config_ok() -> bool:
- errors: list[Exception] = []
+ errors: List[Exception] = []
# at this point 'my' should already be imported, so doesn't hurt to extract paths from it
import my
-
try:
- paths: list[str] = list(my.__path__)
+ paths: List[str] = list(my.__path__)
except Exception as e:
errors.append(e)
error('failed to determine module import path')
@@ -158,23 +145,19 @@ def config_ok() -> bool:
# first try doing as much as possible without actually importing my.config
from .preinit import get_mycfg_dir
-
cfg_path = get_mycfg_dir()
# alternative is importing my.config and then getting cfg_path from its __file__/__path__
# not sure which is better tbh
## check we're not using stub config
import my.core
-
try:
core_pkg_path = str(Path(my.core.__path__[0]).parent)
if str(cfg_path).startswith(core_pkg_path):
- error(
- f'''
+ error(f'''
Seems that the stub config is used ({cfg_path}). This is likely not going to work.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
-'''.strip()
- )
+'''.strip())
errors.append(RuntimeError('bad config path'))
except Exception as e:
errors.append(e)
@@ -188,6 +171,8 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
# use a temporary directory, useful because
# - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
# - compileall isn't following symlinks, just silently ignores them
+ # note: ugh, annoying that copytree requires a non-existing dir before 3.8.
+ # once we have min version 3.8, can use dirs_exist_ok=True param
tdir = Path(td) / 'cfg'
# NOTE: compileall still returns code 0 if the path doesn't exist..
# but in our case hopefully it's not an issue
@@ -196,7 +181,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
try:
# this will resolve symlinks when copying
# should be under try/catch since might fail if some symlinks are missing
- shutil.copytree(cfg_path, tdir, dirs_exist_ok=True)
+ shutil.copytree(cfg_path, tdir)
check_call(cmd)
info('syntax check: ' + ' '.join(cmd))
except Exception as e:
@@ -206,7 +191,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
## check types
mypy_res = run_mypy(cfg_path)
- if mypy_res is not None: # has mypy
+ if mypy_res is not None: # has mypy
rc = mypy_res.returncode
if rc == 0:
info('mypy check : success')
@@ -229,16 +214,16 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
if len(errors) > 0:
error(f'config check: {len(errors)} errors')
return False
-
- # note: shouldn't exit here, might run something else
- info('config check: success!')
- return True
+ else:
+ # note: shouldn't exit here, might run something else
+ info('config check: success!')
+ return True
from .util import HPIModule, modules
-def _modules(*, all: bool = False) -> Iterable[HPIModule]:
+def _modules(*, all: bool=False) -> Iterable[HPIModule]:
skipped = []
for m in modules():
if not all and m.skip_reason is not None:
@@ -249,7 +234,7 @@ def _modules(*, all: bool = False) -> Iterable[HPIModule]:
warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
-def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: list[str]) -> None:
+def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: List[str]) -> None:
if len(for_modules) > 0:
# if you're checking specific modules, show errors
# hopefully makes sense?
@@ -273,7 +258,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
# todo add a --all argument to disregard is_active check?
for mr in mods:
skip = mr.skip_reason
- m = mr.name
+ m = mr.name
if skip is not None:
eprint(f'{OFF} {click.style("SKIP", fg="yellow")}: {m:<50} {skip}')
continue
@@ -323,8 +308,8 @@ def list_modules(*, list_all: bool) -> None:
tabulate_warnings()
for mr in _modules(all=list_all):
- m = mr.name
- sr = mr.skip_reason
+ m = mr.name
+ sr = mr.skip_reason
if sr is None:
pre = OK
suf = ''
@@ -340,20 +325,17 @@ def tabulate_warnings() -> None:
Helper to avoid visual noise in hpi modules/doctor
'''
import warnings
-
orig = warnings.formatwarning
def override(*args, **kwargs) -> str:
res = orig(*args, **kwargs)
return ''.join(' ' + x for x in res.splitlines(keepends=True))
-
warnings.formatwarning = override
# TODO loggers as well?
def _requires(modules: Sequence[str]) -> Sequence[str]:
from .discovery_pure import module_by_name
-
mods = [module_by_name(module) for module in modules]
res = []
for mod in mods:
@@ -380,7 +362,7 @@ def module_requires(*, module: Sequence[str]) -> None:
click.echo(x)
-def module_install(*, user: bool, module: Sequence[str], parallel: bool = False, break_system_packages: bool = False) -> None:
+def module_install(*, user: bool, module: Sequence[str], parallel: bool=False, break_system_packages: bool=False) -> None:
if isinstance(module, str):
# legacy behavior, used to take a since argument
module = [module]
@@ -391,9 +373,8 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
warning('requirements list is empty, no need to install anything')
return
- use_uv = 'HPI_MODULE_INSTALL_USE_UV' in os.environ
pre_cmd = [
- sys.executable, '-m', *(['uv'] if use_uv else []), 'pip',
+ sys.executable, '-m', 'pip',
'install',
*(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
*(['--break-system-packages'] if break_system_packages else []), # https://peps.python.org/pep-0668/
@@ -411,7 +392,7 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
# I think it only helps for pypi artifacts (not git!),
# and only if they weren't cached
for r in requirements:
- cmds.append([*pre_cmd, r])
+ cmds.append(pre_cmd + [r])
else:
if parallel:
warning('parallel install is not supported on this platform, installing sequentially...')
@@ -457,7 +438,7 @@ def _ui_getchar_pick(choices: Sequence[str], prompt: str = 'Select from: ') -> i
return result_map[ch]
-def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]:
+def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True) -> Iterable[Callable[..., Any]]:
from .query import QueryException, locate_qualified_function
from .stats import is_data_provider
@@ -475,9 +456,9 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
# user to select a 'data provider' like function
try:
mod = importlib.import_module(qualname)
- except Exception as ie:
+ except Exception:
eprint(f"During fallback, importing '{qualname}' as module failed")
- raise qr_err from ie
+ raise qr_err
# find data providers in this module
data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
@@ -507,7 +488,6 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
def _warn_exceptions(exc: Exception) -> None:
from my.core import make_logger
-
logger = make_logger('CLI', level='warning')
logger.exception(f'hpi query: {exc}')
@@ -519,14 +499,14 @@ def query_hpi_functions(
*,
output: str = 'json',
stream: bool = False,
- qualified_names: list[str],
- order_key: str | None,
- order_by_value_type: type | None,
+ qualified_names: List[str],
+ order_key: Optional[str],
+ order_by_value_type: Optional[Type],
after: Any,
before: Any,
within: Any,
reverse: bool = False,
- limit: int | None,
+ limit: Optional[int],
drop_unsorted: bool,
wrap_unsorted: bool,
warn_exceptions: bool,
@@ -538,9 +518,6 @@ def query_hpi_functions(
# chain list of functions from user, in the order they wrote them on the CLI
input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
- # NOTE: if passing just one function to this which returns a single namedtuple/dataclass,
- # using both --order-key and --order-type will often be faster as it does not need to
- # duplicate the iterator in memory, or try to find the --order-type type on each object before sorting
res = select_range(
input_src,
order_key=order_key,
@@ -553,8 +530,7 @@ def query_hpi_functions(
warn_exceptions=warn_exceptions,
warn_func=_warn_exceptions,
raise_exceptions=raise_exceptions,
- drop_exceptions=drop_exceptions,
- )
+ drop_exceptions=drop_exceptions)
if output == 'json':
from .serialize import dumps
@@ -588,7 +564,7 @@ def query_hpi_functions(
# can ignore the mypy warning here, locations_to_gpx yields any errors
# if you didnt pass it something that matches the LocationProtocol
- for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type]
+ for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type]
if warn_exceptions:
_warn_exceptions(exc)
elif raise_exceptions:
@@ -601,11 +577,10 @@ def query_hpi_functions(
# output == 'repl'
eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
try:
- import IPython # type: ignore[import,unused-ignore]
+ import IPython # type: ignore[import]
except ModuleNotFoundError:
eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
import code
-
code.interact(local=locals())
else:
IPython.embed()
@@ -613,7 +588,7 @@ def query_hpi_functions(
@click.group()
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
-def main(*, debug: bool) -> None:
+def main(debug: bool) -> None:
'''
Human Programming Interface
@@ -639,19 +614,20 @@ def main(*, debug: bool) -> None:
# to run things at the end (would need to use a callback or pass context)
# https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
- tdir = Path(tempfile.gettempdir()) / 'hpi_temp_dir'
- tdir.mkdir(exist_ok=True)
+ tdir: str = os.path.join(tempfile.gettempdir(), 'hpi_temp_dir')
+ if not os.path.exists(tdir):
+ os.makedirs(tdir)
os.chdir(tdir)
@functools.lru_cache(maxsize=1)
-def _all_mod_names() -> list[str]:
+def _all_mod_names() -> List[str]:
"""Should include all modules, in case user is trying to diagnose issues"""
# sort this, so that the order doesn't change while tabbing through
return sorted([m.name for m in modules()])
-def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> list[str]:
+def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> List[str]:
return [m for m in _all_mod_names() if m.startswith(incomplete)]
@@ -661,7 +637,7 @@ def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: st
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
-def doctor_cmd(*, verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
+def doctor_cmd(verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
'''
Run various checks
@@ -695,7 +671,7 @@ def config_create_cmd() -> None:
@main.command(name='modules', short_help='list available modules')
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
-def module_cmd(*, list_all: bool) -> None:
+def module_cmd(list_all: bool) -> None:
'''List available modules'''
list_modules(list_all=list_all)
@@ -708,7 +684,7 @@ def module_grp() -> None:
@module_grp.command(name='requires', short_help='print module reqs')
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
-def module_requires_cmd(*, modules: Sequence[str]) -> None:
+def module_requires_cmd(modules: Sequence[str]) -> None:
'''
Print MODULES requirements
@@ -725,7 +701,7 @@ def module_requires_cmd(*, modules: Sequence[str]) -> None:
is_flag=True,
help='Bypass PEP 668 and install dependencies into the system-wide python package directory.')
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
-def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
+def module_install_cmd(user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
'''
Install dependencies for modules using pip
@@ -806,18 +782,17 @@ def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: boo
help='ignore any errors returned as objects from the functions')
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
def query_cmd(
- *,
function_name: Sequence[str],
output: str,
stream: bool,
- order_key: str | None,
- order_type: str | None,
- after: str | None,
- before: str | None,
- within: str | None,
- recent: str | None,
+ order_key: Optional[str],
+ order_type: Optional[str],
+ after: Optional[str],
+ before: Optional[str],
+ within: Optional[str],
+ recent: Optional[str],
reverse: bool,
- limit: int | None,
+ limit: Optional[int],
drop_unsorted: bool,
wrap_unsorted: bool,
warn_exceptions: bool,
@@ -853,7 +828,7 @@ def query_cmd(
from datetime import date, datetime
- chosen_order_type: type | None
+ chosen_order_type: Optional[Type]
if order_type == "datetime":
chosen_order_type = datetime
elif order_type == "date":
@@ -889,8 +864,7 @@ def query_cmd(
wrap_unsorted=wrap_unsorted,
warn_exceptions=warn_exceptions,
raise_exceptions=raise_exceptions,
- drop_exceptions=drop_exceptions,
- )
+ drop_exceptions=drop_exceptions)
except QueryException as qe:
eprint(str(qe))
sys.exit(1)
@@ -905,7 +879,6 @@ def query_cmd(
def test_requires() -> None:
from click.testing import CliRunner
-
result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
assert result.exit_code == 0
assert "github.com/karlicoss/ghexport" in result.output
diff --git a/my/core/_cpu_pool.py b/my/core/_cpu_pool.py
index 6b107a7..2369075 100644
--- a/my/core/_cpu_pool.py
+++ b/my/core/_cpu_pool.py
@@ -10,18 +10,15 @@ how many cores we want to dedicate to the DAL.
Enabled by the env variable, specifying how many cores to dedicate
e.g. "HPI_CPU_POOL=4 hpi query ..."
"""
-
-from __future__ import annotations
-
import os
from concurrent.futures import ProcessPoolExecutor
-from typing import cast
+from typing import Optional, cast
_NOT_SET = cast(ProcessPoolExecutor, object())
-_INSTANCE: ProcessPoolExecutor | None = _NOT_SET
+_INSTANCE: Optional[ProcessPoolExecutor] = _NOT_SET
-def get_cpu_pool() -> ProcessPoolExecutor | None:
+def get_cpu_pool() -> Optional[ProcessPoolExecutor]:
global _INSTANCE
if _INSTANCE is _NOT_SET:
use_cpu_pool = os.environ.get('HPI_CPU_POOL')
diff --git a/my/core/_deprecated/kompress.py b/my/core/_deprecated/kompress.py
index c3f333f..7eb9b37 100644
--- a/my/core/_deprecated/kompress.py
+++ b/my/core/_deprecated/kompress.py
@@ -1,17 +1,16 @@
"""
Various helpers for compression
"""
-
# fmt: off
from __future__ import annotations
import io
import pathlib
-from collections.abc import Iterator, Sequence
+import sys
from datetime import datetime
from functools import total_ordering
from pathlib import Path
-from typing import IO, Union
+from typing import IO, Any, Iterator, Sequence, Union
PathIsh = Union[Path, str]
@@ -28,7 +27,7 @@ class Ext:
def is_compressed(p: Path) -> bool:
# todo kinda lame way for now.. use mime ideally?
# should cooperate with kompress.kopen?
- return any(p.name.endswith(ext) for ext in [Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz])
+ return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
def _zstd_open(path: Path, *args, **kwargs) -> IO:
@@ -88,7 +87,7 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
elif name.endswith(Ext.lz4):
import lz4.frame # type: ignore
return lz4.frame.open(str(pp), mode, *args, **kwargs)
- elif name.endswith(Ext.zstd) or name.endswith(Ext.zst): # noqa: PIE810
+ elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
kwargs['mode'] = mode
return _zstd_open(pp, *args, **kwargs)
elif name.endswith(Ext.targz):
@@ -121,7 +120,7 @@ class CPath(BasePath):
Path only has _accessor and _closed slots, so can't directly set .open method
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
"""
- def open(self, *args, **kwargs): # noqa: ARG002
+ def open(self, *args, **kwargs):
kopen_kwargs = {}
mode = kwargs.get('mode')
if mode is not None:
@@ -142,16 +141,21 @@ open = kopen # TODO deprecate
def kexists(path: PathIsh, subpath: str) -> bool:
try:
kopen(path, subpath)
+ return True
except Exception:
return False
- else:
- return True
import zipfile
-# meh... zipfile.Path is not available on 3.7
-zipfile_Path = zipfile.Path
+if sys.version_info[:2] >= (3, 8):
+ # meh... zipfile.Path is not available on 3.7
+ zipfile_Path = zipfile.Path
+else:
+ if typing.TYPE_CHECKING:
+ zipfile_Path = Any
+ else:
+ zipfile_Path = object
@total_ordering
@@ -211,7 +215,7 @@ class ZipPath(zipfile_Path):
def iterdir(self) -> Iterator[ZipPath]:
for s in self._as_dir().iterdir():
- yield ZipPath(s.root, s.at)
+ yield ZipPath(s.root, s.at) # type: ignore[attr-defined]
@property
def stem(self) -> str:
@@ -240,7 +244,7 @@ class ZipPath(zipfile_Path):
# see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
dt = datetime(*self.root.getinfo(self.at).date_time)
ts = int(dt.timestamp())
- params = dict( # noqa: C408
+ params = dict(
st_mode=0,
st_ino=0,
st_dev=0,
diff --git a/my/core/cachew.py b/my/core/cachew.py
index 8ce2f2b..e0e7adf 100644
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@@ -1,18 +1,17 @@
-from __future__ import annotations
-
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
+from .internal import assert_subpackage; assert_subpackage(__name__)
import logging
import sys
-from collections.abc import Iterator
+import warnings
from contextlib import contextmanager
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Callable,
+ Iterator,
+ Optional,
+ Type,
TypeVar,
Union,
cast,
@@ -21,8 +20,6 @@ from typing import (
import appdirs # type: ignore[import-untyped]
-from . import warnings
-
PathIsh = Union[str, Path] # avoid circular import from .common
@@ -61,12 +58,12 @@ def _appdirs_cache_dir() -> Path:
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
-def cache_dir(suffix: PathIsh | None = None) -> Path:
+def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
from . import core_config as CC
cdir_ = CC.config.get_cache_dir()
- sp: Path | None = None
+ sp: Optional[Path] = None
if suffix is not None:
sp = Path(suffix)
# guess if you do need absolute, better path it directly instead of as suffix?
@@ -119,7 +116,7 @@ def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
try:
import cachew
except ModuleNotFoundError:
- warnings.high('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
+ warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
kwargs['cache_path'] = cache_path
@@ -136,7 +133,7 @@ if TYPE_CHECKING:
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
# NOTE: in cachew, HashFunction type returns str
- # however in practice, cachew always calls str for its result
+ # however in practice, cachew alwasy calls str for its result
# so perhaps better to switch it to Any in cachew as well
HashFunction = Callable[P, Any]
@@ -145,19 +142,21 @@ if TYPE_CHECKING:
# we need two versions due to @doublewrap
# this is when we just annotate as @cachew without any args
@overload # type: ignore[no-overload-impl]
- def mcachew(fun: F) -> F: ...
+ def mcachew(fun: F) -> F:
+ ...
@overload
def mcachew(
- cache_path: PathProvider | None = ...,
+ cache_path: Optional[PathProvider] = ...,
*,
force_file: bool = ...,
- cls: type | None = ...,
+ cls: Optional[Type] = ...,
depends_on: HashFunction = ...,
- logger: logging.Logger | None = ...,
+ logger: Optional[logging.Logger] = ...,
chunk_by: int = ...,
- synthetic_key: str | None = ...,
- ) -> Callable[[F], F]: ...
+ synthetic_key: Optional[str] = ...,
+ ) -> Callable[[F], F]:
+ ...
else:
mcachew = _mcachew_impl
diff --git a/my/core/cfg.py b/my/core/cfg.py
index 9851443..a71a7e3 100644
--- a/my/core/cfg.py
+++ b/my/core/cfg.py
@@ -3,32 +3,28 @@ from __future__ import annotations
import importlib
import re
import sys
-from collections.abc import Iterator
from contextlib import ExitStack, contextmanager
-from typing import Any, Callable, TypeVar
+from typing import Any, Callable, Dict, Iterator, Optional, Type, TypeVar
-Attrs = dict[str, Any]
+Attrs = Dict[str, Any]
C = TypeVar('C')
-
# todo not sure about it, could be overthinking...
# but short enough to change later
# TODO document why it's necessary?
-def make_config(cls: type[C], migration: Callable[[Attrs], Attrs] = lambda x: x) -> C:
+def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C:
user_config = cls.__base__
old_props = {
# NOTE: deliberately use gettatr to 'force' class properties here
- k: getattr(user_config, k)
- for k in vars(user_config)
+ k: getattr(user_config, k) for k in vars(user_config)
}
new_props = migration(old_props)
from dataclasses import fields
-
params = {
k: v
for k, v in new_props.items()
- if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
+ if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
}
# todo maybe return type here?
return cls(**params)
@@ -55,8 +51,6 @@ def _override_config(config: F) -> Iterator[F]:
ModuleRegex = str
-
-
@contextmanager
def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
# need to use list here, otherwise reordering with set might mess things up
@@ -87,14 +81,13 @@ def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
@contextmanager
-def tmp_config(*, modules: ModuleRegex | None = None, config=None):
+def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None):
if modules is None:
assert config is None
if modules is not None:
assert config is not None
import my.config
-
with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
if config is not None:
overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
@@ -109,7 +102,6 @@ def tmp_config(*, modules: ModuleRegex | None = None, config=None):
def test_tmp_config() -> None:
class extra:
data_path = '/path/to/data'
-
with tmp_config() as c:
assert c.google != 'whatever'
assert not hasattr(c, 'extra')
diff --git a/my/core/common.py b/my/core/common.py
index aa994ea..dcd1074 100644
--- a/my/core/common.py
+++ b/my/core/common.py
@@ -1,18 +1,21 @@
-from __future__ import annotations
-
import os
-from collections.abc import Iterable, Sequence
+import warnings
from glob import glob as do_glob
from pathlib import Path
from typing import (
TYPE_CHECKING,
Callable,
Generic,
+ Iterable,
+ List,
+ Sequence,
+ Tuple,
TypeVar,
Union,
)
-from . import compat, warnings
+from . import compat
+from . import warnings as core_warnings
# some helper functions
# TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit
@@ -22,22 +25,19 @@ Paths = Union[Sequence[PathIsh], PathIsh]
DEFAULT_GLOB = '*'
-
-
def get_files(
- pp: Paths,
- glob: str = DEFAULT_GLOB,
- *,
- sort: bool = True,
- guess_compression: bool = True,
-) -> tuple[Path, ...]:
+ pp: Paths,
+ glob: str=DEFAULT_GLOB,
+ sort: bool=True,
+ guess_compression: bool=True,
+) -> Tuple[Path, ...]:
"""
Helper function to avoid boilerplate.
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
"""
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
- sources: list[Path]
+ sources: List[Path]
if isinstance(pp, Path):
sources = [pp]
elif isinstance(pp, str):
@@ -54,7 +54,7 @@ def get_files(
# TODO ugh. very flaky... -3 because [, get_files(), ]
return traceback.extract_stack()[-3].filename
- paths: list[Path] = []
+ paths: List[Path] = []
for src in sources:
if src.parts[0] == '~':
src = src.expanduser()
@@ -62,9 +62,9 @@ def get_files(
gs = str(src)
if '*' in gs:
if glob != DEFAULT_GLOB:
- warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
- paths.extend(map(Path, do_glob(gs))) # noqa: PTH207
- elif os.path.isdir(str(src)): # noqa: PTH112
+ warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
+ paths.extend(map(Path, do_glob(gs)))
+ elif os.path.isdir(str(src)):
# NOTE: we're using os.path here on purpose instead of src.is_dir
# the reason is is_dir for archives might return True and then
# this clause would try globbing insize the archives
@@ -80,11 +80,11 @@ def get_files(
paths.append(src)
if sort:
- paths = sorted(paths)
+ paths = list(sorted(paths))
if len(paths) == 0:
# todo make it conditionally defensive based on some global settings
- warnings.high(f'''
+ core_warnings.high(f'''
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
'''.strip())
# traceback is useful to figure out what config caused it?
@@ -133,8 +133,8 @@ def test_classproperty() -> None:
return 'hello'
res = C.prop
- assert_type(res, str)
assert res == 'hello'
+ assert_type(res, str)
# hmm, this doesn't really work with mypy well..
@@ -157,7 +157,7 @@ def get_valid_filename(s: str) -> str:
# TODO deprecate and suggest to use one from my.core directly? not sure
-from .utils.itertools import unique_everseen # noqa: F401
+from .utils.itertools import unique_everseen
### legacy imports, keeping them here for backwards compatibility
## hiding behind TYPE_CHECKING so it works in runtime
@@ -234,14 +234,16 @@ if not TYPE_CHECKING:
return types.asdict(*args, **kwargs)
# todo wrap these in deprecated decorator as well?
- # TODO hmm how to deprecate these in runtime?
- # tricky cause they are actually classes/types
- from typing import Literal # noqa: F401
-
from .cachew import mcachew # noqa: F401
# this is kinda internal, should just use my.core.logging.setup_logger if necessary
from .logging import setup_logger
+
+ # TODO hmm how to deprecate these in runtime?
+ # tricky cause they are actually classes/types
+
+ from typing import Literal # noqa: F401
+
from .stats import Stats
from .types import (
Json,
diff --git a/my/core/compat.py b/my/core/compat.py
index 8f719a8..4372a01 100644
--- a/my/core/compat.py
+++ b/my/core/compat.py
@@ -3,8 +3,6 @@ Contains backwards compatibility helpers for different python versions.
If something is relevant to HPI itself, please put it in .hpi_compat instead
'''
-from __future__ import annotations
-
import sys
from typing import TYPE_CHECKING
@@ -23,20 +21,20 @@ if not TYPE_CHECKING:
# TODO warn here?
source.backup(dest, **kwargs)
- # keeping for runtime backwards compatibility (added in 3.9)
- @deprecated('use .removeprefix method on string directly instead')
- def removeprefix(text: str, prefix: str) -> str:
- return text.removeprefix(prefix)
- @deprecated('use .removesuffix method on string directly instead')
- def removesuffix(text: str, suffix: str) -> str:
- return text.removesuffix(suffix)
+# can remove after python3.9 (although need to keep the method itself for bwd compat)
+def removeprefix(text: str, prefix: str) -> str:
+ if text.startswith(prefix):
+ return text[len(prefix) :]
+ return text
- ##
- ## used to have compat function before 3.8 for these, keeping for runtime back compatibility
+## used to have compat function before 3.8 for these, keeping for runtime back compatibility
+if not TYPE_CHECKING:
from functools import cached_property
from typing import Literal, Protocol, TypedDict
+else:
+ from typing_extensions import Literal, Protocol, TypedDict
##
@@ -49,13 +47,13 @@ else:
# bisect_left doesn't have a 'key' parameter (which we use)
# till python3.10
if sys.version_info[:2] <= (3, 9):
- from typing import Any, Callable, List, Optional, TypeVar # noqa: UP035
+ from typing import Any, Callable, List, Optional, TypeVar
X = TypeVar('X')
# copied from python src
# fmt: off
- def bisect_left(a: list[Any], x: Any, lo: int=0, hi: int | None=None, *, key: Callable[..., Any] | None=None) -> int:
+ def bisect_left(a: List[Any], x: Any, lo: int=0, hi: Optional[int]=None, *, key: Optional[Callable[..., Any]]=None) -> int:
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
@@ -127,10 +125,8 @@ def test_fromisoformat() -> None:
if sys.version_info[:2] >= (3, 10):
from types import NoneType
- from typing import TypeAlias
else:
NoneType = type(None)
- from typing_extensions import TypeAlias
if sys.version_info[:2] >= (3, 11):
diff --git a/my/core/core_config.py b/my/core/core_config.py
index 3f26c03..9036971 100644
--- a/my/core/core_config.py
+++ b/my/core/core_config.py
@@ -2,21 +2,18 @@
Bindings for the 'core' HPI configuration
'''
-from __future__ import annotations
-
import re
-from collections.abc import Sequence
from dataclasses import dataclass
from pathlib import Path
+from typing import Optional, Sequence
-from . import warnings
+from . import PathIsh, warnings
try:
from my.config import core as user_config # type: ignore[attr-defined]
except Exception as e:
try:
from my.config import common as user_config # type: ignore[attr-defined]
-
warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
except Exception as e2:
# make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
@@ -27,7 +24,6 @@ except Exception as e:
_HPI_CACHE_DIR_DEFAULT = ''
-
@dataclass
class Config(user_config):
'''
@@ -38,7 +34,7 @@ class Config(user_config):
cache_dir = '/your/custom/cache/path'
'''
- cache_dir: Path | str | None = _HPI_CACHE_DIR_DEFAULT
+ cache_dir: Optional[PathIsh] = _HPI_CACHE_DIR_DEFAULT
'''
Base directory for cachew.
- if None , means cache is disabled
@@ -48,7 +44,7 @@ class Config(user_config):
NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
'''
- tmp_dir: Path | str | None = None
+ tmp_dir: Optional[PathIsh] = None
'''
Path to a temporary directory.
This can be used temporarily while extracting zipfiles etc...
@@ -56,36 +52,34 @@ class Config(user_config):
- otherwise , use the specified directory as the base temporary directory
'''
- enabled_modules: Sequence[str] | None = None
+ enabled_modules : Optional[Sequence[str]] = None
'''
list of regexes/globs
- None means 'rely on disabled_modules'
'''
- disabled_modules: Sequence[str] | None = None
+ disabled_modules: Optional[Sequence[str]] = None
'''
list of regexes/globs
- None means 'rely on enabled_modules'
'''
- def get_cache_dir(self) -> Path | None:
+ def get_cache_dir(self) -> Optional[Path]:
cdir = self.cache_dir
if cdir is None:
return None
if cdir == _HPI_CACHE_DIR_DEFAULT:
from .cachew import _appdirs_cache_dir
-
return _appdirs_cache_dir()
else:
return Path(cdir).expanduser()
def get_tmp_dir(self) -> Path:
- tdir: Path | str | None = self.tmp_dir
+ tdir: Optional[PathIsh] = self.tmp_dir
tpath: Path
# use tempfile if unset
if tdir is None:
import tempfile
-
tpath = Path(tempfile.gettempdir()) / 'HPI'
else:
tpath = Path(tdir)
@@ -93,10 +87,10 @@ class Config(user_config):
tpath.mkdir(parents=True, exist_ok=True)
return tpath
- def _is_module_active(self, module: str) -> bool | None:
+ def _is_module_active(self, module: str) -> Optional[bool]:
# None means the config doesn't specify anything
# todo might be nice to return the 'reason' too? e.g. which option has matched
- def matches(specs: Sequence[str]) -> str | None:
+ def matches(specs: Sequence[str]) -> Optional[str]:
for spec in specs:
# not sure because . (packages separate) matches anything, but I guess unlikely to clash
if re.match(spec, module):
@@ -112,10 +106,10 @@ class Config(user_config):
return None
else:
return False
- else: # not None
+ else: # not None
if off is None:
return True
- else: # not None
+ else: # not None
# fallback onto the 'enable everything', then the user will notice
warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.")
return True
@@ -127,8 +121,8 @@ config = make_config(Config)
### tests start
-from collections.abc import Iterator
from contextlib import contextmanager as ctx
+from typing import Iterator
@ctx
@@ -169,5 +163,4 @@ def test_active_modules() -> None:
assert cc._is_module_active("my.body.exercise") is True
assert len(record_warnings) == 1
-
### tests end
diff --git a/my/core/denylist.py b/my/core/denylist.py
index c92f9a0..7ca0ddf 100644
--- a/my/core/denylist.py
+++ b/my/core/denylist.py
@@ -5,25 +5,23 @@ A helper module for defining denylists for sources programmatically
For docs, see doc/DENYLIST.md
"""
-from __future__ import annotations
-
import functools
import json
import sys
from collections import defaultdict
-from collections.abc import Iterator, Mapping
from pathlib import Path
-from typing import Any, TypeVar
+from typing import Any, Dict, Iterator, List, Mapping, Set, TypeVar
import click
from more_itertools import seekable
-from .serialize import dumps
-from .warnings import medium
+from my.core.common import PathIsh
+from my.core.serialize import dumps
+from my.core.warnings import medium
T = TypeVar("T")
-DenyMap = Mapping[str, set[Any]]
+DenyMap = Mapping[str, Set[Any]]
def _default_key_func(obj: T) -> str:
@@ -31,9 +29,9 @@ def _default_key_func(obj: T) -> str:
class DenyList:
- def __init__(self, denylist_file: Path | str) -> None:
+ def __init__(self, denylist_file: PathIsh):
self.file = Path(denylist_file).expanduser().absolute()
- self._deny_raw_list: list[dict[str, Any]] = []
+ self._deny_raw_list: List[Dict[str, Any]] = []
self._deny_map: DenyMap = defaultdict(set)
# deny cli, user can override these
@@ -47,7 +45,7 @@ class DenyList:
return
deny_map: DenyMap = defaultdict(set)
- data: list[dict[str, Any]] = json.loads(self.file.read_text())
+ data: List[Dict[str, Any]]= json.loads(self.file.read_text())
self._deny_raw_list = data
for ignore in data:
@@ -98,7 +96,6 @@ class DenyList:
def filter(
self,
itr: Iterator[T],
- *,
invert: bool = False,
) -> Iterator[T]:
denyf = functools.partial(self._allow, deny_map=self.load())
@@ -106,7 +103,7 @@ class DenyList:
return filter(lambda x: not denyf(x), itr)
return filter(denyf, itr)
- def deny(self, key: str, value: Any, *, write: bool = False) -> None:
+ def deny(self, key: str, value: Any, write: bool = False) -> None:
'''
add a key/value pair to the denylist
'''
@@ -114,7 +111,7 @@ class DenyList:
self._load()
self._deny_raw({key: self._stringify_value(value)}, write=write)
- def _deny_raw(self, data: dict[str, Any], *, write: bool = False) -> None:
+ def _deny_raw(self, data: Dict[str, Any], write: bool = False) -> None:
self._deny_raw_list.append(data)
if write:
self.write()
@@ -133,7 +130,7 @@ class DenyList:
def _deny_cli_remember(
self,
items: Iterator[T],
- mem: dict[str, T],
+ mem: Dict[str, T],
) -> Iterator[str]:
keyf = self._deny_cli_key_func or _default_key_func
# i.e., convert each item to a string, and map str -> item
@@ -159,8 +156,10 @@ class DenyList:
# reset the iterator
sit.seek(0)
# so we can map the selected string from fzf back to the original objects
- memory_map: dict[str, T] = {}
- picker = FzfPrompt(executable_path=self.fzf_path, default_options="--no-multi")
+ memory_map: Dict[str, T] = {}
+ picker = FzfPrompt(
+ executable_path=self.fzf_path, default_options="--no-multi"
+ )
picked_l = picker.prompt(
self._deny_cli_remember(itr, memory_map),
"--read0",
diff --git a/my/core/discovery_pure.py b/my/core/discovery_pure.py
index 18a19c4..63d9922 100644
--- a/my/core/discovery_pure.py
+++ b/my/core/discovery_pure.py
@@ -10,8 +10,6 @@ This potentially allows it to be:
It should be free of external modules, importlib, exec, etc. etc.
'''
-from __future__ import annotations
-
REQUIRES = 'REQUIRES'
NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
@@ -21,9 +19,8 @@ import ast
import logging
import os
import re
-from collections.abc import Iterable, Sequence
from pathlib import Path
-from typing import Any, NamedTuple, Optional, cast
+from typing import Any, Iterable, List, NamedTuple, Optional, Sequence, cast
'''
None means that requirements weren't defined (different from empty requirements)
@@ -33,11 +30,11 @@ Requires = Optional[Sequence[str]]
class HPIModule(NamedTuple):
name: str
- skip_reason: str | None
- doc: str | None = None
- file: Path | None = None
+ skip_reason: Optional[str]
+ doc: Optional[str] = None
+ file: Optional[Path] = None
requires: Requires = None
- legacy: str | None = None # contains reason/deprecation warning
+ legacy: Optional[str] = None # contains reason/deprecation warning
def ignored(m: str) -> bool:
@@ -58,13 +55,13 @@ def has_stats(src: Path) -> bool:
def _has_stats(code: str) -> bool:
a: ast.Module = ast.parse(code)
for x in a.body:
- try: # maybe assign
+ try: # maybe assign
[tg] = cast(Any, x).targets
if tg.id == 'stats':
return True
except:
pass
- try: # maybe def?
+ try: # maybe def?
name = cast(Any, x).name
if name == 'stats':
return True
@@ -147,7 +144,7 @@ def all_modules() -> Iterable[HPIModule]:
def _iter_my_roots() -> Iterable[Path]:
import my # doesn't import any code, because of namespace package
- paths: list[str] = list(my.__path__)
+ paths: List[str] = list(my.__path__)
if len(paths) == 0:
# should probably never happen?, if this code is running, it was imported
# because something was added to __path__ to match this name
@@ -245,7 +242,7 @@ def test_pure() -> None:
src = Path(__file__).read_text()
# 'import my' is allowed, but
# dont allow anything other HPI modules
- assert re.findall('import ' + r'my\.\S+', src, re.MULTILINE) == []
+ assert re.findall('import ' + r'my\.\S+', src, re.M) == []
assert 'from ' + 'my' not in src
diff --git a/my/core/error.py b/my/core/error.py
index b308869..c4dff07 100644
--- a/my/core/error.py
+++ b/my/core/error.py
@@ -3,16 +3,19 @@ Various error handling helpers
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
"""
-from __future__ import annotations
-
import traceback
-from collections.abc import Iterable, Iterator
from datetime import datetime
from itertools import tee
from typing import (
Any,
Callable,
+ Iterable,
+ Iterator,
+ List,
Literal,
+ Optional,
+ Tuple,
+ Type,
TypeVar,
Union,
cast,
@@ -30,7 +33,7 @@ Res = ResT[T, Exception]
ErrorPolicy = Literal["yield", "raise", "drop"]
-def notnone(x: T | None) -> T:
+def notnone(x: Optional[T]) -> T:
assert x is not None
return x
@@ -38,7 +41,8 @@ def notnone(x: T | None) -> T:
def unwrap(res: Res[T]) -> T:
if isinstance(res, Exception):
raise res
- return res
+ else:
+ return res
def drop_exceptions(itr: Iterator[Res[T]]) -> Iterator[T]:
@@ -57,15 +61,13 @@ def raise_exceptions(itr: Iterable[Res[T]]) -> Iterator[T]:
yield o
-def warn_exceptions(itr: Iterable[Res[T]], warn_func: Callable[[Exception], None] | None = None) -> Iterator[T]:
+def warn_exceptions(itr: Iterable[Res[T]], warn_func: Optional[Callable[[Exception], None]] = None) -> Iterator[T]:
# if not provided, use the 'warnings' module
if warn_func is None:
from my.core.warnings import medium
-
def _warn_func(e: Exception) -> None:
# TODO: print traceback? but user could always --raise-exceptions as well
medium(str(e))
-
warn_func = _warn_func
for o in itr:
@@ -80,7 +82,7 @@ def echain(ex: E, cause: Exception) -> E:
return ex
-def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Iterable[E]]:
+def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
# TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
vit, eit = tee(l)
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
@@ -98,9 +100,7 @@ def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Ite
K = TypeVar('K')
-
-
-def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]]:
+def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]]:
"""
Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
The general idea is: the error sticks to the non-error entry that follows it
@@ -108,7 +108,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
group = []
groups = []
for i in items:
- k: K | None
+ k: Optional[K]
try:
k = key(i)
except Exception: # error white computing key? dunno, might be nice to handle...
@@ -118,10 +118,10 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
groups.append((k, group))
group = []
- results: list[Res[T]] = []
- for _v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
+ results: List[Res[T]] = []
+ for v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
results.extend(grp)
- results.extend(group) # handle last group (it will always be errors only)
+ results.extend(group) # handle last group (it will always be errors only)
return results
@@ -153,7 +153,7 @@ def test_sort_res_by() -> None:
Exc('last'),
]
- results2 = sort_res_by([*ress, 0], lambda x: int(x))
+ results2 = sort_res_by(ress + [0], lambda x: int(x))
assert results2 == [Exc('last'), 0] + results[:-1]
assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
@@ -163,20 +163,20 @@ def test_sort_res_by() -> None:
# helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
# todo document it under 'patterns' somewhere...
# todo proper typevar?
-def set_error_datetime(e: Exception, dt: datetime | None) -> None:
+def set_error_datetime(e: Exception, dt: Optional[datetime]) -> None:
if dt is None:
return
- e.args = (*e.args, dt)
+ e.args = e.args + (dt,)
# todo not sure if should return new exception?
-def attach_dt(e: Exception, *, dt: datetime | None) -> Exception:
+def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception:
set_error_datetime(e, dt)
return e
# todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
-def extract_error_datetime(e: Exception) -> datetime | None:
+def extract_error_datetime(e: Exception) -> Optional[datetime]:
import re
for x in reversed(e.args):
@@ -201,12 +201,7 @@ def error_to_json(e: Exception) -> Json:
MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
-def warn_my_config_import_error(
- err: ImportError | AttributeError,
- *,
- help_url: str | None = None,
- module_name: str | None = None,
-) -> bool:
+def warn_my_config_import_error(err: Union[ImportError, AttributeError], help_url: Optional[str] = None) -> bool:
"""
If the user tried to import something from my.config but it failed,
possibly due to missing the config block in my.config?
@@ -238,24 +233,10 @@ See {help_url}\
config_obj = cast(object, getattr(err, 'obj')) # the object that caused the attribute error
# e.g. active_browser for my.browser
nested_block_name = err.name
- errmsg = f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
-See {help_url} or check the corresponding module.py file for an example\
-"""
if config_obj.__module__ == 'my.config':
- click.secho(errmsg, fg='yellow', err=True)
- return True
- if module_name is not None and nested_block_name == module_name.split('.')[-1]:
- # this tries to cover cases like these
- # user config:
- # class location:
- # class via_ip:
- # accuracy = 10_000
- # then when we import it, we do something like
- # from my.config import location
- # user_config = location.via_ip
- # so if location is present, but via_ip is not, we get
- # AttributeError: type object 'location' has no attribute 'via_ip'
- click.secho(errmsg, fg='yellow', err=True)
+ click.secho(f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
+See {help_url} or check the corresponding module.py file for an example\
+""", fg='yellow', err=True)
return True
else:
click.echo(f"Unexpected error... {err}", err=True)
@@ -266,7 +247,7 @@ def test_datetime_errors() -> None:
import pytz # noqa: I001
dt_notz = datetime.now()
- dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
+ dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
for dt in [dt_tz, dt_notz]:
e1 = RuntimeError('whatever')
assert extract_error_datetime(e1) is None
diff --git a/my/core/experimental.py b/my/core/experimental.py
index 0a1c3b4..1a78272 100644
--- a/my/core/experimental.py
+++ b/my/core/experimental.py
@@ -1,8 +1,6 @@
-from __future__ import annotations
-
import sys
import types
-from typing import Any
+from typing import Any, Dict, Optional
# The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages
@@ -22,7 +20,7 @@ def import_original_module(
file: str,
*,
star: bool = False,
- globals: dict[str, Any] | None = None,
+ globals: Optional[Dict[str, Any]] = None,
) -> types.ModuleType:
module_to_restore = sys.modules[module_name]
diff --git a/my/core/freezer.py b/my/core/freezer.py
index 4fb0e25..e46525b 100644
--- a/my/core/freezer.py
+++ b/my/core/freezer.py
@@ -1,29 +1,29 @@
-from __future__ import annotations
+from .internal import assert_subpackage; assert_subpackage(__name__)
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
-
-import dataclasses
+import dataclasses as dcl
import inspect
-from typing import Any, Generic, TypeVar
+from typing import Any, Type, TypeVar
D = TypeVar('D')
-def _freeze_dataclass(Orig: type[D]):
- ofields = [(f.name, f.type, f) for f in dataclasses.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
+def _freeze_dataclass(Orig: Type[D]):
+ ofields = [(f.name, f.type, f) for f in dcl.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
# extract properties along with their types
- props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
+ props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
# FIXME not sure about name?
# NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
- RRR = dataclasses.make_dataclass('RRR', fields=[*ofields, *pfields])
+ RRR = dcl.make_dataclass('RRR', fields=[*ofields, *pfields])
# todo maybe even declare as slots?
return props, RRR
+# todo need some decorator thingie?
+from typing import Generic
+
+
class Freezer(Generic[D]):
'''
Some magic which converts dataclass properties into fields.
@@ -31,13 +31,13 @@ class Freezer(Generic[D]):
For now only supports dataclasses.
'''
- def __init__(self, Orig: type[D]) -> None:
+ def __init__(self, Orig: Type[D]) -> None:
self.Orig = Orig
self.props, self.Frozen = _freeze_dataclass(Orig)
def freeze(self, value: D) -> D:
pvalues = {name: getattr(value, name) for name, _ in self.props}
- return self.Frozen(**dataclasses.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
+ return self.Frozen(**dcl.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
### tests
@@ -45,7 +45,7 @@ class Freezer(Generic[D]):
# this needs to be defined here to prevent a mypy bug
# see https://github.com/python/mypy/issues/7281
-@dataclasses.dataclass
+@dcl.dataclass
class _A:
x: Any
@@ -60,10 +60,8 @@ class _A:
def test_freezer() -> None:
- val = _A(x={
- 'an_int': 123,
- 'an_any': [1, 2, 3],
- })
+
+ val = _A(x=dict(an_int=123, an_any=[1, 2, 3]))
af = Freezer(_A)
fval = af.freeze(val)
@@ -71,7 +69,6 @@ def test_freezer() -> None:
assert fd['typed'] == 123
assert fd['untyped'] == [1, 2, 3]
-
###
# TODO shit. what to do with exceptions?
diff --git a/my/core/hpi_compat.py b/my/core/hpi_compat.py
index 3687483..bad0b17 100644
--- a/my/core/hpi_compat.py
+++ b/my/core/hpi_compat.py
@@ -2,15 +2,11 @@
Contains various backwards compatibility/deprecation helpers relevant to HPI itself.
(as opposed to .compat module which implements compatibility between python versions)
"""
-
-from __future__ import annotations
-
import inspect
import os
import re
-from collections.abc import Iterator, Sequence
from types import ModuleType
-from typing import TypeVar
+from typing import Iterator, List, Optional, TypeVar
from . import warnings
@@ -18,7 +14,7 @@ from . import warnings
def handle_legacy_import(
parent_module_name: str,
legacy_submodule_name: str,
- parent_module_path: list[str],
+ parent_module_path: List[str],
) -> bool:
###
# this is to trick mypy into treating this as a proper namespace package
@@ -75,7 +71,7 @@ def pre_pip_dal_handler(
name: str,
e: ModuleNotFoundError,
cfg,
- requires: Sequence[str] = (),
+ requires=[],
) -> ModuleType:
'''
https://github.com/karlicoss/HPI/issues/79
@@ -120,141 +116,32 @@ V = TypeVar('V')
# named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
class always_supports_sequence(Iterator[V]):
"""
- Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible in runtime
+ Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible
"""
def __init__(self, it: Iterator[V]) -> None:
- self._it = it
- self._list: list[V] | None = None
- self._lit: Iterator[V] | None = None
+ self.it = it
+ self._list: Optional[List] = None
- def __iter__(self) -> Iterator[V]: # noqa: PYI034
- if self._list is not None:
- self._lit = iter(self._list)
- return self
+ def __iter__(self) -> Iterator[V]:
+ return self.it.__iter__()
def __next__(self) -> V:
- if self._list is not None:
- assert self._lit is not None
- delegate = self._lit
- else:
- delegate = self._it
- return next(delegate)
+ return self.it.__next__()
def __getattr__(self, name):
- return getattr(self._it, name)
+ return getattr(self.it, name)
@property
- def _aslist(self) -> list[V]:
+ def aslist(self) -> List[V]:
if self._list is None:
- qualname = getattr(self._it, '__qualname__', '') # defensive just in case
+ qualname = getattr(self.it, '__qualname__', '') # defensive just in case
warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
- self._list = list(self._it)
-
- # this is necessary for list constructor to work correctly
- # since it's __iter__ first, then tries to compute length and then starts iterating...
- self._lit = iter(self._list)
+ self._list = list(self.it)
return self._list
def __len__(self) -> int:
- return len(self._aslist)
+ return len(self.aslist)
def __getitem__(self, i: int) -> V:
- return self._aslist[i]
-
-
-def test_always_supports_sequence_list_constructor() -> None:
- exhausted = 0
-
- def it() -> Iterator[str]:
- nonlocal exhausted
- yield from ['a', 'b', 'c']
- exhausted += 1
-
- sit = always_supports_sequence(it())
-
- # list constructor is a bit special... it's trying to compute length if it's available to optimize memory allocation
- # so, what's happening in this case is
- # - sit.__iter__ is called
- # - sit.__len__ is called
- # - sit.__next__ is called
- res = list(sit)
- assert res == ['a', 'b', 'c']
- assert exhausted == 1
-
- res = list(sit)
- assert res == ['a', 'b', 'c']
- assert exhausted == 1 # this will iterate over 'cached' list now, so original generator is only exhausted once
-
-
-def test_always_supports_sequence_indexing() -> None:
- exhausted = 0
-
- def it() -> Iterator[str]:
- nonlocal exhausted
- yield from ['a', 'b', 'c']
- exhausted += 1
-
- sit = always_supports_sequence(it())
-
- assert len(sit) == 3
- assert exhausted == 1
-
- assert sit[2] == 'c'
- assert sit[1] == 'b'
- assert sit[0] == 'a'
- assert exhausted == 1
-
- # a few tests to make sure list-like operations are working..
- assert list(sit) == ['a', 'b', 'c']
- assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
- assert list(sit) == ['a', 'b', 'c']
- assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
- assert exhausted == 1
-
-
-def test_always_supports_sequence_next() -> None:
- exhausted = 0
-
- def it() -> Iterator[str]:
- nonlocal exhausted
- yield from ['a', 'b', 'c']
- exhausted += 1
-
- sit = always_supports_sequence(it())
-
- x = next(sit)
- assert x == 'a'
- assert exhausted == 0
-
- x = next(sit)
- assert x == 'b'
- assert exhausted == 0
-
-
-def test_always_supports_sequence_iter() -> None:
- exhausted = 0
-
- def it() -> Iterator[str]:
- nonlocal exhausted
- yield from ['a', 'b', 'c']
- exhausted += 1
-
- sit = always_supports_sequence(it())
-
- for x in sit:
- assert x == 'a'
- break
-
- x = next(sit)
- assert x == 'b'
-
- assert exhausted == 0
-
- x = next(sit)
- assert x == 'c'
- assert exhausted == 0
-
- for _ in sit:
- raise RuntimeError # shouldn't trigger, just exhaust the iterator
- assert exhausted == 1
+ return self.aslist[i]
diff --git a/my/core/influxdb.py b/my/core/influxdb.py
index 78a439a..c39f6af 100644
--- a/my/core/influxdb.py
+++ b/my/core/influxdb.py
@@ -2,14 +2,9 @@
TODO doesn't really belong to 'core' morally, but can think of moving out later
'''
-from __future__ import annotations
+from .internal import assert_subpackage; assert_subpackage(__name__)
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
-
-from collections.abc import Iterable
-from typing import Any
+from typing import Any, Dict, Iterable, Optional
import click
@@ -26,7 +21,7 @@ class config:
RESET_DEFAULT = False
-def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt_col: str = 'dt') -> None:
+def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_col: str='dt') -> None:
# todo infer dt column automatically, reuse in stat?
# it doesn't like dots, ends up some syntax error?
measurement = measurement.replace('.', '_')
@@ -35,7 +30,6 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
db = config.db
from influxdb import InfluxDBClient # type: ignore
-
client = InfluxDBClient()
# todo maybe create if not exists?
# client.create_database(db)
@@ -46,7 +40,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
client.delete_series(database=db, measurement=measurement)
# TODO need to take schema here...
- cache: dict[str, bool] = {}
+ cache: Dict[str, bool] = {}
def good(f, v) -> bool:
c = cache.get(f)
@@ -65,9 +59,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
def dit() -> Iterable[Json]:
for i in it:
d = asdict(i)
- tags: Json | None = None
- tags_ = d.get('tags') # meh... handle in a more robust manner
- if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
+ tags: Optional[Json] = None
+ tags_ = d.get('tags') # meh... handle in a more robust manner
+ if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
del d['tags']
tags = tags_
@@ -78,19 +72,18 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
fields = filter_dict(d)
- yield {
- 'measurement': measurement,
+ yield dict(
+ measurement=measurement,
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
# hmm, so tags are autoindexed and might be faster?
# not sure what's the big difference though
# "fields are data and tags are metadata"
- 'tags': tags,
- 'time': dt,
- 'fields': fields,
- }
+ tags=tags,
+ time=dt,
+ fields=fields,
+ )
from more_itertools import chunked
-
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails
inserted = 0
@@ -104,9 +97,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
# todo "Specify timestamp precision when writing to InfluxDB."?
-def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> None:
+def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None:
if name is None:
- assert callable(it) # generators have no name/module
+ assert callable(it) # generators have no name/module
name = f'{it.__module__}:{it.__name__}'
assert name is not None
@@ -116,7 +109,6 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
from itertools import tee
from more_itertools import first, one
-
it, x = tee(it)
f = first(x, default=None)
if f is None:
@@ -126,11 +118,9 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
# TODO can we reuse pandas code or something?
#
from .pandas import _as_columns
-
schema = _as_columns(type(f))
from datetime import datetime
-
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
@@ -145,9 +135,8 @@ def main() -> None:
@main.command(name='populate', short_help='populate influxdb')
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
@click.argument('FUNCTION_NAME', type=str, required=True)
-def populate(*, function_name: str, reset: bool) -> None:
+def populate(function_name: str, reset: bool) -> None:
from .__main__ import _locate_functions_or_prompt
-
[provider] = list(_locate_functions_or_prompt([function_name]))
# todo could have a non-interactive version which populates from all data sources for the provider?
magic_fill(provider, reset=reset)
diff --git a/my/core/init.py b/my/core/init.py
index 644c7b4..49148de 100644
--- a/my/core/init.py
+++ b/my/core/init.py
@@ -19,14 +19,13 @@ def setup_config() -> None:
from pathlib import Path
from .preinit import get_mycfg_dir
-
mycfg_dir = get_mycfg_dir()
if not mycfg_dir.exists():
warnings.warn(f"""
'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""".strip(), stacklevel=1)
+""".strip())
return
mpath = str(mycfg_dir)
@@ -44,12 +43,11 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
except ImportError as ex:
# just in case... who knows what crazy setup users have
import logging
-
logging.exception(ex)
warnings.warn(f"""
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""", stacklevel=1)
+""")
else:
# defensive just in case -- __file__ may not be present if there is some dynamic magic involved
used_config_file = getattr(my.config, '__file__', None)
@@ -65,7 +63,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
Expected my.config to be located at {mycfg_dir}, but instead its path is {used_config_path}.
This will likely cause issues down the line -- double check {mycfg_dir} structure.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
-""", stacklevel=1
+""",
)
diff --git a/my/core/kompress.py b/my/core/kompress.py
index 8accb2d..6ab3228 100644
--- a/my/core/kompress.py
+++ b/my/core/kompress.py
@@ -1,7 +1,4 @@
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
-
+from .internal import assert_subpackage; assert_subpackage(__name__)
from . import warnings
# do this later -- for now need to transition modules to avoid using kompress directly (e.g. ZipPath)
@@ -11,7 +8,10 @@ try:
from kompress import *
except ModuleNotFoundError as e:
if e.name == 'kompress':
- warnings.high('Please install kompress (pip3 install kompress). Falling onto vendorized kompress for now.')
+ warnings.high('Please install kompress (pip3 install kompress), it will be required in the future. Falling onto vendorized kompress for now.')
from ._deprecated.kompress import * # type: ignore[assignment]
else:
raise e
+
+# this is deprecated in compress, keep here for backwards compatibility
+open = kopen # noqa: F405
diff --git a/my/core/konsume.py b/my/core/konsume.py
index 41b5a4e..ac1b100 100644
--- a/my/core/konsume.py
+++ b/my/core/konsume.py
@@ -5,21 +5,17 @@ This can potentially allow both for safer defensive parsing, and let you know if
TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
'''
-from __future__ import annotations
-
from collections import OrderedDict
-from typing import Any
+from typing import Any, List
def ignore(w, *keys):
for k in keys:
w[k].ignore()
-
def zoom(w, *keys):
return [w[k].zoom() for k in keys]
-
# TODO need to support lists
class Zoomable:
def __init__(self, parent, *args, **kwargs) -> None:
@@ -44,7 +40,7 @@ class Zoomable:
assert self.parent is not None
self.parent._remove(self)
- def zoom(self) -> Zoomable:
+ def zoom(self) -> 'Zoomable':
self.consume()
return self
@@ -67,7 +63,6 @@ class Wdict(Zoomable, OrderedDict):
def this_consumed(self):
return len(self) == 0
-
# TODO specify mypy type for the index special method?
@@ -82,7 +77,6 @@ class Wlist(Zoomable, list):
def this_consumed(self):
return len(self) == 0
-
class Wvalue(Zoomable):
def __init__(self, parent, value: Any) -> None:
super().__init__(parent)
@@ -93,20 +87,23 @@ class Wvalue(Zoomable):
return []
def this_consumed(self):
- return True # TODO not sure..
+ return True # TODO not sure..
def __repr__(self):
return 'WValue{' + repr(self.value) + '}'
-def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
+from typing import Tuple
+
+
+def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
res: Zoomable
- cc: list[Zoomable]
+ cc: List[Zoomable]
if isinstance(j, dict):
res = Wdict(parent)
cc = [res]
for k, v in j.items():
- vv, c = _wrap(v, parent=res)
+ vv, c = _wrap(v, parent=res)
res[k] = vv
cc.extend(c)
return res, cc
@@ -125,23 +122,22 @@ def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
-from collections.abc import Iterator
from contextlib import contextmanager
+from typing import Iterator
class UnconsumedError(Exception):
pass
-
# TODO think about error policy later...
@contextmanager
-def wrap(j, *, throw=True) -> Iterator[Zoomable]:
+def wrap(j, throw=True) -> Iterator[Zoomable]:
w, children = _wrap(j)
yield w
for c in children:
- if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed???
+ if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed???
if throw:
# TODO need to keep a full path or something...
raise UnconsumedError(f'''
@@ -157,7 +153,6 @@ from typing import cast
def test_unconsumed() -> None:
import pytest
-
with pytest.raises(UnconsumedError):
with wrap({'a': 1234}) as w:
w = cast(Wdict, w)
@@ -168,7 +163,6 @@ def test_unconsumed() -> None:
w = cast(Wdict, w)
d = w['c']['d'].zoom()
-
def test_consumed() -> None:
with wrap({'a': 1234}) as w:
w = cast(Wdict, w)
@@ -179,7 +173,6 @@ def test_consumed() -> None:
c = w['c'].zoom()
d = c['d'].zoom()
-
def test_types() -> None:
# (string, number, object, array, boolean or nul
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
@@ -188,10 +181,9 @@ def test_types() -> None:
w['number'].consume()
w['boolean'].zoom()
w['null'].zoom()
- for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
+ for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
x.consume()
-
def test_consume_all() -> None:
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
w = cast(Wdict, w)
@@ -201,9 +193,11 @@ def test_consume_all() -> None:
def test_consume_few() -> None:
import pytest
-
pytest.skip('Will think about it later..')
- with wrap({'important': 123, 'unimportant': 'whatever'}) as w:
+ with wrap({
+ 'important': 123,
+ 'unimportant': 'whatever'
+ }) as w:
w = cast(Wdict, w)
w['important'].zoom()
w.consume_all()
@@ -212,7 +206,6 @@ def test_consume_few() -> None:
def test_zoom() -> None:
import pytest
-
with wrap({'aaa': 'whatever'}) as w:
w = cast(Wdict, w)
with pytest.raises(KeyError):
@@ -236,7 +229,7 @@ def test_zoom() -> None:
# - very flexible, easy to adjust behaviour
# - cons:
# - can forget to assert about extra entities etc, so error prone
-# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
+# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
# - TODO perhaps combine warnings somehow or at least only emit once per module?
diff --git a/my/core/logging.py b/my/core/logging.py
index 167a167..734c1e0 100644
--- a/my/core/logging.py
+++ b/my/core/logging.py
@@ -15,7 +15,7 @@ def test() -> None:
## prepare exception for later
try:
- None.whatever # type: ignore[attr-defined] # noqa: B018
+ None.whatever # type: ignore[attr-defined]
except Exception as e:
ex = e
##
@@ -146,7 +146,7 @@ def _setup_handlers_and_formatters(name: str) -> None:
# try colorlog first, so user gets nice colored logs
import colorlog
except ModuleNotFoundError:
- warnings.warn("You might want to 'pip install colorlog' for nice colored logs", stacklevel=1)
+ warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
formatter = logging.Formatter(FORMAT_NOCOLOR)
else:
# log_color/reset are specific to colorlog
@@ -233,7 +233,7 @@ def get_enlighten():
try:
import enlighten # type: ignore[import-untyped]
except ModuleNotFoundError:
- warnings.warn("You might want to 'pip install enlighten' for a nice progress bar", stacklevel=1)
+ warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")
return Mock()
@@ -250,7 +250,7 @@ if __name__ == '__main__':
test()
-## legacy/deprecated methods for backwards compatibility
+## legacy/deprecated methods for backwards compatilibity
if not TYPE_CHECKING:
from .compat import deprecated
diff --git a/my/core/mime.py b/my/core/mime.py
index 8235960..cf5bdf5 100644
--- a/my/core/mime.py
+++ b/my/core/mime.py
@@ -2,14 +2,11 @@
Utils for mime/filetype handling
"""
-from __future__ import annotations
-
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
+from .internal import assert_subpackage; assert_subpackage(__name__)
import functools
-from pathlib import Path
+
+from .common import PathIsh
@functools.lru_cache(1)
@@ -26,7 +23,7 @@ import mimetypes # todo do I need init()?
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
# whereas magic detects correctly: application/x-zstd and application/x-xz
-def fastermime(path: Path | str) -> str:
+def fastermime(path: PathIsh) -> str:
paths = str(path)
# mimetypes is faster, so try it first
(mime, _) = mimetypes.guess_type(paths)
diff --git a/my/core/orgmode.py b/my/core/orgmode.py
index 96c09a4..d9a254c 100644
--- a/my/core/orgmode.py
+++ b/my/core/orgmode.py
@@ -1,42 +1,39 @@
"""
Various helpers for reading org-mode data
"""
-
from datetime import datetime
def parse_org_datetime(s: str) -> datetime:
s = s.strip('[]')
- for fmt, _cls in [
- ("%Y-%m-%d %a %H:%M", datetime),
- ("%Y-%m-%d %H:%M" , datetime),
- # todo not sure about these... fallback on 00:00?
- # ("%Y-%m-%d %a" , date),
- # ("%Y-%m-%d" , date),
+ for fmt, cl in [
+ ("%Y-%m-%d %a %H:%M", datetime),
+ ("%Y-%m-%d %H:%M" , datetime),
+ # todo not sure about these... fallback on 00:00?
+ # ("%Y-%m-%d %a" , date),
+ # ("%Y-%m-%d" , date),
]:
try:
return datetime.strptime(s, fmt)
except ValueError:
continue
- raise RuntimeError(f"Bad datetime string {s}")
+ else:
+ raise RuntimeError(f"Bad datetime string {s}")
# TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
-from collections.abc import Iterable
-from typing import Callable, TypeVar
+from typing import Callable, Iterable, TypeVar
from orgparse import OrgNode
V = TypeVar('V')
-
def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
yield from cfun(n)
for c in n.children:
yield from collect(c, cfun)
-
from more_itertools import one
from orgparse.extra import Table
@@ -50,7 +47,7 @@ class TypedTable(Table):
tt = super().__new__(TypedTable)
tt.__dict__ = orig.__dict__
blocks = list(orig.blocks)
- header = blocks[0] # fist block is schema
+ header = blocks[0] # fist block is schema
if len(header) == 2:
# TODO later interpret first line as types
header = header[1:]
diff --git a/my/core/pandas.py b/my/core/pandas.py
index d444965..8abbb1f 100644
--- a/my/core/pandas.py
+++ b/my/core/pandas.py
@@ -1,27 +1,19 @@
'''
Various pandas helpers and convenience functions
'''
-
from __future__ import annotations
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
# NOTE: this file is meant to be importable without Pandas installed
import dataclasses
-from collections.abc import Iterable, Iterator
from datetime import datetime, timezone
from pprint import pformat
-from typing import (
- TYPE_CHECKING,
- Any,
- Callable,
- Literal,
- TypeVar,
-)
+from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Literal, Type, TypeVar
from decorator import decorator
-from . import warnings
-from .error import Res, error_to_json, extract_error_datetime
+from . import Res, warnings
+from .error import error_to_json, extract_error_datetime
from .logging import make_logger
from .types import Json, asdict
@@ -46,7 +38,7 @@ else:
S1 = Any
-def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
+def check_dateish(s: SeriesT[S1]) -> Iterable[str]:
import pandas as pd # noqa: F811 not actually a redefinition
ctype = s.dtype
@@ -70,37 +62,9 @@ def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
def test_check_dateish() -> None:
import pandas as pd
- from .compat import fromisoformat
-
- # empty series shouldn't warn
- assert list(_check_dateish(pd.Series([]))) == []
-
- # if no dateimes, shouldn't return any warnings
- assert list(_check_dateish(pd.Series([1, 2, 3]))) == []
-
- # all values are datetimes, shouldn't warn
- # fmt: off
- assert list(_check_dateish(pd.Series([
- fromisoformat('2024-08-19T01:02:03'),
- fromisoformat('2024-08-19T03:04:05'),
- ]))) == []
- # fmt: on
-
- # mixture of timezones -- should warn
- # fmt: off
- assert len(list(_check_dateish(pd.Series([
- fromisoformat('2024-08-19T01:02:03'),
- fromisoformat('2024-08-19T03:04:05Z'),
- ])))) == 1
- # fmt: on
-
- # TODO hmm. maybe this should actually warn?
- # fmt: off
- assert len(list(_check_dateish(pd.Series([
- 'whatever',
- fromisoformat('2024-08-19T01:02:03'),
- ])))) == 0
- # fmt: on
+ # todo just a dummy test to check it doesn't crash, need something meaningful
+ s1 = pd.Series([1, 2, 3])
+ list(check_dateish(s1))
# fmt: off
@@ -138,7 +102,7 @@ def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy = 'add_if_missing
# makes sense to keep super defensive
try:
for col, data in df.reset_index().items():
- for w in _check_dateish(data):
+ for w in check_dateish(data):
warnings.low(f"{tag}, column '{col}': {w}")
except Exception as e:
logger.exception(e)
@@ -162,7 +126,8 @@ def error_to_row(e: Exception, *, dt_col: str = 'dt', tz: timezone | None = None
return err_dict
-def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
+# todo not sure about naming
+def to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
for r in it:
if isinstance(r, Exception):
yield error_to_row(r)
@@ -175,10 +140,10 @@ def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
Schema = Any
-def _as_columns(s: Schema) -> dict[str, type]:
+def _as_columns(s: Schema) -> Dict[str, Type]:
# todo would be nice to extract properties; add tests for this as well
if dataclasses.is_dataclass(s):
- return {f.name: f.type for f in dataclasses.fields(s)} # type: ignore[misc] # ugh, why mypy thinks f.type can return str??
+ return {f.name: f.type for f in dataclasses.fields(s)}
# else must be NamedTuple??
# todo assert my.core.common.is_namedtuple?
return getattr(s, '_field_types')
@@ -197,7 +162,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Schema | None = None) -> DataFr
import pandas as pd # noqa: F811 not actually a redefinition
columns = None if schema is None else list(_as_columns(schema).keys())
- return pd.DataFrame(_to_jsons(it), columns=columns)
+ return pd.DataFrame(to_jsons(it), columns=columns)
# ugh. in principle this could be inside the test
@@ -207,76 +172,20 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Schema | None = None) -> DataFr
# see https://github.com/pytest-dev/pytest/issues/7856
@dataclasses.dataclass
class _X:
- # FIXME try moving inside?
x: int
def test_as_dataframe() -> None:
- import numpy as np
- import pandas as pd
import pytest
- from pandas.testing import assert_frame_equal
- from .compat import fromisoformat
-
- it = ({'i': i, 's': f'str{i}'} for i in range(5))
+ it = (dict(i=i, s=f'str{i}') for i in range(10))
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
df: DataFrameT = as_dataframe(it)
# todo test other error col policies
+ assert list(df.columns) == ['i', 's', 'error']
- # fmt: off
- assert_frame_equal(
- df,
- pd.DataFrame({
- 'i' : [0 , 1 , 2 , 3 , 4 ],
- 's' : ['str0', 'str1', 'str2', 'str3', 'str4'],
- # NOTE: error column is always added
- 'error': [None , None , None , None , None ],
- }),
- )
- # fmt: on
- assert_frame_equal(as_dataframe([]), pd.DataFrame(columns=['error']))
+ assert len(as_dataframe([])) == 0
+ # makes sense to specify the schema so the downstream program doesn't fail in case of empty iterable
df2: DataFrameT = as_dataframe([], schema=_X)
- assert_frame_equal(
- df2,
- # FIXME hmm. x column type should be an int?? and error should be string (or object??)
- pd.DataFrame(columns=['x', 'error']),
- )
-
- @dataclasses.dataclass
- class S:
- value: str
-
- def it2() -> Iterator[Res[S]]:
- yield S(value='test')
- yield RuntimeError('i failed')
-
- df = as_dataframe(it2())
- # fmt: off
- assert_frame_equal(
- df,
- pd.DataFrame(data={
- 'value': ['test', np.nan ],
- 'error': [np.nan, 'RuntimeError: i failed\n'],
- 'dt' : [np.nan, np.nan ],
- }).astype(dtype={'dt': 'float'}), # FIXME should be datetime64 as below
- )
- # fmt: on
-
- def it3() -> Iterator[Res[S]]:
- yield S(value='aba')
- yield RuntimeError('whoops')
- yield S(value='cde')
- yield RuntimeError('exception with datetime', fromisoformat('2024-08-19T22:47:01Z'))
-
- df = as_dataframe(it3())
-
- # fmt: off
- assert_frame_equal(df, pd.DataFrame(data={
- 'value': ['aba' , np.nan , 'cde' , np.nan ],
- 'error': [np.nan, 'RuntimeError: whoops\n', np.nan, "RuntimeError: ('exception with datetime', datetime.datetime(2024, 8, 19, 22, 47, 1, tzinfo=datetime.timezone.utc))\n"],
- # note: dt column is added even if errors don't have an associated datetime
- 'dt' : [np.nan, np.nan , np.nan, '2024-08-19 22:47:01+00:00'],
- }).astype(dtype={'dt': 'datetime64[ns, UTC]'}))
- # fmt: on
+ assert list(df2.columns) == ['x', 'error']
diff --git a/my/core/preinit.py b/my/core/preinit.py
index eb3a34f..be5477b 100644
--- a/my/core/preinit.py
+++ b/my/core/preinit.py
@@ -8,7 +8,6 @@ def get_mycfg_dir() -> Path:
import os
import appdirs # type: ignore[import-untyped]
-
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
mvar = os.environ.get('MY_CONFIG')
diff --git a/my/core/pytest.py b/my/core/pytest.py
index ad9e7d7..c73c71a 100644
--- a/my/core/pytest.py
+++ b/my/core/pytest.py
@@ -2,9 +2,7 @@
Helpers to prevent depending on pytest in runtime
"""
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
+from .internal import assert_subpackage; assert_subpackage(__name__)
import sys
import typing
@@ -17,7 +15,7 @@ if typing.TYPE_CHECKING or under_pytest:
parametrize = pytest.mark.parametrize
else:
- def parametrize(*_args, **_kwargs):
+ def parametrize(*args, **kwargs):
def wrapper(f):
return f
diff --git a/my/core/query.py b/my/core/query.py
index 50724a7..cf85b1b 100644
--- a/my/core/query.py
+++ b/my/core/query.py
@@ -5,20 +5,23 @@ The main entrypoint to this library is the 'select' function below; try:
python3 -c "from my.core.query import select; help(select)"
"""
-from __future__ import annotations
-
import dataclasses
import importlib
import inspect
import itertools
-from collections.abc import Iterable, Iterator
from datetime import datetime
from typing import (
Any,
Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
NamedTuple,
Optional,
+ Tuple,
TypeVar,
+ Union,
)
import more_itertools
@@ -48,7 +51,6 @@ class Unsortable(NamedTuple):
class QueryException(ValueError):
"""Used to differentiate query-related errors, so the CLI interface is more expressive"""
-
pass
@@ -61,7 +63,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
"""
try:
mod = importlib.import_module(module_name)
- for fname, f in inspect.getmembers(mod, inspect.isfunction):
+ for (fname, f) in inspect.getmembers(mod, inspect.isfunction):
if fname == function_name:
return f
# in case the function is defined dynamically,
@@ -70,7 +72,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
if func is not None and callable(func):
return func
except Exception as e:
- raise QueryException(str(e)) # noqa: B904
+ raise QueryException(str(e))
raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
@@ -81,10 +83,10 @@ def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]]
if "." not in qualified_name:
raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.rexport.comments")
rdot_index = qualified_name.rindex(".")
- return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :])
+ return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1:])
-def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc | None:
+def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]:
"""
Attempts to find an attribute which matches the 'where_function' on the object,
using some getattr/dict checks. Returns a function which when called with
@@ -112,7 +114,7 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
if where(v):
return lambda o: o.get(k, default) # type: ignore[union-attr]
elif dataclasses.is_dataclass(obj):
- for field_name in obj.__annotations__.keys():
+ for (field_name, _annotation) in obj.__annotations__.items():
if where(getattr(obj, field_name)):
return lambda o: getattr(o, field_name, default)
elif is_namedtuple(obj):
@@ -129,13 +131,12 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
def _generate_order_by_func(
- obj_res: Res[T],
- *,
- key: str | None = None,
- where_function: Where | None = None,
- default: U | None = None,
- force_unsortable: bool = False,
-) -> OrderFunc | None:
+ obj_res: Res[T],
+ key: Optional[str] = None,
+ where_function: Optional[Where] = None,
+ default: Optional[U] = None,
+ force_unsortable: bool = False,
+) -> Optional[OrderFunc]:
"""
Accepts an object Res[T] (Instance of some class or Exception)
@@ -200,7 +201,7 @@ pass 'drop_exceptions' to ignore exceptions""")
# user must provide either a key or a where predicate
if where_function is not None:
- func: OrderFunc | None = attribute_func(obj, where_function, default)
+ func: Optional[OrderFunc] = attribute_func(obj, where_function, default)
if func is not None:
return func
@@ -216,6 +217,8 @@ pass 'drop_exceptions' to ignore exceptions""")
return None # couldn't compute a OrderFunc for this class/instance
+
+
# currently using the 'key set' as a proxy for 'this is the same type of thing'
def _determine_order_by_value_key(obj_res: ET) -> Any:
"""
@@ -240,7 +243,7 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]:
# try getting the first value from the iterator
# similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
-def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
+def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
itr = more_itertools.peekable(itr)
try:
first_item = itr.peek()
@@ -251,9 +254,9 @@ def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
# similar to 'my.core.error.sort_res_by'?
-def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Unsortable], Iterator[ET]]:
- unsortable: list[Unsortable] = []
- sortable: list[ET] = []
+def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
+ unsortable: List[Unsortable] = []
+ sortable: List[ET] = []
for o in itr:
# if input to select was another select
if isinstance(o, Unsortable):
@@ -271,11 +274,10 @@ def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Un
# the second being items for which orderfunc returned a non-none value
def _handle_unsorted(
itr: Iterator[ET],
- *,
orderfunc: OrderFunc,
drop_unsorted: bool,
wrap_unsorted: bool
-) -> tuple[Iterator[Unsortable], Iterator[ET]]:
+) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
# prefer drop_unsorted to wrap_unsorted, if both were present
if drop_unsorted:
return iter([]), _drop_unsorted(itr, orderfunc)
@@ -290,16 +292,16 @@ def _handle_unsorted(
# different types. ***This consumes the iterator***, so
# you should definitely itertoolts.tee it beforehand
# as to not exhaust the values
-def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U | None = None) -> OrderFunc:
+def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc:
# TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
- order_by_lookup: dict[Any, OrderFunc] = {}
+ order_by_lookup: Dict[Any, OrderFunc] = {}
# need to go through a copy of the whole iterator here to
# pre-generate functions to support sorting mixed types
for obj_res in itr:
key: Any = _determine_order_by_value_key(obj_res)
if key not in order_by_lookup:
- keyfunc: OrderFunc | None = _generate_order_by_func(
+ keyfunc: Optional[OrderFunc] = _generate_order_by_func(
obj_res,
where_function=order_value,
default=default,
@@ -320,12 +322,12 @@ def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U
def _handle_generate_order_by(
itr,
*,
- order_by: OrderFunc | None = None,
- order_key: str | None = None,
- order_value: Where | None = None,
- default: U | None = None,
-) -> tuple[OrderFunc | None, Iterator[ET]]:
- order_by_chosen: OrderFunc | None = order_by # if the user just supplied a function themselves
+ order_by: Optional[OrderFunc] = None,
+ order_key: Optional[str] = None,
+ order_value: Optional[Where] = None,
+ default: Optional[U] = None,
+) -> Tuple[Optional[OrderFunc], Iterator[ET]]:
+ order_by_chosen: Optional[OrderFunc] = order_by # if the user just supplied a function themselves
if order_by is not None:
return order_by, itr
if order_key is not None:
@@ -350,19 +352,19 @@ def _handle_generate_order_by(
def select(
- src: Iterable[ET] | Callable[[], Iterable[ET]],
+ src: Union[Iterable[ET], Callable[[], Iterable[ET]]],
*,
- where: Where | None = None,
- order_by: OrderFunc | None = None,
- order_key: str | None = None,
- order_value: Where | None = None,
- default: U | None = None,
+ where: Optional[Where] = None,
+ order_by: Optional[OrderFunc] = None,
+ order_key: Optional[str] = None,
+ order_value: Optional[Where] = None,
+ default: Optional[U] = None,
reverse: bool = False,
- limit: int | None = None,
+ limit: Optional[int] = None,
drop_unsorted: bool = False,
wrap_unsorted: bool = True,
warn_exceptions: bool = False,
- warn_func: Callable[[Exception], None] | None = None,
+ warn_func: Optional[Callable[[Exception], None]] = None,
drop_exceptions: bool = False,
raise_exceptions: bool = False,
) -> Iterator[ET]:
@@ -464,7 +466,7 @@ Will attempt to call iter() on the value""")
try:
itr: Iterator[ET] = iter(it)
except TypeError as t:
- raise QueryException("Could not convert input src to an Iterator: " + str(t)) # noqa: B904
+ raise QueryException("Could not convert input src to an Iterator: " + str(t))
# if both drop_exceptions and drop_exceptions are provided for some reason,
# should raise exceptions before dropping them
@@ -501,12 +503,7 @@ Will attempt to call iter() on the value""")
# note: can't just attach sort unsortable values in the same iterable as the
# other items because they don't have any lookups for order_key or functions
# to handle items in the order_by_lookup dictionary
- unsortable, itr = _handle_unsorted(
- itr,
- orderfunc=order_by_chosen,
- drop_unsorted=drop_unsorted,
- wrap_unsorted=wrap_unsorted,
- )
+ unsortable, itr = _handle_unsorted(itr, order_by_chosen, drop_unsorted, wrap_unsorted)
# run the sort, with the computed order by function
itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse)) # type: ignore[arg-type]
@@ -613,7 +610,7 @@ class _B(NamedTuple):
# move these to tests/? They are re-used so much in the tests below,
# not sure where the best place for these is
-def _mixed_iter() -> Iterator[_A | _B]:
+def _mixed_iter() -> Iterator[Union[_A, _B]]:
yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
@@ -622,7 +619,7 @@ def _mixed_iter() -> Iterator[_A | _B]:
yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
-def _mixed_iter_errors() -> Iterator[Res[_A | _B]]:
+def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]:
m = _mixed_iter()
yield from itertools.islice(m, 0, 3)
yield RuntimeError("Unhandled error!")
@@ -658,7 +655,7 @@ def test_wrap_unsortable() -> None:
# by default, wrap unsortable
res = list(select(_mixed_iter(), order_key="z"))
- assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 2})
+ assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 2})
def test_disabled_wrap_unsorted() -> None:
@@ -677,7 +674,7 @@ def test_drop_unsorted() -> None:
# test drop unsortable, should remove them before the 'sorted' call
res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
assert len(res) == 4
- assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4})
+ assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4})
def test_drop_exceptions() -> None:
@@ -708,9 +705,9 @@ def test_wrap_unsortable_with_error_and_warning() -> None:
# by default should wrap unsortable (error)
with pytest.warns(UserWarning, match=r"encountered exception"):
res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
- assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
+ assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
# compare the returned error wrapped in the Unsortable
- returned_error = next(o for o in res if isinstance(o, Unsortable)).obj
+ returned_error = next((o for o in res if isinstance(o, Unsortable))).obj
assert "Unhandled error!" == str(returned_error)
@@ -720,7 +717,7 @@ def test_order_key_unsortable() -> None:
# both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
res = list(select(_mixed_iter_errors(), order_key="z"))
- assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
+ assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
def test_order_default_param() -> None:
@@ -740,7 +737,7 @@ def test_no_recursive_unsortables() -> None:
# select to select as input, wrapping unsortables the first time, second should drop them
# reverse=True to send errors to the end, so the below order_key works
res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
- assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
+ assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
# drop_unsorted
dropped = list(select(res, order_key="z", drop_unsorted=True))
diff --git a/my/core/query_range.py b/my/core/query_range.py
index 83728bf..d077225 100644
--- a/my/core/query_range.py
+++ b/my/core/query_range.py
@@ -7,14 +7,11 @@ filtered iterator
See the select_range function below
"""
-from __future__ import annotations
-
import re
import time
-from collections.abc import Iterator
from datetime import date, datetime, timedelta
-from functools import cache
-from typing import Any, Callable, NamedTuple
+from functools import lru_cache
+from typing import Any, Callable, Iterator, NamedTuple, Optional, Type
import more_itertools
@@ -28,9 +25,7 @@ from .query import (
select,
)
-timedelta_regex = re.compile(
- r"^((?P[\.\d]+?)w)?((?P[\.\d]+?)d)?((?P[\.\d]+?)h)?((?P[\.\d]+?)m)?((?P[\.\d]+?)s)?$"
-)
+timedelta_regex = re.compile(r"^((?P[\.\d]+?)w)?((?P[\.\d]+?)d)?((?P[\.\d]+?)h)?((?P[\.\d]+?)m)?((?P[\.\d]+?)s)?$")
# https://stackoverflow.com/a/51916936
@@ -93,7 +88,7 @@ def parse_datetime_float(date_str: str) -> float:
# dateparser is a bit more lenient than the above, lets you type
# all sorts of dates as inputs
# https://github.com/scrapinghub/dateparser#how-to-use
- res: datetime | None = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
+ res: Optional[datetime] = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
if res is not None:
return res.timestamp()
@@ -103,7 +98,7 @@ def parse_datetime_float(date_str: str) -> float:
# probably DateLike input? but a user could specify an order_key
# which is an epoch timestamp or a float value which they
# expect to be converted to a datetime to compare
-@cache
+@lru_cache(maxsize=None)
def _datelike_to_float(dl: Any) -> float:
if isinstance(dl, datetime):
return dl.timestamp()
@@ -114,7 +109,7 @@ def _datelike_to_float(dl: Any) -> float:
try:
return parse_datetime_float(dl)
except QueryException as q:
- raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q)) # noqa: B904
+ raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))
class RangeTuple(NamedTuple):
@@ -135,12 +130,11 @@ class RangeTuple(NamedTuple):
of the timeframe -- 'before'
- before and after - anything after 'after' and before 'before', acts as a time range
"""
-
# technically doesn't need to be Optional[Any],
# just to make it more clear these can be None
- after: Any | None
- before: Any | None
- within: Any | None
+ after: Optional[Any]
+ before: Optional[Any]
+ within: Optional[Any]
Converter = Callable[[Any], Any]
@@ -151,9 +145,9 @@ def _parse_range(
unparsed_range: RangeTuple,
end_parser: Converter,
within_parser: Converter,
- parsed_range: RangeTuple | None = None,
- error_message: str | None = None,
-) -> RangeTuple | None:
+ parsed_range: Optional[RangeTuple] = None,
+ error_message: Optional[str] = None
+) -> Optional[RangeTuple]:
if parsed_range is not None:
return parsed_range
@@ -182,11 +176,11 @@ def _create_range_filter(
end_parser: Converter,
within_parser: Converter,
attr_func: Where,
- parsed_range: RangeTuple | None = None,
- default_before: Any | None = None,
- value_coercion_func: Converter | None = None,
- error_message: str | None = None,
-) -> Where | None:
+ parsed_range: Optional[RangeTuple] = None,
+ default_before: Optional[Any] = None,
+ value_coercion_func: Optional[Converter] = None,
+ error_message: Optional[str] = None,
+) -> Optional[Where]:
"""
Handles:
- parsing the user input into values that are comparable to items the iterable returns
@@ -278,17 +272,17 @@ def _create_range_filter(
def select_range(
itr: Iterator[ET],
*,
- where: Where | None = None,
- order_key: str | None = None,
- order_value: Where | None = None,
- order_by_value_type: type | None = None,
- unparsed_range: RangeTuple | None = None,
+ where: Optional[Where] = None,
+ order_key: Optional[str] = None,
+ order_value: Optional[Where] = None,
+ order_by_value_type: Optional[Type] = None,
+ unparsed_range: Optional[RangeTuple] = None,
reverse: bool = False,
- limit: int | None = None,
+ limit: Optional[int] = None,
drop_unsorted: bool = False,
wrap_unsorted: bool = False,
warn_exceptions: bool = False,
- warn_func: Callable[[Exception], None] | None = None,
+ warn_func: Optional[Callable[[Exception], None]] = None,
drop_exceptions: bool = False,
raise_exceptions: bool = False,
) -> Iterator[ET]:
@@ -323,10 +317,9 @@ def select_range(
drop_exceptions=drop_exceptions,
raise_exceptions=raise_exceptions,
warn_exceptions=warn_exceptions,
- warn_func=warn_func,
- )
+ warn_func=warn_func)
- order_by_chosen: OrderFunc | None = None
+ order_by_chosen: Optional[OrderFunc] = None
# if the user didn't specify an attribute to order value, but specified a type
# we should search for on each value in the iterator
@@ -337,8 +330,6 @@ def select_range(
# if the user supplied a order_key, and/or we've generated an order_value, create
# the function that accesses that type on each value in the iterator
if order_key is not None or order_value is not None:
- # _generate_order_value_func internally here creates a copy of the iterator, which has to
- # be consumed in-case we're sorting by mixed types
order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
# signifies that itr is empty -- can early return here
if order_by_chosen is None:
@@ -350,39 +341,37 @@ def select_range(
if order_by_chosen is None:
raise QueryException("""Can't order by range if we have no way to order_by!
Specify a type or a key to order the value by""")
-
- # force drop_unsorted=True so we can use _create_range_filter
- # sort the iterable by the generated order_by_chosen function
- itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
- filter_func: Where | None
- if order_by_value_type in [datetime, date]:
- filter_func = _create_range_filter(
- unparsed_range=unparsed_range,
- end_parser=parse_datetime_float,
- within_parser=parse_timedelta_float,
- attr_func=order_by_chosen, # type: ignore[arg-type]
- default_before=time.time(),
- value_coercion_func=_datelike_to_float,
- )
- elif order_by_value_type in [int, float]:
- # allow primitives to be converted using the default int(), float() callables
- filter_func = _create_range_filter(
- unparsed_range=unparsed_range,
- end_parser=order_by_value_type,
- within_parser=order_by_value_type,
- attr_func=order_by_chosen, # type: ignore[arg-type]
- default_before=None,
- value_coercion_func=order_by_value_type,
- )
else:
- # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
- # would need to allow passing the end_parser, within parser, default before and value_coercion_func...
- # (seems like a lot?)
- raise QueryException("Sorting by custom types is currently unsupported")
+ # force drop_unsorted=True so we can use _create_range_filter
+ # sort the iterable by the generated order_by_chosen function
+ itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
+ filter_func: Optional[Where]
+ if order_by_value_type in [datetime, date]:
+ filter_func = _create_range_filter(
+ unparsed_range=unparsed_range,
+ end_parser=parse_datetime_float,
+ within_parser=parse_timedelta_float,
+ attr_func=order_by_chosen, # type: ignore[arg-type]
+ default_before=time.time(),
+ value_coercion_func=_datelike_to_float)
+ elif order_by_value_type in [int, float]:
+ # allow primitives to be converted using the default int(), float() callables
+ filter_func = _create_range_filter(
+ unparsed_range=unparsed_range,
+ end_parser=order_by_value_type,
+ within_parser=order_by_value_type,
+ attr_func=order_by_chosen, # type: ignore[arg-type]
+ default_before=None,
+ value_coercion_func=order_by_value_type)
+ else:
+ # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
+ # would need to allow passing the end_parser, within parser, default before and value_coercion_func...
+ # (seems like a lot?)
+ raise QueryException("Sorting by custom types is currently unsupported")
- # use the created filter function
- # we've already applied drop_exceptions and kwargs related to unsortable values above
- itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
+ # use the created filter function
+ # we've already applied drop_exceptions and kwargs related to unsortable values above
+ itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
else:
# wrap_unsorted may be used here if the user specified an order_key,
# or manually passed a order_value function
@@ -400,7 +389,7 @@ Specify a type or a key to order the value by""")
return itr
-# reuse items from query for testing
+# re-use items from query for testing
from .query import _A, _B, _Float, _mixed_iter_errors
@@ -481,7 +470,7 @@ def test_range_predicate() -> None:
# filter from 0 to 5
rn: RangeTuple = RangeTuple("0", "5", None)
- zero_to_five_filter: Where | None = int_filter_func(unparsed_range=rn)
+ zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn)
assert zero_to_five_filter is not None
# this is just a Where function, given some input it return True/False if the value is allowed
assert zero_to_five_filter(3) is True
@@ -494,7 +483,6 @@ def test_range_predicate() -> None:
rn = RangeTuple(None, 3, "3.5")
assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
-
def test_parse_range() -> None:
from functools import partial
@@ -538,8 +526,9 @@ def test_parse_timedelta_string() -> None:
def test_parse_datetime_float() -> None:
+
pnow = parse_datetime_float("now")
- sec_diff = abs(pnow - datetime.now().timestamp())
+ sec_diff = abs((pnow - datetime.now().timestamp()))
# should probably never fail? could mock time.time
# but there seems to be issues with doing that use C-libraries (as time.time) does
# https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
diff --git a/my/core/serialize.py b/my/core/serialize.py
index e36da8f..b196d47 100644
--- a/my/core/serialize.py
+++ b/my/core/serialize.py
@@ -1,11 +1,9 @@
-from __future__ import annotations
-
import datetime
from dataclasses import asdict, is_dataclass
from decimal import Decimal
-from functools import cache
+from functools import lru_cache
from pathlib import Path
-from typing import Any, Callable, NamedTuple
+from typing import Any, Callable, NamedTuple, Optional
from .error import error_to_json
from .pytest import parametrize
@@ -59,12 +57,12 @@ def _default_encode(obj: Any) -> Any:
# could possibly run multiple times/raise warning if you provide different 'default'
# functions or change the kwargs? The alternative is to maintain all of this at the module
# level, which is just as annoying
-@cache
+@lru_cache(maxsize=None)
def _dumps_factory(**kwargs) -> Callable[[Any], str]:
use_default: DefaultEncoder = _default_encode
# if the user passed an additional 'default' parameter,
# try using that to serialize before before _default_encode
- _additional_default: DefaultEncoder | None = kwargs.get("default")
+ _additional_default: Optional[DefaultEncoder] = kwargs.get("default")
if _additional_default is not None and callable(_additional_default):
def wrapped_default(obj: Any) -> Any:
@@ -80,9 +78,9 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
kwargs["default"] = use_default
- prefer_factory: str | None = kwargs.pop('_prefer_factory', None)
+ prefer_factory: Optional[str] = kwargs.pop('_prefer_factory', None)
- def orjson_factory() -> Dumps | None:
+ def orjson_factory() -> Optional[Dumps]:
try:
import orjson
except ModuleNotFoundError:
@@ -97,7 +95,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
return _orjson_dumps
- def simplejson_factory() -> Dumps | None:
+ def simplejson_factory() -> Optional[Dumps]:
try:
from simplejson import dumps as simplejson_dumps
except ModuleNotFoundError:
@@ -117,7 +115,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
return _simplejson_dumps
- def stdlib_factory() -> Dumps | None:
+ def stdlib_factory() -> Optional[Dumps]:
import json
from .warnings import high
@@ -147,12 +145,13 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
res = factory()
if res is not None:
return res
- raise RuntimeError("Should not happen!")
+ else:
+ raise RuntimeError("Should not happen!")
def dumps(
obj: Any,
- default: DefaultEncoder | None = None,
+ default: Optional[DefaultEncoder] = None,
**kwargs,
) -> str:
"""
diff --git a/my/core/source.py b/my/core/source.py
index a309d13..9488ae2 100644
--- a/my/core/source.py
+++ b/my/core/source.py
@@ -3,12 +3,9 @@ Decorator to gracefully handle importing a data source, or warning
and yielding nothing (or a default) when its not available
"""
-from __future__ import annotations
-
import warnings
-from collections.abc import Iterable, Iterator
from functools import wraps
-from typing import Any, Callable, TypeVar
+from typing import Any, Callable, Iterable, Iterator, Optional, TypeVar
from .warnings import medium
@@ -29,8 +26,8 @@ _DEFAULT_ITR = ()
def import_source(
*,
default: Iterable[T] = _DEFAULT_ITR,
- module_name: str | None = None,
- help_url: str | None = None,
+ module_name: Optional[str] = None,
+ help_url: Optional[str] = None,
) -> Callable[..., Callable[..., Iterator[T]]]:
"""
doesn't really play well with types, but is used to catch
@@ -53,7 +50,6 @@ def import_source(
except (ImportError, AttributeError) as err:
from . import core_config as CC
from .error import warn_my_config_import_error
-
suppressed_in_conf = False
if module_name is not None and CC.config._is_module_active(module_name) is False:
suppressed_in_conf = True
@@ -65,18 +61,16 @@ def import_source(
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
class core:
- disabled_modules = [{module_name!r}]
-""", stacklevel=1)
+ disabled_modules = [{repr(module_name)}]
+""")
# try to check if this is a config error or based on dependencies not being installed
if isinstance(err, (ImportError, AttributeError)):
- matched_config_err = warn_my_config_import_error(err, module_name=module_name, help_url=help_url)
+ matched_config_err = warn_my_config_import_error(err, help_url=help_url)
# if we determined this wasn't a config error, and it was an attribute error
# it could be *any* attribute error -- we should raise this since its otherwise a fatal error
# from some code in the module failing
if not matched_config_err and isinstance(err, AttributeError):
raise err
yield from default
-
return wrapper
-
return decorator
diff --git a/my/core/sqlite.py b/my/core/sqlite.py
index 6167d2e..47bd78b 100644
--- a/my/core/sqlite.py
+++ b/my/core/sqlite.py
@@ -1,16 +1,12 @@
-from __future__ import annotations
+from .internal import assert_subpackage; assert_subpackage(__name__)
-from .internal import assert_subpackage # noqa: I001
-
-assert_subpackage(__name__)
import shutil
import sqlite3
-from collections.abc import Iterator
from contextlib import contextmanager
from pathlib import Path
from tempfile import TemporaryDirectory
-from typing import Any, Callable, Literal, Union, overload
+from typing import Any, Callable, Iterator, Literal, Optional, Tuple, Union, overload
from .common import PathIsh
from .compat import assert_never
@@ -26,7 +22,6 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
conn.execute('CREATE TABLE testtable (col)')
import pytest
-
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
with sqlite_connect_immutable(db) as conn:
conn.execute('DROP TABLE testtable')
@@ -38,17 +33,15 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
-
def dict_factory(cursor, row):
fields = [column[0] for column in cursor.description]
- return dict(zip(fields, row))
+ return {key: value for key, value in zip(fields, row)}
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
-
@contextmanager
-def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]:
+def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]:
dbp = f'file:{db}'
# https://www.sqlite.org/draft/uri.html#uriimmutable
if immutable:
@@ -104,76 +97,31 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
# a bit annoying to have this copy-pasting, but hopefully not a big issue
-# fmt: off
@overload
-def select(cols: tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any ]]: ...
+def select(cols: Tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any ]]: ...
@overload
-def select(cols: tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any ]]: ...
+def select(cols: Tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any ]]: ...
+def select(cols: Tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any, Any ]]: ...
+def select(cols: Tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any, Any, Any ]]: ...
+def select(cols: Tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any, Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any, Any, Any, Any ]]: ...
+def select(cols: Tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any, Any, Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
+def select(cols: Tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
@overload
-def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
- Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
-# fmt: on
+def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
+ Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
def select(cols, rest, *, db):
# db arg is last cause that results in nicer code formatting..
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
-
-
-class SqliteTool:
- def __init__(self, connection: sqlite3.Connection) -> None:
- self.connection = connection
-
- def _get_sqlite_master(self) -> dict[str, str]:
- res = {}
- for c in self.connection.execute('SELECT name, type FROM sqlite_master'):
- [name, type_] = c
- assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_) # just in case
- res[name] = type_
- return res
-
- def get_table_names(self) -> list[str]:
- master = self._get_sqlite_master()
- res = []
- for name, type_ in master.items():
- if type_ != 'table':
- continue
- res.append(name)
- return res
-
- def get_table_schema(self, name: str) -> dict[str, str]:
- """
- Returns map from column name to column type
-
- NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps)
- In this case you might still be able to use get_table_names
- """
- schema: dict[str, str] = {}
- for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'):
- col = row[1]
- type_ = row[2]
- # hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase
- # let's do this just in case since python < 3.10 are using the old version
- # e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs)
- type_ = type_.upper()
- schema[col] = type_
- return schema
-
- def get_table_schemas(self) -> dict[str, dict[str, str]]:
- return {name: self.get_table_schema(name) for name in self.get_table_names()}
diff --git a/my/core/stats.py b/my/core/stats.py
index a553db3..08821a2 100644
--- a/my/core/stats.py
+++ b/my/core/stats.py
@@ -2,13 +2,10 @@
Helpers for hpi doctor/stats functionality.
'''
-from __future__ import annotations
-
-import collections.abc
+import collections
import importlib
import inspect
import typing
-from collections.abc import Iterable, Iterator, Sequence
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
@@ -16,17 +13,24 @@ from types import ModuleType
from typing import (
Any,
Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
Protocol,
+ Sequence,
+ Union,
cast,
)
from .types import asdict
-Stats = dict[str, Any]
+Stats = Dict[str, Any]
class StatsFun(Protocol):
- def __call__(self, *, quick: bool = False) -> Stats: ...
+ def __call__(self, quick: bool = False) -> Stats: ...
# global state that turns on/off quick stats
@@ -51,10 +55,10 @@ def quick_stats():
def stat(
- func: Callable[[], Iterable[Any]] | Iterable[Any],
+ func: Union[Callable[[], Iterable[Any]], Iterable[Any]],
*,
quick: bool = False,
- name: str | None = None,
+ name: Optional[str] = None,
) -> Stats:
"""
Extracts various statistics from a passed iterable/callable, e.g.:
@@ -149,8 +153,8 @@ def test_stat() -> None:
#
-def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
- stats: StatsFun | None = None
+def get_stats(module_name: str, *, guess: bool = False) -> Optional[StatsFun]:
+ stats: Optional[StatsFun] = None
try:
module = importlib.import_module(module_name)
except Exception:
@@ -163,7 +167,7 @@ def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
# TODO maybe could be enough to annotate OUTPUTS or something like that?
# then stats could just use them as hints?
-def guess_stats(module: ModuleType) -> StatsFun | None:
+def guess_stats(module: ModuleType) -> Optional[StatsFun]:
"""
If the module doesn't have explicitly defined 'stat' function,
this is used to try to guess what could be included in stats automatically
@@ -172,7 +176,7 @@ def guess_stats(module: ModuleType) -> StatsFun | None:
if len(providers) == 0:
return None
- def auto_stats(*, quick: bool = False) -> Stats:
+ def auto_stats(quick: bool = False) -> Stats:
res = {}
for k, v in providers.items():
res.update(stat(v, quick=quick, name=k))
@@ -202,7 +206,7 @@ def test_guess_stats() -> None:
}
-def _guess_data_providers(module: ModuleType) -> dict[str, Callable]:
+def _guess_data_providers(module: ModuleType) -> Dict[str, Callable]:
mfunctions = inspect.getmembers(module, inspect.isfunction)
return {k: v for k, v in mfunctions if is_data_provider(v)}
@@ -259,7 +263,7 @@ def test_is_data_provider() -> None:
lam = lambda: [1, 2]
assert not idp(lam)
- def has_extra_args(count) -> list[int]:
+ def has_extra_args(count) -> List[int]:
return list(range(count))
assert not idp(has_extra_args)
@@ -336,10 +340,10 @@ def test_type_is_iterable() -> None:
assert not fun(None)
assert not fun(int)
assert not fun(Any)
- assert not fun(dict[int, int])
+ assert not fun(Dict[int, int])
- assert fun(list[int])
- assert fun(Sequence[dict[str, str]])
+ assert fun(List[int])
+ assert fun(Sequence[Dict[str, str]])
assert fun(Iterable[Any])
@@ -351,7 +355,7 @@ def _stat_item(item):
return _guess_datetime(item)
-def _stat_iterable(it: Iterable[Any], *, quick: bool = False) -> Stats:
+def _stat_iterable(it: Iterable[Any], quick: bool = False) -> Stats:
from more_itertools import first, ilen, take
# todo not sure if there is something in more_itertools to compute this?
@@ -410,9 +414,7 @@ def test_stat_iterable() -> None:
dd = datetime.fromtimestamp(123, tz=timezone.utc)
day = timedelta(days=3)
- class X(NamedTuple):
- x: int
- d: datetime
+ X = NamedTuple('X', [('x', int), ('d', datetime)])
def it():
yield RuntimeError('oops!')
@@ -430,13 +432,13 @@ def test_stat_iterable() -> None:
# experimental, not sure about it..
-def _guess_datetime(x: Any) -> datetime | None:
+def _guess_datetime(x: Any) -> Optional[datetime]:
# todo hmm implement without exception..
try:
d = asdict(x)
except: # noqa: E722 bare except
return None
- for v in d.values():
+ for k, v in d.items():
if isinstance(v, datetime):
return v
return None
@@ -450,12 +452,9 @@ def test_guess_datetime() -> None:
dd = fromisoformat('2021-02-01T12:34:56Z')
- class A(NamedTuple):
- x: int
-
- class B(NamedTuple):
- x: int
- created: datetime
+ # ugh.. https://github.com/python/mypy/issues/7281
+ A = NamedTuple('A', [('x', int)])
+ B = NamedTuple('B', [('x', int), ('created', datetime)])
assert _guess_datetime(A(x=4)) is None
assert _guess_datetime(B(x=4, created=dd)) == dd
diff --git a/my/core/structure.py b/my/core/structure.py
index bb049e4..df25e37 100644
--- a/my/core/structure.py
+++ b/my/core/structure.py
@@ -1,22 +1,18 @@
-from __future__ import annotations
-
import atexit
import os
import shutil
-import sys
-import tarfile
import tempfile
import zipfile
-from collections.abc import Generator, Sequence
from contextlib import contextmanager
from pathlib import Path
+from typing import Generator, List, Sequence, Tuple, Union
from .logging import make_logger
logger = make_logger(__name__, level="info")
-def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = False) -> bool:
+def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = False) -> bool:
"""
Helper function for match_structure to check if
all subpaths exist at some base directory
@@ -38,18 +34,17 @@ def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = F
ZIP_EXT = {".zip"}
-TARGZ_EXT = {".tar.gz"}
@contextmanager
def match_structure(
base: Path,
- expected: str | Sequence[str],
+ expected: Union[str, Sequence[str]],
*,
partial: bool = False,
-) -> Generator[tuple[Path, ...], None, None]:
+) -> Generator[Tuple[Path, ...], None, None]:
"""
- Given a 'base' directory or archive (zip/tar.gz), recursively search for one or more paths that match the
+ Given a 'base' directory or zipfile, recursively search for one or more paths that match the
pattern described in 'expected'. That can be a single string, or a list
of relative paths (as strings) you expect at the same directory.
@@ -57,12 +52,12 @@ def match_structure(
expected be present, not all of them.
This reduces the chances of the user misconfiguring gdpr exports, e.g.
- if they archived the folders instead of the parent directory or vice-versa
+ if they zipped the folders instead of the parent directory or vice-versa
When this finds a matching directory structure, it stops searching in that subdirectory
and continues onto other possible subdirectories which could match
- If base is an archive, this extracts it into a temporary directory
+ If base is a zipfile, this extracts the zipfile into a temporary directory
(configured by core_config.config.get_tmp_dir), and then searches the extracted
folder for matching structures
@@ -72,21 +67,21 @@ def match_structure(
export_dir
├── exp_2020
- │ ├── channel_data
- │ │ ├── data1
- │ │ └── data2
- │ ├── index.json
- │ ├── messages
- │ │ └── messages.csv
- │ └── profile
- │ └── settings.json
+ │ ├── channel_data
+ │ │ ├── data1
+ │ │ └── data2
+ │ ├── index.json
+ │ ├── messages
+ │ │ └── messages.csv
+ │ └── profile
+ │ └── settings.json
└── exp_2021
├── channel_data
- │ ├── data1
- │ └── data2
+ │ ├── data1
+ │ └── data2
├── index.json
├── messages
- │ └── messages.csv
+ │ └── messages.csv
└── profile
└── settings.json
@@ -98,12 +93,12 @@ def match_structure(
This doesn't require an exhaustive list of expected values, but its a good idea to supply
a complete picture of the expected structure to avoid false-positives
- This does not recursively decompress archives in the subdirectories,
- it only unpacks into a temporary directory if 'base' is an archive
+ This does not recursively unzip zipfiles in the subdirectories,
+ it only unzips into a temporary directory if 'base' is a zipfile
A common pattern for using this might be to use get_files to get a list
- of archives or top-level gdpr export directories, and use match_structure
- to search the resulting paths for an export structure you're expecting
+ of zipfiles or top-level gdpr export directories, and use match_structure
+ to search the resulting paths for a export structure you're expecting
"""
from . import core_config as CC
@@ -113,37 +108,29 @@ def match_structure(
expected = (expected,)
is_zip: bool = base.suffix in ZIP_EXT
- is_targz: bool = any(base.name.endswith(suffix) for suffix in TARGZ_EXT)
searchdir: Path = base.absolute()
try:
- # if the file given by the user is an archive, create a temporary
- # directory and extract it to that temporary directory
+ # if the file given by the user is a zipfile, create a temporary
+ # directory and extract the zipfile to that temporary directory
#
# this temporary directory is removed in the finally block
- if is_zip or is_targz:
+ if is_zip:
# sanity check before we start creating directories/rm-tree'ing things
- assert base.exists(), f"archive at {base} doesn't exist"
+ assert base.exists(), f"zipfile at {base} doesn't exist"
searchdir = Path(tempfile.mkdtemp(dir=tdir))
- if is_zip:
- # base might already be a ZipPath, and str(base) would end with /
- zf = zipfile.ZipFile(str(base).rstrip('/'))
- zf.extractall(path=str(searchdir))
- elif is_targz:
- with tarfile.open(str(base)) as tar:
- # filter is a security feature, will be required param in later python version
- mfilter = {'filter': 'data'} if sys.version_info[:2] >= (3, 12) else {}
- tar.extractall(path=str(searchdir), **mfilter) # type: ignore[arg-type]
- else:
- raise RuntimeError("can't happen")
+ # base might already be a ZipPath, and str(base) would end with /
+ zf = zipfile.ZipFile(str(base).rstrip('/'))
+ zf.extractall(path=str(searchdir))
+
else:
if not searchdir.is_dir():
- raise NotADirectoryError(f"Expected either a zip/tar.gz archive or a directory, received {searchdir}")
+ raise NotADirectoryError(f"Expected either a zipfile or a directory, received {searchdir}")
- matches: list[Path] = []
- possible_targets: list[Path] = [searchdir]
+ matches: List[Path] = []
+ possible_targets: List[Path] = [searchdir]
while len(possible_targets) > 0:
p = possible_targets.pop(0)
@@ -163,9 +150,9 @@ def match_structure(
finally:
- if is_zip or is_targz:
+ if is_zip:
# make sure we're not mistakenly deleting data
- assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting archive to start with the temporary directory prefix ({tdir}), found {searchdir}"
+ assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting zip to start with the temporary directory prefix ({tdir}), found {searchdir}"
shutil.rmtree(str(searchdir))
@@ -174,7 +161,7 @@ def warn_leftover_files() -> None:
from . import core_config as CC
base_tmp: Path = CC.config.get_tmp_dir()
- leftover: list[Path] = list(base_tmp.iterdir())
+ leftover: List[Path] = list(base_tmp.iterdir())
if leftover:
logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
diff --git a/my/core/tests/auto_stats.py b/my/core/tests/auto_stats.py
index fc49e03..d10d4c4 100644
--- a/my/core/tests/auto_stats.py
+++ b/my/core/tests/auto_stats.py
@@ -2,11 +2,11 @@
Helper 'module' for test_guess_stats
"""
-from collections.abc import Iterable, Iterator, Sequence
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
+from typing import Iterable, Iterator, Sequence
@dataclass
diff --git a/my/core/tests/common.py b/my/core/tests/common.py
index 073ea5f..22a74d7 100644
--- a/my/core/tests/common.py
+++ b/my/core/tests/common.py
@@ -1,8 +1,6 @@
-from __future__ import annotations
-
import os
-from collections.abc import Iterator
from contextlib import contextmanager
+from typing import Iterator, Optional
import pytest
@@ -17,7 +15,7 @@ skip_if_uses_optional_deps = pytest.mark.skipif(
# TODO maybe move to hpi core?
@contextmanager
-def tmp_environ_set(key: str, value: str | None) -> Iterator[None]:
+def tmp_environ_set(key: str, value: Optional[str]) -> Iterator[None]:
prev_value = os.environ.get(key)
if value is None:
os.environ.pop(key, None)
diff --git a/my/core/tests/denylist.py b/my/core/tests/denylist.py
index 73c3165..8016282 100644
--- a/my/core/tests/denylist.py
+++ b/my/core/tests/denylist.py
@@ -1,9 +1,8 @@
import json
import warnings
-from collections.abc import Iterator
from datetime import datetime
from pathlib import Path
-from typing import NamedTuple
+from typing import Iterator, NamedTuple
from ..denylist import DenyList
@@ -92,7 +91,8 @@ def test_denylist(tmp_path: Path) -> None:
assert "59.40.113.87" not in [i.addr for i in filtered]
- data_json = json.loads(tf.read_text())
+ with open(tf, "r") as f:
+ data_json = json.loads(f.read())
assert data_json == [
{
diff --git a/my/core/tests/structure.py b/my/core/tests/structure.py
index 741e0ea..6a94fc4 100644
--- a/my/core/tests/structure.py
+++ b/my/core/tests/structure.py
@@ -14,9 +14,8 @@ def test_gdpr_structure_exists() -> None:
assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
-@pytest.mark.parametrize("archive", ["gdpr_export.zip", "gdpr_export.tar.gz"])
-def test_gdpr_unpack(archive: str) -> None:
- with match_structure(structure_data / archive, expected=gdpr_expected) as results:
+def test_gdpr_unzip() -> None:
+ with match_structure(structure_data / "gdpr_export.zip", expected=gdpr_expected) as results:
assert len(results) == 1
extracted = results[0]
index_file = extracted / "messages" / "index.csv"
@@ -33,6 +32,6 @@ def test_match_partial() -> None:
def test_not_directory() -> None:
- with pytest.raises(NotADirectoryError, match=r"Expected either a zip/tar.gz archive or a directory"):
+ with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"):
with match_structure(structure_data / "messages/index.csv", expected=gdpr_expected):
pass
diff --git a/my/core/tests/structure_data/gdpr_export.tar.gz b/my/core/tests/structure_data/gdpr_export.tar.gz
deleted file mode 100644
index 4f0597c..0000000
Binary files a/my/core/tests/structure_data/gdpr_export.tar.gz and /dev/null differ
diff --git a/my/core/tests/test_cachew.py b/my/core/tests/test_cachew.py
index a0d2267..70ac76f 100644
--- a/my/core/tests/test_cachew.py
+++ b/my/core/tests/test_cachew.py
@@ -1,7 +1,7 @@
-from __future__ import annotations
-
from .common import skip_if_uses_optional_deps as pytestmark
+from typing import List
+
# TODO ugh, this is very messy.. need to sort out config overriding here
@@ -16,7 +16,7 @@ def test_cachew() -> None:
# TODO ugh. need doublewrap or something to avoid having to pass parens
@mcachew()
- def cf() -> list[int]:
+ def cf() -> List[int]:
nonlocal called
called += 1
return [1, 2, 3]
@@ -43,7 +43,7 @@ def test_cachew_dir_none() -> None:
called = 0
@mcachew(cache_path=cache_dir() / 'ctest')
- def cf() -> list[int]:
+ def cf() -> List[int]:
nonlocal called
called += 1
return [called, called, called]
diff --git a/my/core/tests/test_config.py b/my/core/tests/test_config.py
deleted file mode 100644
index f6d12ba..0000000
--- a/my/core/tests/test_config.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-Various tests that are checking behaviour of user config wrt to various things
-"""
-
-import os
-import sys
-from pathlib import Path
-
-import pytest
-import pytz
-
-import my.config
-from my.core import notnone
-from my.demo import items, make_config
-
-from .common import tmp_environ_set
-
-# TODO would be nice to randomize test order here to catch various config issues
-
-
-# run the same test multiple times to make sure there are not issues with import order etc
-@pytest.mark.parametrize('run_id', ['1', '2'])
-def test_override_config(tmp_path: Path, run_id: str) -> None:
- class user_config:
- username = f'user_{run_id}'
- data_path = f'{tmp_path}/*.json'
-
- my.config.demo = user_config # type: ignore[misc, assignment]
-
- [item1, item2] = items()
- assert item1.username == f'user_{run_id}'
- assert item2.username == f'user_{run_id}'
-
-
-@pytest.mark.skip(reason="won't work at the moment because of inheritance")
-def test_dynamic_config_simplenamespace(tmp_path: Path) -> None:
- from types import SimpleNamespace
-
- user_config = SimpleNamespace(
- username='user3',
- data_path=f'{tmp_path}/*.json',
- )
- my.config.demo = user_config # type: ignore[misc, assignment]
-
- cfg = make_config()
-
- assert cfg.username == 'user3'
-
-
-def test_mixin_attribute_handling(tmp_path: Path) -> None:
- """
- Tests that arbitrary mixin attributes work with our config handling pattern
- """
-
- nytz = pytz.timezone('America/New_York')
-
- class user_config:
- # check that override is taken into the account
- timezone = nytz
-
- irrelevant = 'hello'
-
- username = 'UUU'
- data_path = f'{tmp_path}/*.json'
-
- my.config.demo = user_config # type: ignore[misc, assignment]
-
- cfg = make_config()
-
- assert cfg.username == 'UUU'
-
- # mypy doesn't know about it, but the attribute is there
- assert getattr(cfg, 'irrelevant') == 'hello'
-
- # check that overridden default attribute is actually getting overridden
- assert cfg.timezone == nytz
-
- [item1, item2] = items()
- assert item1.username == 'UUU'
- assert notnone(item1.dt.tzinfo).zone == nytz.zone # type: ignore[attr-defined]
- assert item2.username == 'UUU'
- assert notnone(item2.dt.tzinfo).zone == nytz.zone # type: ignore[attr-defined]
-
-
-# use multiple identical tests to make sure there are no issues with cached imports etc
-@pytest.mark.parametrize('run_id', ['1', '2'])
-def test_dynamic_module_import(tmp_path: Path, run_id: str) -> None:
- """
- Test for dynamic hackery in config properties
- e.g. importing some external modules
- """
-
- ext = tmp_path / 'external'
- ext.mkdir()
- (ext / '__init__.py').write_text(
- '''
-def transform(x):
- from .submodule import do_transform
- return do_transform(x)
-
-'''
- )
- (ext / 'submodule.py').write_text(
- f'''
-def do_transform(x):
- return {{"total_{run_id}": sum(x.values())}}
-'''
- )
-
- class user_config:
- username = 'someuser'
- data_path = f'{tmp_path}/*.json'
- external = f'{ext}'
-
- my.config.demo = user_config # type: ignore[misc, assignment]
-
- [item1, item2] = items()
- assert item1.raw == {f'total_{run_id}': 1 + 123}, item1
- assert item2.raw == {f'total_{run_id}': 2 + 456}, item2
-
- # need to reset these modules, otherwise they get cached
- # kind of relevant to my.core.cfg.tmp_config
- sys.modules.pop('external', None)
- sys.modules.pop('external.submodule', None)
-
-
-@pytest.mark.parametrize('run_id', ['1', '2'])
-def test_my_config_env_variable(tmp_path: Path, run_id: str) -> None:
- """
- Tests handling of MY_CONFIG variable
- """
-
- # ugh. so by this point, my.config is already loaded (default stub), so we need to unload it
- sys.modules.pop('my.config', None)
- # but my.config itself relies on my.core.init hook, so unless it's reloaded too it wouldn't help
- sys.modules.pop('my.core', None)
- sys.modules.pop('my.core.init', None)
- # it's a bit of a mouthful of course, but in most cases MY_CONFIG would be set once
- # , and before hpi runs, so hopefully it's not a huge deal
- cfg_dir = tmp_path / 'my'
- cfg_file = cfg_dir / 'config.py'
- cfg_dir.mkdir()
-
- cfg_file.write_text(
- f'''
-# print("IMPORTING CONFIG {run_id}")
-class demo:
- username = 'xxx_{run_id}'
- data_path = r'{tmp_path}{os.sep}*.json' # need raw string for windows...
-'''
- )
-
- with tmp_environ_set('MY_CONFIG', str(tmp_path)):
- [item1, item2] = items()
- assert item1.username == f'xxx_{run_id}'
- assert item2.username == f'xxx_{run_id}'
-
- # sigh.. so this is cached in sys.path
- # so it takes precedence later during next import, not giving the MY_CONFIG hook
- # (imported from builtin my.config) to kick in
- sys.path.remove(str(tmp_path))
-
- # FIXME ideally this shouldn't be necessary?
- # remove this after we fixup my.tests.reddit and my.tests.commits
- # (they were failing ci when running all tests)
- sys.modules.pop('my.config', None)
-
-
-@pytest.fixture(autouse=True)
-def prepare_data(tmp_path: Path):
- (tmp_path / 'data.json').write_text(
- '''
-[
- {"key": 1, "value": 123},
- {"key": 2, "value": 456}
-]
-'''
- )
diff --git a/my/core/tests/test_tmp_config.py b/my/core/tests/test_tmp_config.py
index d99621d..e5a24cc 100644
--- a/my/core/tests/test_tmp_config.py
+++ b/my/core/tests/test_tmp_config.py
@@ -12,7 +12,7 @@ def _init_default_config() -> None:
def test_tmp_config() -> None:
## ugh. ideally this would be on the top level (would be a better test)
- ## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
+ ## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
## later would be nice to be a bit more careful about them
_init_default_config()
from my.simple import items
diff --git a/my/core/time.py b/my/core/time.py
index a9b180d..83a407b 100644
--- a/my/core/time.py
+++ b/my/core/time.py
@@ -1,7 +1,5 @@
-from __future__ import annotations
-
-from collections.abc import Sequence
-from functools import cache, lru_cache
+from functools import lru_cache
+from typing import Dict, Sequence
import pytz
@@ -13,24 +11,22 @@ def user_forced() -> Sequence[str]:
# https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
try:
from my.config import time as user_config
-
- return user_config.tz.force_abbreviations # type: ignore[attr-defined] # noqa: TRY300
- # note: noqa since we're catching case where config doesn't have attribute here as well
+ return user_config.tz.force_abbreviations # type: ignore[attr-defined]
except:
# todo log/apply policy
return []
@lru_cache(1)
-def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
+def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
# also force UTC to always correspond to utc
# this makes more sense than Zulu it ends up by default
- timezones = [*pytz.all_timezones, 'UTC', *user_forced()]
+ timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
- res: dict[str, pytz.BaseTzInfo] = {}
+ res: Dict[str, pytz.BaseTzInfo] = {}
for tzname in timezones:
tz = pytz.timezone(tzname)
- infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
+ infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
for info in infos:
abbr = info[-1]
# todo could support this with a better error handling strategy?
@@ -46,7 +42,7 @@ def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
return res
-@cache
+@lru_cache(maxsize=None)
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
return _abbr_to_timezone_map()[abbr]
diff --git a/my/core/types.py b/my/core/types.py
index dc19c19..b1cf103 100644
--- a/my/core/types.py
+++ b/my/core/types.py
@@ -1,15 +1,14 @@
-from __future__ import annotations
-
-from .internal import assert_subpackage
-
-assert_subpackage(__name__)
+from .internal import assert_subpackage; assert_subpackage(__name__)
from dataclasses import asdict as dataclasses_asdict
from dataclasses import is_dataclass
from datetime import datetime
-from typing import Any
+from typing import (
+ Any,
+ Dict,
+)
-Json = dict[str, Any]
+Json = Dict[str, Any]
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
diff --git a/my/core/util.py b/my/core/util.py
index 74e71e1..b48a450 100644
--- a/my/core/util.py
+++ b/my/core/util.py
@@ -1,12 +1,10 @@
-from __future__ import annotations
-
import os
import pkgutil
import sys
-from collections.abc import Iterable
from itertools import chain
from pathlib import Path
from types import ModuleType
+from typing import Iterable, List, Optional
from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
@@ -14,7 +12,8 @@ from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
def modules() -> Iterable[HPIModule]:
import my
- yield from _iter_all_importables(my)
+ for m in _iter_all_importables(my):
+ yield m
__NOT_HPI_MODULE__ = 'Import this to mark a python file as a helper, not an actual HPI module'
@@ -22,14 +21,13 @@ from .discovery_pure import NOT_HPI_MODULE_VAR
assert NOT_HPI_MODULE_VAR in globals() # check name consistency
-
-def is_not_hpi_module(module: str) -> str | None:
+def is_not_hpi_module(module: str) -> Optional[str]:
'''
None if a module, otherwise returns reason
'''
- import importlib.util
+ import importlib
- path: str | None = None
+ path: Optional[str] = None
try:
# TODO annoying, this can cause import of the parent module?
spec = importlib.util.find_spec(module)
@@ -38,7 +36,7 @@ def is_not_hpi_module(module: str) -> str | None:
except Exception as e:
# todo a bit misleading.. it actually shouldn't import in most cases, it's just the weird parent module import thing
return "import error (possibly missing config entry)" # todo add exc message?
- assert path is not None # not sure if can happen?
+ assert path is not None # not sure if can happen?
if _is_not_module_src(Path(path)):
return f"marked explicitly (via {NOT_HPI_MODULE_VAR})"
@@ -60,10 +58,9 @@ def _iter_all_importables(pkg: ModuleType) -> Iterable[HPIModule]:
def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModule]:
+ from .core_config import config
+
"""Yield all importables under a given path and package."""
-
- from .core_config import config # noqa: F401
-
for dir_path, dirs, file_names in os.walk(pkg_pth):
file_names.sort()
# NOTE: sorting dirs in place is intended, it's the way you're supposed to do it with os.walk
@@ -78,7 +75,7 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
continue
rel_pt = pkg_dir_path.relative_to(pkg_pth)
- pkg_pref = '.'.join((pkg_name, *rel_pt.parts))
+ pkg_pref = '.'.join((pkg_name, ) + rel_pt.parts)
yield from _walk_packages(
(str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
@@ -86,7 +83,6 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
# TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
# use onerror=?
-
# ignored explicitly -> not HPI
# if enabled in config -> HPI
# if disabled in config -> HPI
@@ -95,17 +91,17 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
# TODO when do we need to recurse?
-def _walk_packages(path: Iterable[str], prefix: str = '', onerror=None) -> Iterable[HPIModule]:
+def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterable[HPIModule]:
"""
Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53,
- to avoid importing modules that are skipped
+ to alvoid importing modules that are skipped
"""
from .core_config import config
- def seen(p, m={}): # noqa: B006
+ def seen(p, m={}):
if p in m:
return True
- m[p] = True # noqa: RET503
+ m[p] = True
for info in pkgutil.iter_modules(path, prefix):
mname = info.name
@@ -158,9 +154,8 @@ def _walk_packages(path: Iterable[str], prefix: str = '', onerror=None) -> Itera
path = [p for p in path if not seen(p)]
yield from _walk_packages(path, mname + '.', onerror)
-
# deprecate?
-def get_modules() -> list[HPIModule]:
+def get_modules() -> List[HPIModule]:
return list(modules())
@@ -175,14 +170,14 @@ def test_module_detection() -> None:
with reset() as cc:
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*']
mods = {m.name: m for m in modules()}
- assert mods['my.demo'].skip_reason == "has no 'stats()' function"
+ assert mods['my.demo'] .skip_reason == "has no 'stats()' function"
with reset() as cc:
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm']
- cc.enabled_modules = ['my.demo']
+ cc.enabled_modules = ['my.demo']
mods = {m.name: m for m in modules()}
- assert mods['my.demo'].skip_reason is None # not skipped
+ assert mods['my.demo'] .skip_reason is None # not skipped
assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
diff --git a/my/core/utils/concurrent.py b/my/core/utils/concurrent.py
index 515c3f1..3553cd9 100644
--- a/my/core/utils/concurrent.py
+++ b/my/core/utils/concurrent.py
@@ -1,7 +1,6 @@
-from __future__ import annotations
-
+import sys
from concurrent.futures import Executor, Future
-from typing import Any, Callable, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
from ..compat import ParamSpec
@@ -16,25 +15,37 @@ class DummyExecutor(Executor):
but also want to provide an option to run the code serially (e.g. for debugging)
"""
- def __init__(self, max_workers: int | None = 1) -> None:
+ def __init__(self, max_workers: Optional[int] = 1) -> None:
self._shutdown = False
self._max_workers = max_workers
- def submit(self, fn: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Future[_T]:
- if self._shutdown:
- raise RuntimeError('cannot schedule new futures after shutdown')
+ if TYPE_CHECKING:
+ if sys.version_info[:2] <= (3, 8):
+ # 3.8 doesn't support ParamSpec as Callable arg :(
+ # and any attempt to type results in incompatible supertype.. so whatever
+ def submit(self, fn, *args, **kwargs): ...
- f: Future[Any] = Future()
- try:
- result = fn(*args, **kwargs)
- except KeyboardInterrupt:
- raise
- except BaseException as e:
- f.set_exception(e)
else:
- f.set_result(result)
- return f
+ def submit(self, fn: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Future[_T]: ...
- def shutdown(self, wait: bool = True, **kwargs) -> None: # noqa: FBT001,FBT002,ARG002
+ else:
+
+ def submit(self, fn, *args, **kwargs):
+ if self._shutdown:
+ raise RuntimeError('cannot schedule new futures after shutdown')
+
+ f: Future[Any] = Future()
+ try:
+ result = fn(*args, **kwargs)
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ f.set_exception(e)
+ else:
+ f.set_result(result)
+
+ return f
+
+ def shutdown(self, wait: bool = True, **kwargs) -> None:
self._shutdown = True
diff --git a/my/core/utils/imports.py b/my/core/utils/imports.py
index e0fb01d..4666a5e 100644
--- a/my/core/utils/imports.py
+++ b/my/core/utils/imports.py
@@ -1,27 +1,27 @@
-from __future__ import annotations
-
import importlib
import importlib.util
import sys
from pathlib import Path
from types import ModuleType
+from typing import Optional
+
+from ..common import PathIsh
# TODO only used in tests? not sure if useful at all.
-def import_file(p: Path | str, name: str | None = None) -> ModuleType:
+def import_file(p: PathIsh, name: Optional[str] = None) -> ModuleType:
p = Path(p)
if name is None:
name = p.stem
spec = importlib.util.spec_from_file_location(name, p)
assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}"
foo = importlib.util.module_from_spec(spec)
- loader = spec.loader
- assert loader is not None
+ loader = spec.loader; assert loader is not None
loader.exec_module(foo)
return foo
-def import_from(path: Path | str, name: str) -> ModuleType:
+def import_from(path: PathIsh, name: str) -> ModuleType:
path = str(path)
sys.path.append(path)
try:
@@ -30,7 +30,7 @@ def import_from(path: Path | str, name: str) -> ModuleType:
sys.path.remove(path)
-def import_dir(path: Path | str, extra: str = '') -> ModuleType:
+def import_dir(path: PathIsh, extra: str = '') -> ModuleType:
p = Path(path)
if p.parts[0] == '~':
p = p.expanduser() # TODO eh. not sure about this..
diff --git a/my/core/utils/itertools.py b/my/core/utils/itertools.py
index 42b2b77..023484d 100644
--- a/my/core/utils/itertools.py
+++ b/my/core/utils/itertools.py
@@ -4,13 +4,17 @@ Various helpers/transforms of iterators
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
"""
-from __future__ import annotations
-
import warnings
-from collections.abc import Hashable, Iterable, Iterator, Sized
+from collections.abc import Hashable
from typing import (
TYPE_CHECKING,
Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Sized,
TypeVar,
Union,
cast,
@@ -19,7 +23,6 @@ from typing import (
import more_itertools
from decorator import decorator
-from .. import warnings as core_warnings
from ..compat import ParamSpec
T = TypeVar('T')
@@ -34,7 +37,7 @@ def _identity(v: T) -> V: # type: ignore[type-var]
# ugh. nothing in more_itertools?
# perhaps duplicates_everseen? but it doesn't yield non-unique elements?
def ensure_unique(it: Iterable[T], *, key: Callable[[T], K]) -> Iterable[T]:
- key2item: dict[K, T] = {}
+ key2item: Dict[K, T] = {}
for i in it:
k = key(i)
pi = key2item.get(k, None)
@@ -58,7 +61,7 @@ def test_ensure_unique() -> None:
list(it)
# hacky way to force distinct objects?
- list(ensure_unique(dups, key=lambda _: object()))
+ list(ensure_unique(dups, key=lambda i: object()))
def make_dict(
@@ -67,10 +70,10 @@ def make_dict(
key: Callable[[T], K],
# TODO make value optional instead? but then will need a typing override for it?
value: Callable[[T], V] = _identity,
-) -> dict[K, V]:
+) -> Dict[K, V]:
with_keys = ((key(i), i) for i in it)
uniques = ensure_unique(with_keys, key=lambda p: p[0])
- res: dict[K, V] = {}
+ res: Dict[K, V] = {}
for k, i in uniques:
res[k] = i if value is None else value(i)
return res
@@ -88,8 +91,8 @@ def test_make_dict() -> None:
d = make_dict(it, key=lambda i: i % 2, value=lambda i: i)
# check type inference
- d2: dict[str, int] = make_dict(it, key=lambda i: str(i))
- d3: dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
+ d2: Dict[str, int] = make_dict(it, key=lambda i: str(i))
+ d3: Dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
LFP = ParamSpec('LFP')
@@ -97,7 +100,7 @@ LV = TypeVar('LV')
@decorator
-def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> list[LV]:
+def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> List[LV]:
"""
Wraps a function's return value in wrapper (e.g. list)
Useful when an algorithm can be expressed more cleanly as a generator
@@ -110,7 +113,7 @@ def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.k
# so seems easiest to just use specialize instantiations of decorator instead
if TYPE_CHECKING:
- def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, list[LV]]: ... # noqa: ARG001
+ def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, List[LV]]: ...
else:
listify = _listify
@@ -125,7 +128,7 @@ def test_listify() -> None:
yield 2
res = it()
- assert_type(res, list[int])
+ assert_type(res, List[int])
assert res == [1, 2]
@@ -139,7 +142,8 @@ def _warn_if_empty(func, *args, **kwargs):
if isinstance(iterable, Sized):
sz = len(iterable)
if sz == 0:
- core_warnings.medium(f"Function {func} returned empty container, make sure your config paths are correct")
+ # todo use hpi warnings here?
+ warnings.warn(f"Function {func} returned empty container, make sure your config paths are correct")
return iterable
else: # must be an iterator
@@ -149,7 +153,7 @@ def _warn_if_empty(func, *args, **kwargs):
yield i
empty = False
if empty:
- core_warnings.medium(f"Function {func} didn't emit any data, make sure your config paths are correct")
+ warnings.warn(f"Function {func} didn't emit any data, make sure your config paths are correct")
return wit()
@@ -157,7 +161,7 @@ def _warn_if_empty(func, *args, **kwargs):
if TYPE_CHECKING:
FF = TypeVar('FF', bound=Callable[..., Iterable])
- def warn_if_empty(func: FF) -> FF: ... # noqa: ARG001
+ def warn_if_empty(f: FF) -> FF: ...
else:
warn_if_empty = _warn_if_empty
@@ -196,24 +200,24 @@ def test_warn_if_empty_list() -> None:
ll = [1, 2, 3]
@warn_if_empty
- def nonempty() -> list[int]:
+ def nonempty() -> List[int]:
return ll
with warnings.catch_warnings(record=True) as w:
res1 = nonempty()
assert len(w) == 0
- assert_type(res1, list[int])
+ assert_type(res1, List[int])
assert isinstance(res1, list)
assert res1 is ll # object should be unchanged!
@warn_if_empty
- def empty() -> list[str]:
+ def empty() -> List[str]:
return []
with warnings.catch_warnings(record=True) as w:
res2 = empty()
assert len(w) == 1
- assert_type(res2, list[str])
+ assert_type(res2, List[str])
assert isinstance(res2, list)
assert res2 == []
@@ -237,7 +241,7 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
"""
NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety
Consider hashable type X, and Y that inherits from X, but not hashable
- Then l: list[X] = [Y(...)] is a valid expression, and type checks against Hashable,
+ Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable,
but isn't runtime hashable
"""
# Sadly this doesn't work 100% correctly with dataclasses atm...
@@ -263,27 +267,30 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
# TODO different policies -- error/warn/ignore?
def test_check_if_hashable() -> None:
from dataclasses import dataclass
+ from typing import Set, Tuple
import pytest
from ..compat import assert_type
- x1: list[int] = [1, 2]
+ x1: List[int] = [1, 2]
r1 = check_if_hashable(x1)
- assert_type(r1, Iterable[int])
+ # tgype: ignore[comparison-overlap] # object should be unchanged
assert r1 is x1
+ assert_type(r1, Iterable[int])
- x2: Iterator[int | str] = iter((123, 'aba'))
+ x2: Iterator[Union[int, str]] = iter((123, 'aba'))
r2 = check_if_hashable(x2)
- assert_type(r2, Iterable[Union[int, str]])
assert list(r2) == [123, 'aba']
+ assert_type(r2, Iterable[Union[int, str]])
- x3: tuple[object, ...] = (789, 'aba')
+ x3: Tuple[object, ...] = (789, 'aba')
r3 = check_if_hashable(x3)
+ # ttype: ignore[comparison-overlap] # object should be unchanged
+ assert r3 is x3
assert_type(r3, Iterable[object])
- assert r3 is x3 # object should be unchanged
- x4: list[set[int]] = [{1, 2, 3}, {4, 5, 6}]
+ x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}]
with pytest.raises(Exception):
# should be rejected by mypy sice set isn't Hashable, but also throw at runtime
r4 = check_if_hashable(x4) # type: ignore[type-var]
@@ -301,7 +308,7 @@ def test_check_if_hashable() -> None:
class X:
a: int
- x6: list[X] = [X(a=123)]
+ x6: List[X] = [X(a=123)]
r6 = check_if_hashable(x6)
assert x6 is r6
@@ -310,7 +317,7 @@ def test_check_if_hashable() -> None:
class Y(X):
b: str
- x7: list[Y] = [Y(a=123, b='aba')]
+ x7: List[Y] = [Y(a=123, b='aba')]
with pytest.raises(Exception):
# ideally that would also be rejected by mypy, but currently there is a bug
# which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463
@@ -321,12 +328,15 @@ _UET = TypeVar('_UET')
_UEU = TypeVar('_UEU')
-# NOTE: for historic reasons, this function had to accept Callable that returns iterator
+# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
# instead of just iterator
# TODO maybe deprecated Callable support? not sure
def unique_everseen(
- fun: Callable[[], Iterable[_UET]] | Iterable[_UET],
- key: Callable[[_UET], _UEU] | None = None,
+ fun: Union[
+ Callable[[], Iterable[_UET]],
+ Iterable[_UET]
+ ],
+ key: Optional[Callable[[_UET], _UEU]] = None,
) -> Iterator[_UET]:
import os
@@ -358,7 +368,7 @@ def test_unique_everseen() -> None:
assert list(unique_everseen(fun_good)) == [123]
with pytest.raises(Exception):
- # since function returns a list rather than iterator, check happens immediately
+ # since function retuns a list rather than iterator, check happens immediately
# , even without advancing the iterator
unique_everseen(fun_bad)
diff --git a/my/core/warnings.py b/my/core/warnings.py
index d67ec7d..82e539b 100644
--- a/my/core/warnings.py
+++ b/my/core/warnings.py
@@ -5,16 +5,14 @@ since who looks at the terminal output?
E.g. would be nice to propagate the warnings in the UI (it's even a subclass of Exception!)
'''
-from __future__ import annotations
-
import sys
import warnings
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Optional
import click
-def _colorize(x: str, color: str | None = None) -> str:
+def _colorize(x: str, color: Optional[str]=None) -> str:
if color is None:
return x
@@ -26,10 +24,10 @@ def _colorize(x: str, color: str | None = None) -> str:
return click.style(x, fg=color)
-def _warn(message: str, *args, color: str | None = None, **kwargs) -> None:
+def _warn(message: str, *args, color: Optional[str]=None, **kwargs) -> None:
stacklevel = kwargs.get('stacklevel', 1)
- kwargs['stacklevel'] = stacklevel + 2 # +1 for this function, +1 for medium/high wrapper
- warnings.warn(_colorize(message, color=color), *args, **kwargs) # noqa: B028
+ kwargs['stacklevel'] = stacklevel + 2 # +1 for this function, +1 for medium/high wrapper
+ warnings.warn(_colorize(message, color=color), *args, **kwargs)
def low(message: str, *args, **kwargs) -> None:
@@ -57,4 +55,4 @@ if not TYPE_CHECKING:
def warn(*args, **kwargs):
import warnings
- return warnings.warn(*args, **kwargs) # noqa: B028
+ return warnings.warn(*args, **kwargs)
diff --git a/my/demo.py b/my/demo.py
index fa80b2a..645be4f 100644
--- a/my/demo.py
+++ b/my/demo.py
@@ -1,77 +1,69 @@
'''
Just a demo module for testing and documentation purposes
'''
-from __future__ import annotations
-import json
-from collections.abc import Iterable, Sequence
+from .core import Paths, PathIsh
+
+from typing import Optional
+from datetime import tzinfo, timezone
+
+from my.config import demo as user_config
from dataclasses import dataclass
-from datetime import datetime, timezone, tzinfo
-from pathlib import Path
-from typing import Protocol
-
-from my.core import Json, PathIsh, Paths, get_files
-class config(Protocol):
+@dataclass
+class demo(user_config):
data_path: Paths
-
- # this is to check required attribute handling
username: str
-
- # this is to check optional attribute handling
timezone: tzinfo = timezone.utc
- external: PathIsh | None = None
+ external: Optional[PathIsh] = None
@property
def external_module(self):
rpath = self.external
if rpath is not None:
- from my.core.utils.imports import import_dir
-
+ from .core.utils.imports import import_dir
return import_dir(rpath)
- import my.config.repos.external as m # type: ignore
-
+ import my.config.repos.external as m # type: ignore
return m
-def make_config() -> config:
- from my.config import demo as user_config
+from .core import make_config
+config = make_config(demo)
- class combined_config(user_config, config): ...
+# TODO not sure about type checking?
+external = config.external_module
- return combined_config()
+from pathlib import Path
+from typing import Sequence, Iterable
+from datetime import datetime
+from .core import Json, get_files
@dataclass
class Item:
'''
Some completely arbitrary artificial stuff, just for testing
'''
-
username: str
raw: Json
dt: datetime
def inputs() -> Sequence[Path]:
- cfg = make_config()
- return get_files(cfg.data_path)
+ return get_files(config.data_path)
+import json
def items() -> Iterable[Item]:
- cfg = make_config()
-
- transform = (lambda i: i) if cfg.external is None else cfg.external_module.transform
-
for f in inputs():
- dt = datetime.fromtimestamp(f.stat().st_mtime, tz=cfg.timezone)
+ dt = datetime.fromtimestamp(f.stat().st_mtime, tz=config.timezone)
j = json.loads(f.read_text())
for raw in j:
yield Item(
- username=cfg.username,
- raw=transform(raw),
+ username=config.username,
+ raw=external.identity(raw),
dt=dt,
)
diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py
index 0d50b06..7fae8ea 100644
--- a/my/emfit/__init__.py
+++ b/my/emfit/__init__.py
@@ -4,33 +4,30 @@
Consumes data exported by https://github.com/karlicoss/emfitexport
"""
-from __future__ import annotations
-
REQUIRES = [
'git+https://github.com/karlicoss/emfitexport',
]
-import dataclasses
-import inspect
-from collections.abc import Iterable, Iterator
from contextlib import contextmanager
+import dataclasses
from datetime import datetime, time, timedelta
+import inspect
from pathlib import Path
-from typing import Any
-
-import emfitexport.dal as dal
+from typing import Any, Dict, Iterable, Iterator, List, Optional
from my.core import (
- Res,
- Stats,
get_files,
stat,
+ Res,
+ Stats,
)
from my.core.cachew import cache_dir, mcachew
-from my.core.error import extract_error_datetime, set_error_datetime
+from my.core.error import set_error_datetime, extract_error_datetime
from my.core.pandas import DataFrameT
-from my.config import emfit as config # isort: skip
+from my.config import emfit as config
+
+import emfitexport.dal as dal
Emfit = dal.Emfit
@@ -88,7 +85,7 @@ def datas() -> Iterable[Res[Emfit]]:
# TODO should be used for jawbone data as well?
def pre_dataframe() -> Iterable[Res[Emfit]]:
# TODO shit. I need some sort of interrupted sleep detection?
- g: list[Emfit] = []
+ g: List[Emfit] = []
def flush() -> Iterable[Res[Emfit]]:
if len(g) == 0:
@@ -115,10 +112,10 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
def dataframe() -> DataFrameT:
- dicts: list[dict[str, Any]] = []
- last: Emfit | None = None
+ dicts: List[Dict[str, Any]] = []
+ last: Optional[Emfit] = None
for s in pre_dataframe():
- d: dict[str, Any]
+ d: Dict[str, Any]
if isinstance(s, Exception):
edt = extract_error_datetime(s)
d = {
@@ -158,9 +155,9 @@ def dataframe() -> DataFrameT:
last = s # meh
dicts.append(d)
- import pandas as pd
+ import pandas
- return pd.DataFrame(dicts)
+ return pandas.DataFrame(dicts)
def stats() -> Stats:
@@ -169,12 +166,11 @@ def stats() -> Stats:
@contextmanager
def fake_data(nights: int = 500) -> Iterator:
+ from my.core.cfg import tmp_config
from tempfile import TemporaryDirectory
import pytz
- from my.core.cfg import tmp_config
-
with TemporaryDirectory() as td:
tdir = Path(td)
gen = dal.FakeData()
@@ -191,9 +187,9 @@ def fake_data(nights: int = 500) -> Iterator:
# TODO remove/deprecate it? I think used by timeline
-def get_datas() -> list[Emfit]:
+def get_datas() -> List[Emfit]:
# todo ugh. run lint properly
- return sorted(datas(), key=lambda e: e.start) # type: ignore
+ return list(sorted(datas(), key=lambda e: e.start)) # type: ignore
# TODO move away old entries if there is a diff??
diff --git a/my/endomondo.py b/my/endomondo.py
index 7732c00..d314e97 100644
--- a/my/endomondo.py
+++ b/my/endomondo.py
@@ -7,14 +7,13 @@ REQUIRES = [
]
# todo use ast in setup.py or doctor to extract the corresponding pip packages?
-from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from pathlib import Path
-
-from my.config import endomondo as user_config
+from typing import Sequence, Iterable
from .core import Paths, get_files
+from my.config import endomondo as user_config
@dataclass
class endomondo(user_config):
@@ -32,22 +31,20 @@ def inputs() -> Sequence[Path]:
# todo add a doctor check for pip endoexport module
import endoexport.dal as dal
-from endoexport.dal import Point, Workout # noqa: F401
+from endoexport.dal import Point, Workout
+
from .core import Res
-
-
# todo cachew?
def workouts() -> Iterable[Res[Workout]]:
_dal = dal.DAL(inputs())
yield from _dal.workouts()
-from .core.pandas import DataFrameT, check_dataframe
-
+from .core.pandas import check_dataframe, DataFrameT
@check_dataframe
-def dataframe(*, defensive: bool=True) -> DataFrameT:
+def dataframe(defensive: bool=True) -> DataFrameT:
def it():
for w in workouts():
if isinstance(w, Exception):
@@ -78,9 +75,7 @@ def dataframe(*, defensive: bool=True) -> DataFrameT:
return df
-from .core import Stats, stat
-
-
+from .core import stat, Stats
def stats() -> Stats:
return {
# todo pretty print stats?
@@ -91,16 +86,13 @@ def stats() -> Stats:
# TODO make sure it's possible to 'advise' functions and override stuff
-from collections.abc import Iterator
from contextlib import contextmanager
-
-
+from typing import Iterator
@contextmanager
def fake_data(count: int=100) -> Iterator:
- import json
- from tempfile import TemporaryDirectory
-
from my.core.cfg import tmp_config
+ from tempfile import TemporaryDirectory
+ import json
with TemporaryDirectory() as td:
tdir = Path(td)
fd = dal.FakeData()
diff --git a/my/error.py b/my/error.py
index e3c1e11..c0b734c 100644
--- a/my/error.py
+++ b/my/error.py
@@ -1,6 +1,6 @@
from .core.warnings import high
-
high("DEPRECATED! Please use my.core.error instead.")
from .core import __NOT_HPI_MODULE__
+
from .core.error import *
diff --git a/my/experimental/destructive_parsing.py b/my/experimental/destructive_parsing.py
index 0c4092a..05c5920 100644
--- a/my/experimental/destructive_parsing.py
+++ b/my/experimental/destructive_parsing.py
@@ -1,6 +1,5 @@
-from collections.abc import Iterator
from dataclasses import dataclass
-from typing import Any
+from typing import Any, Iterator, List, Tuple
from my.core.compat import NoneType, assert_never
@@ -10,7 +9,7 @@ from my.core.compat import NoneType, assert_never
class Helper:
manager: 'Manager'
item: Any # todo realistically, list or dict? could at least type as indexable or something
- path: tuple[str, ...]
+ path: Tuple[str, ...]
def pop_if_primitive(self, *keys: str) -> None:
"""
@@ -27,7 +26,7 @@ class Helper:
assert actual == expected, (key, actual, expected)
def zoom(self, key: str) -> 'Helper':
- return self.manager.helper(item=self.item.pop(key), path=(*self.path, key))
+ return self.manager.helper(item=self.item.pop(key), path=self.path + (key,))
def is_empty(x) -> bool:
@@ -36,14 +35,14 @@ def is_empty(x) -> bool:
elif isinstance(x, list):
return all(map(is_empty, x))
else:
- assert_never(x) # noqa: RET503
+ assert_never(x)
class Manager:
def __init__(self) -> None:
- self.helpers: list[Helper] = []
+ self.helpers: List[Helper] = []
- def helper(self, item: Any, *, path: tuple[str, ...] = ()) -> Helper:
+ def helper(self, item: Any, *, path: Tuple[str, ...] = ()) -> Helper:
res = Helper(manager=self, item=item, path=path)
self.helpers.append(res)
return res
diff --git a/my/fbmessenger/__init__.py b/my/fbmessenger/__init__.py
index e5e417c..40fb235 100644
--- a/my/fbmessenger/__init__.py
+++ b/my/fbmessenger/__init__.py
@@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
"""
-# prevent it from appearing in modules list/doctor
+# prevent it from apprearing in modules list/doctor
from ..core import __NOT_HPI_MODULE__
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
@@ -20,7 +20,6 @@ REQUIRES = [
from my.core.hpi_compat import handle_legacy_import
-
is_legacy_import = handle_legacy_import(
parent_module_name=__name__,
legacy_submodule_name='export',
diff --git a/my/fbmessenger/all.py b/my/fbmessenger/all.py
index a057dca..13689db 100644
--- a/my/fbmessenger/all.py
+++ b/my/fbmessenger/all.py
@@ -1,10 +1,10 @@
-from collections.abc import Iterator
-
-from my.core import Res, Stats
+from typing import Iterator
+from my.core import Res, stat, Stats
from my.core.source import import_source
from .common import Message, _merge_messages
+
src_export = import_source(module_name='my.fbmessenger.export')
src_android = import_source(module_name='my.fbmessenger.android')
diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py
index f6fdb82..bc06114 100644
--- a/my/fbmessenger/android.py
+++ b/my/fbmessenger/android.py
@@ -4,20 +4,19 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab
from __future__ import annotations
-import sqlite3
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
-from typing import Union
+import sqlite3
+from typing import Iterator, Sequence, Optional, Dict, Union, List
-from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_config
+from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config
from my.core.common import unique_everseen
from my.core.compat import assert_never
from my.core.error import echain
-from my.core.sqlite import sqlite_connection, SqliteTool
+from my.core.sqlite import sqlite_connection
-from my.config import fbmessenger as user_config # isort: skip
+from my.config import fbmessenger as user_config
logger = LazyLogger(__name__)
@@ -28,7 +27,7 @@ class Config(user_config.android):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
- facebook_id: str | None = None
+ facebook_id: Optional[str] = None
# hmm. this is necessary for default value (= None) to work
@@ -43,13 +42,13 @@ def inputs() -> Sequence[Path]:
@dataclass(unsafe_hash=True)
class Sender:
id: str
- name: str | None
+ name: Optional[str]
@dataclass(unsafe_hash=True)
class Thread:
id: str
- name: str | None # isn't set for groups or one to one messages
+ name: Optional[str] # isn't set for groups or one to one messages
# todo not sure about order of fields...
@@ -57,14 +56,14 @@ class Thread:
class _BaseMessage:
id: str
dt: datetime_aware
- text: str | None
+ text: Optional[str]
@dataclass(unsafe_hash=True)
class _Message(_BaseMessage):
thread_id: str
sender_id: str
- reply_to_id: str | None
+ reply_to_id: Optional[str]
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
@@ -73,7 +72,7 @@ class _Message(_BaseMessage):
class Message(_BaseMessage):
thread: Thread
sender: Sender
- reply_to: Message | None
+ reply_to: Optional[Message]
Entity = Union[Sender, Thread, _Message]
@@ -86,8 +85,8 @@ def _entities() -> Iterator[Res[Entity]]:
for idx, path in enumerate(paths):
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
with sqlite_connection(path, immutable=True, row_factory='row') as db:
- use_msys = "logging_events_v2" in SqliteTool(db).get_table_names()
try:
+ use_msys = len(list(db.execute('SELECT * FROM sqlite_master WHERE name = "logging_events_v2"'))) > 0
if use_msys:
yield from _process_db_msys(db)
else:
@@ -111,7 +110,7 @@ def _normalise_thread_id(key) -> str:
# NOTE: this is sort of copy pasted from other _process_db method
# maybe later could unify them
def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
- senders: dict[str, Sender] = {}
+ senders: Dict[str, Sender] = {}
for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'):
s = Sender(
id=r['id'], # looks like it's server id? same used on facebook site
@@ -128,7 +127,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
# TODO can we get it from db? could infer as the most common id perhaps?
self_id = config.facebook_id
- thread_users: dict[str, list[Sender]] = {}
+ thread_users: Dict[str, List[Sender]] = {}
for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'):
thread_key = r['thread_key']
user_key = r['contact_id']
@@ -169,15 +168,6 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
CAST(sender_id AS TEXT) AS sender_id,
reply_source_id
FROM messages
- WHERE
- /* Regular message_id conforms to mid.* regex.
- However seems that when message is not sent yet it doesn't have this server id yet
- (happened only once, but could be just luck of course!)
- We exclude these messages to avoid duplication.
- However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
- So instead this excludes only such unsent messages.
- */
- message_id != offline_threading_id
ORDER BY timestamp_ms /* they aren't in order in the database, so need to sort */
'''
):
@@ -194,7 +184,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
- senders: dict[str, Sender] = {}
+ senders: Dict[str, Sender] = {}
for r in db.execute('''SELECT * FROM thread_users'''):
# for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
# but they are still referenced, so need to keep
@@ -208,7 +198,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
yield s
self_id = config.facebook_id
- thread_users: dict[str, list[Sender]] = {}
+ thread_users: Dict[str, List[Sender]] = {}
for r in db.execute('SELECT * from thread_participants'):
thread_key = r['thread_key']
user_key = r['user_key']
@@ -238,7 +228,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
for r in db.execute(
'''
- SELECT *, json_extract(sender, "$.user_key") AS user_key FROM messages
+ SELECT *, json_extract(sender, "$.user_key") AS user_key FROM messages
WHERE msg_type NOT IN (
-1, /* these don't have any data at all, likely immediately deleted or something? */
2 /* these are 'left group' system messages, also a bit annoying since they might reference nonexistent users */
@@ -268,9 +258,9 @@ def contacts() -> Iterator[Res[Sender]]:
def messages() -> Iterator[Res[Message]]:
- senders: dict[str, Sender] = {}
- msgs: dict[str, Message] = {}
- threads: dict[str, Thread] = {}
+ senders: Dict[str, Sender] = {}
+ msgs: Dict[str, Message] = {}
+ threads: Dict[str, Thread] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
diff --git a/my/fbmessenger/common.py b/my/fbmessenger/common.py
index 0f5a374..33d1b20 100644
--- a/my/fbmessenger/common.py
+++ b/my/fbmessenger/common.py
@@ -1,9 +1,6 @@
-from __future__ import annotations
+from my.core import __NOT_HPI_MODULE__
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from collections.abc import Iterator
-from typing import Protocol
+from typing import Iterator, Optional, Protocol
from my.core import datetime_aware
@@ -13,7 +10,7 @@ class Thread(Protocol):
def id(self) -> str: ...
@property
- def name(self) -> str | None: ...
+ def name(self) -> Optional[str]: ...
class Sender(Protocol):
@@ -21,7 +18,7 @@ class Sender(Protocol):
def id(self) -> str: ...
@property
- def name(self) -> str | None: ...
+ def name(self) -> Optional[str]: ...
class Message(Protocol):
@@ -32,7 +29,7 @@ class Message(Protocol):
def dt(self) -> datetime_aware: ...
@property
- def text(self) -> str | None: ...
+ def text(self) -> Optional[str]: ...
@property
def thread(self) -> Thread: ...
@@ -42,11 +39,8 @@ class Message(Protocol):
from itertools import chain
-
from more_itertools import unique_everseen
-
-from my.core import Res, warn_if_empty
-
+from my.core import warn_if_empty, Res
@warn_if_empty
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
diff --git a/my/fbmessenger/export.py b/my/fbmessenger/export.py
index 3b06618..201fad8 100644
--- a/my/fbmessenger/export.py
+++ b/my/fbmessenger/export.py
@@ -7,15 +7,16 @@ REQUIRES = [
'git+https://github.com/karlicoss/fbmessengerexport',
]
-from collections.abc import Iterator
from contextlib import ExitStack, contextmanager
from dataclasses import dataclass
+from typing import Iterator
+
+from my.core import PathIsh, Res, stat, Stats
+from my.core.warnings import high
+from my.config import fbmessenger as user_config
import fbmessengerexport.dal as messenger
-from my.config import fbmessenger as user_config
-from my.core import PathIsh, Res, Stats, stat
-from my.core.warnings import high
###
# support old style config
diff --git a/my/foursquare.py b/my/foursquare.py
index 3b418aa..63e1837 100644
--- a/my/foursquare.py
+++ b/my/foursquare.py
@@ -2,14 +2,16 @@
Foursquare/Swarm checkins
'''
-import json
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone, timedelta
from itertools import chain
-
-from my.config import foursquare as config
+from pathlib import Path
+import json
# TODO pytz for timezone???
+
from my.core import get_files, make_logger
+from my.config import foursquare as config
+
logger = make_logger(__name__)
diff --git a/my/github/all.py b/my/github/all.py
index f5e13cf..f885dde 100644
--- a/my/github/all.py
+++ b/my/github/all.py
@@ -3,7 +3,8 @@ Unified Github data (merged from GDPR export and periodic API updates)
"""
from . import gdpr, ghexport
-from .common import Results, merge_events
+
+from .common import merge_events, Results
def events() -> Results:
diff --git a/my/github/common.py b/my/github/common.py
index 22ba47e..e54bc4d 100644
--- a/my/github/common.py
+++ b/my/github/common.py
@@ -1,27 +1,24 @@
"""
Github events and their metadata: comments/issues/pull requests
"""
-
-from __future__ import annotations
-
-from my.core import __NOT_HPI_MODULE__ # isort: skip
+from ..core import __NOT_HPI_MODULE__
-from collections.abc import Iterable
from datetime import datetime, timezone
-from typing import NamedTuple, Optional
+from typing import Optional, NamedTuple, Iterable, Set, Tuple
-from my.core import make_logger, warn_if_empty
-from my.core.error import Res
+from ..core import warn_if_empty, LazyLogger
+from ..core.error import Res
-logger = make_logger(__name__)
+
+logger = LazyLogger(__name__)
class Event(NamedTuple):
dt: datetime
summary: str
eid: str
link: Optional[str]
- body: Optional[str] = None
+ body: Optional[str]=None
is_bot: bool = False
@@ -30,7 +27,7 @@ Results = Iterable[Res[Event]]
@warn_if_empty
def merge_events(*sources: Results) -> Results:
from itertools import chain
- emitted: set[tuple[datetime, str]] = set()
+ emitted: Set[Tuple[datetime, str]] = set()
for e in chain(*sources):
if isinstance(e, Exception):
yield e
@@ -55,7 +52,7 @@ def parse_dt(s: str) -> datetime:
# experimental way of supportint event ids... not sure
class EventIds:
@staticmethod
- def repo_created(*, dts: str, name: str, ref_type: str, ref: str | None) -> str:
+ def repo_created(*, dts: str, name: str, ref_type: str, ref: Optional[str]) -> str:
return f'{dts}_repocreated_{name}_{ref_type}_{ref}'
@staticmethod
diff --git a/my/github/gdpr.py b/my/github/gdpr.py
index be56454..1fde7c9 100644
--- a/my/github/gdpr.py
+++ b/my/github/gdpr.py
@@ -1,43 +1,36 @@
"""
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
"""
-
-from __future__ import annotations
-
+from dataclasses import dataclass
import json
-from abc import abstractmethod
-from collections.abc import Iterator, Sequence
from pathlib import Path
-from typing import Any
+import tarfile
+from typing import Iterable, Any, Sequence, Dict, Optional
-from my.core import Paths, Res, Stats, get_files, make_logger, stat, warnings
-from my.core.error import echain
+from my.core import get_files, Res, PathIsh, stat, Stats, make_logger
+from my.core.cfg import make_config
+from my.core.error import notnone, echain
+
+from .common import Event, parse_dt, EventIds
+
+# TODO later, use a separate user config? (github_gdpr)
+from my.config import github as user_config
+
+
+@dataclass
+class github(user_config):
+ gdpr_dir: PathIsh # path to unpacked GDPR archive
+
+
+config = make_config(github)
-from .common import Event, EventIds, parse_dt
logger = make_logger(__name__)
-class config:
- @property
- @abstractmethod
- def gdpr_dir(self) -> Paths:
- raise NotImplementedError
-
-
-def make_config() -> config:
- # TODO later, use a separate user config? (github_gdpr)
- from my.config import github as user_config
-
- class combined_config(user_config, config):
- pass
-
- return combined_config()
-
-
def inputs() -> Sequence[Path]:
- gdpr_dir = make_config().gdpr_dir
- res = get_files(gdpr_dir)
+ gdir = config.gdpr_dir
+ res = get_files(gdir)
schema_json = [f for f in res if f.name == 'schema.json']
was_unpacked = len(schema_json) > 0
if was_unpacked:
@@ -50,37 +43,22 @@ def inputs() -> Sequence[Path]:
return res
-def events() -> Iterator[Res[Event]]:
+def events() -> Iterable[Res[Event]]:
last = max(inputs())
logger.info(f'extracting data from {last}')
- root: Path | None = None
-
- if last.is_dir(): # if it's already CPath, this will match it
- root = last
+ # a bit naughty and ad-hoc, but we will generify reading from tar.gz. once we have more examples
+ # another one is zulip archive
+ if last.is_dir():
+ files = list(sorted(last.glob('*.json'))) # looks like all files are in the root
+ open_file = lambda f: f.open()
else:
- try:
- from kompress import CPath
-
- root = CPath(last)
- assert len(list(root.iterdir())) > 0 # trigger to check if we have the kompress version with targz support
- except Exception as e:
- logger.exception(e)
- warnings.high("Upgrade 'kompress' to latest version with native .tar.gz support. Falling back to unpacking to tmp dir.")
-
- if root is None:
- from my.core.structure import match_structure
-
- with match_structure(last, expected=()) as res: # expected=() matches it regardless any patterns
- [root] = res
- yield from _process_one(root)
- else:
- yield from _process_one(root)
-
-
-def _process_one(root: Path) -> Iterator[Res[Event]]:
- files = sorted(root.glob('*.json')) # looks like all files are in the root
+ # treat as .tar.gz
+ tfile = tarfile.open(last)
+ files = list(sorted(map(Path, tfile.getnames())))
+ files = [p for p in files if len(p.parts) == 1 and p.suffix == '.json']
+ open_file = lambda p: notnone(tfile.extractfile(f'./{p}')) # NOTE odd, doesn't work without ./
# fmt: off
handler_map = {
@@ -122,7 +100,8 @@ def _process_one(root: Path) -> Iterator[Res[Event]]:
# ignored
continue
- j = json.loads(f.read_text())
+ with open_file(f) as fo:
+ j = json.load(fo)
for r in j:
try:
yield handler(r)
@@ -137,7 +116,7 @@ def stats() -> Stats:
# TODO typing.TypedDict could be handy here..
-def _parse_common(d: dict) -> dict:
+def _parse_common(d: Dict) -> Dict:
url = d['url']
body = d.get('body')
return {
@@ -147,7 +126,7 @@ def _parse_common(d: dict) -> dict:
}
-def _parse_repository(d: dict) -> Event:
+def _parse_repository(d: Dict) -> Event:
pref = 'https://github.com/'
url = d['url']
dts = d['created_at']
@@ -163,13 +142,13 @@ def _parse_repository(d: dict) -> Event:
# user may be None if the user was deleted
-def _is_bot(user: str | None) -> bool:
+def _is_bot(user: Optional[str]) -> bool:
if user is None:
return False
- return "[bot]" in user
+ return "[bot]" in "user"
-def _parse_issue_comment(d: dict) -> Event:
+def _parse_issue_comment(d: Dict) -> Event:
url = d['url']
return Event(
**_parse_common(d),
@@ -179,7 +158,7 @@ def _parse_issue_comment(d: dict) -> Event:
)
-def _parse_issue(d: dict) -> Event:
+def _parse_issue(d: Dict) -> Event:
url = d['url']
title = d['title']
return Event(
@@ -190,7 +169,7 @@ def _parse_issue(d: dict) -> Event:
)
-def _parse_pull_request(d: dict) -> Event:
+def _parse_pull_request(d: Dict) -> Event:
dts = d['created_at']
url = d['url']
title = d['title']
@@ -204,7 +183,7 @@ def _parse_pull_request(d: dict) -> Event:
)
-def _parse_project(d: dict) -> Event:
+def _parse_project(d: Dict) -> Event:
url = d['url']
title = d['name']
is_bot = "[bot]" in d["creator"]
@@ -219,7 +198,7 @@ def _parse_project(d: dict) -> Event:
)
-def _parse_release(d: dict) -> Event:
+def _parse_release(d: Dict) -> Event:
tag = d['tag_name']
return Event(
**_parse_common(d),
@@ -228,7 +207,7 @@ def _parse_release(d: dict) -> Event:
)
-def _parse_commit_comment(d: dict) -> Event:
+def _parse_commit_comment(d: Dict) -> Event:
url = d['url']
return Event(
**_parse_common(d),
diff --git a/my/github/ghexport.py b/my/github/ghexport.py
index 3e17c10..80106a5 100644
--- a/my/github/ghexport.py
+++ b/my/github/ghexport.py
@@ -1,17 +1,13 @@
"""
Github data: events, comments, etc. (API data)
"""
-
-from __future__ import annotations
-
REQUIRES = [
'git+https://github.com/karlicoss/ghexport',
]
-
from dataclasses import dataclass
-from my.config import github as user_config
from my.core import Paths
+from my.config import github as user_config
@dataclass
@@ -25,9 +21,7 @@ class github(user_config):
###
-from my.core.cfg import Attrs, make_config
-
-
+from my.core.cfg import make_config, Attrs
def migration(attrs: Attrs) -> Attrs:
export_dir = 'export_dir'
if export_dir in attrs: # legacy name
@@ -47,14 +41,15 @@ except ModuleNotFoundError as e:
############################
-from collections.abc import Sequence
from functools import lru_cache
from pathlib import Path
+from typing import Tuple, Dict, Sequence, Optional
-from my.core import LazyLogger, get_files
+from my.core import get_files, LazyLogger
from my.core.cachew import mcachew
-from .common import Event, EventIds, Results, parse_dt
+from .common import Event, parse_dt, Results, EventIds
+
logger = LazyLogger(__name__)
@@ -87,9 +82,7 @@ def _events() -> Results:
yield e
-from my.core import Stats, stat
-
-
+from my.core import stat, Stats
def stats() -> Stats:
return {
**stat(events),
@@ -106,7 +99,7 @@ def _log_if_unhandled(e) -> None:
Link = str
EventId = str
Body = str
-def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]:
+def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Body]]:
# TODO would be nice to give access to raw event within timeline
dts = e['created_at']
eid = e['id']
@@ -202,7 +195,7 @@ def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]:
return tp, None, None, None
-def _parse_event(d: dict) -> Event:
+def _parse_event(d: Dict) -> Event:
summary, link, eid, body = _get_summary(d)
if eid is None:
eid = d['id'] # meh
diff --git a/my/goodreads.py b/my/goodreads.py
index 559efda..864bd64 100644
--- a/my/goodreads.py
+++ b/my/goodreads.py
@@ -7,18 +7,15 @@ REQUIRES = [
from dataclasses import dataclass
-
+from my.core import datetime_aware, Paths
from my.config import goodreads as user_config
-from my.core import Paths, datetime_aware
-
@dataclass
class goodreads(user_config):
# paths[s]/glob to the exported JSON data
export_path: Paths
-from my.core.cfg import Attrs, make_config
-
+from my.core.cfg import make_config, Attrs
def _migration(attrs: Attrs) -> Attrs:
export_dir = 'export_dir'
@@ -32,19 +29,18 @@ config = make_config(goodreads, migration=_migration)
#############################3
-from collections.abc import Iterator, Sequence
-from pathlib import Path
-
from my.core import get_files
-
+from typing import Sequence, Iterator
+from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
from datetime import datetime
-
import pytz
+
+
from goodrexport import dal
diff --git a/my/google/maps/_android_protobuf.py b/my/google/maps/_android_protobuf.py
index 615623d..1d43ae0 100644
--- a/my/google/maps/_android_protobuf.py
+++ b/my/google/maps/_android_protobuf.py
@@ -1,8 +1,8 @@
-from my.core import __NOT_HPI_MODULE__ # isort: skip
+from my.core import __NOT_HPI_MODULE__
# NOTE: this tool was quite useful https://github.com/aj3423/aproto
-from google.protobuf import descriptor_pb2, descriptor_pool, message_factory
+from google.protobuf import descriptor_pool, descriptor_pb2, message_factory
TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING
TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES
diff --git a/my/google/maps/android.py b/my/google/maps/android.py
index 95ecacf..279231a 100644
--- a/my/google/maps/android.py
+++ b/my/google/maps/android.py
@@ -7,20 +7,20 @@ REQUIRES = [
"protobuf", # for parsing blobs from the database
]
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any
+from typing import Any, Iterator, Optional, Sequence
from urllib.parse import quote
-from my.core import LazyLogger, Paths, Res, datetime_aware, get_files
+from my.core import datetime_aware, get_files, LazyLogger, Paths, Res
from my.core.common import unique_everseen
from my.core.sqlite import sqlite_connection
+import my.config
+
from ._android_protobuf import parse_labeled, parse_list, parse_place
-import my.config # isort: skip
logger = LazyLogger(__name__)
@@ -59,8 +59,8 @@ class Place:
updated_at: datetime_aware # TODO double check it's utc?
title: str
location: Location
- address: str | None
- note: str | None
+ address: Optional[str]
+ note: Optional[str]
@property
def place_url(self) -> str:
diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py
index 3f2b5db..d393957 100644
--- a/my/google/takeout/html.py
+++ b/my/google/takeout/html.py
@@ -2,22 +2,19 @@
Google Takeout exports: browsing history, search/youtube/google play activity
'''
-from __future__ import annotations
-
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-import re
-from collections.abc import Iterable
-from datetime import datetime
from enum import Enum
-from html.parser import HTMLParser
+import re
from pathlib import Path
-from typing import Any, Callable
+from datetime import datetime
+from html.parser import HTMLParser
+from typing import List, Optional, Any, Callable, Iterable, Tuple
+from collections import OrderedDict
from urllib.parse import unquote
import pytz
-from my.core.time import abbr_to_timezone
+from ...core.time import abbr_to_timezone
+
# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly
_TIME_FORMATS = [
@@ -40,7 +37,7 @@ def parse_dt(s: str) -> datetime:
s, tzabbr = s.rsplit(maxsplit=1)
tz = abbr_to_timezone(tzabbr)
- dt: datetime | None = None
+ dt: Optional[datetime] = None
for fmt in _TIME_FORMATS:
try:
dt = datetime.strptime(s, fmt)
@@ -77,7 +74,7 @@ class State(Enum):
Url = str
Title = str
-Parsed = tuple[datetime, Url, Title]
+Parsed = Tuple[datetime, Url, Title]
Callback = Callable[[datetime, Url, Title], None]
@@ -87,9 +84,9 @@ class TakeoutHTMLParser(HTMLParser):
super().__init__()
self.state: State = State.OUTSIDE
- self.title_parts: list[str] = []
- self.title: str | None = None
- self.url: str | None = None
+ self.title_parts: List[str] = []
+ self.title: Optional[str] = None
+ self.url: Optional[str] = None
self.callback = callback
@@ -97,8 +94,8 @@ class TakeoutHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if self.state == State.INSIDE and tag == 'a':
self.state = State.PARSING_LINK
- [hr] = (v for k, v in attrs if k == 'href')
- assert hr is not None
+ attrs = OrderedDict(attrs)
+ hr = attrs['href']
# sometimes it's starts with this prefix, it's apparently clicks from google search? or visits from chrome address line? who knows...
# TODO handle http?
@@ -126,7 +123,7 @@ class TakeoutHTMLParser(HTMLParser):
# JamiexxVEVO
# Jun 21, 2018, 5:48:34 AM
# Products:
- # YouTube
+ # YouTube
def handle_data(self, data):
if self.state == State.OUTSIDE:
if data[:-1].strip() in ("Watched", "Visited"):
@@ -152,7 +149,7 @@ class TakeoutHTMLParser(HTMLParser):
def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
- results: list[Parsed] = []
+ results: List[Parsed] = []
def cb(dt: datetime, url: Url, title: Title) -> None:
results.append((dt, url, title))
parser = TakeoutHTMLParser(callback=cb)
@@ -160,3 +157,5 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
data = fo.read()
parser.feed(data)
return results
+
+from ...core import __NOT_HPI_MODULE__
diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py
index 13fd04a..c4e5682 100644
--- a/my/google/takeout/parser.py
+++ b/my/google/takeout/parser.py
@@ -1,7 +1,7 @@
"""
-Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
+Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
-See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information
+See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information
about how to export and organize your takeouts
If the DISABLE_TAKEOUT_CACHE environment variable is set, this won't cache individual
@@ -12,31 +12,27 @@ zip files of the exports, which are temporarily unpacked while creating
the cachew cache
"""
-REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"]
+REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
-import os
-from collections.abc import Sequence
from contextlib import ExitStack
from dataclasses import dataclass
+import os
+from typing import List, Sequence, cast
from pathlib import Path
-from typing import cast
-
-from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
-
-from my.core import Paths, Stats, get_files, make_config, make_logger, stat
+from my.core import make_config, stat, Stats, get_files, Paths, make_logger
from my.core.cachew import mcachew
from my.core.error import ErrorPolicy
from my.core.structure import match_structure
-from my.core.time import user_forced
+from my.core.time import user_forced
+from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
ABBR_TIMEZONES.extend(user_forced())
import google_takeout_parser
-from google_takeout_parser.merge import CacheResults, GoogleEventSet
-from google_takeout_parser.models import BaseEvent
from google_takeout_parser.path_dispatch import TakeoutParser
+from google_takeout_parser.merge import GoogleEventSet, CacheResults
-# see https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
+# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
from my.config import google as user_config
@@ -59,7 +55,6 @@ logger = make_logger(__name__, level="warning")
# patch the takeout parser logger to match the computed loglevel
from google_takeout_parser.log import setup as setup_takeout_logger
-
setup_takeout_logger(logger.level)
@@ -87,7 +82,7 @@ except ImportError:
google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown'))
-def _cachew_depends_on() -> list[str]:
+def _cachew_depends_on() -> List[str]:
exports = sorted([str(p) for p in inputs()])
# add google takeout parser pip version to hash, so this re-creates on breaking changes
exports.insert(0, f"google_takeout_version: {google_takeout_version}")
@@ -96,21 +91,10 @@ def _cachew_depends_on() -> list[str]:
# ResultsType is a Union of all of the models in google_takeout_parser
@mcachew(depends_on=_cachew_depends_on, logger=logger, force_file=True)
-def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: # noqa: FBT001
+def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
error_policy = config.error_policy
count = 0
emitted = GoogleEventSet()
-
- try:
- emitted_add = emitted.add_if_not_present
- except AttributeError:
- # compat for older versions of google_takeout_parser which didn't have this method
- def emitted_add(other: BaseEvent) -> bool:
- if other in emitted:
- return False
- emitted.add(other)
- return True
-
# reversed shouldn't really matter? but logic is to use newer
# takeouts if they're named according to date, since JSON Activity
# is nicer than HTML Activity
@@ -123,7 +107,7 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
else:
results = exit_stack.enter_context(match_structure(path, expected=EXPECTED, partial=True))
for m in results:
- # e.g. /home/username/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457'
+ # e.g. /home/sean/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457'
# means that zipped takeouts have nice filenames from cachew
cw_id, _, _ = path.name.rpartition(".")
# each takeout result is cached as well, in individual databases per-type
@@ -139,9 +123,10 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
elif error_policy == 'drop':
pass
continue
-
- if emitted_add(event):
- yield event # type: ignore[misc]
+ if event in emitted:
+ continue
+ emitted.add(event)
+ yield event # type: ignore[misc]
logger.debug(
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
)
diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py
index 6a523e2..5b53149 100644
--- a/my/google/takeout/paths.py
+++ b/my/google/takeout/paths.py
@@ -2,57 +2,44 @@
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
'''
-from __future__ import annotations
+from dataclasses import dataclass
+from ...core.common import Paths, get_files
+from ...core.util import __NOT_HPI_MODULE__
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from abc import abstractmethod
-from collections.abc import Iterable
-from pathlib import Path
+from my.config import google as user_config
from more_itertools import last
-from my.core import Paths, get_files
-
-
-class config:
- """
- path/paths/glob for the takeout zips
- """
-
- @property
- @abstractmethod
- def takeout_path(self) -> Paths:
- raise NotImplementedError
-
+@dataclass
+class google(user_config):
+ takeout_path: Paths # path/paths/glob for the takeout zips
+###
# TODO rename 'google' to 'takeout'? not sure
+from ...core.cfg import make_config
+config = make_config(google)
-def make_config() -> config:
- from my.config import google as user_config
-
- class combined_config(user_config, config): ...
-
- return combined_config()
+from pathlib import Path
+from typing import Optional, Iterable
-def get_takeouts(*, path: str | None = None) -> Iterable[Path]:
+def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
"""
Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
"""
- # TODO zip is not great..
+ # TODO FIXME zip is not great..
# allow a lambda expression? that way the user could restrict it
- cfg = make_config()
- for takeout in get_files(cfg.takeout_path, glob='*.zip'):
+ for takeout in get_files(config.takeout_path, glob='*.zip'):
if path is None or (takeout / path).exists():
yield takeout
-def get_last_takeout(*, path: str | None = None) -> Path | None:
+def get_last_takeout(*, path: Optional[str]=None) -> Optional[Path]:
return last(get_takeouts(path=path), default=None)
# TODO might be a good idea to merge across multiple takeouts...
# perhaps even a special takeout module that deals with all of this automatically?
# e.g. accumulate, filter and maybe report useless takeouts?
+
diff --git a/my/hackernews/common.py b/my/hackernews/common.py
index 6990987..0c5ff9b 100644
--- a/my/hackernews/common.py
+++ b/my/hackernews/common.py
@@ -1,6 +1,6 @@
from typing import Protocol
-from my.core import datetime_aware
+from my.core import datetime_aware, Json
def hackernews_link(id: str) -> str:
diff --git a/my/hackernews/dogsheep.py b/my/hackernews/dogsheep.py
index 8303284..de6c58d 100644
--- a/my/hackernews/dogsheep.py
+++ b/my/hackernews/dogsheep.py
@@ -3,14 +3,14 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee
"""
from __future__ import annotations
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
+from typing import Iterator, Sequence, Optional
-import my.config
-from my.core import Paths, Res, datetime_aware, get_files
+from my.core import get_files, Paths, Res, datetime_aware
from my.core.sqlite import sqlite_connection
+import my.config
from .common import hackernews_link
@@ -33,9 +33,9 @@ class Item:
id: str
type: str
created: datetime_aware # checked and it's utc
- title: str | None # only present for Story
- text_html: str | None # should be present for Comment and might for Story
- url: str | None # might be present for Story
+ title: Optional[str] # only present for Story
+ text_html: Optional[str] # should be present for Comment and might for Story
+ url: Optional[str] # might be present for Story
# todo process 'deleted'? fields?
# todo process 'parent'?
diff --git a/my/hackernews/harmonic.py b/my/hackernews/harmonic.py
index 08a82e6..3b4ae61 100644
--- a/my/hackernews/harmonic.py
+++ b/my/hackernews/harmonic.py
@@ -1,22 +1,17 @@
"""
[[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews
"""
-
-from __future__ import annotations
-
REQUIRES = ['lxml', 'orjson']
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, TypedDict, cast
-
import orjson
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast
+
from lxml import etree
from more_itertools import one
-import my.config
from my.core import (
Paths,
Res,
@@ -27,10 +22,8 @@ from my.core import (
stat,
)
from my.core.common import unique_everseen
-
-from .common import SavedBase, hackernews_link
-
-import my.config # isort: skip
+import my.config
+from .common import hackernews_link, SavedBase
logger = make_logger(__name__)
@@ -50,7 +43,7 @@ class Cached(TypedDict):
created_at_i: int
id: str
points: int
- test: str | None
+ test: Optional[str]
title: str
type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho
url: str
@@ -101,16 +94,16 @@ def _saved() -> Iterator[Res[Saved]]:
# TODO defensive for each item!
tr = etree.parse(path)
- res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
+ res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
cached_ids = [x.text.split('-')[0] for x in res]
- cached: dict[str, Cached] = {}
+ cached: Dict[str, Cached] = {}
for sid in cached_ids:
- res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
+ res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
j = orjson.loads(res.text)
cached[sid] = j
- res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
+ res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
for x in res.text.split('-'):
ids, item_timestamp = x.split('q')
# not sure if timestamp is any useful?
diff --git a/my/hackernews/materialistic.py b/my/hackernews/materialistic.py
index ccf285b..4d5cd47 100644
--- a/my/hackernews/materialistic.py
+++ b/my/hackernews/materialistic.py
@@ -1,20 +1,19 @@
"""
[[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
"""
-from collections.abc import Iterator, Sequence
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any, NamedTuple
+from typing import Any, Dict, Iterator, NamedTuple, Sequence
from more_itertools import unique_everseen
-from my.core import datetime_aware, get_files, make_logger
+from my.core import get_files, datetime_aware, make_logger
from my.core.sqlite import sqlite_connection
+from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic
+
from .common import hackernews_link
-# todo migrate config to my.hackernews.materialistic
-from my.config import materialistic as config # isort: skip
logger = make_logger(__name__)
@@ -23,7 +22,7 @@ def inputs() -> Sequence[Path]:
return get_files(config.export_path)
-Row = dict[str, Any]
+Row = Dict[str, Any]
class Saved(NamedTuple):
diff --git a/my/hypothesis.py b/my/hypothesis.py
index 15e854b..55fff64 100644
--- a/my/hypothesis.py
+++ b/my/hypothesis.py
@@ -4,22 +4,20 @@
REQUIRES = [
'git+https://github.com/karlicoss/hypexport',
]
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import Iterator, Sequence, TYPE_CHECKING
from my.core import (
+ get_files,
+ stat,
Paths,
Res,
Stats,
- get_files,
- stat,
)
from my.core.cfg import make_config
from my.core.hpi_compat import always_supports_sequence
-
-import my.config # isort: skip
+import my.config
@dataclass
@@ -43,7 +41,6 @@ except ModuleNotFoundError as e:
dal = pre_pip_dal_handler('hypexport', e, config, requires=REQUIRES)
-DAL = dal.DAL
Highlight = dal.Highlight
Page = dal.Page
@@ -52,8 +49,8 @@ def inputs() -> Sequence[Path]:
return get_files(config.export_path)
-def _dal() -> DAL:
- return DAL(inputs())
+def _dal() -> dal.DAL:
+ return dal.DAL(inputs())
# TODO they are in reverse chronological order...
diff --git a/my/instagram/all.py b/my/instagram/all.py
index ce78409..8007399 100644
--- a/my/instagram/all.py
+++ b/my/instagram/all.py
@@ -1,10 +1,11 @@
-from collections.abc import Iterator
+from typing import Iterator
-from my.core import Res, Stats, stat
+from my.core import Res, stat, Stats
from my.core.source import import_source
from .common import Message, _merge_messages
+
src_gdpr = import_source(module_name='my.instagram.gdpr')
@src_gdpr
def _messages_gdpr() -> Iterator[Res[Message]]:
@@ -23,7 +24,7 @@ def messages() -> Iterator[Res[Message]]:
# TODO in general best to prefer android, it has more data
# - message ids
# - usernames are correct for Android data
- # - thread ids more meaningful?
+ # - thread ids more meaninful?
# but for now prefer gdpr prefix since it makes a bit things a bit more consistent?
# e.g. a new batch of android exports can throw off ids if we rely on it for mapping
yield from _merge_messages(
diff --git a/my/instagram/android.py b/my/instagram/android.py
index 12c11d3..96b75d2 100644
--- a/my/instagram/android.py
+++ b/my/instagram/android.py
@@ -3,29 +3,30 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data
"""
from __future__ import annotations
-import json
-import sqlite3
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
+import json
from pathlib import Path
+import sqlite3
+from typing import Iterator, Sequence, Optional, Dict, Union
from my.core import (
- Json,
- Paths,
- Res,
- assert_never,
- datetime_naive,
get_files,
+ Paths,
make_config,
make_logger,
+ datetime_naive,
+ Json,
+ Res,
+ assert_never,
)
-from my.core.cachew import mcachew
from my.core.common import unique_everseen
+from my.core.cachew import mcachew
from my.core.error import echain
-from my.core.sqlite import select, sqlite_connect_immutable
+from my.core.sqlite import sqlite_connect_immutable, select
+
+from my.config import instagram as user_config
-from my.config import instagram as user_config # isort: skip
logger = make_logger(__name__)
@@ -37,8 +38,8 @@ class instagram_android_config(user_config.android):
# sadly doesn't seem easy to extract user's own handle/name from the db...
# todo maybe makes more sense to keep in parent class? not sure...
- username: str | None = None
- full_name: str | None = None
+ username: Optional[str] = None
+ full_name: Optional[str] = None
config = make_config(instagram_android_config)
@@ -100,13 +101,13 @@ class MessageError(RuntimeError):
return self.rest == other.rest
-def _parse_message(j: Json) -> _Message | None:
+def _parse_message(j: Json) -> Optional[_Message]:
id = j['item_id']
t = j['item_type']
tid = j['thread_key']['thread_id']
uid = j['user_id']
created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000)
- text: str | None = None
+ text: Optional[str] = None
if t == 'text':
text = j['text']
elif t == 'reel_share':
@@ -132,7 +133,7 @@ def _parse_message(j: Json) -> _Message | None:
)
-def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]:
+def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
@@ -174,7 +175,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]:
yield e
-def _entities() -> Iterator[Res[User | _Message]]:
+def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
@@ -193,7 +194,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
@mcachew(depends_on=inputs)
def messages() -> Iterator[Res[Message]]:
- id2user: dict[str, User] = {}
+ id2user: Dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
diff --git a/my/instagram/common.py b/my/instagram/common.py
index 17d130f..4df07a1 100644
--- a/my/instagram/common.py
+++ b/my/instagram/common.py
@@ -1,10 +1,9 @@
-from collections.abc import Iterator
from dataclasses import replace
from datetime import datetime
from itertools import chain
-from typing import Any, Protocol
+from typing import Iterator, Dict, Any, Protocol
-from my.core import Res, warn_if_empty
+from my.core import warn_if_empty, Res
class User(Protocol):
@@ -41,7 +40,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
# ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump)
# so the only way to correlate is to try and match messages
# we also can't use unique_everseen here, otherwise will never get a chance to unify threads
- mmap: dict[str, Message] = {}
+ mmap: Dict[str, Message] = {}
thread_map = {}
user_map = {}
@@ -61,7 +60,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
user_map[m.user.id] = mm.user
else:
# not emitted yet, need to emit
- repls: dict[str, Any] = {}
+ repls: Dict[str, Any] = {}
tid = thread_map.get(m.thread_id)
if tid is not None:
repls['thread_id'] = tid
diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py
index d417fdb..1415d55 100644
--- a/my/instagram/gdpr.py
+++ b/my/instagram/gdpr.py
@@ -2,27 +2,26 @@
Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]])
"""
-from __future__ import annotations
-
-import json
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
+import json
from pathlib import Path
+from typing import Iterator, Sequence, Dict, Union
from more_itertools import bucket
from my.core import (
+ get_files,
Paths,
+ datetime_naive,
Res,
assert_never,
- datetime_naive,
- get_files,
make_logger,
)
from my.core.common import unique_everseen
-from my.config import instagram as user_config # isort: skip
+from my.config import instagram as user_config
+
logger = make_logger(__name__)
@@ -71,12 +70,12 @@ def _decode(s: str) -> str:
return s.encode('latin-1').decode('utf8')
-def _entities() -> Iterator[Res[User | _Message]]:
+def _entities() -> Iterator[Res[Union[User, _Message]]]:
# it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones
# NOTE: here there are basically two options
# - process inputs as is (from oldest to newest)
# this would be more stable wrt newer exports (e.g. existing thread ids won't change)
- # the downside is that newer exports seem to have better thread ids, so might be preferable to use them
+ # the downside is that newer exports seem to have better thread ids, so might be preferrable to use them
# - process inputs reversed (from newest to oldest)
# the upside is that thread ids/usernames might be better
# the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable..
@@ -85,7 +84,7 @@ def _entities() -> Iterator[Res[User | _Message]]:
yield from _entitites_from_path(path)
-def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
+def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
# TODO make sure it works both with plan directory
# idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here
# e.g. possible options are:
@@ -137,7 +136,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
j = json.loads(ffile.read_text())
id_len = 10
- # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation
+ # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation
# but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :(
# so basically the only way to merge is to actually try some magic and correlate timestamps/message texts?
# another option is perhaps to query user id from username with some free API
@@ -203,7 +202,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
# TODO basically copy pasted from android.py... hmm
def messages() -> Iterator[Res[Message]]:
- id2user: dict[str, User] = {}
+ id2user: Dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
diff --git a/my/instapaper.py b/my/instapaper.py
index d79e7e4..df1f70b 100644
--- a/my/instapaper.py
+++ b/my/instapaper.py
@@ -7,10 +7,10 @@ REQUIRES = [
from dataclasses import dataclass
-from my.config import instapaper as user_config
-
from .core import Paths
+from my.config import instapaper as user_config
+
@dataclass
class instapaper(user_config):
@@ -22,7 +22,6 @@ class instapaper(user_config):
from .core.cfg import make_config
-
config = make_config(instapaper)
@@ -40,12 +39,9 @@ Bookmark = dal.Bookmark
Page = dal.Page
-from collections.abc import Iterable, Sequence
+from typing import Sequence, Iterable
from pathlib import Path
-
from .core import get_files
-
-
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
diff --git a/my/ip/all.py b/my/ip/all.py
index c267383..46c1fec 100644
--- a/my/ip/all.py
+++ b/my/ip/all.py
@@ -3,15 +3,16 @@ An example all.py stub module that provides ip data
To use this, you'd add IP providers that yield IPs to the 'ips' function
-For an example of how this could be used, see https://github.com/purarue/HPI/tree/master/my/ip
+For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
"""
-REQUIRES = ["git+https://github.com/purarue/ipgeocache"]
+REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
-from collections.abc import Iterator
+from typing import Iterator
from my.core import Stats, warn_if_empty
+
from my.ip.common import IP
diff --git a/my/ip/common.py b/my/ip/common.py
index b551281..244ddc5 100644
--- a/my/ip/common.py
+++ b/my/ip/common.py
@@ -1,13 +1,12 @@
"""
-Provides location/timezone data from IP addresses, using [[https://github.com/purarue/ipgeocache][ipgeocache]]
+Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
"""
-from my.core import __NOT_HPI_MODULE__ # isort: skip
+from my.core import __NOT_HPI_MODULE__
import ipaddress
-from collections.abc import Iterator
+from typing import NamedTuple, Iterator, Tuple
from datetime import datetime
-from typing import NamedTuple
import ipgeocache
@@ -23,7 +22,7 @@ class IP(NamedTuple):
return ipgeocache.get(self.addr)
@property
- def latlon(self) -> tuple[float, float]:
+ def latlon(self) -> Tuple[float, float]:
loc: str = self.ipgeocache()["loc"]
lat, _, lon = loc.partition(",")
return float(lat), float(lon)
diff --git a/my/jawbone/__init__.py b/my/jawbone/__init__.py
index 463d735..5d43296 100644
--- a/my/jawbone/__init__.py
+++ b/my/jawbone/__init__.py
@@ -1,11 +1,10 @@
from __future__ import annotations
+from typing import Dict, Any, List, Iterable
import json
-from collections.abc import Iterable
-from datetime import date, datetime, time, timedelta
from functools import lru_cache
+from datetime import datetime, date, time, timedelta
from pathlib import Path
-from typing import Any
import pytz
@@ -15,6 +14,7 @@ logger = make_logger(__name__)
from my.config import jawbone as config # type: ignore[attr-defined]
+
BDIR = config.export_dir
PHASES_FILE = BDIR / 'phases.json'
SLEEPS_FILE = BDIR / 'sleeps.json'
@@ -24,7 +24,7 @@ GRAPHS_DIR = BDIR / 'graphs'
XID = str # TODO how to shared with backup thing?
-Phases = dict[XID, Any]
+Phases = Dict[XID, Any]
@lru_cache(1)
def get_phases() -> Phases:
return json.loads(PHASES_FILE.read_text())
@@ -89,7 +89,7 @@ class SleepEntry:
# TODO might be useful to cache these??
@property
- def phases(self) -> list[datetime]:
+ def phases(self) -> List[datetime]:
# TODO make sure they are consistent with emfit?
return [self._fromts(i['time']) for i in get_phases()[self.xid]]
@@ -100,13 +100,12 @@ class SleepEntry:
return str(self)
-def load_sleeps() -> list[SleepEntry]:
+def load_sleeps() -> List[SleepEntry]:
sleeps = json.loads(SLEEPS_FILE.read_text())
return [SleepEntry(js) for js in sleeps]
-from ..core.error import Res, extract_error_datetime, set_error_datetime
-
+from ..core.error import Res, set_error_datetime, extract_error_datetime
def pre_dataframe() -> Iterable[Res[SleepEntry]]:
from more_itertools import bucket
@@ -130,9 +129,9 @@ def pre_dataframe() -> Iterable[Res[SleepEntry]]:
def dataframe():
- dicts: list[dict[str, Any]] = []
+ dicts: List[Dict[str, Any]] = []
for s in pre_dataframe():
- d: dict[str, Any]
+ d: Dict[str, Any]
if isinstance(s, Exception):
dt = extract_error_datetime(s)
d = {
@@ -175,14 +174,14 @@ def hhmm(time: datetime):
# return fromstart / tick
-def plot_one(sleep: SleepEntry, fig, axes, xlims=None, *, showtext=True):
+def plot_one(sleep: SleepEntry, fig, axes, xlims=None, showtext=True):
import matplotlib.dates as mdates # type: ignore[import-not-found]
span = sleep.completed - sleep.created
print(f"{sleep.xid} span: {span}")
# pip install imageio
- from imageio import imread # type: ignore
+ from imageio import imread # type: ignore
img = imread(sleep.graph)
# all of them are 300x300 images apparently
@@ -240,7 +239,7 @@ def plot_one(sleep: SleepEntry, fig, axes, xlims=None, *, showtext=True):
# axes.title.set_size(10)
if showtext:
- axes.text(xlims[1] - timedelta(hours=1.5), 20, str(sleep))
+ axes.text(xlims[1] - timedelta(hours=1.5), 20, str(sleep),)
# plt.text(sleep.asleep(), 0, hhmm(sleep.asleep()))
@@ -261,8 +260,8 @@ def predicate(sleep: SleepEntry):
# TODO move to dashboard
def plot() -> None:
- import matplotlib.pyplot as plt # type: ignore[import-not-found]
from matplotlib.figure import Figure # type: ignore[import-not-found]
+ import matplotlib.pyplot as plt # type: ignore[import-not-found]
# TODO FIXME melatonin data
melatonin_data = {} # type: ignore[var-annotated]
@@ -275,7 +274,7 @@ def plot() -> None:
fig: Figure = plt.figure(figsize=(15, sleeps_count * 1))
axarr = fig.subplots(nrows=len(sleeps))
- for (sleep, axes) in zip(sleeps, axarr):
+ for i, (sleep, axes) in enumerate(zip(sleeps, axarr)):
plot_one(sleep, fig, axes, showtext=True)
used = melatonin_data.get(sleep.date_, None)
sused: str
diff --git a/my/jawbone/plots.py b/my/jawbone/plots.py
index 5968412..5dcb63d 100755
--- a/my/jawbone/plots.py
+++ b/my/jawbone/plots.py
@@ -1,11 +1,11 @@
#!/usr/bin/env python3
# TODO this should be in dashboard
+from pathlib import Path
# from kython.plotting import *
from csv import DictReader
-from pathlib import Path
-from typing import Any, NamedTuple
+from itertools import islice
-import matplotlib.pylab as pylab # type: ignore
+from typing import Dict, Any, NamedTuple
# sleep = []
# with open('2017.csv', 'r') as fo:
@@ -13,14 +13,16 @@ import matplotlib.pylab as pylab # type: ignore
# for line in islice(reader, 0, 10):
# sleep
# print(line)
-import matplotlib.pyplot as plt # type: ignore
+
+import matplotlib.pyplot as plt # type: ignore
from numpy import genfromtxt
+import matplotlib.pylab as pylab # type: ignore
pylab.rcParams['figure.figsize'] = (32.0, 24.0)
pylab.rcParams['font.size'] = 10
jawboneDataFeatures = Path(__file__).parent / 'features.csv' # Data File Path
-featureDesc: dict[str, str] = {}
+featureDesc: Dict[str, str] = {}
for x in genfromtxt(jawboneDataFeatures, dtype='unicode', delimiter=','):
featureDesc[x[0]] = x[1]
@@ -51,7 +53,7 @@ class SleepData(NamedTuple):
quality: float # ???
@classmethod
- def from_jawbone_dict(cls, d: dict[str, Any]):
+ def from_jawbone_dict(cls, d: Dict[str, Any]):
return cls(
date=d['DATE'],
asleep_time=_safe_mins(_safe_float(d['s_asleep_time'])),
@@ -74,7 +76,7 @@ class SleepData(NamedTuple):
def iter_useful(data_file: str):
- with Path(data_file).open() as fo:
+ with open(data_file) as fo:
reader = DictReader(fo)
for d in reader:
dt = SleepData.from_jawbone_dict(d)
@@ -94,7 +96,6 @@ files = [
]
from kython import concat, parse_date # type: ignore
-
useful = concat(*(list(iter_useful(str(f))) for f in files))
# for u in useful:
@@ -108,7 +109,6 @@ dates = [parse_date(u.date, yearfirst=True, dayfirst=False) for u in useful]
# TODO don't need this anymore? it's gonna be in dashboards package
from kython.plotting import plot_timestamped # type: ignore
-
for attr, lims, mavg, fig in [
('light', (0, 400), 5, None),
('deep', (0, 600), 5, None),
diff --git a/my/kobo.py b/my/kobo.py
index b4a1575..85bc50f 100644
--- a/my/kobo.py
+++ b/my/kobo.py
@@ -7,22 +7,21 @@ REQUIRES = [
'kobuddy',
]
-from collections.abc import Iterator
from dataclasses import dataclass
-
-import kobuddy
-from kobuddy import *
-from kobuddy import Highlight, get_highlights
+from typing import Iterator
from my.core import (
- Paths,
- Stats,
get_files,
stat,
+ Paths,
+ Stats,
)
from my.core.cfg import make_config
+import my.config
-import my.config # isort: skip
+import kobuddy
+from kobuddy import Highlight, get_highlights
+from kobuddy import *
@dataclass
@@ -52,7 +51,7 @@ def stats() -> Stats:
## TODO hmm. not sure if all this really belongs here?... perhaps orger?
-from typing import Callable, Union
+from typing import Callable, Union, List
# TODO maybe type over T?
_Predicate = Callable[[str], bool]
@@ -70,17 +69,17 @@ def from_predicatish(p: Predicatish) -> _Predicate:
return p
-def by_annotation(predicatish: Predicatish, **kwargs) -> list[Highlight]:
+def by_annotation(predicatish: Predicatish, **kwargs) -> List[Highlight]:
pred = from_predicatish(predicatish)
- res: list[Highlight] = []
+ res: List[Highlight] = []
for h in get_highlights(**kwargs):
if pred(h.annotation):
res.append(h)
return res
-def get_todos() -> list[Highlight]:
+def get_todos() -> List[Highlight]:
def with_todo(ann):
if ann is None:
ann = ''
diff --git a/my/kython/kompress.py b/my/kython/kompress.py
index a5d9c29..01e24e4 100644
--- a/my/kython/kompress.py
+++ b/my/kython/kompress.py
@@ -1,4 +1,5 @@
-from my.core import __NOT_HPI_MODULE__, warnings
+from my.core import __NOT_HPI_MODULE__
+from my.core import warnings
warnings.high('my.kython.kompress is deprecated, please use "kompress" library directly. See https://github.com/karlicoss/kompress')
diff --git a/my/lastfm.py b/my/lastfm.py
index cd9fa8b..6618738 100644
--- a/my/lastfm.py
+++ b/my/lastfm.py
@@ -3,9 +3,9 @@ Last.fm scrobbles
'''
from dataclasses import dataclass
-
+from my.core import Paths, Json, make_logger, get_files
from my.config import lastfm as user_config
-from my.core import Json, Paths, get_files, make_logger
+
logger = make_logger(__name__)
@@ -19,15 +19,13 @@ class lastfm(user_config):
from my.core.cfg import make_config
-
config = make_config(lastfm)
-import json
-from collections.abc import Iterable, Sequence
from datetime import datetime, timezone
+import json
from pathlib import Path
-from typing import NamedTuple
+from typing import NamedTuple, Sequence, Iterable
from my.core.cachew import mcachew
@@ -78,19 +76,16 @@ def scrobbles() -> Iterable[Scrobble]:
yield Scrobble(raw=raw)
-from my.core import Stats, stat
-
-
+from my.core import stat, Stats
def stats() -> Stats:
return stat(scrobbles)
def fill_influxdb() -> None:
from my.core import influxdb
-
# todo needs to be more automatic
- sd = ({
- 'dt': x.dt,
- 'track': x.track,
- } for x in scrobbles())
+ sd = (dict(
+ dt=x.dt,
+ track=x.track,
+ ) for x in scrobbles())
influxdb.fill(sd, measurement=__name__)
diff --git a/my/location/all.py b/my/location/all.py
index c6e8cab..fd88721 100644
--- a/my/location/all.py
+++ b/my/location/all.py
@@ -2,13 +2,14 @@
Merges location data from multiple sources
"""
-from collections.abc import Iterator
+from typing import Iterator
-from my.core import LazyLogger, Stats
+from my.core import Stats, LazyLogger
from my.core.source import import_source
from .common import Location
+
logger = LazyLogger(__name__, level="warning")
diff --git a/my/location/common.py b/my/location/common.py
index 4c47ef0..510e005 100644
--- a/my/location/common.py
+++ b/my/location/common.py
@@ -1,13 +1,12 @@
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from collections.abc import Iterable, Iterator
-from dataclasses import dataclass
from datetime import date, datetime
-from typing import Optional, Protocol, TextIO, Union
+from typing import Union, Tuple, Optional, Iterable, TextIO, Iterator, Protocol
+from dataclasses import dataclass
+
+from my.core import __NOT_HPI_MODULE__
DateIsh = Union[datetime, date, str]
-LatLon = tuple[float, float]
+LatLon = Tuple[float, float]
class LocationProtocol(Protocol):
@@ -71,7 +70,7 @@ def locations_to_gpx(locations: Iterable[LocationProtocol], buffer: TextIO) -> I
)
except AttributeError:
yield TypeError(
- f"Expected a Location or Location-like object, got {type(location)} {location!r}"
+ f"Expected a Location or Location-like object, got {type(location)} {repr(location)}"
)
continue
gpx_segment.points.append(point)
diff --git a/my/location/fallback/all.py b/my/location/fallback/all.py
index d340148..0c7b8cd 100644
--- a/my/location/fallback/all.py
+++ b/my/location/fallback/all.py
@@ -1,16 +1,14 @@
# TODO: add config here which passes kwargs to estimate_from (under_accuracy)
# overwritable by passing the kwarg name here to the top-level estimate_location
-from __future__ import annotations
-
-from collections.abc import Iterator
+from typing import Iterator, Optional
from my.core.source import import_source
from my.location.fallback.common import (
- DateExact,
- FallbackLocation,
- LocationEstimator,
estimate_from,
+ FallbackLocation,
+ DateExact,
+ LocationEstimator,
)
@@ -26,7 +24,7 @@ def fallback_estimators() -> Iterator[LocationEstimator]:
yield _home_estimate
-def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: int | None = None) -> FallbackLocation:
+def estimate_location(dt: DateExact, first_match: bool=False, under_accuracy: Optional[int] = None) -> FallbackLocation:
loc = estimate_from(dt, estimators=list(fallback_estimators()), first_match=first_match, under_accuracy=under_accuracy)
# should never happen if the user has home configured
if loc is None:
diff --git a/my/location/fallback/common.py b/my/location/fallback/common.py
index 622b2f5..fd508c6 100644
--- a/my/location/fallback/common.py
+++ b/my/location/fallback/common.py
@@ -1,12 +1,9 @@
from __future__ import annotations
-
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
+from typing import Optional, Callable, Sequence, Iterator, List, Union
from datetime import datetime, timedelta, timezone
-from typing import Callable, Union
-
-from ..common import Location, LocationProtocol
+from ..common import LocationProtocol, Location
DateExact = Union[datetime, float, int] # float/int as epoch timestamps
Second = float
@@ -16,12 +13,12 @@ class FallbackLocation(LocationProtocol):
lat: float
lon: float
dt: datetime
- duration: Second | None = None
- accuracy: float | None = None
- elevation: float | None = None
- datasource: str | None = None # which module provided this, useful for debugging
+ duration: Optional[Second] = None
+ accuracy: Optional[float] = None
+ elevation: Optional[float] = None
+ datasource: Optional[str] = None # which module provided this, useful for debugging
- def to_location(self, *, end: bool = False) -> Location:
+ def to_location(self, end: bool = False) -> Location:
'''
by default the start date is used for the location
If end is True, the start date + duration is used
@@ -46,9 +43,9 @@ class FallbackLocation(LocationProtocol):
lon: float,
dt: datetime,
end_dt: datetime,
- accuracy: float | None = None,
- elevation: float | None = None,
- datasource: str | None = None,
+ accuracy: Optional[float] = None,
+ elevation: Optional[float] = None,
+ datasource: Optional[str] = None,
) -> FallbackLocation:
'''
Create FallbackLocation from a start date and an end date
@@ -96,13 +93,13 @@ def estimate_from(
estimators: LocationEstimators,
*,
first_match: bool = False,
- under_accuracy: int | None = None,
-) -> FallbackLocation | None:
+ under_accuracy: Optional[int] = None,
+) -> Optional[FallbackLocation]:
'''
first_match: if True, return the first location found
under_accuracy: if set, only return locations with accuracy under this value
'''
- found: list[FallbackLocation] = []
+ found: List[FallbackLocation] = []
for loc in _iter_estimate_from(dt, estimators):
if under_accuracy is not None and loc.accuracy is not None and loc.accuracy > under_accuracy:
continue
diff --git a/my/location/fallback/via_home.py b/my/location/fallback/via_home.py
index f88fee0..259dcaa 100644
--- a/my/location/fallback/via_home.py
+++ b/my/location/fallback/via_home.py
@@ -2,22 +2,25 @@
Simple location provider, serving as a fallback when more detailed data isn't available
'''
-from __future__ import annotations
-
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, time, timezone
-from functools import cache
-from typing import cast
+from functools import lru_cache
+from typing import Sequence, Tuple, Union, cast, List, Iterator
from my.config import location as user_config
-from my.location.common import DateIsh, LatLon
-from my.location.fallback.common import DateExact, FallbackLocation
+from my.location.common import LatLon, DateIsh
+from my.location.fallback.common import FallbackLocation, DateExact
@dataclass
class Config(user_config):
- home: LatLon | Sequence[tuple[DateIsh, LatLon]]
+ home: Union[
+ LatLon, # either single, 'current' location
+ Sequence[Tuple[ # or, a sequence of location history
+ DateIsh, # date when you moved to
+ LatLon, # the location
+ ]]
+ ]
# default ~30km accuracy
# this is called 'home_accuracy' since it lives on the base location.config object,
@@ -26,13 +29,13 @@ class Config(user_config):
# TODO could make current Optional and somehow determine from system settings?
@property
- def _history(self) -> Sequence[tuple[datetime, LatLon]]:
+ def _history(self) -> Sequence[Tuple[datetime, LatLon]]:
home1 = self.home
# todo ugh, can't test for isnstance LatLon, it's a tuple itself
- home2: Sequence[tuple[DateIsh, LatLon]]
+ home2: Sequence[Tuple[DateIsh, LatLon]]
if isinstance(home1[0], tuple):
# already a sequence
- home2 = cast(Sequence[tuple[DateIsh, LatLon]], home1)
+ home2 = cast(Sequence[Tuple[DateIsh, LatLon]], home1)
else:
# must be a pair of coordinates. also doesn't really matter which date to pick?
loc = cast(LatLon, home1)
@@ -52,16 +55,15 @@ class Config(user_config):
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
res.append((dt, loc))
- res = sorted(res, key=lambda p: p[0])
+ res = list(sorted(res, key=lambda p: p[0]))
return res
from ...core.cfg import make_config
-
config = make_config(Config)
-@cache
+@lru_cache(maxsize=None)
def get_location(dt: datetime) -> LatLon:
'''
Interpolates the location at dt
@@ -72,8 +74,8 @@ def get_location(dt: datetime) -> LatLon:
# TODO: in python3.8, use functools.cached_property instead?
-@cache
-def homes_cached() -> list[tuple[datetime, LatLon]]:
+@lru_cache(maxsize=None)
+def homes_cached() -> List[Tuple[datetime, LatLon]]:
return list(config._history)
@@ -90,12 +92,13 @@ def estimate_location(dt: DateExact) -> Iterator[FallbackLocation]:
dt=datetime.fromtimestamp(d, timezone.utc),
datasource='via_home')
return
-
- # I guess the most reasonable is to fallback on the first location
- lat, lon = hist[-1][1]
- yield FallbackLocation(
- lat=lat,
- lon=lon,
- accuracy=config.home_accuracy,
- dt=datetime.fromtimestamp(d, timezone.utc),
- datasource='via_home')
+ else:
+ # I guess the most reasonable is to fallback on the first location
+ lat, lon = hist[-1][1]
+ yield FallbackLocation(
+ lat=lat,
+ lon=lon,
+ accuracy=config.home_accuracy,
+ dt=datetime.fromtimestamp(d, timezone.utc),
+ datasource='via_home')
+ return
diff --git a/my/location/fallback/via_ip.py b/my/location/fallback/via_ip.py
index 8b50878..db03c7c 100644
--- a/my/location/fallback/via_ip.py
+++ b/my/location/fallback/via_ip.py
@@ -2,13 +2,13 @@
Converts IP addresses provided by my.location.ip to estimated locations
"""
-REQUIRES = ["git+https://github.com/purarue/ipgeocache"]
+REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from dataclasses import dataclass
from datetime import timedelta
-from my.config import location
from my.core import Stats, make_config
+from my.config import location
from my.core.warnings import medium
@@ -24,21 +24,19 @@ class ip_config(location.via_ip):
config = make_config(ip_config)
-from collections.abc import Iterator
from functools import lru_cache
+from typing import Iterator, List
from my.core import make_logger
from my.core.compat import bisect_left
+from my.ip.all import ips
from my.location.common import Location
-from my.location.fallback.common import DateExact, FallbackLocation, _datetime_timestamp
+from my.location.fallback.common import FallbackLocation, DateExact, _datetime_timestamp
logger = make_logger(__name__, level="warning")
def fallback_locations() -> Iterator[FallbackLocation]:
- # prefer late import since ips get overridden in tests
- from my.ip.all import ips
-
dur = config.for_duration.total_seconds()
for ip in ips():
lat, lon = ip.latlon
@@ -60,7 +58,7 @@ def locations() -> Iterator[Location]:
@lru_cache(1)
-def _sorted_fallback_locations() -> list[FallbackLocation]:
+def _sorted_fallback_locations() -> List[FallbackLocation]:
fl = list(filter(lambda l: l.duration is not None, fallback_locations()))
logger.debug(f"Fallback locations: {len(fl)}, sorting...:")
fl.sort(key=lambda l: l.dt.timestamp())
diff --git a/my/location/google.py b/my/location/google.py
index 750c847..a7a92d3 100644
--- a/my/location/google.py
+++ b/my/location/google.py
@@ -3,28 +3,28 @@ Location data from Google Takeout
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
"""
-from __future__ import annotations
REQUIRES = [
'geopy', # checking that coordinates are valid
'ijson',
]
-import re
-from collections.abc import Iterable, Sequence
from datetime import datetime, timezone
from itertools import islice
from pathlib import Path
-from subprocess import PIPE, Popen
-from typing import IO, NamedTuple, Optional
+from subprocess import Popen, PIPE
+from typing import Iterable, NamedTuple, Optional, Sequence, IO, Tuple
+import re
# pip3 install geopy
-import geopy # type: ignore
+import geopy # type: ignore
-from my.core import Stats, make_logger, stat, warnings
+from my.core import stat, Stats, make_logger
from my.core.cachew import cache_dir, mcachew
-warnings.high("Please set up my.google.takeout.parser module for better takeout support")
+from my.core.warnings import high
+
+high("Please set up my.google.takeout.parser module for better takeout support")
# otherwise uses ijson
@@ -42,7 +42,7 @@ class Location(NamedTuple):
alt: Optional[float]
-TsLatLon = tuple[int, int, int]
+TsLatLon = Tuple[int, int, int]
def _iter_via_ijson(fo) -> Iterable[TsLatLon]:
@@ -50,10 +50,11 @@ def _iter_via_ijson(fo) -> Iterable[TsLatLon]:
# todo extract to common?
try:
# pip3 install ijson cffi
- import ijson.backends.yajl2_cffi as ijson # type: ignore
+ import ijson.backends.yajl2_cffi as ijson # type: ignore
except:
- warnings.medium("Falling back to default ijson because 'cffi' backend isn't found. It's up to 2x faster, you might want to check it out")
- import ijson # type: ignore
+ import warnings
+ warnings.warn("Falling back to default ijson because 'cffi' backend isn't found. It's up to 2x faster, you might want to check it out")
+ import ijson # type: ignore
for d in ijson.items(fo, 'locations.item'):
yield (
@@ -104,8 +105,7 @@ def _iter_locations_fo(fit) -> Iterable[Location]:
errors += 1
if float(errors) / total > 0.01:
# todo make defensive?
- # todo exceptiongroup?
- raise RuntimeError('too many errors! aborting') # noqa: B904
+ raise RuntimeError('too many errors! aborting')
else:
continue
diff --git a/my/location/google_takeout.py b/my/location/google_takeout.py
index 8613257..eb757ce 100644
--- a/my/location/google_takeout.py
+++ b/my/location/google_takeout.py
@@ -2,16 +2,15 @@
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
"""
-REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"]
+REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
-from collections.abc import Iterator
+from typing import Iterator
+from my.google.takeout.parser import events, _cachew_depends_on
from google_takeout_parser.models import Location as GoogleLocation
-from my.core import LazyLogger, Stats, stat
+from my.core import stat, Stats, LazyLogger
from my.core.cachew import mcachew
-from my.google.takeout.parser import _cachew_depends_on, events
-
from .common import Location
logger = LazyLogger(__name__)
diff --git a/my/location/google_takeout_semantic.py b/my/location/google_takeout_semantic.py
index e84a932..5f2c055 100644
--- a/my/location/google_takeout_semantic.py
+++ b/my/location/google_takeout_semantic.py
@@ -5,26 +5,23 @@ Extracts semantic location history using google_takeout_parser
# This is a separate module to prevent ImportError and a new config block from breaking
# previously functional my.location.google_takeout locations
-REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"]
+REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
-from collections.abc import Iterator
from dataclasses import dataclass
+from typing import Iterator, List
+from my.google.takeout.parser import events, _cachew_depends_on as _parser_cachew_depends_on
from google_takeout_parser.models import PlaceVisit as SemanticLocation
-from my.core import LazyLogger, Stats, make_config, stat
+from my.core import make_config, stat, LazyLogger, Stats
from my.core.cachew import mcachew
from my.core.error import Res
-from my.google.takeout.parser import _cachew_depends_on as _parser_cachew_depends_on
-from my.google.takeout.parser import events
-
from .common import Location
logger = LazyLogger(__name__)
from my.config import location as user_config
-
@dataclass
class semantic_locations_config(user_config.google_takeout_semantic):
# a value between 0 and 100, 100 being the most confident
@@ -39,7 +36,7 @@ config = make_config(semantic_locations_config)
# add config to cachew dependency so it recomputes on config changes
-def _cachew_depends_on() -> list[str]:
+def _cachew_depends_on() -> List[str]:
dep = _parser_cachew_depends_on()
dep.insert(0, f"require_confidence={config.require_confidence} accuracy={config.accuracy}")
return dep
diff --git a/my/location/gpslogger.py b/my/location/gpslogger.py
index bbbf70e..6d158a0 100644
--- a/my/location/gpslogger.py
+++ b/my/location/gpslogger.py
@@ -20,21 +20,21 @@ class config(location.gpslogger):
accuracy: float = 50.0
-from collections.abc import Iterator, Sequence
-from datetime import datetime, timezone
from itertools import chain
+from datetime import datetime, timezone
from pathlib import Path
+from typing import Iterator, Sequence, List
import gpxpy
from gpxpy.gpx import GPXXMLSyntaxException
from more_itertools import unique_everseen
-from my.core import LazyLogger, Stats
+from my.core import Stats, LazyLogger
from my.core.cachew import mcachew
from my.core.common import get_files
-
from .common import Location
+
logger = LazyLogger(__name__, level="warning")
def _input_sort_key(path: Path) -> str:
@@ -49,7 +49,7 @@ def inputs() -> Sequence[Path]:
return sorted(get_files(config.export_path, glob="*.gpx", sort=False), key=_input_sort_key)
-def _cachew_depends_on() -> list[float]:
+def _cachew_depends_on() -> List[float]:
return [p.stat().st_mtime for p in inputs()]
diff --git a/my/location/home.py b/my/location/home.py
index c82dda7..f6e6978 100644
--- a/my/location/home.py
+++ b/my/location/home.py
@@ -1,7 +1,7 @@
-from my.core.warnings import high
-
from .fallback.via_home import *
+from my.core.warnings import high
+
high(
"my.location.home is deprecated, use my.location.fallback.via_home instead, or estimate locations using the higher-level my.location.fallback.all.estimate_location"
)
diff --git a/my/location/via_ip.py b/my/location/via_ip.py
index 240ec5f..df48f8b 100644
--- a/my/location/via_ip.py
+++ b/my/location/via_ip.py
@@ -1,7 +1,7 @@
-REQUIRES = ["git+https://github.com/purarue/ipgeocache"]
-
-from my.core.warnings import high
+REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from .fallback.via_ip import *
+from my.core.warnings import high
+
high("my.location.via_ip is deprecated, use my.location.fallback.via_ip instead")
diff --git a/my/materialistic.py b/my/materialistic.py
index 45af3f9..8a6a997 100644
--- a/my/materialistic.py
+++ b/my/materialistic.py
@@ -1,5 +1,4 @@
from .core.warnings import high
-
high("DEPRECATED! Please use my.hackernews.materialistic instead.")
from .hackernews.materialistic import *
diff --git a/my/media/imdb.py b/my/media/imdb.py
index 131f6a7..b7ecbde 100644
--- a/my/media/imdb.py
+++ b/my/media/imdb.py
@@ -1,12 +1,11 @@
+#!/usr/bin/env python3
import csv
-from collections.abc import Iterator
from datetime import datetime
-from typing import NamedTuple
+from typing import Iterator, List, NamedTuple
-from my.core import get_files
-
-from my.config import imdb as config # isort: skip
+from ..core import get_files
+from my.config import imdb as config
def _get_last():
return max(get_files(config.export_path))
@@ -23,7 +22,7 @@ def iter_movies() -> Iterator[Movie]:
with last.open() as fo:
reader = csv.DictReader(fo)
- for line in reader:
+ for i, line in enumerate(reader):
# TODO extract directors??
title = line['Title']
rating = int(line['You rated'])
@@ -33,8 +32,8 @@ def iter_movies() -> Iterator[Movie]:
yield Movie(created=created, title=title, rating=rating)
-def get_movies() -> list[Movie]:
- return sorted(iter_movies(), key=lambda m: m.created)
+def get_movies() -> List[Movie]:
+ return list(sorted(iter_movies(), key=lambda m: m.created))
def test():
diff --git a/my/media/youtube.py b/my/media/youtube.py
index 9a38c43..efaa74b 100644
--- a/my/media/youtube.py
+++ b/my/media/youtube.py
@@ -1,10 +1,5 @@
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from typing import TYPE_CHECKING
-
-from my.core.warnings import high
-
+from ..core.warnings import high
high("DEPRECATED! Please use my.youtube.takeout instead.")
+from ..core.util import __NOT_HPI_MODULE__
-if not TYPE_CHECKING:
- from my.youtube.takeout import *
+from ..youtube.takeout import *
diff --git a/my/monzo/monzoexport.py b/my/monzo/monzoexport.py
index f5e1cd1..3aa0cf5 100644
--- a/my/monzo/monzoexport.py
+++ b/my/monzo/monzoexport.py
@@ -5,17 +5,16 @@ REQUIRES = [
'git+https://github.com/karlicoss/monzoexport',
]
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
+from typing import Sequence, Iterator
from my.core import (
Paths,
get_files,
make_logger,
)
-
-import my.config # isort: skip
+import my.config
@dataclass
diff --git a/my/orgmode.py b/my/orgmode.py
index 10f53c0..c27f5a7 100644
--- a/my/orgmode.py
+++ b/my/orgmode.py
@@ -1,53 +1,38 @@
'''
Programmatic access and queries to org-mode files on the filesystem
'''
-from __future__ import annotations
REQUIRES = [
'orgparse',
]
-import re
-from collections.abc import Iterable, Sequence
from datetime import datetime
from pathlib import Path
-from typing import NamedTuple, Optional
+import re
+from typing import List, Sequence, Iterable, NamedTuple, Optional, Tuple
-import orgparse
-
-from my.core import Paths, Stats, get_files, stat
+from my.core import get_files
from my.core.cachew import cache_dir, mcachew
from my.core.orgmode import collect
+from my.config import orgmode as user_config
-class config:
- paths: Paths
-
-
-def make_config() -> config:
- from my.config import orgmode as user_config
-
- class combined_config(user_config, config): ...
-
- return combined_config()
+import orgparse
# temporary? hack to cache org-mode notes
class OrgNote(NamedTuple):
created: Optional[datetime]
heading: str
- tags: list[str]
+ tags: List[str]
def inputs() -> Sequence[Path]:
- cfg = make_config()
- return get_files(cfg.paths)
+ return get_files(user_config.paths)
_rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
-
-
-def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]:
+def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]:
heading = n.heading
# meh.. support in orgparse?
pp = {} if n.is_root() else n.properties
@@ -56,7 +41,7 @@ def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]:
# try to guess from heading
m = _rgx.search(heading)
if m is not None:
- createds = m.group(0) # could be None
+ createds = m.group(0) # could be None
if createds is None:
return (None, heading)
assert isinstance(createds, str)
@@ -70,7 +55,7 @@ def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]:
def to_note(x: orgparse.OrgNode) -> OrgNote:
# ugh. hack to merely make it cacheable
heading = x.heading
- created: datetime | None
+ created: Optional[datetime]
try:
c, heading = _created(x)
if isinstance(c, datetime):
@@ -82,7 +67,7 @@ def to_note(x: orgparse.OrgNode) -> OrgNote:
created = None
return OrgNote(
created=created,
- heading=heading, # todo include the body?
+ heading=heading, # todo include the body?
tags=list(x.tags),
)
@@ -99,15 +84,14 @@ def _cachew_cache_path(_self, f: Path) -> Path:
def _cachew_depends_on(_self, f: Path):
return (f, f.stat().st_mtime)
-
+
class Query:
def __init__(self, files: Sequence[Path]) -> None:
self.files = files
# TODO yield errors?
@mcachew(
- cache_path=_cachew_cache_path,
- force_file=True,
+ cache_path=_cachew_cache_path, force_file=True,
depends_on=_cachew_depends_on,
)
def _iterate(self, f: Path) -> Iterable[OrgNote]:
@@ -130,8 +114,8 @@ def query() -> Query:
return Query(files=inputs())
+from my.core import Stats, stat
def stats() -> Stats:
def outlines():
return query().all()
-
return stat(outlines)
diff --git a/my/pdfs.py b/my/pdfs.py
index eefd573..0ab4af3 100644
--- a/my/pdfs.py
+++ b/my/pdfs.py
@@ -1,66 +1,64 @@
'''
PDF documents and annotations on your filesystem
'''
-from __future__ import annotations as _annotations
-
REQUIRES = [
'git+https://github.com/0xabu/pdfannots',
# todo not sure if should use pypi version?
]
-import time
-from collections.abc import Iterator, Sequence
from datetime import datetime
+from dataclasses import dataclass
+import io
from pathlib import Path
-from typing import TYPE_CHECKING, NamedTuple, Optional, Protocol
+import time
+from typing import NamedTuple, List, Optional, Iterator, Sequence
-import pdfannots
-from more_itertools import bucket
-from my.core import PathIsh, Paths, Stats, get_files, make_logger, stat
+from my.core import LazyLogger, get_files, Paths, PathIsh
from my.core.cachew import mcachew
+from my.core.cfg import Attrs, make_config
from my.core.error import Res, split_errors
-class config(Protocol):
- @property
- def paths(self) -> Paths:
- return () # allowed to be empty for 'filelist' logic
+from more_itertools import bucket
+import pdfannots
- def is_ignored(self, p: Path) -> bool: # noqa: ARG002
+
+from my.config import pdfs as user_config
+
+@dataclass
+class pdfs(user_config):
+ paths: Paths = () # allowed to be empty for 'filelist' logic
+
+ def is_ignored(self, p: Path) -> bool:
"""
- You can override this in user config if you want to ignore some files that are tooheavy
+ Used to ignore some extremely heavy files
+ is_ignored function taken either from config,
+ or if not defined, it's a function that returns False
"""
+ user_ignore = getattr(user_config, 'is_ignored', None)
+ if user_ignore is not None:
+ return user_ignore(p)
+
return False
-
-def make_config() -> config:
- from my.config import pdfs as user_config
-
- class migration:
- @property
- def paths(self) -> Paths:
- roots = getattr(user_config, 'roots', None)
- if roots is not None:
- from my.core.warnings import high
-
- high('"roots" is deprecated! Use "paths" instead.')
- return roots
- else:
- return ()
-
- class combined_config(user_config, migration, config): ...
-
- return combined_config()
+ @staticmethod
+ def _migration(attrs: Attrs) -> Attrs:
+ roots = 'roots'
+ if roots in attrs: # legacy name
+ attrs['paths'] = attrs[roots]
+ from my.core.warnings import high
+ high(f'"{roots}" is deprecated! Use "paths" instead.')
+ return attrs
-logger = make_logger(__name__)
+config = make_config(pdfs, migration=pdfs._migration)
+logger = LazyLogger(__name__)
def inputs() -> Sequence[Path]:
- cfg = make_config()
- all_files = get_files(cfg.paths, glob='**/*.pdf')
- return [p for p in all_files if not cfg.is_ignored(p)]
+ all_files = get_files(config.paths, glob='**/*.pdf')
+ return [p for p in all_files if not config.is_ignored(p)]
# TODO canonical names/fingerprinting?
@@ -74,7 +72,7 @@ class Annotation(NamedTuple):
created: Optional[datetime] # note: can be tz unaware in some bad pdfs...
@property
- def date(self) -> datetime | None:
+ def date(self) -> Optional[datetime]:
# legacy name
return self.created
@@ -95,11 +93,11 @@ def _as_annotation(*, raw: pdfannots.Annotation, path: str) -> Annotation:
)
-def get_annots(p: Path) -> list[Annotation]:
+def get_annots(p: Path) -> List[Annotation]:
b = time.time()
with p.open('rb') as fo:
doc = pdfannots.process_file(fo, emit_progress_to=None)
- annots = list(doc.iter_annots())
+ annots = [a for a in doc.iter_annots()]
# also has outlines are kinda like TOC, I don't really need them
a = time.time()
took = a - b
@@ -123,13 +121,14 @@ def _iter_annotations(pdfs: Sequence[Path]) -> Iterator[Res[Annotation]]:
# todo how to print to stdout synchronously?
# todo global config option not to use pools? useful for debugging..
from concurrent.futures import ProcessPoolExecutor
-
from my.core.utils.concurrent import DummyExecutor
-
workers = None # use 0 for debugging
Pool = DummyExecutor if workers == 0 else ProcessPoolExecutor
with Pool(workers) as pool:
- futures = [pool.submit(get_annots, pdf) for pdf in pdfs]
+ futures = [
+ pool.submit(get_annots, pdf)
+ for pdf in pdfs
+ ]
for f, pdf in zip(futures, pdfs):
try:
yield from f.result()
@@ -152,23 +151,21 @@ class Pdf(NamedTuple):
annotations: Sequence[Annotation]
@property
- def created(self) -> datetime | None:
+ def created(self) -> Optional[datetime]:
annots = self.annotations
return None if len(annots) == 0 else annots[-1].created
@property
- def date(self) -> datetime | None:
+ def date(self) -> Optional[datetime]:
# legacy
return self.created
-def annotated_pdfs(*, filelist: Sequence[PathIsh] | None = None) -> Iterator[Res[Pdf]]:
+def annotated_pdfs(*, filelist: Optional[Sequence[PathIsh]]=None) -> Iterator[Res[Pdf]]:
if filelist is not None:
# hacky... keeping it backwards compatible
# https://github.com/karlicoss/HPI/pull/74
- from my.config import pdfs as user_config
-
- user_config.paths = filelist
+ config.paths = filelist
ait = annotations()
vit, eit = split_errors(ait, ET=Exception)
@@ -179,14 +176,17 @@ def annotated_pdfs(*, filelist: Sequence[PathIsh] | None = None) -> Iterator[Res
yield from eit
+from my.core import stat, Stats
def stats() -> Stats:
return {
- **stat(annotations),
+ **stat(annotations) ,
**stat(annotated_pdfs),
}
### legacy/misc stuff
-if not TYPE_CHECKING:
- iter_annotations = annotations
+iter_annotations = annotations # for backwards compatibility
###
+
+# can use 'hpi query my.pdfs.annotations -o pprint' to test
+#
diff --git a/my/photos/main.py b/my/photos/main.py
index f98cb15..6262eac 100644
--- a/my/photos/main.py
+++ b/my/photos/main.py
@@ -1,30 +1,27 @@
"""
Photos and videos on your filesystem, their GPS and timestamps
"""
-
-from __future__ import annotations
-
REQUIRES = [
'geopy',
'magic',
]
# NOTE: also uses fdfind to search photos
-import json
-from collections.abc import Iterable, Iterator
from concurrent.futures import ProcessPoolExecutor as Pool
from datetime import datetime
+import json
from pathlib import Path
-from typing import NamedTuple, Optional
+from typing import Optional, NamedTuple, Iterator, Iterable, List
from geopy.geocoders import Nominatim # type: ignore
from my.core import LazyLogger
-from my.core.cachew import cache_dir, mcachew
from my.core.error import Res, sort_res_by
+from my.core.cachew import cache_dir, mcachew
from my.core.mime import fastermime
-from my.config import photos as config # type: ignore[attr-defined] # isort: skip
+from my.config import photos as config # type: ignore[attr-defined]
+
logger = LazyLogger(__name__)
@@ -46,7 +43,8 @@ class Photo(NamedTuple):
for bp in config.paths:
if self.path.startswith(bp):
return self.path[len(bp):]
- raise RuntimeError(f"Weird path {self.path}, can't match against anything")
+ else:
+ raise RuntimeError(f"Weird path {self.path}, can't match against anything")
@property
def name(self) -> str:
@@ -58,17 +56,17 @@ class Photo(NamedTuple):
return f'{config.base_url}{self._basename}'
-from .utils import Exif, ExifTags, convert_ref, dt_from_path, get_exif_from_file
+from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
Result = Res[Photo]
-def _make_photo_aux(*args, **kwargs) -> list[Result]:
+def _make_photo_aux(*args, **kwargs) -> List[Result]:
# for the process pool..
return list(_make_photo(*args, **kwargs))
-def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterator[Result]:
+def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Iterator[Result]:
exif: Exif
- if any(x in mtype for x in ['image/png', 'image/x-ms-bmp', 'video']):
+ if any(x in mtype for x in {'image/png', 'image/x-ms-bmp', 'video'}):
# TODO don't remember why..
logger.debug(f"skipping exif extraction for {photo} due to mime {mtype}")
exif = {}
@@ -80,7 +78,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterat
yield e
exif = {}
- def _get_geo() -> LatLon | None:
+ def _get_geo() -> Optional[LatLon]:
meta = exif.get(ExifTags.GPSINFO, {})
if ExifTags.LAT in meta and ExifTags.LON in meta:
return LatLon(
@@ -90,7 +88,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterat
return parent_geo
# TODO aware on unaware?
- def _get_dt() -> datetime | None:
+ def _get_dt() -> Optional[datetime]:
edt = exif.get(ExifTags.DATETIME, None)
if edt is not None:
dtimes = edt.replace(' 24', ' 00') # jeez maybe log it?
@@ -126,7 +124,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterat
def _candidates() -> Iterable[Res[str]]:
# TODO that could be a bit slow if there are to many extra files?
- from subprocess import PIPE, Popen
+ from subprocess import Popen, PIPE
# TODO could extract this to common?
# TODO would be nice to reuse get_files (or even let it use find)
# that way would be easier to exclude
@@ -165,7 +163,7 @@ def _photos(candidates: Iterable[Res[str]]) -> Iterator[Result]:
from functools import lru_cache
@lru_cache(None)
- def get_geo(d: Path) -> LatLon | None:
+ def get_geo(d: Path) -> Optional[LatLon]:
geof = d / 'geo.json'
if not geof.exists():
if d == d.parent:
@@ -211,13 +209,11 @@ def print_all() -> None:
if isinstance(p, Exception):
print('ERROR!', p)
else:
- print(f"{p.dt!s:25} {p.path} {p.geo}")
+ print(f"{str(p.dt):25} {p.path} {p.geo}")
# todo cachew -- improve AttributeError: type object 'tuple' has no attribute '__annotations__' -- improve errors?
# todo cachew -- invalidate if function code changed?
from ..core import Stats, stat
-
-
def stats() -> Stats:
return stat(photos)
diff --git a/my/photos/utils.py b/my/photos/utils.py
index e88def2..c614c4a 100644
--- a/my/photos/utils.py
+++ b/my/photos/utils.py
@@ -1,13 +1,11 @@
-from __future__ import annotations
-
-from ..core import __NOT_HPI_MODULE__ # isort: skip
-
from pathlib import Path
+from typing import Dict
import PIL.Image
-from PIL.ExifTags import GPSTAGS, TAGS
+from PIL.ExifTags import TAGS, GPSTAGS
-Exif = dict
+
+Exif = Dict
# TODO PIL.ExifTags.TAGS
@@ -64,15 +62,18 @@ def convert_ref(cstr, ref: str) -> float:
import re
from datetime import datetime
+from typing import Optional
# TODO surely there is a library that does it??
# TODO this belongs to a private overlay or something
# basically have a function that patches up dates after the files were yielded..
_DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D')
-def dt_from_path(p: Path) -> datetime | None:
+def dt_from_path(p: Path) -> Optional[datetime]:
name = p.stem
mm = _DT_REGEX.search(name)
if mm is None:
return None
dates = mm.group(1) + mm.group(2)
return datetime.strptime(dates, "%Y%m%d%H%M%S")
+
+from ..core import __NOT_HPI_MODULE__
diff --git a/my/pinboard.py b/my/pinboard.py
index e98dc78..ef4ca36 100644
--- a/my/pinboard.py
+++ b/my/pinboard.py
@@ -5,16 +5,15 @@ REQUIRES = [
'git+https://github.com/karlicoss/pinbexport',
]
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from pathlib import Path
+from typing import Iterator, Sequence
+
+from my.core import get_files, Paths, Res
+import my.config
import pinbexport.dal as pinbexport
-from my.core import Paths, Res, get_files
-
-import my.config # isort: skip
-
@dataclass
class config(my.config.pinboard): # TODO rename to pinboard.pinbexport?
diff --git a/my/pocket.py b/my/pocket.py
index ff9a788..b638fba 100644
--- a/my/pocket.py
+++ b/my/pocket.py
@@ -7,10 +7,10 @@ REQUIRES = [
from dataclasses import dataclass
from typing import TYPE_CHECKING
-from my.config import pocket as user_config
-
from .core import Paths
+from my.config import pocket as user_config
+
@dataclass
class pocket(user_config):
@@ -23,7 +23,6 @@ class pocket(user_config):
from .core.cfg import make_config
-
config = make_config(pocket)
@@ -38,7 +37,7 @@ except ModuleNotFoundError as e:
Article = dal.Article
-from collections.abc import Iterable, Sequence
+from typing import Sequence, Iterable
# todo not sure if should be defensive against empty?
@@ -52,12 +51,9 @@ def articles() -> Iterable[Article]:
yield from _dal().articles()
-from .core import Stats, stat
-
-
+from .core import stat, Stats
def stats() -> Stats:
from itertools import chain
-
from more_itertools import ilen
return {
**stat(articles),
diff --git a/my/polar.py b/my/polar.py
index 2172014..cd2c719 100644
--- a/my/polar.py
+++ b/my/polar.py
@@ -1,12 +1,11 @@
"""
[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
"""
-from __future__ import annotations
-
from pathlib import Path
-from typing import TYPE_CHECKING, cast
+from typing import cast, TYPE_CHECKING
-import my.config # isort: skip
+
+import my.config
# todo use something similar to tz.via_location for config fallback
if not TYPE_CHECKING:
@@ -21,36 +20,32 @@ if user_config is None:
pass
-from dataclasses import dataclass
-
from .core import PathIsh
-
-
+from dataclasses import dataclass
@dataclass
class polar(user_config):
'''
Polar config is optional, you only need it if you want to specify custom 'polar_dir'
'''
- polar_dir: PathIsh = Path('~/.polar').expanduser() # noqa: RUF009
+ polar_dir: PathIsh = Path('~/.polar').expanduser()
defensive: bool = True # pass False if you want it to fail faster on errors (useful for debugging)
from .core import make_config
-
config = make_config(polar)
# todo not sure where it keeps stuff on Windows?
# https://github.com/burtonator/polar-bookshelf/issues/296
-import json
-from collections.abc import Iterable, Sequence
from datetime import datetime
-from typing import NamedTuple
+from typing import List, Dict, Iterable, NamedTuple, Sequence, Optional
+import json
-from .core import Json, LazyLogger, Res
+from .core import LazyLogger, Json, Res
from .core.compat import fromisoformat
from .core.error import echain, sort_res_by
-from .core.konsume import Wdict, Zoomable, wrap
+from .core.konsume import wrap, Zoomable, Wdict
+
logger = LazyLogger(__name__)
@@ -70,7 +65,7 @@ class Highlight(NamedTuple):
comments: Sequence[Comment]
tags: Sequence[str]
page: int # 1-indexed
- color: str | None = None
+ color: Optional[str] = None
Uid = str
@@ -78,7 +73,7 @@ class Book(NamedTuple):
created: datetime
uid: Uid
path: Path
- title: str | None
+ title: Optional[str]
# TODO hmmm. I think this needs to be defensive as well...
# think about it later.
items: Sequence[Highlight]
@@ -134,7 +129,7 @@ class Loader:
pi['dimensions'].consume_all()
# TODO how to make it nicer?
- cmap: dict[Hid, list[Comment]] = {}
+ cmap: Dict[Hid, List[Comment]] = {}
vals = list(comments)
for v in vals:
cid = v['id'].zoom()
@@ -168,10 +163,10 @@ class Loader:
h['rects'].ignore()
# TODO make it more generic..
- htags: list[str] = []
+ htags: List[str] = []
if 'tags' in h:
ht = h['tags'].zoom()
- for _k, v in list(ht.items()):
+ for k, v in list(ht.items()):
ctag = v.zoom()
ctag['id'].consume()
ct = ctag['label'].zoom()
@@ -204,7 +199,7 @@ class Loader:
def load_items(self, metas: Json) -> Iterable[Highlight]:
- for _p, meta in metas.items(): # noqa: PERF102
+ for p, meta in metas.items():
with wrap(meta, throw=not config.defensive) as meta:
yield from self.load_item(meta)
@@ -247,7 +242,7 @@ def iter_entries() -> Iterable[Result]:
yield err
-def get_entries() -> list[Result]:
+def get_entries() -> List[Result]:
# sorting by first annotation is reasonable I guess???
# todo perhaps worth making it a pattern? X() returns iterable, get_X returns reasonably sorted list?
return list(sort_res_by(iter_entries(), key=lambda e: e.created))
diff --git a/my/reddit/__init__.py b/my/reddit/__init__.py
index 982901a..e81aaf9 100644
--- a/my/reddit/__init__.py
+++ b/my/reddit/__init__.py
@@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
"""
-# prevent it from appearing in modules list/doctor
+# prevent it from apprearing in modules list/doctor
from ..core import __NOT_HPI_MODULE__
# kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works
@@ -20,7 +20,6 @@ REQUIRES = [
from my.core.hpi_compat import handle_legacy_import
-
is_legacy_import = handle_legacy_import(
parent_module_name=__name__,
legacy_submodule_name='rexport',
diff --git a/my/reddit/all.py b/my/reddit/all.py
index 27e22df..daedba1 100644
--- a/my/reddit/all.py
+++ b/my/reddit/all.py
@@ -1,9 +1,8 @@
-from collections.abc import Iterator
-
-from my.core import Stats, stat
+from typing import Iterator
+from my.core import stat, Stats
from my.core.source import import_source
-from .common import Comment, Save, Submission, Upvote, _merge_comments
+from .common import Save, Upvote, Comment, Submission, _merge_comments
# Man... ideally an all.py file isn't this verbose, but
# reddit just feels like that much of a complicated source and
diff --git a/my/reddit/common.py b/my/reddit/common.py
index 40f9f6e..c01258b 100644
--- a/my/reddit/common.py
+++ b/my/reddit/common.py
@@ -2,14 +2,12 @@
This defines Protocol classes, which make sure that each different
type of shared models have a standardized interface
"""
+from my.core import __NOT_HPI_MODULE__
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from collections.abc import Iterator
+from typing import Set, Iterator, Protocol
from itertools import chain
-from typing import Protocol
-from my.core import Json, datetime_aware
+from my.core import datetime_aware, Json
# common fields across all the Protocol classes, so generic code can be written
@@ -51,7 +49,7 @@ class Submission(RedditBase, Protocol):
def _merge_comments(*sources: Iterator[Comment]) -> Iterator[Comment]:
#from .rexport import logger
#ignored = 0
- emitted: set[str] = set()
+ emitted: Set[str] = set()
for e in chain(*sources):
uid = e.id
if uid in emitted:
diff --git a/my/reddit/pushshift.py b/my/reddit/pushshift.py
index 12f592b..9580005 100644
--- a/my/reddit/pushshift.py
+++ b/my/reddit/pushshift.py
@@ -1,27 +1,27 @@
"""
Gives you access to older comments possibly not accessible with rexport
using pushshift
-See https://github.com/purarue/pushshift_comment_export
+See https://github.com/seanbreckenridge/pushshift_comment_export
"""
REQUIRES = [
- "git+https://github.com/purarue/pushshift_comment_export",
+ "git+https://github.com/seanbreckenridge/pushshift_comment_export",
]
from dataclasses import dataclass
-# note: keeping pushshift import before config import, so it's handled gracefully by import_source
-from pushshift_comment_export.dal import PComment, read_file
-
-from my.config import reddit as uconfig
from my.core import Paths, Stats, stat
from my.core.cfg import make_config
+# note: keeping pushshift import before config import, so it's handled gracefully by import_source
+from pushshift_comment_export.dal import read_file, PComment
+
+from my.config import reddit as uconfig
@dataclass
class pushshift_config(uconfig.pushshift):
'''
- Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments
+ Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments
'''
# path[s]/glob to the exported JSON data
@@ -29,10 +29,10 @@ class pushshift_config(uconfig.pushshift):
config = make_config(pushshift_config)
-from collections.abc import Iterator, Sequence
+from my.core import get_files
+from typing import Sequence, Iterator
from pathlib import Path
-from my.core import get_files
def inputs() -> Sequence[Path]:
diff --git a/my/reddit/rexport.py b/my/reddit/rexport.py
index 262635b..6a6be61 100644
--- a/my/reddit/rexport.py
+++ b/my/reddit/rexport.py
@@ -7,24 +7,23 @@ REQUIRES = [
'git+https://github.com/karlicoss/rexport',
]
-import inspect
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
+import inspect
from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Iterator, Sequence
from my.core import (
- Paths,
- Stats,
get_files,
make_logger,
- stat,
warnings,
+ stat,
+ Paths,
+ Stats,
)
from my.core.cachew import mcachew
-from my.core.cfg import Attrs, make_config
+from my.core.cfg import make_config, Attrs
-from my.config import reddit as uconfig # isort: skip
+from my.config import reddit as uconfig
logger = make_logger(__name__)
@@ -145,9 +144,9 @@ if not TYPE_CHECKING:
try:
# here we just check that types are available, we don't actually want to import them
# fmt: off
- dal.Subreddit # noqa: B018
- dal.Profile # noqa: B018
- dal.Multireddit # noqa: B018
+ dal.Subreddit
+ dal.Profile
+ dal.Multireddit
# fmt: on
except AttributeError as ae:
warnings.high(f'{ae} : please update "rexport" installation')
diff --git a/my/rescuetime.py b/my/rescuetime.py
index 0c9fd28..c493e8e 100644
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@@ -5,15 +5,16 @@ REQUIRES = [
'git+https://github.com/karlicoss/rescuexport',
]
-from collections.abc import Iterable, Sequence
-from datetime import timedelta
from pathlib import Path
+from datetime import timedelta
+from typing import Sequence, Iterable
-from my.core import Stats, get_files, make_logger, stat
+from my.core import get_files, make_logger, stat, Stats
from my.core.cachew import mcachew
from my.core.error import Res, split_errors
-from my.config import rescuetime as config # isort: skip
+from my.config import rescuetime as config
+
logger = make_logger(__name__)
@@ -23,7 +24,6 @@ def inputs() -> Sequence[Path]:
import rescuexport.dal as dal
-
DAL = dal.DAL
Entry = dal.Entry
@@ -43,8 +43,6 @@ def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]:
# todo automatic dataframe interface?
from .core.pandas import DataFrameT, as_dataframe
-
-
def dataframe() -> DataFrameT:
return as_dataframe(entries())
@@ -58,19 +56,16 @@ def stats() -> Stats:
# basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this?
-from collections.abc import Iterator
from contextlib import contextmanager
-
-
+from typing import Iterator
# todo take seed, or what?
@contextmanager
def fake_data(rows: int=1000) -> Iterator:
# todo also disable cachew automatically for such things?
- import json
- from tempfile import TemporaryDirectory
-
- from my.core.cachew import disabled_cachew
from my.core.cfg import tmp_config
+ from my.core.cachew import disabled_cachew
+ from tempfile import TemporaryDirectory
+ import json
with disabled_cachew(), TemporaryDirectory() as td:
tdir = Path(td)
f = tdir / 'rescuetime.json'
@@ -87,13 +82,12 @@ def fake_data(rows: int=1000) -> Iterator:
def fill_influxdb() -> None:
- from my.core import influxdb
-
- it = ({
- 'dt': e.dt,
- 'duration_d': e.duration_s,
- 'tags': {'activity': e.activity},
- } for e in entries() if isinstance(e, Entry)) # TODO handle errors in core.influxdb
+ from .core import influxdb
+ it = (dict(
+ dt=e.dt,
+ duration_d=e.duration_s,
+ tags=dict(activity=e.activity),
+ ) for e in entries() if isinstance(e, Entry)) # TODO handle errors in core.influxdb
influxdb.fill(it, measurement=__name__)
diff --git a/my/roamresearch.py b/my/roamresearch.py
index 7322774..2fe06d4 100644
--- a/my/roamresearch.py
+++ b/my/roamresearch.py
@@ -1,19 +1,16 @@
"""
[[https://roamresearch.com][Roam]] data
"""
-from __future__ import annotations
-
-import re
-from collections.abc import Iterator
from datetime import datetime, timezone
-from itertools import chain
from pathlib import Path
-from typing import NamedTuple
+from itertools import chain
+import re
+from typing import NamedTuple, Iterator, List, Optional
+
+from .core import get_files, LazyLogger, Json
from my.config import roamresearch as config
-from .core import Json, LazyLogger, get_files
-
logger = LazyLogger(__name__)
@@ -60,15 +57,15 @@ class Node(NamedTuple):
return datetime.fromtimestamp(rt / 1000, tz=timezone.utc)
@property
- def title(self) -> str | None:
+ def title(self) -> Optional[str]:
return self.raw.get(Keys.TITLE)
@property
- def body(self) -> str | None:
+ def body(self) -> Optional[str]:
return self.raw.get(Keys.STRING)
@property
- def children(self) -> list[Node]:
+ def children(self) -> List['Node']:
# TODO cache? needs a key argument (because of Json)
ch = self.raw.get(Keys.CHILDREN, [])
return list(map(Node, ch))
@@ -98,7 +95,7 @@ class Node(NamedTuple):
# - heading -- notes that haven't been created yet
return len(self.body or '') == 0 and len(self.children) == 0
- def traverse(self) -> Iterator[Node]:
+ def traverse(self) -> Iterator['Node']:
# not sure about __iter__, because might be a bit unintuitive that it's recursive..
yield self
for c in self.children:
@@ -123,7 +120,7 @@ class Node(NamedTuple):
return f'Node(created={self.created}, title={self.title}, body={self.body})'
@staticmethod
- def make(raw: Json) -> Iterator[Node]:
+ def make(raw: Json) -> Iterator['Node']:
is_empty = set(raw.keys()) == {Keys.EDITED, Keys.EDIT_EMAIL, Keys.TITLE}
# not sure about that... but daily notes end up like that
if is_empty:
@@ -133,11 +130,11 @@ class Node(NamedTuple):
class Roam:
- def __init__(self, raw: list[Json]) -> None:
+ def __init__(self, raw: List[Json]) -> None:
self.raw = raw
@property
- def notes(self) -> list[Node]:
+ def notes(self) -> List[Node]:
return list(chain.from_iterable(map(Node.make, self.raw)))
def traverse(self) -> Iterator[Node]:
diff --git a/my/rss/all.py b/my/rss/all.py
index e10e4d2..b4dbdbd 100644
--- a/my/rss/all.py
+++ b/my/rss/all.py
@@ -3,9 +3,9 @@ Unified RSS data, merged from different services I used historically
'''
# NOTE: you can comment out the sources you're not using
-from collections.abc import Iterable
-
from . import feedbin, feedly
+
+from typing import Iterable
from .common import Subscription, compute_subscriptions
diff --git a/my/rss/common.py b/my/rss/common.py
index bf9506e..54067d6 100644
--- a/my/rss/common.py
+++ b/my/rss/common.py
@@ -1,12 +1,10 @@
-from __future__ import annotations
+from my.core import __NOT_HPI_MODULE__
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-from collections.abc import Iterable, Sequence
from dataclasses import dataclass, replace
from itertools import chain
+from typing import Optional, List, Dict, Iterable, Tuple, Sequence
-from my.core import datetime_aware, warn_if_empty
+from my.core import warn_if_empty, datetime_aware
@dataclass
@@ -15,16 +13,16 @@ class Subscription:
url: str
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
- created_at: datetime_aware | None
+ created_at: Optional[datetime_aware]
subscribed: bool = True
# snapshot of subscriptions at time
-SubscriptionState = tuple[datetime_aware, Sequence[Subscription]]
+SubscriptionState = Tuple[datetime_aware, Sequence[Subscription]]
@warn_if_empty
-def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> list[Subscription]:
+def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
"""
Keeps track of everything I ever subscribed to.
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
@@ -32,9 +30,9 @@ def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> list[Subscri
states = list(chain.from_iterable(sources))
# TODO keep 'source'/'provider'/'service' attribute?
- by_url: dict[str, Subscription] = {}
+ by_url: Dict[str, Subscription] = {}
# ah. dates are used for sorting
- for _when, state in sorted(states):
+ for when, state in sorted(states):
# TODO use 'when'?
for feed in state:
if feed.url not in by_url:
diff --git a/my/rss/feedbin.py b/my/rss/feedbin.py
index 5f4da0a..dc13a17 100644
--- a/my/rss/feedbin.py
+++ b/my/rss/feedbin.py
@@ -3,15 +3,15 @@ Feedbin RSS reader
"""
import json
-from collections.abc import Iterator, Sequence
from pathlib import Path
+from typing import Iterator, Sequence
-from my.core import Stats, get_files, stat
+from my.core import get_files, stat, Stats
from my.core.compat import fromisoformat
-
from .common import Subscription, SubscriptionState
-from my.config import feedbin as config # isort: skip
+from my.config import feedbin as config
+
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
diff --git a/my/rss/feedly.py b/my/rss/feedly.py
index 9bf5429..55bcf9b 100644
--- a/my/rss/feedly.py
+++ b/my/rss/feedly.py
@@ -2,36 +2,19 @@
Feedly RSS reader
"""
-import json
-from abc import abstractmethod
-from collections.abc import Iterator, Sequence
+from my.config import feedly as config
+
from datetime import datetime, timezone
+import json
from pathlib import Path
-from typing import Protocol
-
-from my.core import Paths, get_files
+from typing import Iterator, Sequence
+from my.core import get_files
from .common import Subscription, SubscriptionState
-class config(Protocol):
- @property
- @abstractmethod
- def export_path(self) -> Paths:
- raise NotImplementedError
-
-
-def make_config() -> config:
- from my.config import feedly as user_config
-
- class combined_config(user_config, config): ...
-
- return combined_config()
-
-
def inputs() -> Sequence[Path]:
- cfg = make_config()
- return get_files(cfg.export_path)
+ return get_files(config.export_path)
def parse_file(f: Path) -> Iterator[Subscription]:
diff --git a/my/rtm.py b/my/rtm.py
index 217c969..22752fe 100644
--- a/my/rtm.py
+++ b/my/rtm.py
@@ -6,19 +6,21 @@ REQUIRES = [
'icalendar',
]
-import re
-from collections.abc import Iterator
from datetime import datetime
from functools import cached_property
+import re
+from typing import Dict, List, Iterator
-import icalendar # type: ignore
-from icalendar.cal import Todo # type: ignore
-from more_itertools import bucket
-
-from my.core import get_files, make_logger
+from my.core import make_logger, get_files
from my.core.utils.itertools import make_dict
-from my.config import rtm as config # isort: skip
+from my.config import rtm as config
+
+
+from more_itertools import bucket
+import icalendar # type: ignore
+from icalendar.cal import Todo # type: ignore
+
logger = make_logger(__name__)
@@ -30,14 +32,14 @@ class MyTodo:
self.revision = revision
@cached_property
- def notes(self) -> list[str]:
+ def notes(self) -> List[str]:
# TODO can there be multiple??
desc = self.todo['DESCRIPTION']
notes = re.findall(r'---\n\n(.*?)\n\nUpdated:', desc, flags=re.DOTALL)
return notes
@cached_property
- def tags(self) -> list[str]:
+ def tags(self) -> List[str]:
desc = self.todo['DESCRIPTION']
[tags_str] = re.findall(r'\nTags: (.*?)\n', desc, flags=re.DOTALL)
if tags_str == 'none':
@@ -56,7 +58,7 @@ class MyTodo:
def get_status(self) -> str:
if 'STATUS' not in self.todo:
return None # type: ignore
- # TODO 'COMPLETED'?
+ # TODO 'COMPLETED'?
return str(self.todo['STATUS'])
# TODO tz?
@@ -90,11 +92,11 @@ class DAL:
for t in self.cal.walk('VTODO'):
yield MyTodo(t, self.revision)
- def get_todos_by_uid(self) -> dict[str, MyTodo]:
+ def get_todos_by_uid(self) -> Dict[str, MyTodo]:
todos = self.all_todos()
return make_dict(todos, key=lambda t: t.uid)
- def get_todos_by_title(self) -> dict[str, list[MyTodo]]:
+ def get_todos_by_title(self) -> Dict[str, List[MyTodo]]:
todos = self.all_todos()
bucketed = bucket(todos, lambda todo: todo.title)
return {k: list(bucketed[k]) for k in bucketed}
diff --git a/my/runnerup.py b/my/runnerup.py
index f5d7d1e..a21075a 100644
--- a/my/runnerup.py
+++ b/my/runnerup.py
@@ -6,15 +6,17 @@ REQUIRES = [
'python-tcxparser',
]
-from collections.abc import Iterable
from datetime import timedelta
from pathlib import Path
+from typing import Iterable
+
+from my.core import Res, get_files, Json
+from my.core.compat import fromisoformat
import tcxparser # type: ignore[import-untyped]
from my.config import runnerup as config
-from my.core import Json, Res, get_files
-from my.core.compat import fromisoformat
+
# TODO later, use a proper namedtuple?
Workout = Json
@@ -68,8 +70,6 @@ def workouts() -> Iterable[Res[Workout]]:
from .core.pandas import DataFrameT, check_dataframe, error_to_row
-
-
@check_dataframe
def dataframe() -> DataFrameT:
def it():
@@ -85,8 +85,6 @@ def dataframe() -> DataFrameT:
return df
-from .core import Stats, stat
-
-
+from .core import stat, Stats
def stats() -> Stats:
return stat(dataframe)
diff --git a/my/simple.py b/my/simple.py
index b7f25cd..7462291 100644
--- a/my/simple.py
+++ b/my/simple.py
@@ -1,11 +1,12 @@
'''
Just a demo module for testing and documentation purposes
'''
-from collections.abc import Iterator
from dataclasses import dataclass
+from typing import Iterator
+
+from my.core import make_config
from my.config import simple as user_config
-from my.core import make_config
@dataclass
diff --git a/my/smscalls.py b/my/smscalls.py
index 27d08be..f436709 100644
--- a/my/smscalls.py
+++ b/my/smscalls.py
@@ -2,7 +2,6 @@
Phone calls and SMS messages
Exported using https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore&hl=en_US
"""
-from __future__ import annotations
# See: https://www.synctech.com.au/sms-backup-restore/fields-in-xml-backup-files/ for schema
@@ -10,9 +9,8 @@ REQUIRES = ['lxml']
from dataclasses import dataclass
+from my.core import get_files, stat, Paths, Stats
from my.config import smscalls as user_config
-from my.core import Paths, Stats, get_files, stat
-
@dataclass
class smscalls(user_config):
@@ -20,15 +18,13 @@ class smscalls(user_config):
export_path: Paths
from my.core.cfg import make_config
-
config = make_config(smscalls)
-from collections.abc import Iterator
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any, NamedTuple
+from typing import NamedTuple, Iterator, Set, Tuple, Optional, Any, Dict, List
-import lxml.etree as etree
+from lxml import etree
from my.core.error import Res
@@ -37,8 +33,7 @@ class Call(NamedTuple):
dt: datetime
dt_readable: str
duration_s: int
- phone_number: str
- who: str | None
+ who: Optional[str]
# type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List.
call_type: int
@@ -55,27 +50,23 @@ class Call(NamedTuple):
# All the field values are read as-is from the underlying database and no conversion is done by the app in most cases.
#
# The '(Unknown)' is just what my android phone does, not sure if there are others
-UNKNOWN: set[str] = {'(Unknown)'}
-
-def _parse_xml(xml: Path) -> Any:
- return etree.parse(str(xml), parser=etree.XMLParser(huge_tree=True))
+UNKNOWN: Set[str] = {'(Unknown)'}
def _extract_calls(path: Path) -> Iterator[Res[Call]]:
- tr = _parse_xml(path)
+ tr = etree.parse(str(path))
for cxml in tr.findall('call'):
dt = cxml.get('date')
dt_readable = cxml.get('readable_date')
duration = cxml.get('duration')
who = cxml.get('contact_name')
call_type = cxml.get('type')
- number = cxml.get('number')
# if name is missing, its not None (its some string), depends on the phone/message app
if who is not None and who in UNKNOWN:
who = None
- if dt is None or dt_readable is None or duration is None or call_type is None or number is None:
+ if dt is None or dt_readable is None or duration is None or call_type is None:
call_str = etree.tostring(cxml).decode('utf-8')
- yield RuntimeError(f"Missing one or more required attributes [date, readable_date, duration, type, number] in {call_str}")
+ yield RuntimeError(f"Missing one or more required attributes [date, readable_date, duration, type] in {call_str}")
continue
# TODO we've got local tz here, not sure if useful..
# ok, so readable date is local datetime, changing throughout the backup
@@ -83,7 +74,6 @@ def _extract_calls(path: Path) -> Iterator[Res[Call]]:
dt=_parse_dt_ms(dt),
dt_readable=dt_readable,
duration_s=int(duration),
- phone_number=number,
who=who,
call_type=int(call_type),
)
@@ -93,7 +83,7 @@ def calls() -> Iterator[Res[Call]]:
files = get_files(config.export_path, glob='calls-*.xml')
# TODO always replacing with the latter is good, we get better contact names??
- emitted: set[datetime] = set()
+ emitted: Set[datetime] = set()
for p in files:
for c in _extract_calls(p):
if isinstance(c, Exception):
@@ -108,7 +98,7 @@ def calls() -> Iterator[Res[Call]]:
class Message(NamedTuple):
dt: datetime
dt_readable: str
- who: str | None
+ who: Optional[str]
message: str
phone_number: str
# type - 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox, 5 = Failed, 6 = Queued
@@ -122,7 +112,7 @@ class Message(NamedTuple):
def messages() -> Iterator[Res[Message]]:
files = get_files(config.export_path, glob='sms-*.xml')
- emitted: set[tuple[datetime, str | None, bool]] = set()
+ emitted: Set[Tuple[datetime, Optional[str], bool]] = set()
for p in files:
for c in _extract_messages(p):
if isinstance(c, Exception):
@@ -136,7 +126,7 @@ def messages() -> Iterator[Res[Message]]:
def _extract_messages(path: Path) -> Iterator[Res[Message]]:
- tr = _parse_xml(path)
+ tr = etree.parse(str(path))
for mxml in tr.findall('sms'):
dt = mxml.get('date')
dt_readable = mxml.get('readable_date')
@@ -165,20 +155,20 @@ class MMSContentPart(NamedTuple):
sequence_index: int
content_type: str
filename: str
- text: str | None
- data: str | None
+ text: Optional[str]
+ data: Optional[str]
class MMS(NamedTuple):
dt: datetime
dt_readable: str
- parts: list[MMSContentPart]
+ parts: List[MMSContentPart]
# NOTE: these is often something like 'Name 1, Name 2', but might be different depending on your client
- who: str | None
+ who: Optional[str]
# NOTE: This can be a single phone number, or multiple, split by '~' or ','. Its better to think
# of this as a 'key' or 'conversation ID', phone numbers are also present in 'addresses'
phone_number: str
- addresses: list[tuple[str, int]]
+ addresses: List[Tuple[str, int]]
# 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox
message_type: int
@@ -192,9 +182,10 @@ class MMS(NamedTuple):
for (addr, _type) in self.addresses:
if _type == 137:
return addr
- # hmm, maybe return instead? but this probably shouldn't happen, means
- # something is very broken
- raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
+ else:
+ # hmm, maybe return instead? but this probably shouldnt happen, means
+ # something is very broken
+ raise RuntimeError(f'No from address matching 137 found in {self.addresses}')
@property
def from_me(self) -> bool:
@@ -204,7 +195,7 @@ class MMS(NamedTuple):
def mms() -> Iterator[Res[MMS]]:
files = get_files(config.export_path, glob='sms-*.xml')
- emitted: set[tuple[datetime, str | None, str]] = set()
+ emitted: Set[Tuple[datetime, Optional[str], str]] = set()
for p in files:
for c in _extract_mms(p):
if isinstance(c, Exception):
@@ -217,10 +208,10 @@ def mms() -> Iterator[Res[MMS]]:
yield c
-def _resolve_null_str(value: str | None) -> str | None:
+def _resolve_null_str(value: Optional[str]) -> Optional[str]:
if value is None:
return None
- # hmm.. there's some risk of the text actually being 'null', but there's
+ # hmm.. theres some risk of the text actually being 'null', but theres
# no way to distinguish that from XML values
if value == 'null':
return None
@@ -228,7 +219,8 @@ def _resolve_null_str(value: str | None) -> str | None:
def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
- tr = _parse_xml(path)
+ tr = etree.parse(str(path))
+
for mxml in tr.findall('mms'):
dt = mxml.get('date')
dt_readable = mxml.get('readable_date')
@@ -244,7 +236,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
yield RuntimeError(f'Missing one or more required attributes [date, readable_date, msg_box, address] in {mxml_str}')
continue
- addresses: list[tuple[str, int]] = []
+ addresses: List[Tuple[str, int]] = []
for addr_parent in mxml.findall('addrs'):
for addr in addr_parent.findall('addr'):
addr_data = addr.attrib
@@ -259,7 +251,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
continue
addresses.append((user_address, int(user_type)))
- content: list[MMSContentPart] = []
+ content: List[MMSContentPart] = []
for part_root in mxml.findall('parts'):
@@ -273,8 +265,11 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
#
# This seems pretty useless, so we should try and skip it, and just return the
# text/images/data
- part_data: dict[str, Any] = part.attrib
- seq: str | None = part_data.get('seq')
+ #
+ # man, attrib is some internal cpython ._Attrib type which can't
+ # be typed by any sort of mappingproxy. maybe a protocol could work..?
+ part_data: Dict[str, Any] = part.attrib # type: ignore
+ seq: Optional[str] = part_data.get('seq')
if seq == '-1':
continue
@@ -282,13 +277,13 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
yield RuntimeError(f'seq must be a number, was seq={seq} {type(seq)} in {part_data}')
continue
- charset_type: str | None = _resolve_null_str(part_data.get('ct'))
- filename: str | None = _resolve_null_str(part_data.get('name'))
+ charset_type: Optional[str] = _resolve_null_str(part_data.get('ct'))
+ filename: Optional[str] = _resolve_null_str(part_data.get('name'))
# in some cases (images, cards), the filename is set in 'cl' instead
if filename is None:
filename = _resolve_null_str(part_data.get('cl'))
- text: str | None = _resolve_null_str(part_data.get('text'))
- data: str | None = _resolve_null_str(part_data.get('data'))
+ text: Optional[str] = _resolve_null_str(part_data.get('text'))
+ data: Optional[str] = _resolve_null_str(part_data.get('data'))
if charset_type is None or filename is None or (text is None and data is None):
yield RuntimeError(f'Missing one or more required attributes [ct, name, (text, data)] must be present in {part_data}')
diff --git a/my/stackexchange/gdpr.py b/my/stackexchange/gdpr.py
index 8ed0d30..5292bef 100644
--- a/my/stackexchange/gdpr.py
+++ b/my/stackexchange/gdpr.py
@@ -6,11 +6,8 @@ Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][officia
### config
from dataclasses import dataclass
-
from my.config import stackexchange as user_config
-from my.core import Json, PathIsh, get_files, make_config
-
-
+from my.core import PathIsh, make_config, get_files, Json
@dataclass
class stackexchange(user_config):
gdpr_path: PathIsh # path to GDPR zip file
@@ -20,13 +17,9 @@ config = make_config(stackexchange)
# TODO just merge all of them and then filter?.. not sure
-from collections.abc import Iterable
-from datetime import datetime
-from typing import NamedTuple
-
from my.core.compat import fromisoformat
-
-
+from typing import NamedTuple, Iterable
+from datetime import datetime
class Vote(NamedTuple):
j: Json
# todo ip?
@@ -49,7 +42,7 @@ class Vote(NamedTuple):
# hmm, this loads very raw comments without the rest of the page?
# - https://meta.stackexchange.com/posts/27319/comments#comment-57475
#
- # parentPostId is the original question
+ # parentPostId is the original quesion
# TODO is not always present? fucking hell
# seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation...
# postId is the answer
@@ -69,10 +62,7 @@ class Vote(NamedTuple):
# todo expose vote type?
import json
-
from ..core.error import Res
-
-
def votes() -> Iterable[Res[Vote]]:
# TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed
# todo should be defensive? not sure if present when user has no votes
@@ -84,8 +74,6 @@ def votes() -> Iterable[Res[Vote]]:
yield Vote(r)
-from ..core import Stats, stat
-
-
+from ..core import stat, Stats
def stats() -> Stats:
return stat(votes)
diff --git a/my/stackexchange/stexport.py b/my/stackexchange/stexport.py
index 111ed28..812a155 100644
--- a/my/stackexchange/stexport.py
+++ b/my/stackexchange/stexport.py
@@ -16,8 +16,7 @@ from my.core import (
make_config,
stat,
)
-
-import my.config # isort: skip
+import my.config
@dataclass
diff --git a/my/taplog.py b/my/taplog.py
index 5e64a72..51eeb72 100644
--- a/my/taplog.py
+++ b/my/taplog.py
@@ -1,26 +1,24 @@
'''
[[https://play.google.com/store/apps/details?id=com.waterbear.taglog][Taplog]] app data
'''
-from __future__ import annotations
-
-from collections.abc import Iterable
from datetime import datetime
-from typing import NamedTuple
+from typing import NamedTuple, Dict, Optional, Iterable
+
+from my.core import get_files, stat, Stats
+from my.core.sqlite import sqlite_connection
from my.config import taplog as user_config
-from my.core import Stats, get_files, stat
-from my.core.sqlite import sqlite_connection
class Entry(NamedTuple):
- row: dict
+ row: Dict
@property
def id(self) -> str:
return str(self.row['_id'])
@property
- def number(self) -> float | None:
+ def number(self) -> Optional[float]:
ns = self.row['number']
# TODO ??
if isinstance(ns, str):
diff --git a/my/telegram/telegram_backup.py b/my/telegram/telegram_backup.py
index eea7e50..0617501 100644
--- a/my/telegram/telegram_backup.py
+++ b/my/telegram/telegram_backup.py
@@ -1,39 +1,39 @@
"""
Telegram data via [fabianonline/telegram_backup](https://github.com/fabianonline/telegram_backup) tool
"""
-from __future__ import annotations
-import sqlite3
-from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime, timezone
-from struct import calcsize, unpack_from
+from struct import unpack_from, calcsize
+import sqlite3
+from typing import Dict, Iterator, Optional
+
+from my.core import datetime_aware, PathIsh
+from my.core.sqlite import sqlite_connection
from my.config import telegram as user_config
-from my.core import PathIsh, datetime_aware
-from my.core.sqlite import sqlite_connection
@dataclass
class config(user_config.telegram_backup):
# path to the export database.sqlite
export_path: PathIsh
-
+
@dataclass
class Chat:
id: str
- name: str | None
+ name: Optional[str]
# not all users have short handle + groups don't have them either?
# TODO hmm some groups have it -- it's just the tool doesn't dump them??
- handle: str | None
+ handle: Optional[str]
# not sure if need type?
@dataclass
class User:
id: str
- name: str | None
+ name: Optional[str]
@dataclass
@@ -44,7 +44,7 @@ class Message:
chat: Chat
sender: User
text: str
- extra_media_info: str | None = None
+ extra_media_info: Optional[str] = None
@property
def permalink(self) -> str:
@@ -61,7 +61,7 @@ class Message:
-Chats = dict[str, Chat]
+Chats = Dict[str, Chat]
def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bool) -> Message:
ts = r['time']
# desktop export uses UTC (checked by exporting in winter time vs summer time)
@@ -70,7 +70,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo
chat = chats[r['source_id']]
sender = chats[r['sender_id']]
- extra_media_info: str | None = None
+ extra_media_info: Optional[str] = None
if with_extra_media_info and r['has_media'] == 1:
# also it's quite hacky, so at least for now it's just an optional attribute behind the flag
# defensive because it's a bit tricky to correctly parse without a proper api parser..
@@ -90,7 +90,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo
)
-def messages(*, extra_where: str | None=None, with_extra_media_info: bool=False) -> Iterator[Message]:
+def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=False) -> Iterator[Message]:
messages_query = 'SELECT * FROM messages WHERE message_type NOT IN ("service_message", "empty_message")'
if extra_where is not None:
messages_query += ' AND ' + extra_where
@@ -106,7 +106,7 @@ def messages(*, extra_where: str | None=None, with_extra_media_info: bool=False)
for r in db.execute('SELECT * FROM users ORDER BY id'):
first = r["first_name"]
last = r["last_name"]
- name: str | None
+ name: Optional[str]
if first is not None and last is not None:
name = f'{first} {last}'
else:
@@ -121,7 +121,7 @@ def messages(*, extra_where: str | None=None, with_extra_media_info: bool=False)
yield _message_from_row(r, chats=chats, with_extra_media_info=with_extra_media_info)
-def _extract_extra_media_info(data: bytes) -> str | None:
+def _extract_extra_media_info(data: bytes) -> Optional[str]:
# ugh... very hacky, but it does manage to extract from 90% of messages that have media
pos = 0
diff --git a/my/tests/body/weight.py b/my/tests/body/weight.py
deleted file mode 100644
index f26ccf2..0000000
--- a/my/tests/body/weight.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from pathlib import Path
-
-import pytest
-import pytz
-
-from my.body.weight import from_orgmode
-from my.core.cfg import tmp_config
-
-
-def test_body_weight() -> None:
- weights = [0.0 if isinstance(x, Exception) else x.value for x in from_orgmode()]
-
- assert weights == [
- 0.0,
- 62.0,
- 0.0,
- 61.0,
- 62.0,
- 0.0,
- ]
-
-
-@pytest.fixture(autouse=True)
-def prepare(tmp_path: Path):
- ndir = tmp_path / 'notes'
- ndir.mkdir()
- logs = ndir / 'logs.org'
- logs.write_text(
- '''
-#+TITLE: Stuff I'm logging
-
-* Weight (org-capture) :weight:
-** [2020-05-01 Fri 09:00] 62
-** 63
- this should be ignored, got no timestamp
-** [2020-05-03 Sun 08:00] 61
-** [2020-05-04 Mon 10:00] 62
-'''
- )
- misc = ndir / 'misc.org'
- misc.write_text(
- '''
-Some misc stuff
-
-* unrelated note :weight:whatever:
-'''
- )
-
- class orgmode:
- paths = [ndir]
-
- class weight:
- # TODO ugh. this belongs to tz provider or global config or something
- default_timezone = pytz.timezone('Europe/London')
-
- with tmp_config() as cfg:
- cfg.orgmode = orgmode
- cfg.weight = weight
- yield
diff --git a/my/tests/calendar.py b/my/tests/calendar.py
deleted file mode 100644
index b5f856c..0000000
--- a/my/tests/calendar.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from my.calendar.holidays import is_holiday
-
-from .shared_tz_config import config # autoused fixture
-
-
-def test_is_holiday() -> None:
- assert is_holiday('20190101')
- assert not is_holiday('20180601')
- assert is_holiday('20200906') # national holiday in Bulgaria
diff --git a/my/tests/commits.py b/my/tests/commits.py
index 48e349f..c967027 100644
--- a/my/tests/commits.py
+++ b/my/tests/commits.py
@@ -1,11 +1,14 @@
import os
from pathlib import Path
-import pytest
from more_itertools import bucket
+import pytest
+
+
+from my.core.cfg import tmp_config
from my.coding.commits import commits
-from my.core.cfg import tmp_config
+
pytestmark = pytest.mark.skipif(
os.name == 'nt',
diff --git a/my/tests/common.py b/my/tests/common.py
index cf5c632..e3060e1 100644
--- a/my/tests/common.py
+++ b/my/tests/common.py
@@ -1,5 +1,7 @@
import os
from pathlib import Path
+import re
+import sys
import pytest
@@ -11,11 +13,17 @@ skip_if_not_karlicoss = pytest.mark.skipif(
)
+def reset_modules() -> None:
+ '''
+ A hack to 'unload' HPI modules, otherwise some modules might cache the config
+ TODO: a bit crap, need a better way..
+ '''
+ to_unload = [m for m in sys.modules if re.match(r'my[.]?', m)]
+ for m in to_unload:
+ del sys.modules[m]
+
+
def testdata() -> Path:
d = Path(__file__).absolute().parent.parent.parent / 'testdata'
assert d.exists(), d
return d
-
-
-# prevent pytest from treating this as test
-testdata.__test__ = False # type: ignore[attr-defined]
diff --git a/my/tests/conftest.py b/my/tests/conftest.py
deleted file mode 100644
index cc7bb7e..0000000
--- a/my/tests/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pytest
-
-
-# I guess makes sense by default
-@pytest.fixture(autouse=True)
-def without_cachew():
- from my.core.cachew import disabled_cachew
-
- with disabled_cachew():
- yield
diff --git a/my/tests/location/google.py b/my/tests/location/google.py
deleted file mode 100644
index 43b8646..0000000
--- a/my/tests/location/google.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Tests for LEGACY location provider
-
-Keeping for now for backwards compatibility
-"""
-
-from pathlib import Path
-
-import pytest
-from more_itertools import one
-
-from my.core.cfg import tmp_config
-from my.location.google import locations
-
-
-def test_google_locations() -> None:
- locs = list(locations())
- assert len(locs) == 3810, len(locs)
-
- last = locs[-1]
- assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
- # todo approx
- assert last.lat == 46.5515350
- assert last.lon == 16.4742742
- # todo check altitude
-
-
-@pytest.fixture(autouse=True)
-def prepare(tmp_path: Path):
-
- # TODO could just pick a part of shared config? not sure
- _takeout_path = _prepare_takeouts_dir(tmp_path)
-
- class google:
- takeout_path = _takeout_path
-
- with tmp_config() as config:
- config.google = google
- yield
-
-
-def _prepare_takeouts_dir(tmp_path: Path) -> Path:
- from ..common import testdata
-
- try:
- track = one(testdata().rglob('italy-slovenia-2017-07-29.json'))
- except ValueError as e:
- raise RuntimeError('testdata not found, setup git submodules?') from e
-
- # todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
- import zipfile
-
- with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
- zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
- return tmp_path
diff --git a/my/tests/reddit.py b/my/tests/reddit.py
index 4ddccf8..fb8d6d2 100644
--- a/my/tests/reddit.py
+++ b/my/tests/reddit.py
@@ -1,15 +1,16 @@
-import pytest
-from more_itertools import consume
-
-# deliberately use mixed style imports on the top level and inside the methods to test tmp_config stuff
-# todo won't really be necessary once we migrate to lazy user config
-import my.reddit.all as my_reddit_all
-import my.reddit.rexport as my_reddit_rexport
from my.core.cfg import tmp_config
from my.core.utils.itertools import ensure_unique
+# todo ugh, it's discovered as a test???
from .common import testdata
+from more_itertools import consume
+import pytest
+
+# deliberately use mixed style imports on the top level and inside the methods to test tmp_config stuff
+import my.reddit.rexport as my_reddit_rexport
+import my.reddit.all as my_reddit_all
+
def test_basic_1() -> None:
# todo maybe this should call stat or something instead?
diff --git a/my/tests/tz.py b/my/tests/tz.py
deleted file mode 100644
index 92d8f3b..0000000
--- a/my/tests/tz.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import sys
-from datetime import datetime, timedelta
-
-import pytest
-import pytz
-
-import my.time.tz.main as tz_main
-import my.time.tz.via_location as tz_via_location
-from my.core import notnone
-from my.core.compat import fromisoformat
-
-from .shared_tz_config import config # autoused fixture
-
-
-def getzone(dt: datetime) -> str:
- tz = notnone(dt.tzinfo)
- return getattr(tz, 'zone')
-
-
-@pytest.mark.parametrize('fast', [False, True])
-def test_iter_tzs(*, fast: bool, config) -> None:
- # TODO hmm.. maybe need to make sure we start with empty config?
- config.time.tz.via_location.fast = fast
-
- ll = list(tz_via_location._iter_tzs())
- zones = [x.zone for x in ll]
-
- if fast:
- assert zones == [
- 'Europe/Rome',
- 'Europe/Rome',
- 'Europe/Vienna',
- 'Europe/Vienna',
- 'Europe/Vienna',
- ]
- else:
- assert zones == [
- 'Europe/Rome',
- 'Europe/Rome',
- 'Europe/Ljubljana',
- 'Europe/Ljubljana',
- 'Europe/Ljubljana',
- ]
-
-
-def test_past() -> None:
- """
- Should fallback to the 'home' location provider
- """
- dt = fromisoformat('2000-01-01 12:34:45')
- dt = tz_main.localize(dt)
- assert getzone(dt) == 'America/New_York'
-
-
-def test_future() -> None:
- """
- For locations in the future should rely on 'home' location
- """
- fut = datetime.now() + timedelta(days=100)
- fut = tz_main.localize(fut)
- assert getzone(fut) == 'Europe/Moscow'
-
-
-def test_get_tz(config) -> None:
- # todo hmm, the way it's implemented at the moment, never returns None?
- get_tz = tz_via_location.get_tz
-
- # not present in the test data
- tz = get_tz(fromisoformat('2020-01-01 10:00:00'))
- assert notnone(tz).zone == 'Europe/Sofia'
-
- tz = get_tz(fromisoformat('2017-08-01 11:00:00'))
- assert notnone(tz).zone == 'Europe/Vienna'
-
- tz = get_tz(fromisoformat('2017-07-30 10:00:00'))
- assert notnone(tz).zone == 'Europe/Rome'
-
- tz = get_tz(fromisoformat('2020-10-01 14:15:16'))
- assert tz is not None
-
- on_windows = sys.platform == 'win32'
- if not on_windows:
- tz = get_tz(datetime.min)
- assert tz is not None
- else:
- # seems this fails because windows doesn't support same date ranges
- # https://stackoverflow.com/a/41400321/
- with pytest.raises(OSError):
- get_tz(datetime.min)
-
-
-def test_policies() -> None:
- naive = fromisoformat('2017-07-30 10:00:00')
- assert naive.tzinfo is None # just in case
-
- # actual timezone at the time
- assert getzone(tz_main.localize(naive)) == 'Europe/Rome'
-
- z = pytz.timezone('America/New_York')
- aware = z.localize(naive)
-
- assert getzone(tz_main.localize(aware)) == 'America/New_York'
-
- assert getzone(tz_main.localize(aware, policy='convert')) == 'Europe/Rome'
-
- with pytest.raises(RuntimeError):
- assert tz_main.localize(aware, policy='throw')
diff --git a/my/time/tz/common.py b/my/time/tz/common.py
index c0dd262..89150c7 100644
--- a/my/time/tz/common.py
+++ b/my/time/tz/common.py
@@ -3,6 +3,7 @@ from typing import Callable, Literal, cast
from my.core import datetime_aware
+
'''
Depending on the specific data provider and your level of paranoia you might expect different behaviour.. E.g.:
- if your objects already have tz info, you might not need to call localize() at all
@@ -32,7 +33,7 @@ def default_policy() -> TzPolicy:
def localize_with_policy(
lfun: Callable[[datetime], datetime_aware],
dt: datetime,
- policy: TzPolicy=default_policy() # noqa: B008
+ policy: TzPolicy=default_policy()
) -> datetime_aware:
tz = dt.tzinfo
if tz is None:
diff --git a/my/time/tz/main.py b/my/time/tz/main.py
index bdd36b1..fafc5fe 100644
--- a/my/time/tz/main.py
+++ b/my/time/tz/main.py
@@ -6,7 +6,6 @@ from datetime import datetime
from my.core import datetime_aware
-
# todo hmm, kwargs isn't mypy friendly.. but specifying types would require duplicating default args. uhoh
def localize(dt: datetime, **kwargs) -> datetime_aware:
# todo document patterns for combining multiple data sources
diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py
index 1b2275b..b66ff8a 100644
--- a/my/time/tz/via_location.py
+++ b/my/time/tz/via_location.py
@@ -1,39 +1,52 @@
'''
Timezone data provider, guesses timezone based on location data (e.g. GPS)
'''
-
-from __future__ import annotations
-
REQUIRES = [
# for determining timezone by coordinate
'timezonefinder',
]
-import heapq
-import os
from collections import Counter
-from collections.abc import Iterable, Iterator
from dataclasses import dataclass
from datetime import date, datetime
from functools import lru_cache
+import heapq
from itertools import groupby
-from typing import (
- TYPE_CHECKING,
- Any,
- Protocol,
-)
+import os
+from typing import Iterator, Optional, Tuple, Any, List, Iterable, Set, Dict
import pytz
-from my.core import Stats, datetime_aware, make_logger, stat
from my.core.cachew import mcachew
-from my.core.compat import TypeAlias
+from my.core import make_logger, stat, Stats, datetime_aware
from my.core.source import import_source
from my.core.warnings import high
+
from my.location.common import LatLon
-class config(Protocol):
+## user might not have tz config section, so makes sense to be more defensive about it
+# todo might be useful to extract a helper for this
+try:
+ from my.config import time
+except ImportError as ie:
+ if ie.name != 'time':
+ raise ie
+else:
+ try:
+ user_config = time.tz.via_location
+ except AttributeError as ae:
+ if not ("'tz'" in str(ae) or "'via_location'"):
+ raise ae
+
+# deliberately dynamic to prevent confusing mypy
+if 'user_config' not in globals():
+ globals()['user_config'] = object
+##
+
+
+@dataclass
+class config(user_config):
# less precise, but faster
fast: bool = True
@@ -49,46 +62,11 @@ class config(Protocol):
_iter_tz_refresh_time: int = 6
-def _get_user_config():
- ## user might not have tz config section, so makes sense to be more defensive about it
-
- class empty_config: ...
-
- try:
- from my.config import time
- except ImportError as ie:
- if "'time'" not in str(ie):
- raise ie
- return empty_config
-
- try:
- user_config = time.tz.via_location
- except AttributeError as ae:
- if not ("'tz'" in str(ae) or "'via_location'" in str(ae)):
- raise ae
- return empty_config
-
- return user_config
-
-
-def make_config() -> config:
- if TYPE_CHECKING:
- import my.config
-
- user_config: TypeAlias = my.config.time.tz.via_location
- else:
- user_config = _get_user_config()
-
- class combined_config(user_config, config): ...
-
- return combined_config()
-
-
logger = make_logger(__name__)
@lru_cache(None)
-def _timezone_finder(*, fast: bool) -> Any:
+def _timezone_finder(fast: bool) -> Any:
if fast:
# less precise, but faster
from timezonefinder import TimezoneFinderL as Finder
@@ -98,7 +76,7 @@ def _timezone_finder(*, fast: bool) -> Any:
# for backwards compatibility
-def _locations() -> Iterator[tuple[LatLon, datetime_aware]]:
+def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]:
try:
import my.location.all
@@ -121,7 +99,7 @@ def _locations() -> Iterator[tuple[LatLon, datetime_aware]]:
# TODO: could use heapmerge or sort the underlying iterators somehow?
# see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934
-def _sorted_locations() -> list[tuple[LatLon, datetime_aware]]:
+def _sorted_locations() -> List[Tuple[LatLon, datetime_aware]]:
return sorted(_locations(), key=lambda x: x[1])
@@ -136,7 +114,7 @@ class DayWithZone:
zone: Zone
-def _find_tz_for_locs(finder: Any, locs: Iterable[tuple[LatLon, datetime]]) -> Iterator[DayWithZone]:
+def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]:
for (lat, lon), dt in locs:
# TODO right. its _very_ slow...
zone = finder.timezone_at(lat=lat, lng=lon)
@@ -162,14 +140,13 @@ def _find_tz_for_locs(finder: Any, locs: Iterable[tuple[LatLon, datetime]]) -> I
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
# has to do an iterative sort of the entire my.locations.all list
def _iter_local_dates() -> Iterator[DayWithZone]:
- cfg = make_config()
- finder = _timezone_finder(fast=cfg.fast) # rely on the default
+ finder = _timezone_finder(fast=config.fast) # rely on the default
# pdt = None
# TODO: warnings doesn't actually warn?
# warnings = []
- locs: Iterable[tuple[LatLon, datetime]]
- locs = _sorted_locations() if cfg.sort_locations else _locations()
+ locs: Iterable[Tuple[LatLon, datetime]]
+ locs = _sorted_locations() if config.sort_locations else _locations()
yield from _find_tz_for_locs(finder, locs)
@@ -181,13 +158,11 @@ def _iter_local_dates() -> Iterator[DayWithZone]:
def _iter_local_dates_fallback() -> Iterator[DayWithZone]:
from my.location.fallback.all import fallback_locations as flocs
- cfg = make_config()
-
- def _fallback_locations() -> Iterator[tuple[LatLon, datetime]]:
+ def _fallback_locations() -> Iterator[Tuple[LatLon, datetime]]:
for loc in sorted(flocs(), key=lambda x: x.dt):
yield ((loc.lat, loc.lon), loc.dt)
- yield from _find_tz_for_locs(_timezone_finder(fast=cfg.fast), _fallback_locations())
+ yield from _find_tz_for_locs(_timezone_finder(fast=config.fast), _fallback_locations())
def most_common(lst: Iterator[DayWithZone]) -> DayWithZone:
@@ -205,13 +180,12 @@ def _iter_tz_depends_on() -> str:
2022-04-26_12
2022-04-26_18
"""
- cfg = make_config()
- mod = cfg._iter_tz_refresh_time
+ mod = config._iter_tz_refresh_time
assert mod >= 1
day = str(date.today())
hr = datetime.now().hour
hr_truncated = hr // mod * mod
- return f"{day}_{hr_truncated}"
+ return "{}_{}".format(day, hr_truncated)
# refresh _iter_tzs every few hours -- don't think a better depends_on is possible dynamically
@@ -221,14 +195,14 @@ def _iter_tzs() -> Iterator[DayWithZone]:
# we need to sort them first before we can do a groupby
by_day = lambda p: p.day
- local_dates: list[DayWithZone] = sorted(_iter_local_dates(), key=by_day)
+ local_dates: List[DayWithZone] = sorted(_iter_local_dates(), key=by_day)
logger.debug(f"no. of items using exact locations: {len(local_dates)}")
- local_dates_fallback: list[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day)
+ local_dates_fallback: List[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day)
# find days that are in fallback but not in local_dates (i.e., missing days)
- local_dates_set: set[date] = {d.day for d in local_dates}
- use_fallback_days: list[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set]
+ local_dates_set: Set[date] = {d.day for d in local_dates}
+ use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set]
logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}")
# combine local_dates and missing days from fallback into a sorted list
@@ -242,20 +216,20 @@ def _iter_tzs() -> Iterator[DayWithZone]:
@lru_cache(1)
-def _day2zone() -> dict[date, pytz.BaseTzInfo]:
+def _day2zone() -> Dict[date, pytz.BaseTzInfo]:
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
# however otherwise cachew cache might never be initialized properly
- # so we'll always end up recomputing everything during subsequent runs
+ # so we'll always end up recomputing everyting during subsequent runs
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
-def _get_day_tz(d: date) -> pytz.BaseTzInfo | None:
+def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
return _day2zone().get(d)
# ok to cache, there are only a few home locations?
@lru_cache(None)
-def _get_home_tz(loc: LatLon) -> pytz.BaseTzInfo | None:
+def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]:
(lat, lng) = loc
finder = _timezone_finder(fast=False) # ok to use slow here for better precision
zone = finder.timezone_at(lat=lat, lng=lng)
@@ -266,7 +240,7 @@ def _get_home_tz(loc: LatLon) -> pytz.BaseTzInfo | None:
return pytz.timezone(zone)
-def get_tz(dt: datetime) -> pytz.BaseTzInfo | None:
+def get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
'''
Given a datetime, returns the timezone for that date.
'''
@@ -297,7 +271,7 @@ def localize(dt: datetime) -> datetime_aware:
return tz.localize(dt)
-def stats(*, quick: bool = False) -> Stats:
+def stats(quick: bool = False) -> Stats:
if quick:
prev, config.sort_locations = config.sort_locations, False
res = {'first': next(_iter_local_dates())}
@@ -319,13 +293,5 @@ def stats(*, quick: bool = False) -> Stats:
return stat(localized_years)
-## deprecated -- keeping for now as might be used in other modules?
-if not TYPE_CHECKING:
- from my.core.compat import deprecated
-
- @deprecated('use get_tz function instead')
- def _get_tz(*args, **kwargs):
- return get_tz(*args, **kwargs)
-
-
-##
+# deprecated -- still used in some other modules so need to keep
+_get_tz = get_tz
diff --git a/my/tinder/android.py b/my/tinder/android.py
index 5a5d887..d9b256b 100644
--- a/my/tinder/android.py
+++ b/my/tinder/android.py
@@ -3,22 +3,20 @@ Tinder data from Android app database (in =/data/data/com.tinder/databases/tinde
"""
from __future__ import annotations
-import sqlite3
-from collections import Counter, defaultdict
-from collections.abc import Iterator, Mapping, Sequence
+from collections import defaultdict, Counter
from dataclasses import dataclass
from datetime import datetime, timezone
from itertools import chain
from pathlib import Path
-from typing import Union
+import sqlite3
+from typing import Sequence, Iterator, Union, Dict, List, Mapping
-from my.core import Paths, Res, Stats, datetime_aware, get_files, make_logger, stat
+from my.core import Paths, get_files, Res, stat, Stats, datetime_aware, make_logger
from my.core.common import unique_everseen
from my.core.compat import assert_never
from my.core.error import echain
from my.core.sqlite import sqlite_connection
-
-import my.config # isort: skip
+import my.config
logger = make_logger(__name__)
@@ -106,7 +104,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
user_profile_rows = list(db.execute('SELECT * FROM profile_user_view'))
if len(user_profile_rows) == 0:
- # shit, sometime in 2023 profile_user_view stopped containing user profile..
+ # shit, sometime in 2023 profile_user_view stoppped containing user profile..
# presumably the most common from_id/to_id would be our own username
counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')])
if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out)
@@ -166,8 +164,8 @@ def _parse_msg(row: sqlite3.Row) -> _Message:
# todo maybe it's rich_entities method?
def entities() -> Iterator[Res[Entity]]:
- id2person: dict[str, Person] = {}
- id2match: dict[str, Match] = {}
+ id2person: Dict[str, Person] = {}
+ id2match: Dict[str, Match] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
@@ -219,7 +217,7 @@ def messages() -> Iterator[Res[Message]]:
# todo not sure, maybe it's not fundamental enough to keep here...
def match2messages() -> Iterator[Res[Mapping[Match, Sequence[Message]]]]:
- res: dict[Match, list[Message]] = defaultdict(list)
+ res: Dict[Match, List[Message]] = defaultdict(list)
for x in entities():
if isinstance(x, Exception):
yield x
diff --git a/my/topcoder.py b/my/topcoder.py
index 40df77c..8e39252 100644
--- a/my/topcoder.py
+++ b/my/topcoder.py
@@ -1,14 +1,14 @@
-import json
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from functools import cached_property
+import json
from pathlib import Path
+from typing import Iterator, Sequence
-from my.core import Res, datetime_aware, get_files
+from my.core import get_files, Res, datetime_aware
from my.core.compat import fromisoformat
from my.experimental.destructive_parsing import Manager
-from my.config import topcoder as config # type: ignore[attr-defined] # isort: skip
+from my.config import topcoder as config # type: ignore[attr-defined]
def inputs() -> Sequence[Path]:
@@ -58,7 +58,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
h.pop_if_primitive('version', 'id')
h = h.zoom('result')
- h.check('success', expected=True)
+ h.check('success', True)
h.check('status', 200)
h.pop_if_primitive('metadata')
@@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]:
# but also expects cooperation from .make method (e.g. popping items from the dict)
# could also wrap in helper and pass to .make .. not sure
# an argument could be made that .make isn't really a class methond..
- # it's pretty specific to this parser only
+ # it's pretty specific to this parser onl
yield from Competition.make(j=c)
yield from m.check()
diff --git a/my/twitter/all.py b/my/twitter/all.py
index c2c471e..4714021 100644
--- a/my/twitter/all.py
+++ b/my/twitter/all.py
@@ -1,11 +1,11 @@
"""
Unified Twitter data (merged from the archive and periodic updates)
"""
-from collections.abc import Iterator
-
+from typing import Iterator
from ..core import Res
from ..core.source import import_source
-from .common import Tweet, merge_tweets
+from .common import merge_tweets, Tweet
+
# NOTE: you can comment out the sources you don't need
src_twint = import_source(module_name='my.twitter.twint')
diff --git a/my/twitter/android.py b/my/twitter/android.py
index 7e8f170..f40ad0e 100644
--- a/my/twitter/android.py
+++ b/my/twitter/android.py
@@ -4,20 +4,20 @@ Twitter data from official app for Android
from __future__ import annotations
-import re
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
+import re
from struct import unpack_from
+from typing import Iterator, Sequence, Set
-from my.core import LazyLogger, Paths, Res, datetime_aware, get_files
+from my.core import datetime_aware, get_files, LazyLogger, Paths, Res
from my.core.common import unique_everseen
from my.core.sqlite import sqlite_connect_immutable
-from .common import permalink
+import my.config
-import my.config # isort: skip
+from .common import permalink
logger = LazyLogger(__name__)
@@ -155,31 +155,16 @@ _SELECT_OWN_TWEETS = '_SELECT_OWN_TWEETS'
def get_own_user_id(conn) -> str:
# unclear what's the reliable way to query it, so we use multiple different ones and arbitrate
# NOTE: 'SELECT DISTINCT ev_owner_id FROM lists' doesn't work, might include lists from other people?
- res: set[str] = set()
- # need to cast as it's int by default
+ res = set()
for q in [
- 'SELECT DISTINCT CAST(list_mapping_user_id AS TEXT) FROM list_mapping',
- 'SELECT DISTINCT CAST(owner_id AS TEXT) FROM cursors',
- 'SELECT DISTINCT CAST(user_id AS TEXT) FROM users WHERE _id == 1',
- # ugh, sometimes all of the above are empty...
- # for the rest it seems:
- # - is_active_creator is NULL
- # - is_graduated is NULL
- # - profile_highlighted_info is NULL
- 'SELECT DISTINCT CAST(user_id AS TEXT) FROM users WHERE is_active_creator == 0 AND is_graduated == 1 AND profile_highlights_info IS NOT NULL',
+ 'SELECT DISTINCT list_mapping_user_id FROM list_mapping',
+ 'SELECT DISTINCT owner_id FROM cursors',
+ 'SELECT DISTINCT user_id FROM users WHERE _id == 1',
]:
- res |= {r for (r,) in conn.execute(q)}
-
- assert len(res) <= 1, res
- if len(res) == 0:
- # sometimes even all of the above doesn't help...
- # last resort is trying to get from status_groups table
- # however we can't always use it because it might contain multiple different owner_id?
- # not sure, maybe it will break as well and we'll need to fallback on the most common or something..
- res |= {r for (r,) in conn.execute('SELECT DISTINCT CAST(owner_id AS TEXT) FROM status_groups')}
+ for (r,) in conn.execute(q):
+ res.add(r)
assert len(res) == 1, res
- [r] = res
- return r
+ return str(list(res)[0])
# NOTE:
@@ -205,7 +190,7 @@ def get_own_user_id(conn) -> str:
# - timeline_data_type
# 1 : the bulk of tweets, but also some notifications etc??
# 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL
-# 8 : who-to-follow/notification
+# 8 : who-to-follow/notfication
# 13: semantic-core/who-to-follow
# 14: cursor
# 17: trends
@@ -252,7 +237,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]:
NOT (statuses.in_r_user_id == -1 AND statuses.in_r_status_id == -1 AND statuses.conversation_id == 0)
'''
- def _query_one(*, where: str, quoted: set[int]) -> Iterator[Res[Tweet]]:
+ def _query_one(*, where: str, quoted: Set[int]) -> Iterator[Res[Tweet]]:
for (
tweet_id,
user_username,
@@ -276,7 +261,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]:
text=content,
)
- quoted: set[int] = set()
+ quoted: Set[int] = set()
yield from _query_one(where=db_where, quoted=quoted)
# get quoted tweets 'recursively'
# TODO maybe do it for favs/bookmarks too? not sure
diff --git a/my/twitter/archive.py b/my/twitter/archive.py
index c9d2dbc..0ea6b24 100644
--- a/my/twitter/archive.py
+++ b/my/twitter/archive.py
@@ -2,74 +2,73 @@
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
"""
-from __future__ import annotations
-import html
-import json # hmm interesting enough, orjson didn't give much speedup here?
-from abc import abstractmethod
-from collections.abc import Iterator, Sequence
+# before this config was named 'twitter', doesn't make too much sense for archive
+# todo unify with other code like this, e.g. time.tz.via_location
+try:
+ from my.config import twitter_archive as user_config
+except ImportError as ie:
+ if not (ie.name == 'my.config' and 'twitter_archive' in str(ie)):
+ # must be caused by something else
+ raise ie
+ try:
+ from my.config import twitter as user_config # type: ignore[assignment]
+ except ImportError:
+ raise ie # raise the original exception.. must be something else
+ else:
+ from ..core import warnings
+ warnings.high('my.config.twitter is deprecated! Please rename it to my.config.twitter_archive in your config')
+##
+
+
from dataclasses import dataclass
from datetime import datetime
-from functools import cached_property
from itertools import chain
+import json # hmm interesting enough, orjson didn't give much speedup here?
from pathlib import Path
+from functools import cached_property
+import html
from typing import (
- TYPE_CHECKING,
+ Iterator,
+ List,
+ Optional,
+ Sequence,
)
from more_itertools import unique_everseen
from my.core import (
- Json,
- Paths,
- Res,
- Stats,
datetime_aware,
get_files,
make_logger,
stat,
- warnings,
+ Json,
+ Paths,
+ Res,
+ Stats,
)
+from my.core import warnings
+from my.core.cfg import make_config
from my.core.serialize import dumps as json_dumps
from .common import TweetId, permalink
+
+@dataclass
+class twitter_archive(user_config):
+ export_path: Paths # path[s]/glob to the twitter archive takeout
+
+
+###
+
+config = make_config(twitter_archive)
+
+
logger = make_logger(__name__)
-class config:
- @property
- @abstractmethod
- def export_path(self) -> Paths:
- """path[s]/glob to the twitter archive takeout"""
- raise NotImplementedError
-
-
-def make_config() -> config:
- # before this config was named 'twitter', doesn't make too much sense for archive
- # todo unify with other code like this, e.g. time.tz.via_location
- try:
- from my.config import twitter_archive as user_config
- except ImportError as ie:
- if not (ie.name == 'my.config' and 'twitter_archive' in str(ie)):
- # must be caused by something else
- raise ie
- try:
- from my.config import twitter as user_config # type: ignore[assignment]
- except ImportError:
- raise ie # raise the original exception.. must be something else # noqa: B904
- else:
- warnings.high('my.config.twitter is deprecated! Please rename it to my.config.twitter_archive in your config')
- ##
-
- class combined_config(user_config, config):
- pass
-
- return combined_config()
-
-
def inputs() -> Sequence[Path]:
- return get_files(make_config().export_path)
+ return get_files(config.export_path)
# TODO make sure it's not used anywhere else and simplify interface
@@ -106,7 +105,7 @@ class Tweet:
repls.append((fr, to, me['display_url']))
# todo not sure, maybe use media_url_https instead?
# for now doing this for compatibility with twint
- repls = sorted(repls)
+ repls = list(sorted(repls))
parts = []
idx = 0
for fr, to, what in repls:
@@ -122,7 +121,7 @@ class Tweet:
return res
@property
- def urls(self) -> list[str]:
+ def urls(self) -> List[str]:
ents = self.entities
us = ents['urls']
return [u['expanded_url'] for u in us]
@@ -163,10 +162,10 @@ class Like:
return self.raw['tweetId']
@property
- def text(self) -> str | None:
+ def text(self) -> Optional[str]:
# NOTE: likes basically don't have anything except text and url
# ugh. I think none means that tweet was deleted?
- res: str | None = self.raw.get('fullText')
+ res: Optional[str] = self.raw.get('fullText')
if res is None:
return None
res = html.unescape(res)
@@ -187,7 +186,7 @@ class ZipExport:
if not (self.zpath / 'Your archive.html').exists():
self.old_format = True
- def raw(self, what: str, *, fname: str | None = None) -> Iterator[Json]:
+ def raw(self, what: str, *, fname: Optional[str] = None) -> Iterator[Json]:
logger.info(f'{self.zpath} : processing {what}')
path = fname or what
@@ -227,80 +226,11 @@ class ZipExport:
yield Like(r, screen_name=self.screen_name)
-def _cleanup_tweet_json(rj: Json) -> None:
- # note: for now this isn't used, was just an attempt to normalise raw data...
-
- rj.pop('edit_info', None) # useless for downstream processing, but results in dupes, so let's remove it
-
- ## could probably just take the last one? dunno
- rj.pop('retweet_count', None)
- rj.pop('favorite_count', None)
- ##
-
- entities = rj.get('entities', {})
- ext_entities = rj.get('extended_entities', {})
-
- # TODO shit. unclear how to 'merge' changes to these
- # links sometimes change for no apparent reason -- and sometimes old one is still valid but not the new one???
- for m in entities.get('media', {}):
- m.pop('media_url', None)
- m.pop('media_url_https', None)
- for m in ext_entities.get('media', {}):
- m.pop('media_url', None)
- m.pop('media_url_https', None)
- ##
-
- for m in entities.get('user_mentions', {}):
- # changes if user renames themselves...
- m.pop('name', None)
-
- # hmm so can change to -1? maybe if user was deleted?
- # but also can change to actually something else?? second example
- entities.pop('user_mentions', None)
-
- # TODO figure out what else is changing there later...
- rj.pop('entities', None)
- rj.pop('extended_entities', None)
-
- ## useless attributes which should be fine to exclude
- rj.pop('possibly_sensitive', None) # not sure what is this.. sometimes appears with False value??
- rj.pop('withheld_in_countries', None)
- rj.pop('lang', None)
- ##
-
- # ugh. might change if the Twitter client was deleted or description renamed??
- rj.pop('source', None)
-
- ## ugh. sometimes trailing 0 after decimal point is present?
- rj.pop('coordinates', None)
- rj.get('geo', {}).pop('coordinates', None)
- ##
-
- # ugh. this changes if user changed their name...
- # or disappears if account was deleted?
- rj.pop('in_reply_to_screen_name', None)
-
-
# todo not sure about list and sorting? although can't hurt considering json is not iterative?
def tweets() -> Iterator[Res[Tweet]]:
_all = chain.from_iterable(ZipExport(i).tweets() for i in inputs())
-
- # NOTE raw json data in archived tweets changes all the time even for same tweets
- # there is an attempt to clean it up... but it's tricky since users rename themselves, twitter stats are changing
- # so it's unclear how to pick up
- # we should probably 'merge' tweets into a canonical version, e.g.
- # - pick latest tweet stats
- # - keep history of usernames we were replying to that share the same user id
- # - pick 'best' media url somehow??
- # - normalise coordinates data
- def key(t: Tweet):
- # NOTE: not using t.text, since it actually changes if entities in tweet are changing...
- # whereas full_text seems stable
- text = t.raw['full_text']
- return (t.created_at, t.id_str, text)
-
- res = unique_everseen(_all, key=key)
- yield from sorted(res, key=lambda t: t.created_at)
+ res = unique_everseen(_all, key=json_dumps)
+ yield from sorted(res, key=lambda t: t.dt)
def likes() -> Iterator[Res[Like]]:
@@ -318,5 +248,4 @@ def stats() -> Stats:
## Deprecated stuff
-if not TYPE_CHECKING:
- Tid = TweetId
+Tid = TweetId
diff --git a/my/twitter/common.py b/my/twitter/common.py
index 8c346f6..258216f 100644
--- a/my/twitter/common.py
+++ b/my/twitter/common.py
@@ -1,19 +1,17 @@
-from my.core import __NOT_HPI_MODULE__ # isort: skip
+from my.core import __NOT_HPI_MODULE__
-from collections.abc import Iterator
from itertools import chain
-from typing import Any
+from typing import Iterator, Any
from more_itertools import unique_everseen
+
# TODO add proper Protocol for Tweet
Tweet = Any
TweetId = str
-from my.core import Res, warn_if_empty
-
-
+from my.core import warn_if_empty, Res
@warn_if_empty
def merge_tweets(*sources: Iterator[Res[Tweet]]) -> Iterator[Res[Tweet]]:
def key(r: Res[Tweet]):
diff --git a/my/twitter/talon.py b/my/twitter/talon.py
index dbf2e2e..306a735 100644
--- a/my/twitter/talon.py
+++ b/my/twitter/talon.py
@@ -1,17 +1,13 @@
"""
Twitter data from Talon app database (in =/data/data/com.klinker.android.twitter_l/databases/=)
"""
-
from __future__ import annotations
-import re
-import sqlite3
-from abc import abstractmethod
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
-from pathlib import Path
-from typing import Union
+import re
+import sqlite3
+from typing import Iterator, Sequence, Union
from my.core import Paths, Res, datetime_aware, get_files
from my.core.common import unique_everseen
@@ -19,25 +15,18 @@ from my.core.sqlite import sqlite_connection
from .common import TweetId, permalink
-
-class config:
- @property
- @abstractmethod
- def export_path(self) -> Paths:
- raise NotImplementedError
+from my.config import twitter as user_config
-def make_config() -> config:
- from my.config import twitter as user_config
-
- class combined_config(user_config.talon, config):
- pass
-
- return combined_config()
+@dataclass
+class config(user_config.talon):
+ # paths[s]/glob to the exported sqlite databases
+ export_path: Paths
+from pathlib import Path
def inputs() -> Sequence[Path]:
- return get_files(make_config().export_path)
+ return get_files(config.export_path)
@dataclass(unsafe_hash=True)
@@ -57,16 +46,12 @@ class Tweet:
@dataclass(unsafe_hash=True)
class _IsTweet:
tweet: Tweet
-
-
@dataclass(unsafe_hash=True)
class _IsFavorire:
tweet: Tweet
Entity = Union[_IsTweet, _IsFavorire]
-
-
def _entities() -> Iterator[Res[Entity]]:
for f in inputs():
yield from _process_one(f)
@@ -74,7 +59,7 @@ def _entities() -> Iterator[Res[Entity]]:
def _process_one(f: Path) -> Iterator[Res[Entity]]:
handlers = {
- 'user_tweets.db': _process_user_tweets,
+ 'user_tweets.db' : _process_user_tweets,
'favorite_tweets.db': _process_favorite_tweets,
}
fname = f.name
diff --git a/my/twitter/twint.py b/my/twitter/twint.py
index 9d36a93..ceb5406 100644
--- a/my/twitter/twint.py
+++ b/my/twitter/twint.py
@@ -1,17 +1,17 @@
"""
Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export.
"""
-from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
-from typing import NamedTuple
+from typing import NamedTuple, Iterator, List
-from my.core import Json, LazyLogger, Paths, Res, Stats, datetime_aware, get_files, stat
+
+from my.core import Paths, Res, get_files, LazyLogger, Json, datetime_aware, stat, Stats
from my.core.cfg import make_config
from my.core.sqlite import sqlite_connection
-from my.config import twint as user_config # isort: skip
+from my.config import twint as user_config
# TODO move to twitter.twint config structure
@@ -54,7 +54,7 @@ class Tweet(NamedTuple):
# https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62
#
# twint is also saving 'timezone', but this is local machine timezone at the time of scraping?
- # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db)
+ # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db)
return datetime.fromtimestamp(seconds, tz=tz)
@property
@@ -76,7 +76,7 @@ class Tweet(NamedTuple):
return text
@property
- def urls(self) -> list[str]:
+ def urls(self) -> List[str]:
ustr = self.row['urls']
if len(ustr) == 0:
return []
diff --git a/my/util/hpi_heartbeat.py b/my/util/hpi_heartbeat.py
index 6dcac7e..84790a4 100644
--- a/my/util/hpi_heartbeat.py
+++ b/my/util/hpi_heartbeat.py
@@ -5,13 +5,12 @@ In particular the behaviour of import_original_module function
The idea of testing is that overlays extend this module, and add their own
items to items(), and the checker asserts all overlays have contributed.
"""
+from my.core import __NOT_HPI_MODULE__
-from my.core import __NOT_HPI_MODULE__ # isort: skip
-
-import sys
-from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime
+import sys
+from typing import Iterator, List
NOW = datetime.now()
@@ -20,10 +19,10 @@ NOW = datetime.now()
class Item:
dt: datetime
message: str
- path: list[str]
+ path: List[str]
-def get_pkg_path() -> list[str]:
+def get_pkg_path() -> List[str]:
pkg = sys.modules[__package__]
return list(pkg.__path__)
diff --git a/my/vk/favorites.py b/my/vk/favorites.py
index 5f278ff..9caae6d 100644
--- a/my/vk/favorites.py
+++ b/my/vk/favorites.py
@@ -1,21 +1,20 @@
# todo: uses my private export script?, timezone
-from __future__ import annotations
-
-import json
-from collections.abc import Iterable, Iterator
from dataclasses import dataclass
from datetime import datetime, timezone
+import json
+from typing import Iterator, Iterable, Optional
+
+from my.core import Json, datetime_aware, stat, Stats
+from my.core.error import Res
from my.config import vk as config # type: ignore[attr-defined]
-from my.core import Json, Stats, datetime_aware, stat
-from my.core.error import Res
@dataclass
class Favorite:
dt: datetime_aware
title: str
- url: str | None
+ url: Optional[str]
text: str
diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py
index 4f593c8..c73587f 100644
--- a/my/vk/vk_messages_backup.py
+++ b/my/vk/vk_messages_backup.py
@@ -2,17 +2,19 @@
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
'''
# note: could reuse the original repo, but little point I guess since VK closed their API
-import json
-from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime
+import json
+from typing import Dict, Iterator
import pytz
-from my.config import vk_messages_backup as config
-from my.core import Json, Res, Stats, datetime_aware, get_files, stat
+from my.core import stat, Stats, Json, Res, datetime_aware, get_files
from my.core.common import unique_everseen
+from my.config import vk_messages_backup as config
+
+
# I think vk_messages_backup used this tz?
# not sure if vk actually used to return this tz in api?
TZ = pytz.timezone('Europe/Moscow')
@@ -43,7 +45,7 @@ class Message:
body: str
-Users = dict[Uid, User]
+Users = Dict[Uid, User]
def users() -> Users:
diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py
index a8dbe8d..58ac612 100644
--- a/my/whatsapp/android.py
+++ b/my/whatsapp/android.py
@@ -1,37 +1,41 @@
"""
Whatsapp data from Android app database (in =/data/data/com.whatsapp/databases/msgstore.db=)
"""
-
from __future__ import annotations
-import sqlite3
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
-from typing import Union
+import sqlite3
+from typing import Union, Sequence, Iterator, Optional, Protocol
-from my.core import Paths, Res, datetime_aware, get_files, make_config, make_logger
+from my.core import get_files, Paths, datetime_aware, Res, make_logger
from my.core.common import unique_everseen
from my.core.error import echain, notnone
from my.core.sqlite import sqlite_connection
+import my.config
-import my.config # isort: skip
logger = make_logger(__name__)
-@dataclass
-class Config(my.config.whatsapp.android):
+class Config(Protocol):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
- my_user_id: str | None = None
+
+ my_user_id: Optional[str] = None
-config = make_config(Config)
+def make_config() -> Config:
+ import my.config as user_config
+
+ class combined_config(user_config.whatsapp.android, Config): ...
+
+ return combined_config()
def inputs() -> Sequence[Path]:
+ config = make_config()
return get_files(config.export_path)
@@ -40,13 +44,13 @@ class Chat:
id: str
# todo not sure how to support renames?
# could change Chat object itself, but this won't work well with incremental processing..
- name: str | None
+ name: Optional[str]
@dataclass(unsafe_hash=True)
class Sender:
id: str
- name: str | None
+ name: Optional[str]
@dataclass(unsafe_hash=True)
@@ -55,7 +59,7 @@ class Message:
id: str
dt: datetime_aware
sender: Sender
- text: str | None
+ text: Optional[str]
Entity = Union[Chat, Sender, Message]
@@ -64,27 +68,13 @@ Entity = Union[Chat, Sender, Message]
def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
# TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources
- try:
- db.execute('SELECT jid_row_id FROM chat_view')
- except sqlite3.OperationalError as oe:
- if 'jid_row_id' not in str(oe):
- raise oe
- new_version_202410 = False
- else:
- new_version_202410 = True
-
- if new_version_202410:
- chat_id_col = 'jid.raw_string'
- jid_join = 'JOIN jid ON jid._id == chat_view.jid_row_id'
- else:
- chat_id_col = 'chat_view.raw_string_jid'
- jid_join = ''
+ config = make_config()
chats = {}
for r in db.execute(
- f'''
- SELECT {chat_id_col} AS chat_id, subject
- FROM chat_view {jid_join}
+ '''
+ SELECT raw_string_jid AS chat_id, subject
+ FROM chat_view
WHERE chat_id IS NOT NULL /* seems that it might be null for chats that are 'recycled' (the db is more like an LRU cache) */
'''
):
@@ -106,7 +96,6 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
):
# TODO seems that msgstore.db doesn't have contact names
# perhaps should extract from wa.db and match against wa_contacts.jid?
- # TODO these can also be chats? not sure if need to include...
s = Sender(
id=r['raw_string'],
name=None,
@@ -118,9 +107,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
# so even if it seems as if it has a column (e.g. for attachment path), there is actually no such data
# so makes more sense to just query message column directly
for r in db.execute(
- f'''
+ '''
SELECT
- {chat_id_col} AS chat_id,
+ C.raw_string_jid AS chat_id,
M.key_id, M.timestamp,
sender_jid_row_id,
M.from_me,
@@ -129,9 +118,8 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
MM.file_size,
M.message_type
FROM message AS M
- LEFT JOIN chat_view ON M.chat_row_id = chat_view._id
- {jid_join}
- left JOIN message_media AS MM ON M._id = MM.message_row_id
+ LEFT JOIN chat_view AS C ON M.chat_row_id = C._id
+ LEFT JOIN message_media AS MM ON M._id = MM.message_row_id
WHERE M.key_id != -1 /* key_id -1 is some sort of fake message where everything is null */
/* type 7 seems to be some dummy system message.
sometimes contain chat name, but usually null, so ignore them
@@ -145,9 +133,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
ts: int = notnone(r['timestamp'])
dt = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
- text: str | None = r['text_data']
- media_file_path: str | None = r['file_path']
- media_file_size: int | None = r['file_size']
+ text: Optional[str] = r['text_data']
+ media_file_path: Optional[str] = r['file_path']
+ media_file_size: Optional[int] = r['file_size']
message_type = r['message_type']
@@ -199,7 +187,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]:
sender_row_id = r['sender_jid_row_id']
if sender_row_id == 0:
# seems that it's always 0 for 1-1 chats
- # for group chats our own id is still 0, but other ids are properly set
+ # for group chats our onw id is still 0, but other ids are properly set
if from_me:
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
diff --git a/my/youtube/takeout.py b/my/youtube/takeout.py
index 8eca328..8fe8f2c 100644
--- a/my/youtube/takeout.py
+++ b/my/youtube/takeout.py
@@ -1,17 +1,13 @@
-from __future__ import annotations
+from typing import NamedTuple, List, Iterable, TYPE_CHECKING
-from collections.abc import Iterable, Iterator
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
-
-from my.core import Res, Stats, datetime_aware, make_logger, stat, warnings
-from my.core.compat import deprecated
-
-logger = make_logger(__name__)
+from ..core import datetime_aware, Res, LazyLogger
+from ..core.compat import removeprefix
-@dataclass
-class Watched:
+logger = LazyLogger(__name__)
+
+
+class Watched(NamedTuple):
url: str
title: str
when: datetime_aware
@@ -20,57 +16,19 @@ class Watched:
def eid(self) -> str:
return f'{self.url}-{self.when.isoformat()}'
- def is_deleted(self) -> bool:
- return self.title == self.url
-
# todo define error policy?
# although it has one from google takeout module.. so not sure
-
-def watched() -> Iterator[Res[Watched]]:
- emitted: dict[Any, Watched] = {}
- for w in _watched():
- if isinstance(w, Exception):
- yield w # TODO also make unique?
- continue
-
- # older exports (e.g. html) didn't have microseconds
- # whereas newer json ones do have them
- # seconds resolution is enough to distinguish watched videos
- # also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data
- without_microsecond = w.when.replace(microsecond=0)
-
- key = w.url, without_microsecond
- prev = emitted.get(key, None)
- if prev is not None:
- # NOTE: some video titles start with 'Liked ' for liked videos activity
- # but they'd have different timestamp, so fine not to handle them as a special case here
- if w.title in prev.title:
- # often more stuff added to the title, like 'Official Video'
- # in this case not worth emitting the change
- # also handles the case when titles match
- continue
- # otherwise if title changed completely, just emit the change... not sure what else we could do?
- # could merge titles in the 'titles' field and update dynamically? but a bit complicated, maybe later..
-
- # TODO would also be nice to handle is_deleted here somehow...
- # but for that would need to process data in direct order vs reversed..
- # not sure, maybe this could use a special mode or something?
-
- emitted[key] = w
- yield w
-
-
-def _watched() -> Iterator[Res[Watched]]:
+def watched() -> Iterable[Res[Watched]]:
try:
- from google_takeout_parser.models import Activity
-
from ..google.takeout.parser import events
+ from google_takeout_parser.models import Activity
except ModuleNotFoundError as ex:
logger.exception(ex)
- warnings.high("Please set up my.google.takeout.parser module for better youtube support. Falling back to legacy implementation.")
- yield from _watched_legacy() # type: ignore[name-defined]
+ from ..core.warnings import high
+ high("Please set up my.google.takeout.parser module for better youtube support. Falling back to legacy implementation.")
+ yield from _watched_legacy()
return
YOUTUBE_VIDEO_LINK = '://www.youtube.com/watch?v='
@@ -85,12 +43,12 @@ def _watched() -> Iterator[Res[Watched]]:
continue
url = e.titleUrl
+ header = e.header
+ title = e.title
if url is None:
continue
- header = e.header
-
if header in {'Image Search', 'Search', 'Chrome'}:
# sometimes results in youtube links.. but definitely not watch history
continue
@@ -103,8 +61,6 @@ def _watched() -> Iterator[Res[Watched]]:
pass
continue
- title = e.title
-
if header == 'youtube.com' and title.startswith('Visited '):
continue
@@ -118,34 +74,18 @@ def _watched() -> Iterator[Res[Watched]]:
# all titles contain it, so pointless to include 'Watched '
# also compatible with legacy titles
- title = title.removeprefix('Watched ')
-
- # watches originating from some activity end with this, remove it for consistency
- title = title.removesuffix(' - YouTube')
+ title = removeprefix(title, 'Watched ')
if YOUTUBE_VIDEO_LINK not in url:
- if 'youtube.com/post/' in url:
- # some sort of channel updates?
+ if e.details == ['From Google Ads']:
+ # weird, sometimes results in odd
continue
- if 'youtube.com/playlist' in url:
- # 'saved playlist' actions
- continue
- if 'music.youtube.com' in url:
- # todo maybe allow it?
- continue
- if any('From Google Ads' in d for d in e.details):
- # weird, sometimes results in odd urls
- continue
-
- if title == 'Used YouTube':
+ if title == 'Used YouTube' and e.products == ['Android']:
continue
yield RuntimeError(f'Unexpected url: {e}')
continue
- # TODO contribute to takeout parser? seems that these still might happen in json data
- title = title.replace("\xa0", " ")
-
yield Watched(
url=url,
title=title,
@@ -153,6 +93,7 @@ def _watched() -> Iterator[Res[Watched]]:
)
+from ..core import stat, Stats
def stats() -> Stats:
return stat(watched)
@@ -160,24 +101,23 @@ def stats() -> Stats:
### deprecated stuff (keep in my.media.youtube)
if not TYPE_CHECKING:
+ # "deprecate" by hiding from mypy
+ get_watched = watched
- @deprecated("use 'watched' instead")
- def get_watched(*args, **kwargs):
- return watched(*args, **kwargs)
- def _watched_legacy() -> Iterable[Watched]:
- from ..google.takeout.html import read_html
- from ..google.takeout.paths import get_last_takeout
+def _watched_legacy() -> Iterable[Watched]:
+ from ..google.takeout.html import read_html
+ from ..google.takeout.paths import get_last_takeout
- # todo looks like this one doesn't have retention? so enough to use the last
- path = 'Takeout/My Activity/YouTube/MyActivity.html'
- last = get_last_takeout(path=path)
- if last is None:
- return []
+ # todo looks like this one doesn't have retention? so enough to use the last
+ path = 'Takeout/My Activity/YouTube/MyActivity.html'
+ last = get_last_takeout(path=path)
+ if last is None:
+ return []
- watches: list[Watched] = []
- for dt, url, title in read_html(last, path):
- watches.append(Watched(url=url, title=title, when=dt))
+ watches: List[Watched] = []
+ for dt, url, title in read_html(last, path):
+ watches.append(Watched(url=url, title=title, when=dt))
- # todo hmm they already come sorted.. wonder if should just rely on it..
- return sorted(watches, key=lambda e: e.when)
+ # todo hmm they already come sorted.. wonder if should just rely on it..
+ return list(sorted(watches, key=lambda e: e.when))
diff --git a/my/zotero.py b/my/zotero.py
index 8eb34ba..4440aae 100644
--- a/my/zotero.py
+++ b/my/zotero.py
@@ -1,16 +1,14 @@
-from __future__ import annotations as _annotations
-
-import json
-import sqlite3
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
+import json
+from typing import Iterator, Optional, Dict, Any, Sequence
from pathlib import Path
-from typing import Any
+import sqlite3
-from my.core import Res, datetime_aware, make_logger
+from my.core import make_logger, Res, datetime_aware
from my.core.sqlite import sqlite_copy_and_open
+
logger = make_logger(__name__)
@@ -28,7 +26,7 @@ class Item:
"""Corresponds to 'Zotero item'"""
file: Path
title: str
- url: Url | None
+ url: Optional[Url]
tags: Sequence[str]
@@ -41,8 +39,8 @@ class Annotation:
page: int
"""0-indexed"""
- text: str | None
- comment: str | None
+ text: Optional[str]
+ comment: Optional[str]
tags: Sequence[str]
color_hex: str
"""Original hex-encoded color in zotero"""
@@ -99,7 +97,7 @@ WHERE ID.fieldID = 13 AND IA.itemID = ?
# TODO maybe exclude 'private' methods from detection?
-def _query_raw() -> Iterator[Res[dict[str, Any]]]:
+def _query_raw() -> Iterator[Res[Dict[str, Any]]]:
[db] = inputs()
with sqlite_copy_and_open(db) as conn:
@@ -159,7 +157,7 @@ def _hex2human(color_hex: str) -> str:
}.get(color_hex, color_hex)
-def _parse_annotation(r: dict) -> Annotation:
+def _parse_annotation(r: Dict) -> Annotation:
text = r['text']
comment = r['comment']
# todo use json query for this?
diff --git a/my/zulip/organization.py b/my/zulip/organization.py
index d0cfcb7..8725411 100644
--- a/my/zulip/organization.py
+++ b/my/zulip/organization.py
@@ -1,55 +1,38 @@
"""
Zulip data from [[https://memex.zulipchat.com/help/export-your-organization][Organization export]]
"""
-
-from __future__ import annotations
-
-import json
-from abc import abstractmethod
-from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime, timezone
from itertools import count
+import json
from pathlib import Path
+from typing import Sequence, Iterator, Dict, Union
from my.core import (
+ assert_never,
+ datetime_aware,
+ get_files,
+ stat,
Json,
Paths,
Res,
Stats,
- assert_never,
- datetime_aware,
- get_files,
- make_logger,
- stat,
- warnings,
)
-
-logger = make_logger(__name__)
+from my.core.error import notnone
+import my.config
-class config:
- @property
- @abstractmethod
- def export_path(self) -> Paths:
- """paths[s]/glob to the exported JSON data"""
- raise NotImplementedError
-
-
-def make_config() -> config:
- from my.config import zulip as user_config
-
- class combined_config(user_config.organization, config):
- pass
-
- return combined_config()
+@dataclass
+class organization(my.config.zulip.organization):
+ # paths[s]/glob to the exported JSON data
+ export_path: Paths
def inputs() -> Sequence[Path]:
# TODO: seems like export ids are kinda random..
# not sure what's the best way to figure out the last without renaming?
# could use mtime perhaps?
- return get_files(make_config().export_path, sort=False)
+ return get_files(organization.export_path, sort=False)
@dataclass(frozen=True)
@@ -102,39 +85,19 @@ class Message:
# todo cache it
-def _entities() -> Iterator[Res[Server | Sender | _Message]]:
+def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]:
last = max(inputs())
- logger.info(f'extracting data from {last}')
+ # todo would be nice to switch it to unpacked dirs as well, similar to ZipPath
+ # I guess makes sense to have a special implementation for .tar.gz considering how common are they
+ import tarfile
- root: Path | None = None
+ tfile = tarfile.open(last)
- if last.is_dir(): # if it's already CPath, this will match it
- root = last
- else:
- try:
- from kompress import CPath
+ subdir = tfile.getnames()[0] # there is a directory inside tar file, first name should be that
- root = CPath(last)
- assert len(list(root.iterdir())) > 0 # trigger to check if we have the kompress version with targz support
- except Exception as e:
- logger.exception(e)
- warnings.high("Upgrade 'kompress' to latest version with native .tar.gz support. Falling back to unpacking to tmp dir.")
-
- if root is None:
- from my.core.structure import match_structure
-
- with match_structure(last, expected=()) as res: # expected=() matches it regardless any patterns
- [root] = res
- yield from _process_one(root)
- else:
- yield from _process_one(root)
-
-
-def _process_one(root: Path) -> Iterator[Res[Server | Sender | _Message]]:
- [subdir] = root.iterdir() # there is a directory inside tar file, first name should be that
-
- rj = json.loads((subdir / 'realm.json').read_text())
+ with notnone(tfile.extractfile(f'{subdir}/realm.json')) as fo:
+ rj = json.load(fo)
[sj] = rj['zerver_realm']
server = Server(
@@ -173,10 +136,12 @@ def _process_one(root: Path) -> Iterator[Res[Server | Sender | _Message]]:
for idx in count(start=1, step=1):
fname = f'messages-{idx:06}.json'
- fpath = subdir / fname
- if not fpath.exists():
+ fpath = f'{subdir}/{fname}'
+ if fpath not in tfile.getnames():
+ # tarfile doesn't have .exists?
break
- mj = json.loads(fpath.read_text())
+ with notnone(tfile.extractfile(fpath)) as fo:
+ mj = json.load(fo)
# TODO handle zerver_usermessage
for j in mj['zerver_message']:
try:
@@ -186,8 +151,8 @@ def _process_one(root: Path) -> Iterator[Res[Server | Sender | _Message]]:
def messages() -> Iterator[Res[Message]]:
- id2sender: dict[int, Sender] = {}
- id2server: dict[int, Server] = {}
+ id2sender: Dict[int, Sender] = {}
+ id2server: Dict[int, Server] = {}
for x in _entities():
if isinstance(x, Exception):
yield x
diff --git a/mypy.ini b/mypy.ini
index 9c34fcc..ebc81a5 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,13 +1,18 @@
[mypy]
+namespace_packages = True
pretty = True
show_error_context = True
+show_error_codes = True
show_column_numbers = True
show_error_end = True
-warn_redundant_casts = True
warn_unused_ignores = True
check_untyped_defs = True
-strict_equality = True
enable_error_code = possibly-undefined
+strict_equality = True
+
+# a bit annoying, it has optional ipython import which should be ignored in mypy-core configuration..
+[mypy-my.core.__main__]
+warn_unused_ignores = False
# todo ok, maybe it wasn't such a good idea..
# mainly because then tox picks it up and running against the user config, not the repository config
diff --git a/ruff.toml b/ruff.toml
index 3d803e7..54f621c 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -1,55 +1,4 @@
-target-version = "py39" # NOTE: inferred from pyproject.toml if present
-
-lint.extend-select = [
- "F", # flakes rules -- default, but extend just in case
- "E", # pycodestyle -- default, but extend just in case
- "W", # various warnings
-
- "B", # 'bugbear' set -- various possible bugs
- "C4", # flake8-comprehensions -- unnecessary list/map/dict calls
- "COM", # trailing commas
- "EXE", # various checks wrt executable files
- # "I", # sort imports
- "ICN", # various import conventions
- "FBT", # detect use of boolean arguments
- "FURB", # various rules
- "PERF", # various potential performance speedups
- "PD", # pandas rules
- "PIE", # 'misc' lints
- "PLC", # pylint convention rules
- "PLR", # pylint refactor rules
- "PLW", # pylint warnings
- "PT", # pytest stuff
- "PYI", # various type hinting rules
- "RET", # early returns
- "RUF", # various ruff-specific rules
- "TID", # various imports suggestions
- "TRY", # various exception handling rules
- "UP", # detect deprecated python stdlib stuff
- "FA", # suggest using from __future__ import annotations
- "PTH", # pathlib migration
- "ARG", # unused argument checks
- # "A", # builtin shadowing -- TODO handle later
- # "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying
-
- # "ALL", # uncomment this to check for new rules!
-]
-
-# Preserve types, even if a file imports `from __future__ import annotations`
-# we need this for cachew to work with HPI types on 3.9
-# can probably remove after 3.10?
-lint.pyupgrade.keep-runtime-typing = true
-
lint.ignore = [
- "D", # annoying nags about docstrings
- "N", # pep naming
- "TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING
- "S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks
- "DTZ", # datetimes checks -- complaining about missing tz and mostly false positives
- "FIX", # complains about fixmes/todos -- annoying
- "TD", # complains about todo formatting -- too annoying
- "ANN", # missing type annotations? seems way to strict though
-
### too opinionated style checks
"E501", # too long lines
"E702", # Multiple statements on one line (semicolon)
@@ -68,84 +17,9 @@ lint.ignore = [
"E402", # Module level import not at top of file
### maybe consider these soon
- # sometimes it's useful to give a variable a name even if we don't use it as a documentation
- # on the other hand, often is a sign of error
+# sometimes it's useful to give a variable a name even if we don't use it as a documentation
+# on the other hand, often is a sign of error
"F841", # Local variable `count` is assigned to but never used
+ "F401", # imported but unused
###
-
- "RUF100", # unused noqa -- handle later
- "RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs
-
-### these are just nitpicky, we usually know better
- "PLR0911", # too many return statements
- "PLR0912", # too many branches
- "PLR0913", # too many function arguments
- "PLR0915", # too many statements
- "PLR1714", # consider merging multiple comparisons
- "PLR2044", # line with empty comment
- "PLR5501", # use elif instead of else if
- "PLR2004", # magic value in comparison -- super annoying in tests
-###
- "PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check
-
- "B009", # calling gettattr with constant attribute -- this is useful to convince mypy
- "B010", # same as above, but setattr
- "B011", # complains about assert False
- "B017", # pytest.raises(Exception)
- "B023", # seems to result in false positives?
- "B028", # suggest using explicit stacklevel? TODO double check later, but not sure it's useful
-
- # complains about useless pass, but has sort of a false positive if the function has a docstring?
- # this is common for click entrypoints (e.g. in __main__), so disable
- "PIE790",
-
- # a bit too annoying, offers to convert for loops to list comprehension
- # , which may heart readability
- "PERF401",
-
- # suggests no using exception in for loops
- # we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost"
- "PERF203",
-
- "RET504", # unnecessary assignment before returning -- that can be useful for readability
- "RET505", # unnecessary else after return -- can hurt readability
-
- "PLW0603", # global variable update.. we usually know why we are doing this
- "PLW2901", # for loop variable overwritten, usually this is intentional
-
- "PT004", # deprecated rule, will be removed later
- "PT011", # pytest raises should is too broad
- "PT012", # pytest raises should contain a single statement
-
- "COM812", # trailing comma missing -- mostly just being annoying with long multiline strings
-
- "PD901", # generic variable name df
-
- "TRY003", # suggests defining exception messages in exception class -- kinda annoying
- "TRY004", # prefer TypeError -- don't see the point
- "TRY201", # raise without specifying exception name -- sometimes hurts readability
- "TRY400", # TODO double check this, might be useful
- "TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging
-
- "PGH", # TODO force error code in mypy instead
-
- "TID252", # Prefer absolute imports over relative imports from parent modules
-
- "UP038", # suggests using | (union) in isisntance checks.. but it results in slower code
-
- ## too annoying
- "T20", # just complains about prints and pprints
- "Q", # flake quotes, too annoying
- "C90", # some complexity checking
- "G004", # logging statement uses f string
- "ERA001", # commented out code
- "SLF001", # private member accessed
- "BLE001", # do not catch 'blind' Exception
- "INP001", # complains about implicit namespace packages
- "SIM", # some if statements crap
- "RSE102", # complains about missing parens in exceptions
- ##
-
- "ARG001", # ugh, kinda annoying when using pytest fixtures
- "F401" , # TODO nice to have, but annoying with NOT_HPI_MODULE thing
]
diff --git a/setup.py b/setup.py
index 385c810..cf4b79f 100644
--- a/setup.py
+++ b/setup.py
@@ -4,13 +4,13 @@
from setuptools import setup, find_namespace_packages # type: ignore
INSTALL_REQUIRES = [
- 'pytz' , # even though it's not needed by the core, it's so common anyway...
- 'typing-extensions' , # one of the most common pypi packages, ok to depend for core
- 'appdirs' , # very common, and makes it portable
- 'more-itertools' , # it's just too useful and very common anyway
- 'decorator' , # less pain in writing correct decorators. very mature and stable, so worth keeping in core
- 'click>=8.1' , # for the CLI, printing colors, decorator-based - may allow extensions to CLI
- 'kompress>=0.2.20240918' , # for transparent access to compressed files via pathlib.Path
+ 'pytz', # even though it's not needed by the core, it's so common anyway...
+ 'typing-extensions', # one of the most common pypi packages, ok to depend for core
+ 'appdirs', # very common, and makes it portable
+ 'more-itertools', # it's just too useful and very common anyway
+ 'decorator' , # less pain in writing correct decorators. very mature and stable, so worth keeping in core
+ 'click>=8.1' , # for the CLI, printing colors, decorator-based - may allow extensions to CLI
+ 'kompress' , # for transparent access to compressed files via pathlib.Path
]
@@ -44,7 +44,7 @@ def main() -> None:
author_email='karlicoss@gmail.com',
description='A Python interface to my life',
- python_requires='>=3.9',
+ python_requires='>=3.8',
install_requires=INSTALL_REQUIRES,
extras_require={
'testing': [
@@ -58,16 +58,6 @@ def main() -> None:
'orjson', # for my.core.serialize and denylist
'simplejson', # for my.core.serialize
-
- ##
- # ideally we'd use --instal-types in mypy
- # , but looks like it doesn't respect uv venv if it's running in it :(
- 'types-pytz' , # for my.core
- 'types-decorator' , # for my.core.compat
- 'pandas-stubs' , # for my.core.pandas
- 'types-dateparser', # for my.core.query_range
- 'types-simplejson', # for my.core.serialize
- ##
],
'optional': [
# todo document these?
diff --git a/my/tests/bluemaestro.py b/tests/bluemaestro.py
similarity index 78%
rename from my/tests/bluemaestro.py
rename to tests/bluemaestro.py
index d139a8f..84d3eb0 100644
--- a/my/tests/bluemaestro.py
+++ b/tests/bluemaestro.py
@@ -1,15 +1,19 @@
-from collections.abc import Iterator
+from pathlib import Path
+from typing import TYPE_CHECKING, Iterator, Any
-import pytest
from more_itertools import one
-from my.bluemaestro import Measurement, measurements
-from my.core.cfg import tmp_config
+import pytest
-from .common import testdata
+
+if TYPE_CHECKING:
+ from my.bluemaestro import Measurement
+else:
+ Measurement = Any
def ok_measurements() -> Iterator[Measurement]:
+ from my.bluemaestro import measurements
for m in measurements():
assert not isinstance(m, Exception)
yield m
@@ -26,7 +30,7 @@ def test() -> None:
# check that timezone is set properly
assert dts == '20200824 22'
- assert len(tp) == 1 # should be unique
+ assert len(tp) == 1 # should be unique
# 2.5 K + 4 K datapoints, somewhat overlapping
assert len(res2020) < 6000
@@ -46,12 +50,14 @@ def test_old_db() -> None:
@pytest.fixture(autouse=True)
def prepare():
+ from my.tests.common import testdata
bmdata = testdata() / 'hpi-testdata' / 'bluemaestro'
assert bmdata.exists(), bmdata
class bluemaestro:
export_path = bmdata
+ from my.core.cfg import tmp_config
with tmp_config() as config:
config.bluemaestro = bluemaestro
yield
diff --git a/tests/calendar.py b/tests/calendar.py
new file mode 100644
index 0000000..3435da3
--- /dev/null
+++ b/tests/calendar.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+
+import pytest
+
+from my.calendar.holidays import is_holiday
+
+
+def test() -> None:
+ assert is_holiday('20190101')
+ assert not is_holiday('20180601')
+ assert is_holiday('20200906') # national holiday in Bulgaria
+
+
+@pytest.fixture(autouse=True)
+def prepare(tmp_path: Path):
+ from . import tz
+ # todo meh. fixtures can't be called directly?
+ orig = tz.prepare.__wrapped__ # type: ignore
+ yield from orig(tmp_path)
diff --git a/tests/config.py b/tests/config.py
new file mode 100644
index 0000000..101f7df
--- /dev/null
+++ b/tests/config.py
@@ -0,0 +1,126 @@
+from pathlib import Path
+
+
+def test_dynamic_configuration(notes: Path) -> None:
+ import pytz
+ from types import SimpleNamespace as NS
+
+ from my.core.cfg import tmp_config
+ with tmp_config() as C:
+ C.orgmode = NS(paths=[notes])
+ # TODO ugh. this belongs to tz provider or global config or something
+ C.weight = NS(default_timezone=pytz.timezone('Europe/London'))
+
+ from my.body.weight import from_orgmode
+ weights = [0.0 if isinstance(x, Exception) else x.value for x in from_orgmode()]
+
+ assert weights == [
+ 0.0,
+ 62.0,
+ 0.0,
+ 61.0,
+ 62.0,
+ 0.0,
+ ]
+
+import pytest
+
+
+def test_environment_variable(tmp_path: Path) -> None:
+ cfg_dir = tmp_path / 'my'
+ cfg_file = cfg_dir / 'config.py'
+ cfg_dir.mkdir()
+ cfg_file.write_text('''
+class feedly:
+ pass
+class just_for_test:
+ pass
+''')
+
+ import os
+ oenv = dict(os.environ)
+ try:
+ os.environ['MY_CONFIG'] = str(tmp_path)
+ # should not raise at least
+ import my.rss.feedly
+
+ import my.config as c
+ assert hasattr(c, 'just_for_test')
+ finally:
+ os.environ.clear()
+ os.environ.update(oenv)
+
+ import sys
+ # TODO wtf??? doesn't work without unlink... is it caching something?
+ cfg_file.unlink()
+ del sys.modules['my.config'] # meh..
+
+ import my.config as c
+ assert not hasattr(c, 'just_for_test')
+
+
+from dataclasses import dataclass
+
+
+def test_user_config() -> None:
+ from my.core.common import classproperty
+ class user_config:
+ param1 = 'abacaba'
+ # TODO fuck. properties don't work here???
+ @classproperty
+ def param2(cls) -> int:
+ return 456
+
+ extra = 'extra!'
+
+ @dataclass
+ class test_config(user_config):
+ param1: str
+ param2: int # type: ignore[assignment] # TODO need to figure out how to trick mypy for @classproperty
+ param3: str = 'default'
+
+ assert test_config.param1 == 'abacaba'
+ assert test_config.param2 == 456
+ assert test_config.param3 == 'default'
+ assert test_config.extra == 'extra!'
+
+ from my.core.cfg import make_config
+ c = make_config(test_config)
+ assert c.param1 == 'abacaba'
+ assert c.param2 == 456
+ assert c.param3 == 'default'
+ assert c.extra == 'extra!'
+
+
+@pytest.fixture
+def notes(tmp_path: Path):
+ ndir = tmp_path / 'notes'
+ ndir.mkdir()
+ logs = ndir / 'logs.org'
+ logs.write_text('''
+#+TITLE: Stuff I'm logging
+
+* Weight (org-capture) :weight:
+** [2020-05-01 Fri 09:00] 62
+** 63
+ this should be ignored, got no timestamp
+** [2020-05-03 Sun 08:00] 61
+** [2020-05-04 Mon 10:00] 62
+ ''')
+ misc = ndir / 'misc.org'
+ misc.write_text('''
+Some misc stuff
+
+* unrelated note :weight:whatever:
+ ''')
+ try:
+ yield ndir
+ finally:
+ pass
+
+
+@pytest.fixture(autouse=True)
+def prepare():
+ from my.tests.common import reset_modules
+ reset_modules()
+ yield
diff --git a/tests/demo.py b/tests/demo.py
new file mode 100644
index 0000000..73a6c65
--- /dev/null
+++ b/tests/demo.py
@@ -0,0 +1,118 @@
+import sys
+from pathlib import Path
+from more_itertools import ilen
+
+# TODO NOTE: this wouldn't work because of an early my.config.demo import
+# from my.demo import items
+
+def test_dynamic_config_1(tmp_path: Path) -> None:
+ import my.config
+
+ class user_config:
+ username = 'user'
+ data_path = f'{tmp_path}/*.json'
+ external = f'{tmp_path}/external'
+ my.config.demo = user_config # type: ignore[misc, assignment]
+
+ from my.demo import items
+ [item1, item2] = items()
+ assert item1.username == 'user'
+
+
+# exactly the same test, but using a different config, to test out the behaviour w.r.t. import order
+def test_dynamic_config_2(tmp_path: Path) -> None:
+ # doesn't work without it!
+ # because the config from test_dybamic_config_1 is cached in my.demo.demo
+ del sys.modules['my.demo']
+
+ import my.config
+
+ class user_config:
+ username = 'user2'
+ data_path = f'{tmp_path}/*.json'
+ external = f'{tmp_path}/external'
+ my.config.demo = user_config # type: ignore[misc, assignment]
+
+ from my.demo import items
+ [item1, item2] = items()
+ assert item1.username == 'user2'
+
+
+import pytest
+
+@pytest.mark.skip(reason="won't work at the moment because of inheritance")
+def test_dynamic_config_simplenamespace(tmp_path: Path) -> None:
+ # doesn't work without it!
+ # because the config from test_dybamic_config_1 is cached in my.demo.demo
+ del sys.modules['my.demo']
+
+ import my.config
+ from types import SimpleNamespace
+
+ user_config = SimpleNamespace(
+ username='user3',
+ data_path=f'{tmp_path}/*.json',
+ )
+ my.config.demo = user_config # type: ignore[misc, assignment]
+
+ from my.demo import config
+ assert config.username == 'user3'
+
+
+# make sure our config handling pattern does it as expected
+def test_attribute_handling(tmp_path: Path) -> None:
+ # doesn't work without it!
+ # because the config from test_dybamic_config_1 is cached in my.demo.demo
+ del sys.modules['my.demo']
+
+ import pytz
+ nytz = pytz.timezone('America/New_York')
+
+ import my.config
+ class user_config:
+ # check that override is taken into the account
+ timezone = nytz
+
+ irrelevant = 'hello'
+
+ username = 'UUU'
+ data_path = f'{tmp_path}/*.json'
+ external = f'{tmp_path}/external'
+
+
+ my.config.demo = user_config # type: ignore[misc, assignment]
+
+ from my.demo import config
+
+ assert config.username == 'UUU'
+
+ # mypy doesn't know about it, but the attribute is there
+ assert getattr(config, 'irrelevant') == 'hello'
+
+ # check that overridden default attribute is actually getting overridden
+ assert config.timezone == nytz
+
+
+
+@pytest.fixture(autouse=True)
+def prepare(tmp_path: Path):
+ (tmp_path / 'data.json').write_text('''
+[
+ {"key1": 1},
+ {"key2": 2}
+]
+''')
+ ext = tmp_path / 'external'
+ ext.mkdir()
+ (ext / '__init__.py').write_text('''
+def identity(x):
+ from .submodule import hello
+ hello(x)
+ return x
+
+''')
+ (ext / 'submodule.py').write_text('hello = lambda x: print("hello " + str(x))')
+ yield
+ ex = 'my.config.repos.external'
+ if ex in sys.modules:
+ del sys.modules[ex]
diff --git a/tests/github.py b/tests/github.py
index ed89053..6b7df23 100644
--- a/tests/github.py
+++ b/tests/github.py
@@ -5,13 +5,11 @@ from more_itertools import ilen
def test_gdpr() -> None:
import my.github.gdpr as gdpr
-
assert ilen(gdpr.events()) > 100
def test() -> None:
- from my.github.all import get_events
-
+ from my.coding.github import get_events
events = get_events()
assert ilen(events) > 100
for e in events:
diff --git a/tests/location.py b/tests/location.py
new file mode 100644
index 0000000..2597d5e
--- /dev/null
+++ b/tests/location.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+
+import pytest
+
+
+def test() -> None:
+ from my.location.google import locations
+ locs = list(locations())
+ assert len(locs) == 3810
+
+ last = locs[-1]
+ assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
+ # todo approx
+ assert last.lat == 46.5515350
+ assert last.lon == 16.4742742
+ # todo check altitude
+
+
+@pytest.fixture(autouse=True)
+def prepare(tmp_path: Path):
+ from .shared_config import temp_config
+ user_config = temp_config(tmp_path)
+
+ import my.core.cfg as C
+ with C.tmp_config() as config:
+ config.google = user_config.google
+ yield
+
diff --git a/my/tests/location/fallback.py b/tests/location_fallback.py
similarity index 85%
rename from my/tests/location/fallback.py
rename to tests/location_fallback.py
index c09b902..aad33ee 100644
--- a/my/tests/location/fallback.py
+++ b/tests/location_fallback.py
@@ -2,23 +2,32 @@
To test my.location.fallback_location.all
"""
-from collections.abc import Iterator
-from datetime import datetime, timedelta, timezone
+from typing import Iterator
+from datetime import datetime, timezone, timedelta
-import pytest
from more_itertools import ilen
-import my.ip.all as ip_module
from my.ip.common import IP
+
+def data() -> Iterator[IP]:
+ # random IP addresses
+ yield IP(addr="67.98.113.0", dt=datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc))
+ yield IP(addr="67.98.112.0", dt=datetime(2020, 1, 15, 12, 0, 0, tzinfo=timezone.utc))
+ yield IP(addr="59.40.113.87", dt=datetime(2020, 2, 1, 12, 0, 0, tzinfo=timezone.utc))
+ yield IP(addr="59.40.139.87", dt=datetime(2020, 2, 1, 16, 0, 0, tzinfo=timezone.utc))
+ yield IP(addr="161.235.192.228", dt=datetime(2020, 3, 1, 12, 0, 0, tzinfo=timezone.utc))
+
+# redefine the my.ip.all function using data for testing
+import my.ip.all as ip_module
+ip_module.ips = data
+
from my.location.fallback import via_ip
-from ..shared_tz_config import config # autoused fixture
-
-
# these are all tests for the bisect algorithm defined in via_ip.py
# to make sure we can correctly find IPs that are within the 'for_duration' of a given datetime
+
def test_ip_fallback() -> None:
- # precondition, make sure that the data override works
+ # make sure that the data override works
assert ilen(ip_module.ips()) == ilen(data())
assert ilen(ip_module.ips()) == ilen(via_ip.fallback_locations())
assert ilen(via_ip.fallback_locations()) == 5
@@ -38,9 +47,7 @@ def test_ip_fallback() -> None:
assert len(est) == 1
# right after the 'for_duration' for an IP
- est = list(
- via_ip.estimate_location(datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + via_ip.config.for_duration + timedelta(seconds=1))
- )
+ est = list(via_ip.estimate_location(datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + via_ip.config.for_duration + timedelta(seconds=1)))
assert len(est) == 0
# on 2/1/2020, threes one IP if before 16:30
@@ -68,8 +75,8 @@ def test_ip_fallback() -> None:
#
# redefine fallback_estimators to prevent possible namespace packages the user
# may have installed from having side effects testing this
- from my.location.fallback import all, via_home
-
+ from my.location.fallback import all
+ from my.location.fallback import via_home
def _fe() -> Iterator[all.LocationEstimator]:
yield via_ip.estimate_location
yield via_home.estimate_location
@@ -81,7 +88,6 @@ def test_ip_fallback() -> None:
#
# just passing via_ip should give one IP
from my.location.fallback.common import _iter_estimate_from
-
raw_est = list(_iter_estimate_from(use_dt, (via_ip.estimate_location,)))
assert len(raw_est) == 1
assert raw_est[0].datasource == "via_ip"
@@ -104,7 +110,7 @@ def test_ip_fallback() -> None:
# should have used the IP from via_ip since it was more accurate
assert all_est.datasource == "via_ip"
- # test that a home defined in shared_tz_config.py is used if no IP is found
+ # test that a home defined in shared_config.py is used if no IP is found
loc = all.estimate_location(datetime(2021, 1, 1, 12, 30, 0, tzinfo=timezone.utc))
assert loc.datasource == "via_home"
@@ -115,21 +121,5 @@ def test_ip_fallback() -> None:
assert (loc.lat, loc.lon) != (bulgaria.lat, bulgaria.lon)
-def data() -> Iterator[IP]:
- # random IP addresses
- yield IP(addr="67.98.113.0", dt=datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc))
- yield IP(addr="67.98.112.0", dt=datetime(2020, 1, 15, 12, 0, 0, tzinfo=timezone.utc))
- yield IP(addr="59.40.113.87", dt=datetime(2020, 2, 1, 12, 0, 0, tzinfo=timezone.utc))
- yield IP(addr="59.40.139.87", dt=datetime(2020, 2, 1, 16, 0, 0, tzinfo=timezone.utc))
- yield IP(addr="161.235.192.228", dt=datetime(2020, 3, 1, 12, 0, 0, tzinfo=timezone.utc))
-
-
-@pytest.fixture(autouse=True)
-def prepare(config):
- before = ip_module.ips
- # redefine the my.ip.all function using data for testing
- ip_module.ips = data
- try:
- yield
- finally:
- ip_module.ips = before
+# re-use prepare fixture for overriding config from shared_config.py
+from .tz import prepare
diff --git a/tests/orgmode.py b/tests/orgmode.py
index 9b5cc59..37d783e 100644
--- a/tests/orgmode.py
+++ b/tests/orgmode.py
@@ -1,9 +1,10 @@
from my.tests.common import skip_if_not_karlicoss as pytestmark
-def test() -> None:
- from my import orgmode
- from my.core.orgmode import collect
+from my import orgmode
+from my.core.orgmode import collect
+
+def test() -> None:
# meh
results = list(orgmode.query().collect_all(lambda n: [n] if 'python' in n.tags else []))
assert len(results) > 5
diff --git a/my/tests/pdfs.py b/tests/pdfs.py
similarity index 77%
rename from my/tests/pdfs.py
rename to tests/pdfs.py
index 3702424..63b1319 100644
--- a/my/tests/pdfs.py
+++ b/tests/pdfs.py
@@ -1,15 +1,17 @@
-import inspect
from pathlib import Path
-import pytest
from more_itertools import ilen
-from my.core.cfg import tmp_config
-from my.pdfs import annotated_pdfs, annotations, get_annots
+import pytest
+
from my.tests.common import testdata
def test_module(with_config) -> None:
+ # TODO crap. if module is imported too early (on the top level, it makes it super hard to override config)
+ # need to at least detect it...
+ from my.pdfs import annotations, annotated_pdfs
+
# todo check types etc as well
assert ilen(annotations()) >= 3
assert ilen(annotated_pdfs()) >= 1
@@ -20,13 +22,12 @@ def test_with_error(with_config, tmp_path: Path) -> None:
root = tmp_path
g = root / 'garbage.pdf'
g.write_text('garbage')
-
from my.config import pdfs
-
# meh. otherwise legacy config value 'wins'
del pdfs.roots # type: ignore[attr-defined]
pdfs.paths = (root,)
+ from my.pdfs import annotations
annots = list(annotations())
[annot] = annots
assert isinstance(annot, Exception)
@@ -34,6 +35,9 @@ def test_with_error(with_config, tmp_path: Path) -> None:
@pytest.fixture
def with_config():
+ from my.tests.common import reset_modules
+ reset_modules() # todo ugh.. getting boilerplaty.. need to make it a bit more automatic..
+
# extra_data = Path(__file__).absolute().parent / 'extra/data/polar'
# assert extra_data.exists(), extra_data
# todo hmm, turned out no annotations in these ones.. whatever
@@ -43,9 +47,13 @@ def with_config():
testdata(),
]
- with tmp_config() as config:
+ import my.core.cfg as C
+ with C.tmp_config() as config:
config.pdfs = user_config
- yield
+ try:
+ yield
+ finally:
+ reset_modules()
EXPECTED_HIGHLIGHTS = {
@@ -60,9 +68,11 @@ def test_get_annots() -> None:
Test get_annots, with a real PDF file
get_annots should return a list of three Annotation objects
"""
+ from my.pdfs import get_annots
+
annotations = get_annots(testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf')
assert len(annotations) == 3
- assert {a.highlight for a in annotations} == EXPECTED_HIGHLIGHTS
+ assert set([a.highlight for a in annotations]) == EXPECTED_HIGHLIGHTS
def test_annotated_pdfs_with_filelist() -> None:
@@ -70,9 +80,12 @@ def test_annotated_pdfs_with_filelist() -> None:
Test annotated_pdfs, with a real PDF file
annotated_pdfs should return a list of one Pdf object, with three Annotations
"""
+ from my.pdfs import annotated_pdfs
+
filelist = [testdata() / 'pdfs' / 'Information Architecture for the World Wide Web.pdf']
annotations_generator = annotated_pdfs(filelist=filelist)
+ import inspect
assert inspect.isgeneratorfunction(annotated_pdfs)
highlights_from_pdfs = []
diff --git a/my/tests/shared_tz_config.py b/tests/shared_config.py
similarity index 54%
rename from my/tests/shared_tz_config.py
rename to tests/shared_config.py
index 810d989..c2f6973 100644
--- a/my/tests/shared_tz_config.py
+++ b/tests/shared_config.py
@@ -1,26 +1,47 @@
-"""
-Helper to test various timezone/location dependent things
-"""
+# Defines some shared config for tests
-from datetime import date, datetime, timezone
+from datetime import datetime, date, timezone
from pathlib import Path
-import pytest
+from typing import Any, NamedTuple
+import my.time.tz.via_location as LTZ
from more_itertools import one
-from my.core.cfg import tmp_config
+
+class SharedConfig(NamedTuple):
+ google: Any
+ location: Any
+ time: Any
-@pytest.fixture(autouse=True)
-def config(tmp_path: Path):
- # TODO could just pick a part of shared config? not sure
- _takeout_path = _prepare_takeouts_dir(tmp_path)
+def _prepare_google_config(tmp_path: Path):
+ from my.tests.common import testdata
+ try:
+ track = one(testdata().rglob('italy-slovenia-2017-07-29.json'))
+ except ValueError:
+ raise RuntimeError('testdata not found, setup git submodules?')
- class google:
- takeout_path = _takeout_path
+
+ # todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
+ import zipfile
+ with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
+ zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
+
+ class google_config:
+ takeout_path = tmp_path
+ return google_config
+
+
+# pass tmp_path from pytest to this helper function
+# see tests/tz.py as an example
+def temp_config(temp_path: Path) -> Any:
+ from my.tests.common import reset_modules
+ reset_modules()
+
+ LTZ.config.fast = True
class location:
- # fmt: off
+ home_accuracy = 30_000
home = (
# supports ISO strings
('2005-12-04' , (42.697842, 23.325973)), # Bulgaria, Sofia
@@ -29,32 +50,16 @@ def config(tmp_path: Path):
# check tz handling..
(datetime.fromtimestamp(1600000000, tz=timezone.utc), (55.7558 , 37.6173 )), # Moscow, Russia
)
- # fmt: on
# note: order doesn't matter, will be sorted in the data provider
+ class via_ip:
+ accuracy = 15_000
+ class gpslogger:
+ pass
class time:
class tz:
class via_location:
- fast = True # some tests rely on it
-
- with tmp_config() as cfg:
- cfg.google = google
- cfg.location = location
- cfg.time = time
- yield cfg
+ pass # just rely on the defaults...
-def _prepare_takeouts_dir(tmp_path: Path) -> Path:
- from .common import testdata
-
- try:
- track = one(testdata().rglob('italy-slovenia-2017-07-29.json'))
- except ValueError as e:
- raise RuntimeError('testdata not found, setup git submodules?') from e
-
- # todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
- import zipfile
-
- with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
- zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
- return tmp_path
+ return SharedConfig(google=_prepare_google_config(temp_path), location=location, time=time)
diff --git a/tests/takeout.py b/tests/takeout.py
index 47d405b..cddc684 100644
--- a/tests/takeout.py
+++ b/tests/takeout.py
@@ -1,4 +1,4 @@
-from my.tests.common import skip_if_not_karlicoss as pytestmark
+#!/usr/bin/env python3
from datetime import datetime, timezone
from itertools import islice
import pytz
diff --git a/tests/tz.py b/tests/tz.py
new file mode 100644
index 0000000..d86c5cb
--- /dev/null
+++ b/tests/tz.py
@@ -0,0 +1,95 @@
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pytest
+import pytz
+
+from my.core.error import notnone
+
+import my.time.tz.main as TZ
+import my.time.tz.via_location as LTZ
+
+
+def test_iter_tzs() -> None:
+ ll = list(LTZ._iter_tzs())
+ assert len(ll) > 3
+
+
+def test_past() -> None:
+ # should fallback to the home location provider
+ dt = D('20000101 12:34:45')
+ dt = TZ.localize(dt)
+ tz = dt.tzinfo
+ assert tz is not None
+ assert getattr(tz, 'zone') == 'America/New_York'
+
+
+def test_future() -> None:
+ fut = datetime.now() + timedelta(days=100)
+ # shouldn't crash at least
+ assert TZ.localize(fut) is not None
+
+
+def test_tz() -> None:
+ # todo hmm, the way it's implemented at the moment, never returns None?
+
+ # not present in the test data
+ tz = LTZ._get_tz(D('20200101 10:00:00'))
+ assert notnone(tz).zone == 'Europe/Sofia'
+
+ tz = LTZ._get_tz(D('20170801 11:00:00'))
+ assert notnone(tz).zone == 'Europe/Vienna'
+
+ tz = LTZ._get_tz(D('20170730 10:00:00'))
+ assert notnone(tz).zone == 'Europe/Rome'
+
+ tz = LTZ._get_tz(D('20201001 14:15:16'))
+ assert tz is not None
+
+ on_windows = sys.platform == 'win32'
+ if not on_windows:
+ tz = LTZ._get_tz(datetime.min)
+ assert tz is not None
+ else:
+ # seems this fails because windows doesn't support same date ranges
+ # https://stackoverflow.com/a/41400321/
+ with pytest.raises(OSError):
+ LTZ._get_tz(datetime.min)
+
+
+def test_policies() -> None:
+ getzone = lambda dt: getattr(dt.tzinfo, 'zone')
+
+ naive = D('20170730 10:00:00')
+ # actual timezone at the time
+ assert getzone(TZ.localize(naive)) == 'Europe/Rome'
+
+ z = pytz.timezone('America/New_York')
+ aware = z.localize(naive)
+
+ assert getzone(TZ.localize(aware)) == 'America/New_York'
+
+ assert getzone(TZ.localize(aware, policy='convert')) == 'Europe/Rome'
+
+
+ with pytest.raises(RuntimeError):
+ assert TZ.localize(aware, policy='throw')
+
+
+def D(dstr: str) -> datetime:
+ return datetime.strptime(dstr, '%Y%m%d %H:%M:%S')
+
+
+
+@pytest.fixture(autouse=True)
+def prepare(tmp_path: Path):
+ from .shared_config import temp_config
+ conf = temp_config(tmp_path)
+
+ import my.core.cfg as C
+ with C.tmp_config() as config:
+ config.google = conf.google
+ config.time = conf.time
+ config.location = conf.location
+ yield
diff --git a/tox.ini b/tox.ini
index d202bd2..248469e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -17,9 +17,6 @@ passenv =
PYTHONPYCACHEPREFIX
MYPY_CACHE_DIR
RUFF_CACHE_DIR
-setenv =
- HPI_MODULE_INSTALL_USE_UV=true
-uv_seed = true # seems necessary so uv creates separate venvs per tox env?
# note: --use-pep517 below is necessary for tox --parallel flag to work properly
@@ -27,17 +24,16 @@ uv_seed = true # seems necessary so uv creates separate venvs per tox env?
[testenv:ruff]
-deps =
- -e .[testing]
commands =
+ {envpython} -m pip install --use-pep517 -e .[testing]
{envpython} -m ruff check my/
# just the very core tests with minimal dependencies
[testenv:tests-core]
-deps =
- -e .[testing]
commands =
+ {envpython} -m pip install --use-pep517 -e .[testing]
+
{envpython} -m pytest \
# importlib is the new suggested import-mode
# without it test package names end up as core.tests.* instead of my.core.tests.*
@@ -57,26 +53,31 @@ setenv =
# TODO not sure if need it?
MY_CONFIG=nonexistent
HPI_TESTS_USES_OPTIONAL_DEPS=true
-deps =
- -e .[testing]
- uv # for hpi module install
- cachew
- ijson # optional dependency for various modules
commands =
- {envpython} -m my.core module install \
- ## tz/location
- my.location.google \
- my.time.tz.via_location \
- my.ip.all \
- my.location.gpslogger \
- my.location.fallback.via_ip \
- my.google.takeout.parser \
- ##
- my.calendar.holidays \
- my.orgmode \ # my.body.weight dep
- my.coding.commits \
- my.pdfs \
- my.reddit.rexport
+ {envpython} -m pip install --use-pep517 -e .[testing]
+
+ {envpython} -m pip install cachew
+
+ {envpython} -m my.core module install my.location.google
+ {envpython} -m pip install ijson # optional dependency
+
+ # tz/location
+ {envpython} -m my.core module install my.time.tz.via_location
+ {envpython} -m my.core module install my.ip.all
+ {envpython} -m my.core module install my.location.gpslogger
+ {envpython} -m my.core module install my.location.fallback.via_ip
+ {envpython} -m my.core module install my.google.takeout.parser
+
+ {envpython} -m my.core module install my.calendar.holidays
+
+ # my.body.weight dep
+ {envpython} -m my.core module install my.orgmode
+
+ {envpython} -m my.core module install my.coding.commits
+
+ {envpython} -m my.core module install my.pdfs
+
+ {envpython} -m my.core module install my.reddit.rexport
{envpython} -m pytest \
# importlib is the new suggested import-mode
@@ -85,21 +86,26 @@ commands =
--pyargs {[testenv]package_name}.core {[testenv]package_name}.tests \
{posargs}
+ {envpython} -m pytest tests \
+ # ignore some tests which might take a while to run on ci..
+ --ignore tests/takeout.py \
+ --ignore tests/extra/polar.py
+ {posargs}
+
[testenv:demo]
-deps =
- git+https://github.com/karlicoss/hypexport
commands =
+ {envpython} -m pip install git+https://github.com/karlicoss/hypexport
{envpython} ./demo.py
[testenv:mypy-core]
-deps =
- -e .[testing,optional]
- orgparse # for core.orgmode
- gpxpy # for hpi query --output gpx
commands =
- {envpython} -m mypy --no-install-types \
+ {envpython} -m pip install --use-pep517 -e .[testing,optional]
+ {envpython} -m pip install orgparse # used it core.orgmode?
+ {envpython} -m pip install gpxpy # for hpi query --output gpx
+
+ {envpython} -m mypy --install-types --non-interactive \
-p {[testenv]package_name}.core \
--txt-report .coverage.mypy-core \
--html-report .coverage.mypy-core \
@@ -109,13 +115,9 @@ commands =
# specific modules that are known to be mypy compliant (to avoid false negatives)
# todo maybe split into separate jobs? need to add comment how to run
[testenv:mypy-misc]
-deps =
- -e .[testing,optional]
- uv # for hpi module install
- lxml-stubs # for my.smscalls
- types-protobuf # for my.google.maps.android
- types-Pillow # for my.photos
commands =
+ {envpython} -m pip install --use-pep517 -e .[testing,optional]
+
{envpython} -m my.core module install \
my.arbtt \
my.browser.export \
@@ -147,13 +149,13 @@ commands =
my.time.tz.via_location
- {envpython} -m mypy --no-install-types \
+ {envpython} -m mypy --install-types --non-interactive \
-p {[testenv]package_name} \
--txt-report .coverage.mypy-misc \
--html-report .coverage.mypy-misc \
{posargs}
- {envpython} -m mypy --no-install-types \
+ {envpython} -m mypy --install-types --non-interactive \
tests
# note: this comment doesn't seem relevant anymore, but keeping it in case the issue happens again