Compare commits
No commits in common. "master" and "v0.5.20240824" have entirely different histories.
master
...
v0.5.20240
202 changed files with 2435 additions and 3408 deletions
13
.ci/run
13
.ci/run
|
@ -11,8 +11,6 @@ if ! command -v sudo; then
|
||||||
}
|
}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --parallel-live to show outputs while it's running
|
|
||||||
tox_cmd='run-parallel --parallel-live'
|
|
||||||
if [ -n "${CI-}" ]; then
|
if [ -n "${CI-}" ]; then
|
||||||
# install OS specific stuff here
|
# install OS specific stuff here
|
||||||
case "$OSTYPE" in
|
case "$OSTYPE" in
|
||||||
|
@ -22,8 +20,7 @@ if [ -n "${CI-}" ]; then
|
||||||
;;
|
;;
|
||||||
cygwin* | msys* | win*)
|
cygwin* | msys* | win*)
|
||||||
# windows
|
# windows
|
||||||
# ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that
|
:
|
||||||
tox_cmd='run'
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
# must be linux?
|
# must be linux?
|
||||||
|
@ -40,9 +37,5 @@ if ! command -v python3 &> /dev/null; then
|
||||||
PY_BIN="python"
|
PY_BIN="python"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
"$PY_BIN" -m pip install --user tox
|
||||||
# TODO hmm for some reason installing uv with pip and then running
|
"$PY_BIN" -m tox --parallel --parallel-live "$@"
|
||||||
# "$PY_BIN" -m uv tool fails with missing setuptools error??
|
|
||||||
# just uvx directly works, but it's not present in PATH...
|
|
||||||
"$PY_BIN" -m pip install --user pipx
|
|
||||||
"$PY_BIN" -m pipx run uv tool run --with=tox-uv tox $tox_cmd "$@"
|
|
||||||
|
|
11
.github/workflows/main.yml
vendored
11
.github/workflows/main.yml
vendored
|
@ -21,20 +21,19 @@ on:
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
platform: [ubuntu-latest, macos-latest, windows-latest]
|
platform: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
||||||
exclude: [
|
exclude: [
|
||||||
# windows runners are pretty scarce, so let's only run lowest and highest python version
|
# windows runners are pretty scarce, so let's only run lowest and highest python version
|
||||||
|
{platform: windows-latest, python-version: '3.9' },
|
||||||
{platform: windows-latest, python-version: '3.10'},
|
{platform: windows-latest, python-version: '3.10'},
|
||||||
{platform: windows-latest, python-version: '3.11'},
|
{platform: windows-latest, python-version: '3.11'},
|
||||||
{platform: windows-latest, python-version: '3.12'},
|
|
||||||
|
|
||||||
# same, macos is a bit too slow and ubuntu covers python quirks well
|
# same, macos is a bit too slow and ubuntu covers python quirks well
|
||||||
|
{platform: macos-latest , python-version: '3.9' },
|
||||||
{platform: macos-latest , python-version: '3.10' },
|
{platform: macos-latest , python-version: '3.10' },
|
||||||
{platform: macos-latest , python-version: '3.11' },
|
{platform: macos-latest , python-version: '3.11' },
|
||||||
{platform: macos-latest , python-version: '3.12' },
|
|
||||||
]
|
]
|
||||||
|
|
||||||
runs-on: ${{ matrix.platform }}
|
runs-on: ${{ matrix.platform }}
|
||||||
|
@ -64,13 +63,11 @@ jobs:
|
||||||
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
include-hidden-files: true
|
|
||||||
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
|
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
|
||||||
path: .coverage.mypy-misc/
|
path: .coverage.mypy-misc/
|
||||||
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
include-hidden-files: true
|
|
||||||
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
|
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
|
||||||
path: .coverage.mypy-core/
|
path: .coverage.mypy-core/
|
||||||
|
|
||||||
|
@ -84,7 +81,7 @@ jobs:
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.8'
|
||||||
|
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -155,9 +155,6 @@ celerybeat-schedule
|
||||||
.dmypy.json
|
.dmypy.json
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
# linters
|
|
||||||
.ruff_cache/
|
|
||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ General/my.core changes:
|
||||||
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
||||||
|
|
||||||
Modules:
|
Modules:
|
||||||
- some initial work on filling **InfluxDB** with HPI data
|
- some innitial work on filling **InfluxDB** with HPI data
|
||||||
|
|
||||||
- pinboard
|
- pinboard
|
||||||
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
||||||
|
|
|
@ -723,10 +723,10 @@ If you want to write modules for personal use but don't want to merge them into
|
||||||
|
|
||||||
Other HPI Repositories:
|
Other HPI Repositories:
|
||||||
|
|
||||||
- [[https://github.com/purarue/HPI][purarue/HPI]]
|
- [[https://github.com/seanbreckenridge/HPI][seanbreckenridge/HPI]]
|
||||||
- [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
|
- [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
|
||||||
|
|
||||||
If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/purarue/HPI-template][template]].
|
If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/seanbreckenridge/HPI-template][template]].
|
||||||
|
|
||||||
* Related links
|
* Related links
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
|
|
|
@ -76,7 +76,7 @@ This would typically be used in an overridden `all.py` file, or in a one-off scr
|
||||||
which you may want to filter out some items from a source, progressively adding more
|
which you may want to filter out some items from a source, progressively adding more
|
||||||
items to the denylist as you go.
|
items to the denylist as you go.
|
||||||
|
|
||||||
A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/purarue/HPI)):
|
A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/seanbreckenridge/HPI)):
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
@ -119,9 +119,9 @@ python3 -c 'from my.ip import all; all.deny.deny_cli(all.ips())'
|
||||||
To edit the `all.py`, you could either:
|
To edit the `all.py`, you could either:
|
||||||
|
|
||||||
- install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
|
- install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
|
||||||
- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/purarue/reorder_editable), and possibly the [`HPI-template`](https://github.com/purarue/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
|
- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/seanbreckenridge/reorder_editable), and possibly the [`HPI-template`](https://github.com/seanbreckenridge/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
|
||||||
|
|
||||||
For a real example of this see, [purarue/HPI-personal](https://github.com/purarue/HPI-personal/blob/master/my/ip/all.py)
|
For a real example of this see, [seanbreckenridge/HPI-personal](https://github.com/seanbreckenridge/HPI-personal/blob/master/my/ip/all.py)
|
||||||
|
|
||||||
Sidenote: the reason why we want to specifically override
|
Sidenote: the reason why we want to specifically override
|
||||||
the all.py and not just create a script that filters out the items you're
|
the all.py and not just create a script that filters out the items you're
|
||||||
|
|
|
@ -76,7 +76,7 @@ The config snippets below are meant to be modified accordingly and *pasted into
|
||||||
|
|
||||||
You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
|
You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
|
||||||
|
|
||||||
For an extensive/complex example, you can check out ~@purarue~'s [[https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
|
For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
|
||||||
|
|
||||||
# Nested Configurations before the doc generation using the block below
|
# Nested Configurations before the doc generation using the block below
|
||||||
** [[file:../my/reddit][my.reddit]]
|
** [[file:../my/reddit][my.reddit]]
|
||||||
|
@ -96,7 +96,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
|
||||||
|
|
||||||
class pushshift:
|
class pushshift:
|
||||||
'''
|
'''
|
||||||
Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments
|
Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# path[s]/glob to the exported JSON data
|
# path[s]/glob to the exported JSON data
|
||||||
|
@ -106,7 +106,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
|
||||||
|
|
||||||
** [[file:../my/browser/][my.browser]]
|
** [[file:../my/browser/][my.browser]]
|
||||||
|
|
||||||
Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||||
|
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
class browser:
|
class browser:
|
||||||
|
@ -132,7 +132,7 @@ For an extensive/complex example, you can check out ~@purarue~'s [[https://githu
|
||||||
|
|
||||||
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
||||||
provide geolocation data for an IPs (though no IPs are provided from any
|
provide geolocation data for an IPs (though no IPs are provided from any
|
||||||
of the sources here). For an example of usage, see [[https://github.com/purarue/HPI/tree/master/my/ip][here]]
|
of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
|
||||||
|
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
class location:
|
class location:
|
||||||
|
@ -256,9 +256,9 @@ for cls, p in modules:
|
||||||
|
|
||||||
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
||||||
|
|
||||||
Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
|
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
||||||
|
|
||||||
See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
|
See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
|
||||||
|
|
||||||
If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
|
If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
|
||||||
cache individual exports in =~/.cache/google_takeout_parser=
|
cache individual exports in =~/.cache/google_takeout_parser=
|
||||||
|
|
|
@ -67,7 +67,7 @@ If you want to disable a source, you have a few options.
|
||||||
|
|
||||||
... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
|
... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
|
||||||
|
|
||||||
Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/purarue/HPI#partially-in-usewith-overrides][purarue]]s location and ip modules.
|
Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/seanbreckenridge/HPI#partially-in-usewith-overrides][seanbreckenridge]]s location and ip modules.
|
||||||
|
|
||||||
This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
|
This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
|
||||||
|
|
||||||
|
@ -208,13 +208,13 @@ Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this
|
||||||
|
|
||||||
Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
|
Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
|
||||||
|
|
||||||
If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/purarue/reorder_editable][reorder_editable]] repository.
|
If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository.
|
||||||
|
|
||||||
There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
|
There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
|
||||||
|
|
||||||
There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/purarue/HPI-template][template]] to get started.
|
There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/seanbreckenridge/HPI-template][template]] to get started.
|
||||||
|
|
||||||
Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/purarue/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
|
Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/seanbreckenridge/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
|
||||||
|
|
||||||
You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
|
You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
|
||||||
|
|
||||||
# You can see them TODO in overlays dir
|
# You can see them TODO in overlays dir
|
||||||
|
|
||||||
Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
|
Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes.
|
||||||
|
|
||||||
- =main= package structure
|
- =main= package structure
|
||||||
# TODO do links
|
# TODO do links
|
||||||
|
@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, without any actual da
|
||||||
Extracts Twitter data from GDPR archive.
|
Extracts Twitter data from GDPR archive.
|
||||||
- =my/twitter/all.py=
|
- =my/twitter/all.py=
|
||||||
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
||||||
This will be overridden by =overlay=.
|
This will be overriden by =overlay=.
|
||||||
- =my/twitter/common.py=
|
- =my/twitter/common.py=
|
||||||
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
||||||
- =my/reddit.py=
|
- =my/reddit.py=
|
||||||
|
@ -66,7 +66,7 @@ This basically means that modules will be searched in both paths, with overlay t
|
||||||
|
|
||||||
** Installing with =--use-pep517=
|
** Installing with =--use-pep517=
|
||||||
|
|
||||||
See here for discussion https://github.com/purarue/reorder_editable/issues/2, but TLDR it should work similarly.
|
See here for discussion https://github.com/seanbreckenridge/reorder_editable/issues/2, but TLDR it should work similarly.
|
||||||
|
|
||||||
* Testing runtime behaviour (editable install)
|
* Testing runtime behaviour (editable install)
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp
|
||||||
|
|
||||||
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
||||||
|
|
||||||
But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
|
But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly?
|
||||||
Let's see what's going on with imports:
|
Let's see what's going on with imports:
|
||||||
|
|
||||||
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
||||||
|
|
|
@ -97,9 +97,9 @@ By default, this just returns the items in the order they were returned by the f
|
||||||
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
||||||
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
message='sources.smscalls: propagate errors if there are breaking '
|
message='sources.smscalls: propogate errors if there are breaking '
|
||||||
'schema changes',
|
'schema changes',
|
||||||
repo='/home/username/Repos/promnesia-fork',
|
repo='/home/sean/Repos/promnesia-fork',
|
||||||
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
||||||
ref='refs/heads/smscalls-handle-result')
|
ref='refs/heads/smscalls-handle-result')
|
||||||
```
|
```
|
||||||
|
@ -195,7 +195,7 @@ To preview, you can use something like [`qgis`](https://qgis.org/en/site/) or fo
|
||||||
|
|
||||||
<img src="https://user-images.githubusercontent.com/7804791/232249184-7e203ee6-a3ec-4053-800c-751d2c28e690.png" width=500 alt="chicago trip" />
|
<img src="https://user-images.githubusercontent.com/7804791/232249184-7e203ee6-a3ec-4053-800c-751d2c28e690.png" width=500 alt="chicago trip" />
|
||||||
|
|
||||||
(Sidenote: this is [`@purarue`](https://github.com/purarue/)s locations, on a trip to Chicago)
|
(Sidenote: this is [`@seanbreckenridge`](https://github.com/seanbreckenridge/)s locations, on a trip to Chicago)
|
||||||
|
|
||||||
## Python reference
|
## Python reference
|
||||||
|
|
||||||
|
@ -301,4 +301,4 @@ The `hpi query` command is a CLI wrapper around the code in [`query.py`](../my/c
|
||||||
If you specify a range, drop_unsorted is forced to be True
|
If you specify a range, drop_unsorted is forced to be True
|
||||||
```
|
```
|
||||||
|
|
||||||
Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/purarue/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
|
Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/seanbreckenridge/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
|
||||||
|
|
|
@ -387,7 +387,7 @@ But there is an extra caveat: rexport is already coming with nice [[https://gith
|
||||||
|
|
||||||
Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
|
Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
|
||||||
|
|
||||||
Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/purarue/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
|
Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
|
||||||
|
|
||||||
** Twitter
|
** Twitter
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,6 @@ ignore =
|
||||||
#
|
#
|
||||||
|
|
||||||
# as a reference:
|
# as a reference:
|
||||||
# https://github.com/purarue/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
|
# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
|
||||||
# and this https://github.com/karlicoss/HPI/pull/151
|
# and this https://github.com/karlicoss/HPI/pull/151
|
||||||
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
|
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
|
||||||
|
|
25
my/arbtt.py
25
my/arbtt.py
|
@ -2,22 +2,20 @@
|
||||||
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
|
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
REQUIRES = ['ijson', 'cffi']
|
REQUIRES = ['ijson', 'cffi']
|
||||||
# NOTE likely also needs libyajl2 from apt or elsewhere?
|
# NOTE likely also needs libyajl2 from apt or elsewhere?
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Sequence, Iterable, List, Optional
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
try:
|
try:
|
||||||
from my.config import arbtt as user_config
|
from my.config import arbtt as user_config
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from my.core.warnings import low
|
from .core.warnings import low
|
||||||
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
|
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
|
||||||
return []
|
return []
|
||||||
else:
|
else:
|
||||||
|
@ -57,7 +55,7 @@ class Entry:
|
||||||
return fromisoformat(ds)
|
return fromisoformat(ds)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active(self) -> str | None:
|
def active(self) -> Optional[str]:
|
||||||
# NOTE: WIP, might change this in the future...
|
# NOTE: WIP, might change this in the future...
|
||||||
ait = (w for w in self.json['windows'] if w['active'])
|
ait = (w for w in self.json['windows'] if w['active'])
|
||||||
a = next(ait, None)
|
a = next(ait, None)
|
||||||
|
@ -76,18 +74,17 @@ class Entry:
|
||||||
def entries() -> Iterable[Entry]:
|
def entries() -> Iterable[Entry]:
|
||||||
inps = list(inputs())
|
inps = list(inputs())
|
||||||
|
|
||||||
base: list[PathIsh] = ['arbtt-dump', '--format=json']
|
base: List[PathIsh] = ['arbtt-dump', '--format=json']
|
||||||
|
|
||||||
cmds: list[list[PathIsh]]
|
cmds: List[List[PathIsh]]
|
||||||
if len(inps) == 0:
|
if len(inps) == 0:
|
||||||
cmds = [base] # rely on default
|
cmds = [base] # rely on default
|
||||||
else:
|
else:
|
||||||
# otherwise, 'merge' them
|
# otherwise, 'merge' them
|
||||||
cmds = [[*base, '--logfile', f] for f in inps]
|
cmds = [base + ['--logfile', f] for f in inps]
|
||||||
|
|
||||||
from subprocess import PIPE, Popen
|
import ijson.backends.yajl2_cffi as ijson # type: ignore
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
import ijson.backends.yajl2_cffi as ijson # type: ignore
|
|
||||||
for cmd in cmds:
|
for cmd in cmds:
|
||||||
with Popen(cmd, stdout=PIPE) as p:
|
with Popen(cmd, stdout=PIPE) as p:
|
||||||
out = p.stdout; assert out is not None
|
out = p.stdout; assert out is not None
|
||||||
|
@ -96,8 +93,8 @@ def entries() -> Iterable[Entry]:
|
||||||
|
|
||||||
|
|
||||||
def fill_influxdb() -> None:
|
def fill_influxdb() -> None:
|
||||||
from .core.freezer import Freezer
|
|
||||||
from .core.influxdb import magic_fill
|
from .core.influxdb import magic_fill
|
||||||
|
from .core.freezer import Freezer
|
||||||
freezer = Freezer(Entry)
|
freezer = Freezer(Entry)
|
||||||
fit = (freezer.freeze(e) for e in entries())
|
fit = (freezer.freeze(e) for e in entries())
|
||||||
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
|
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
|
||||||
|
@ -109,8 +106,6 @@ def fill_influxdb() -> None:
|
||||||
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
|
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
|
||||||
|
|
||||||
|
|
||||||
from .core import Stats, stat
|
from .core import stat, Stats
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(entries)
|
return stat(entries)
|
||||||
|
|
|
@ -1,63 +1,39 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
"""
|
"""
|
||||||
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
|
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
|
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
|
||||||
import re
|
|
||||||
import sqlite3
|
|
||||||
from abc import abstractmethod
|
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Protocol
|
import re
|
||||||
|
import sqlite3
|
||||||
|
from typing import Iterable, Sequence, Set, Optional
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from my.core import (
|
from my.core import (
|
||||||
Paths,
|
|
||||||
Res,
|
|
||||||
Stats,
|
|
||||||
get_files,
|
get_files,
|
||||||
make_logger,
|
make_logger,
|
||||||
|
Res,
|
||||||
stat,
|
stat,
|
||||||
unwrap,
|
Stats,
|
||||||
|
influxdb,
|
||||||
)
|
)
|
||||||
from my.core.cachew import mcachew
|
from my.core.cachew import mcachew
|
||||||
|
from my.core.error import unwrap
|
||||||
from my.core.pandas import DataFrameT, as_dataframe
|
from my.core.pandas import DataFrameT, as_dataframe
|
||||||
from my.core.sqlite import sqlite_connect_immutable
|
from my.core.sqlite import sqlite_connect_immutable
|
||||||
|
|
||||||
|
from my.config import bluemaestro as config
|
||||||
class config(Protocol):
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def export_path(self) -> Paths:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@property
|
|
||||||
def tz(self) -> pytz.BaseTzInfo:
|
|
||||||
# fixme: later, rely on the timezone provider
|
|
||||||
# NOTE: the timezone should be set with respect to the export date!!!
|
|
||||||
return pytz.timezone('Europe/London')
|
|
||||||
# TODO when I change tz, check the diff
|
|
||||||
|
|
||||||
|
|
||||||
def make_config() -> config:
|
|
||||||
from my.config import bluemaestro as user_config
|
|
||||||
|
|
||||||
class combined_config(user_config, config): ...
|
|
||||||
|
|
||||||
return combined_config()
|
|
||||||
|
|
||||||
|
|
||||||
logger = make_logger(__name__)
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
cfg = make_config()
|
return get_files(config.export_path)
|
||||||
return get_files(cfg.export_path)
|
|
||||||
|
|
||||||
|
|
||||||
Celsius = float
|
Celsius = float
|
||||||
|
@ -74,6 +50,12 @@ class Measurement:
|
||||||
dewpoint: Celsius
|
dewpoint: Celsius
|
||||||
|
|
||||||
|
|
||||||
|
# fixme: later, rely on the timezone provider
|
||||||
|
# NOTE: the timezone should be set with respect to the export date!!!
|
||||||
|
tz = pytz.timezone('Europe/London')
|
||||||
|
# TODO when I change tz, check the diff
|
||||||
|
|
||||||
|
|
||||||
def is_bad_table(name: str) -> bool:
|
def is_bad_table(name: str) -> bool:
|
||||||
# todo hmm would be nice to have a hook that can patch any module up to
|
# todo hmm would be nice to have a hook that can patch any module up to
|
||||||
delegate = getattr(config, 'is_bad_table', None)
|
delegate = getattr(config, 'is_bad_table', None)
|
||||||
|
@ -82,31 +64,28 @@ def is_bad_table(name: str) -> bool:
|
||||||
|
|
||||||
@mcachew(depends_on=inputs)
|
@mcachew(depends_on=inputs)
|
||||||
def measurements() -> Iterable[Res[Measurement]]:
|
def measurements() -> Iterable[Res[Measurement]]:
|
||||||
cfg = make_config()
|
|
||||||
tz = cfg.tz
|
|
||||||
|
|
||||||
# todo ideally this would be via arguments... but needs to be lazy
|
# todo ideally this would be via arguments... but needs to be lazy
|
||||||
paths = inputs()
|
paths = inputs()
|
||||||
total = len(paths)
|
total = len(paths)
|
||||||
width = len(str(total))
|
width = len(str(total))
|
||||||
|
|
||||||
last: datetime | None = None
|
last: Optional[datetime] = None
|
||||||
|
|
||||||
# tables are immutable, so can save on processing..
|
# tables are immutable, so can save on processing..
|
||||||
processed_tables: set[str] = set()
|
processed_tables: Set[str] = set()
|
||||||
for idx, path in enumerate(paths):
|
for idx, path in enumerate(paths):
|
||||||
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
tot = 0
|
tot = 0
|
||||||
new = 0
|
new = 0
|
||||||
# todo assert increasing timestamp?
|
# todo assert increasing timestamp?
|
||||||
with sqlite_connect_immutable(path) as db:
|
with sqlite_connect_immutable(path) as db:
|
||||||
db_dt: datetime | None = None
|
db_dt: Optional[datetime] = None
|
||||||
try:
|
try:
|
||||||
datas = db.execute(
|
datas = db.execute(
|
||||||
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
|
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
|
||||||
)
|
)
|
||||||
oldfmt = True
|
oldfmt = True
|
||||||
[(db_dts,)] = db.execute('SELECT last_download FROM info')
|
db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
|
||||||
if db_dts == 'N/A':
|
if db_dts == 'N/A':
|
||||||
# ??? happens for 20180923-20180928
|
# ??? happens for 20180923-20180928
|
||||||
continue
|
continue
|
||||||
|
@ -139,7 +118,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
processed_tables |= set(log_tables)
|
processed_tables |= set(log_tables)
|
||||||
|
|
||||||
# todo use later?
|
# todo use later?
|
||||||
frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables] # noqa: RUF015
|
frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]
|
||||||
|
|
||||||
# todo could just filter out the older datapoints?? dunno.
|
# todo could just filter out the older datapoints?? dunno.
|
||||||
|
|
||||||
|
@ -155,7 +134,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
oldfmt = False
|
oldfmt = False
|
||||||
db_dt = None
|
db_dt = None
|
||||||
|
|
||||||
for (name, tsc, temp, hum, pres, dewp) in datas:
|
for i, (name, tsc, temp, hum, pres, dewp) in enumerate(datas):
|
||||||
if is_bad_table(name):
|
if is_bad_table(name):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -232,8 +211,6 @@ def dataframe() -> DataFrameT:
|
||||||
|
|
||||||
|
|
||||||
def fill_influxdb() -> None:
|
def fill_influxdb() -> None:
|
||||||
from my.core import influxdb
|
|
||||||
|
|
||||||
influxdb.fill(measurements(), measurement=__name__)
|
influxdb.fill(measurements(), measurement=__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,42 +2,41 @@
|
||||||
Blood tracking (manual org-mode entries)
|
Blood tracking (manual org-mode entries)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import NamedTuple
|
from typing import Iterable, NamedTuple, Optional
|
||||||
|
|
||||||
import orgparse
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from my.config import blood as config # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
from ..core.error import Res
|
from ..core.error import Res
|
||||||
from ..core.orgmode import one_table, parse_org_datetime
|
from ..core.orgmode import parse_org_datetime, one_table
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import orgparse
|
||||||
|
|
||||||
|
|
||||||
|
from my.config import blood as config # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
class Entry(NamedTuple):
|
class Entry(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
|
|
||||||
ketones : float | None=None
|
ketones : Optional[float]=None
|
||||||
glucose : float | None=None
|
glucose : Optional[float]=None
|
||||||
|
|
||||||
vitamin_d : float | None=None
|
vitamin_d : Optional[float]=None
|
||||||
vitamin_b12 : float | None=None
|
vitamin_b12 : Optional[float]=None
|
||||||
|
|
||||||
hdl : float | None=None
|
hdl : Optional[float]=None
|
||||||
ldl : float | None=None
|
ldl : Optional[float]=None
|
||||||
triglycerides: float | None=None
|
triglycerides: Optional[float]=None
|
||||||
|
|
||||||
source : str | None=None
|
source : Optional[str]=None
|
||||||
extra : str | None=None
|
extra : Optional[str]=None
|
||||||
|
|
||||||
|
|
||||||
Result = Res[Entry]
|
Result = Res[Entry]
|
||||||
|
|
||||||
|
|
||||||
def try_float(s: str) -> float | None:
|
def try_float(s: str) -> Optional[float]:
|
||||||
l = s.split()
|
l = s.split()
|
||||||
if len(l) == 0:
|
if len(l) == 0:
|
||||||
return None
|
return None
|
||||||
|
@ -106,7 +105,6 @@ def blood_tests_data() -> Iterable[Result]:
|
||||||
|
|
||||||
def data() -> Iterable[Result]:
|
def data() -> Iterable[Result]:
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from ..core.error import sort_res_by
|
from ..core.error import sort_res_by
|
||||||
datas = chain(glucose_ketones_data(), blood_tests_data())
|
datas = chain(glucose_ketones_data(), blood_tests_data())
|
||||||
return sort_res_by(datas, key=lambda e: e.dt)
|
return sort_res_by(datas, key=lambda e: e.dt)
|
||||||
|
|
|
@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe
|
||||||
@check_dataframe
|
@check_dataframe
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
# this should be somehow more flexible...
|
# this should be somehow more flexible...
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from ...endomondo import dataframe as EDF
|
from ...endomondo import dataframe as EDF
|
||||||
from ...runnerup import dataframe as RDF
|
from ...runnerup import dataframe as RDF
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
return pd.concat([
|
return pd.concat([
|
||||||
EDF(),
|
EDF(),
|
||||||
RDF(),
|
RDF(),
|
||||||
|
|
|
@ -3,6 +3,7 @@ Cardio data, filtered from various data sources
|
||||||
'''
|
'''
|
||||||
from ...core.pandas import DataFrameT, check_dataframe
|
from ...core.pandas import DataFrameT, check_dataframe
|
||||||
|
|
||||||
|
|
||||||
CARDIO = {
|
CARDIO = {
|
||||||
'Running',
|
'Running',
|
||||||
'Running, treadmill',
|
'Running, treadmill',
|
||||||
|
|
|
@ -5,18 +5,16 @@ This is probably too specific to my needs, so later I will move it away to a per
|
||||||
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
|
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import pytz
|
from ...core.pandas import DataFrameT, check_dataframe as cdf
|
||||||
|
from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
|
||||||
|
|
||||||
from my.config import exercise as config
|
from my.config import exercise as config
|
||||||
|
|
||||||
from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime
|
|
||||||
from ...core.pandas import DataFrameT
|
|
||||||
from ...core.pandas import check_dataframe as cdf
|
|
||||||
|
|
||||||
|
import pytz
|
||||||
# FIXME how to attach it properly?
|
# FIXME how to attach it properly?
|
||||||
tz = pytz.timezone('Europe/London')
|
tz = pytz.timezone('Europe/London')
|
||||||
|
|
||||||
|
@ -107,7 +105,7 @@ def dataframe() -> DataFrameT:
|
||||||
rows = []
|
rows = []
|
||||||
idxs = [] # type: ignore[var-annotated]
|
idxs = [] # type: ignore[var-annotated]
|
||||||
NO_ENDOMONDO = 'no endomondo matches'
|
NO_ENDOMONDO = 'no endomondo matches'
|
||||||
for _i, row in mdf.iterrows():
|
for i, row in mdf.iterrows():
|
||||||
rd = row.to_dict()
|
rd = row.to_dict()
|
||||||
mdate = row['date']
|
mdate = row['date']
|
||||||
if pd.isna(mdate):
|
if pd.isna(mdate):
|
||||||
|
@ -116,7 +114,7 @@ def dataframe() -> DataFrameT:
|
||||||
rows.append(rd) # presumably has an error set
|
rows.append(rd) # presumably has an error set
|
||||||
continue
|
continue
|
||||||
|
|
||||||
idx: int | None
|
idx: Optional[int]
|
||||||
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
|
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
|
||||||
if len(close) == 0:
|
if len(close) == 0:
|
||||||
idx = None
|
idx = None
|
||||||
|
@ -165,9 +163,7 @@ def dataframe() -> DataFrameT:
|
||||||
# TODO wtf?? where is speed coming from??
|
# TODO wtf?? where is speed coming from??
|
||||||
|
|
||||||
|
|
||||||
from ...core import Stats, stat
|
from ...core import stat, Stats
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(cross_trainer_data)
|
return stat(cross_trainer_data)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from ...core import Stats, stat
|
from ...core import stat, Stats
|
||||||
from ...core.pandas import DataFrameT
|
from ...core.pandas import DataFrameT, check_dataframe as cdf
|
||||||
from ...core.pandas import check_dataframe as cdf
|
|
||||||
|
|
||||||
|
|
||||||
class Combine:
|
class Combine:
|
||||||
|
@ -8,7 +7,7 @@ class Combine:
|
||||||
self.modules = modules
|
self.modules = modules
|
||||||
|
|
||||||
@cdf
|
@cdf
|
||||||
def dataframe(self, *, with_temperature: bool=True) -> DataFrameT:
|
def dataframe(self, with_temperature: bool=True) -> DataFrameT:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
# todo include 'source'?
|
# todo include 'source'?
|
||||||
df = pd.concat([m.dataframe() for m in self.modules])
|
df = pd.concat([m.dataframe() for m in self.modules])
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from ... import emfit, jawbone
|
from ... import jawbone
|
||||||
from .common import Combine
|
from ... import emfit
|
||||||
|
|
||||||
|
from .common import Combine
|
||||||
_combined = Combine([
|
_combined = Combine([
|
||||||
jawbone,
|
jawbone,
|
||||||
emfit,
|
emfit,
|
||||||
|
|
|
@ -2,29 +2,21 @@
|
||||||
Weight data (manually logged)
|
Weight data (manually logged)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from collections.abc import Iterator
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import NamedTuple, Iterator
|
||||||
|
|
||||||
from my import orgmode
|
from ..core import LazyLogger
|
||||||
from my.core import make_logger
|
from ..core.error import Res, set_error_datetime, extract_error_datetime
|
||||||
from my.core.error import Res, extract_error_datetime, set_error_datetime
|
|
||||||
|
|
||||||
config = Any
|
from .. import orgmode
|
||||||
|
|
||||||
|
from my.config import weight as config # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
def make_config() -> config:
|
log = LazyLogger('my.body.weight')
|
||||||
from my.config import weight as user_config # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
return user_config()
|
|
||||||
|
|
||||||
|
|
||||||
log = make_logger(__name__)
|
class Entry(NamedTuple):
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Entry:
|
|
||||||
dt: datetime
|
dt: datetime
|
||||||
value: float
|
value: float
|
||||||
# TODO comment??
|
# TODO comment??
|
||||||
|
@ -34,8 +26,6 @@ Result = Res[Entry]
|
||||||
|
|
||||||
|
|
||||||
def from_orgmode() -> Iterator[Result]:
|
def from_orgmode() -> Iterator[Result]:
|
||||||
cfg = make_config()
|
|
||||||
|
|
||||||
orgs = orgmode.query()
|
orgs = orgmode.query()
|
||||||
for o in orgmode.query().all():
|
for o in orgmode.query().all():
|
||||||
if 'weight' not in o.tags:
|
if 'weight' not in o.tags:
|
||||||
|
@ -56,8 +46,8 @@ def from_orgmode() -> Iterator[Result]:
|
||||||
yield e
|
yield e
|
||||||
continue
|
continue
|
||||||
# FIXME use timezone provider
|
# FIXME use timezone provider
|
||||||
created = cfg.default_timezone.localize(created)
|
created = config.default_timezone.localize(created)
|
||||||
assert created is not None # ??? somehow mypy wasn't happy?
|
assert created is not None #??? somehow mypy wasn't happy?
|
||||||
yield Entry(
|
yield Entry(
|
||||||
dt=created,
|
dt=created,
|
||||||
value=w,
|
value=w,
|
||||||
|
@ -67,23 +57,21 @@ def from_orgmode() -> Iterator[Result]:
|
||||||
|
|
||||||
def make_dataframe(data: Iterator[Result]):
|
def make_dataframe(data: Iterator[Result]):
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
def it():
|
def it():
|
||||||
for e in data:
|
for e in data:
|
||||||
if isinstance(e, Exception):
|
if isinstance(e, Exception):
|
||||||
dt = extract_error_datetime(e)
|
dt = extract_error_datetime(e)
|
||||||
yield {
|
yield {
|
||||||
'dt': dt,
|
'dt' : dt,
|
||||||
'error': str(e),
|
'error': str(e),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
yield {
|
yield {
|
||||||
'dt': e.dt,
|
'dt' : e.dt,
|
||||||
'weight': e.value,
|
'weight': e.value,
|
||||||
}
|
}
|
||||||
|
|
||||||
df = pd.DataFrame(it())
|
df = pd.DataFrame(it())
|
||||||
df = df.set_index('dt')
|
df.set_index('dt', inplace=True)
|
||||||
# TODO not sure about UTC??
|
# TODO not sure about UTC??
|
||||||
df.index = pd.to_datetime(df.index, utc=True)
|
df.index = pd.to_datetime(df.index, utc=True)
|
||||||
return df
|
return df
|
||||||
|
@ -93,7 +81,6 @@ def dataframe():
|
||||||
entries = from_orgmode()
|
entries = from_orgmode()
|
||||||
return make_dataframe(entries)
|
return make_dataframe(entries)
|
||||||
|
|
||||||
|
|
||||||
# TODO move to a submodule? e.g. my.body.weight.orgmode?
|
# TODO move to a submodule? e.g. my.body.weight.orgmode?
|
||||||
# so there could be more sources
|
# so there could be more sources
|
||||||
# not sure about my.body thing though
|
# not sure about my.body thing though
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from my.core import warnings
|
from ..core import warnings
|
||||||
|
|
||||||
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
|
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
|
||||||
|
|
||||||
from my.core.util import __NOT_HPI_MODULE__
|
from ..core.util import __NOT_HPI_MODULE__
|
||||||
from my.kobo import *
|
|
||||||
|
from ..kobo import * # type: ignore[no-redef]
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
"""
|
"""
|
||||||
Parses active browser history by backing it up with [[http://github.com/purarue/sqlite_backup][sqlite_backup]]
|
Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = ["browserexport", "sqlite_backup"]
|
REQUIRES = ["browserexport", "sqlite_backup"]
|
||||||
|
@ -19,18 +19,16 @@ class config(user_config.active_browser):
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Sequence, Iterator
|
||||||
|
|
||||||
from browserexport.merge import Visit, read_visits
|
from my.core import get_files, Stats, make_logger
|
||||||
|
from browserexport.merge import read_visits, Visit
|
||||||
from sqlite_backup import sqlite_backup
|
from sqlite_backup import sqlite_backup
|
||||||
|
|
||||||
from my.core import Stats, get_files, make_logger
|
|
||||||
|
|
||||||
logger = make_logger(__name__)
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
from .common import _patch_browserexport_logs
|
from .common import _patch_browserexport_logs
|
||||||
|
|
||||||
_patch_browserexport_logs(logger.level)
|
_patch_browserexport_logs(logger.level)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
from collections.abc import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from browserexport.merge import Visit, merge_visits
|
|
||||||
|
|
||||||
from my.core import Stats
|
from my.core import Stats
|
||||||
from my.core.source import import_source
|
from my.core.source import import_source
|
||||||
|
from browserexport.merge import merge_visits, Visit
|
||||||
|
|
||||||
|
|
||||||
src_export = import_source(module_name="my.browser.export")
|
src_export = import_source(module_name="my.browser.export")
|
||||||
src_active = import_source(module_name="my.browser.active_browser")
|
src_active = import_source(module_name="my.browser.active_browser")
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
"""
|
"""
|
||||||
Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = ["browserexport"]
|
REQUIRES = ["browserexport"]
|
||||||
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Iterator, Sequence
|
||||||
|
|
||||||
from browserexport.merge import Visit, read_and_merge
|
import my.config
|
||||||
|
|
||||||
from my.core import (
|
from my.core import (
|
||||||
Paths,
|
Paths,
|
||||||
Stats,
|
Stats,
|
||||||
|
@ -19,9 +18,9 @@ from my.core import (
|
||||||
)
|
)
|
||||||
from my.core.cachew import mcachew
|
from my.core.cachew import mcachew
|
||||||
|
|
||||||
from .common import _patch_browserexport_logs
|
from browserexport.merge import read_and_merge, Visit
|
||||||
|
|
||||||
import my.config # isort: skip
|
from .common import _patch_browserexport_logs
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from typing import Iterator, Sequence, Optional, Dict
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
from my.core import Paths, get_files
|
from my.config import bumble as user_config
|
||||||
|
|
||||||
from my.config import bumble as user_config # isort: skip
|
|
||||||
|
|
||||||
|
|
||||||
|
from ..core import Paths
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config.android):
|
class config(user_config.android):
|
||||||
# paths[s]/glob to the exported sqlite databases
|
# paths[s]/glob to the exported sqlite databases
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from ..core import get_files
|
||||||
|
from pathlib import Path
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
@ -43,23 +43,21 @@ class _BaseMessage:
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class _Message(_BaseMessage):
|
class _Message(_BaseMessage):
|
||||||
conversation_id: str
|
conversation_id: str
|
||||||
reply_to_id: str | None
|
reply_to_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Message(_BaseMessage):
|
class Message(_BaseMessage):
|
||||||
person: Person
|
person: Person
|
||||||
reply_to: Message | None
|
reply_to: Optional[Message]
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import sqlite3
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from my.core.compat import assert_never
|
|
||||||
|
|
||||||
from ..core import Res
|
from ..core import Res
|
||||||
from ..core.sqlite import select, sqlite_connect_immutable
|
import sqlite3
|
||||||
|
from ..core.sqlite import sqlite_connect_immutable, select
|
||||||
|
from my.core.compat import assert_never
|
||||||
|
|
||||||
EntitiesRes = Res[Union[Person, _Message]]
|
EntitiesRes = Res[Union[Person, _Message]]
|
||||||
|
|
||||||
|
@ -122,8 +120,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON"
|
||||||
|
|
||||||
|
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def messages() -> Iterator[Res[Message]]:
|
||||||
id2person: dict[str, Person] = {}
|
id2person: Dict[str, Person] = {}
|
||||||
id2msg: dict[str, Message] = {}
|
id2msg: Dict[str, Message] = {}
|
||||||
for x in unique_everseen(_entities(), key=_key):
|
for x in unique_everseen(_entities(), key=_key):
|
||||||
if isinstance(x, Exception):
|
if isinstance(x, Exception):
|
||||||
yield x
|
yield x
|
||||||
|
|
|
@ -15,12 +15,11 @@ from my.core.time import zone_to_countrycode
|
||||||
|
|
||||||
@lru_cache(1)
|
@lru_cache(1)
|
||||||
def _calendar():
|
def _calendar():
|
||||||
from workalendar.registry import registry # type: ignore
|
from workalendar.registry import registry # type: ignore
|
||||||
|
|
||||||
# todo switch to using time.tz.main once _get_tz stabilizes?
|
# todo switch to using time.tz.main once _get_tz stabilizes?
|
||||||
from ..time.tz import via_location as LTZ
|
from ..time.tz import via_location as LTZ
|
||||||
# TODO would be nice to do it dynamically depending on the past timezones...
|
# TODO would be nice to do it dynamically depending on the past timezones...
|
||||||
tz = LTZ.get_tz(datetime.now())
|
tz = LTZ._get_tz(datetime.now())
|
||||||
assert tz is not None
|
assert tz is not None
|
||||||
zone = tz.zone; assert zone is not None
|
zone = tz.zone; assert zone is not None
|
||||||
code = zone_to_countrycode(zone)
|
code = zone_to_countrycode(zone)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import my.config as config
|
import my.config as config
|
||||||
|
|
||||||
from .core import __NOT_HPI_MODULE__
|
from .core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from .core import warnings as W
|
from .core import warnings as W
|
||||||
|
|
||||||
# still used in Promnesia, maybe in dashboard?
|
# still used in Promnesia, maybe in dashboard?
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import json
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterator, Sequence
|
||||||
|
|
||||||
|
from my.core import get_files, Res, datetime_aware
|
||||||
|
|
||||||
from my.config import codeforces as config # type: ignore[attr-defined]
|
from my.config import codeforces as config # type: ignore[attr-defined]
|
||||||
from my.core import Res, datetime_aware, get_files
|
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
|
@ -38,7 +39,7 @@ class Competition:
|
||||||
class Parser:
|
class Parser:
|
||||||
def __init__(self, *, inputs: Sequence[Path]) -> None:
|
def __init__(self, *, inputs: Sequence[Path]) -> None:
|
||||||
self.inputs = inputs
|
self.inputs = inputs
|
||||||
self.contests: dict[ContestId, Contest] = {}
|
self.contests: Dict[ContestId, Contest] = {}
|
||||||
|
|
||||||
def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
|
def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
|
||||||
j = json.loads(p.read_text())
|
j = json.loads(p.read_text())
|
||||||
|
|
|
@ -1,32 +1,29 @@
|
||||||
"""
|
"""
|
||||||
Git commits data for repositories on your filesystem
|
Git commits data for repositories on your filesystem
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
REQUIRES = [
|
REQUIRES = [
|
||||||
'gitpython',
|
'gitpython',
|
||||||
]
|
]
|
||||||
|
|
||||||
import shutil
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional, cast
|
|
||||||
|
|
||||||
from my.core import LazyLogger, PathIsh, make_config
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Optional, Iterator, Set, Sequence, cast
|
||||||
|
|
||||||
|
|
||||||
|
from my.core import PathIsh, LazyLogger, make_config
|
||||||
from my.core.cachew import cache_dir, mcachew
|
from my.core.cachew import cache_dir, mcachew
|
||||||
from my.core.warnings import high
|
from my.core.warnings import high
|
||||||
|
|
||||||
from my.config import commits as user_config # isort: skip
|
|
||||||
|
|
||||||
|
|
||||||
|
from my.config import commits as user_config
|
||||||
@dataclass
|
@dataclass
|
||||||
class commits_cfg(user_config):
|
class commits_cfg(user_config):
|
||||||
roots: Sequence[PathIsh] = field(default_factory=list)
|
roots: Sequence[PathIsh] = field(default_factory=list)
|
||||||
emails: Sequence[str] | None = None
|
emails: Optional[Sequence[str]] = None
|
||||||
names: Sequence[str] | None = None
|
names: Optional[Sequence[str]] = None
|
||||||
|
|
||||||
|
|
||||||
# experiment to make it lazy?
|
# experiment to make it lazy?
|
||||||
|
@ -43,6 +40,7 @@ def config() -> commits_cfg:
|
||||||
import git
|
import git
|
||||||
from git.repo.fun import is_git_dir
|
from git.repo.fun import is_git_dir
|
||||||
|
|
||||||
|
|
||||||
log = LazyLogger(__name__, level='info')
|
log = LazyLogger(__name__, level='info')
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,7 +93,7 @@ def _git_root(git_dir: PathIsh) -> Path:
|
||||||
return gd # must be bare
|
return gd # must be bare
|
||||||
|
|
||||||
|
|
||||||
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]:
|
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]:
|
||||||
# without path might not handle pull heads properly
|
# without path might not handle pull heads properly
|
||||||
for c in gr.iter_commits(rev=rev):
|
for c in gr.iter_commits(rev=rev):
|
||||||
if not by_me(c):
|
if not by_me(c):
|
||||||
|
@ -122,7 +120,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Com
|
||||||
|
|
||||||
def repo_commits(repo: PathIsh):
|
def repo_commits(repo: PathIsh):
|
||||||
gr = git.Repo(str(repo))
|
gr = git.Repo(str(repo))
|
||||||
emitted: set[str] = set()
|
emitted: Set[str] = set()
|
||||||
for r in gr.references:
|
for r in gr.references:
|
||||||
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
|
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
|
||||||
|
|
||||||
|
@ -138,19 +136,19 @@ def canonical_name(repo: Path) -> str:
|
||||||
# else:
|
# else:
|
||||||
# rname = r.name
|
# rname = r.name
|
||||||
# if 'backups/github' in repo:
|
# if 'backups/github' in repo:
|
||||||
# pass # TODO
|
# pass # TODO
|
||||||
|
|
||||||
|
|
||||||
def _fd_path() -> str:
|
def _fd_path() -> str:
|
||||||
# todo move it to core
|
# todo move it to core
|
||||||
fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
|
fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
|
||||||
if fd_path is None:
|
if fd_path is None:
|
||||||
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
|
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
|
||||||
assert fd_path is not None
|
assert fd_path is not None
|
||||||
return fd_path
|
return fd_path
|
||||||
|
|
||||||
|
|
||||||
def git_repos_in(roots: list[Path]) -> list[Path]:
|
def git_repos_in(roots: List[Path]) -> List[Path]:
|
||||||
from subprocess import check_output
|
from subprocess import check_output
|
||||||
outputs = check_output([
|
outputs = check_output([
|
||||||
_fd_path(),
|
_fd_path(),
|
||||||
|
@ -163,36 +161,37 @@ def git_repos_in(roots: list[Path]) -> list[Path]:
|
||||||
*roots,
|
*roots,
|
||||||
]).decode('utf8').splitlines()
|
]).decode('utf8').splitlines()
|
||||||
|
|
||||||
candidates = {Path(o).resolve().absolute().parent for o in outputs}
|
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
||||||
|
|
||||||
# exclude stuff within .git dirs (can happen for submodules?)
|
# exclude stuff within .git dirs (can happen for submodules?)
|
||||||
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
|
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
|
||||||
|
|
||||||
candidates = {c for c in candidates if is_git_dir(c)}
|
candidates = {c for c in candidates if is_git_dir(c)}
|
||||||
|
|
||||||
repos = sorted(map(_git_root, candidates))
|
repos = list(sorted(map(_git_root, candidates)))
|
||||||
return repos
|
return repos
|
||||||
|
|
||||||
|
|
||||||
def repos() -> list[Path]:
|
def repos() -> List[Path]:
|
||||||
return git_repos_in(list(map(Path, config().roots)))
|
return git_repos_in(list(map(Path, config().roots)))
|
||||||
|
|
||||||
|
|
||||||
# returns modification time for an index to use as hash function
|
# returns modification time for an index to use as hash function
|
||||||
def _repo_depends_on(_repo: Path) -> int:
|
def _repo_depends_on(_repo: Path) -> int:
|
||||||
for pp in [
|
for pp in {
|
||||||
".git/FETCH_HEAD",
|
".git/FETCH_HEAD",
|
||||||
".git/HEAD",
|
".git/HEAD",
|
||||||
"FETCH_HEAD", # bare
|
"FETCH_HEAD", # bare
|
||||||
"HEAD", # bare
|
"HEAD", # bare
|
||||||
]:
|
}:
|
||||||
ff = _repo / pp
|
ff = _repo / pp
|
||||||
if ff.exists():
|
if ff.exists():
|
||||||
return int(ff.stat().st_mtime)
|
return int(ff.stat().st_mtime)
|
||||||
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
|
else:
|
||||||
|
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
|
||||||
|
|
||||||
|
|
||||||
def _commits(_repos: list[Path]) -> Iterator[Commit]:
|
def _commits(_repos: List[Path]) -> Iterator[Commit]:
|
||||||
for r in _repos:
|
for r in _repos:
|
||||||
yield from _cached_commits(r)
|
yield from _cached_commits(r)
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,9 @@
|
||||||
from typing import TYPE_CHECKING
|
import warnings
|
||||||
|
|
||||||
from my.core import warnings
|
warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!')
|
||||||
|
|
||||||
warnings.high('my.coding.github is deprecated! Please use my.github.all instead!')
|
|
||||||
# todo why aren't DeprecationWarning shown by default??
|
# todo why aren't DeprecationWarning shown by default??
|
||||||
|
|
||||||
if not TYPE_CHECKING:
|
from ..github.all import events, get_events
|
||||||
from ..github.all import events, get_events # noqa: F401
|
|
||||||
|
|
||||||
# todo deprecate properly
|
# todo deprecate properly
|
||||||
iter_events = events
|
iter_events = events
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from .core.warnings import high
|
from .core.warnings import high
|
||||||
|
|
||||||
high("DEPRECATED! Please use my.core.common instead.")
|
high("DEPRECATED! Please use my.core.common instead.")
|
||||||
|
|
||||||
from .core import __NOT_HPI_MODULE__
|
from .core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from .core.common import *
|
from .core.common import *
|
||||||
|
|
36
my/config.py
36
my/config.py
|
@ -9,18 +9,17 @@ This file is used for:
|
||||||
- mypy: this file provides some type annotations
|
- mypy: this file provides some type annotations
|
||||||
- for loading the actual user config
|
- for loading the actual user config
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
#### NOTE: you won't need this line VVVV in your personal config
|
#### NOTE: you won't need this line VVVV in your personal config
|
||||||
from my.core import init # noqa: F401 # isort: skip
|
from my.core import init
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
from datetime import tzinfo
|
from datetime import tzinfo
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
from my.core import PathIsh, Paths
|
|
||||||
|
from my.core import Paths, PathIsh
|
||||||
|
|
||||||
|
|
||||||
class hypothesis:
|
class hypothesis:
|
||||||
|
@ -76,16 +75,14 @@ class google:
|
||||||
takeout_path: Paths = ''
|
takeout_path: Paths = ''
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Sequence
|
from typing import Sequence, Union, Tuple
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import datetime, date, timedelta
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
DateIsh = Union[datetime, date, str]
|
DateIsh = Union[datetime, date, str]
|
||||||
LatLon = tuple[float, float]
|
LatLon = Tuple[float, float]
|
||||||
class location:
|
class location:
|
||||||
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
|
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
|
||||||
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
||||||
home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0)
|
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
||||||
home_accuracy = 30_000.0
|
home_accuracy = 30_000.0
|
||||||
|
|
||||||
class via_ip:
|
class via_ip:
|
||||||
|
@ -106,8 +103,6 @@ class location:
|
||||||
|
|
||||||
|
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
class time:
|
class time:
|
||||||
class tz:
|
class tz:
|
||||||
policy: Literal['keep', 'convert', 'throw']
|
policy: Literal['keep', 'convert', 'throw']
|
||||||
|
@ -126,9 +121,10 @@ class arbtt:
|
||||||
logfiles: Paths
|
logfiles: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
class commits:
|
class commits:
|
||||||
emails: Sequence[str] | None
|
emails: Optional[Sequence[str]]
|
||||||
names: Sequence[str] | None
|
names: Optional[Sequence[str]]
|
||||||
roots: Sequence[PathIsh]
|
roots: Sequence[PathIsh]
|
||||||
|
|
||||||
|
|
||||||
|
@ -154,8 +150,8 @@ class tinder:
|
||||||
class instagram:
|
class instagram:
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
username: str | None
|
username: Optional[str]
|
||||||
full_name: str | None
|
full_name: Optional[str]
|
||||||
|
|
||||||
class gdpr:
|
class gdpr:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
@ -173,7 +169,7 @@ class materialistic:
|
||||||
class fbmessenger:
|
class fbmessenger:
|
||||||
class fbmessengerexport:
|
class fbmessengerexport:
|
||||||
export_db: PathIsh
|
export_db: PathIsh
|
||||||
facebook_id: str | None
|
facebook_id: Optional[str]
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
@ -251,7 +247,7 @@ class runnerup:
|
||||||
class emfit:
|
class emfit:
|
||||||
export_path: Path
|
export_path: Path
|
||||||
timezone: tzinfo
|
timezone: tzinfo
|
||||||
excluded_sids: list[str]
|
excluded_sids: List[str]
|
||||||
|
|
||||||
|
|
||||||
class foursquare:
|
class foursquare:
|
||||||
|
@ -274,7 +270,7 @@ class roamresearch:
|
||||||
class whatsapp:
|
class whatsapp:
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
my_user_id: str | None
|
my_user_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class harmonic:
|
class harmonic:
|
||||||
|
|
|
@ -4,7 +4,7 @@ from typing import TYPE_CHECKING
|
||||||
from .cfg import make_config
|
from .cfg import make_config
|
||||||
from .common import PathIsh, Paths, get_files
|
from .common import PathIsh, Paths, get_files
|
||||||
from .compat import assert_never
|
from .compat import assert_never
|
||||||
from .error import Res, notnone, unwrap
|
from .error import Res, unwrap
|
||||||
from .logging import (
|
from .logging import (
|
||||||
make_logger,
|
make_logger,
|
||||||
)
|
)
|
||||||
|
@ -29,25 +29,22 @@ if not TYPE_CHECKING:
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'__NOT_HPI_MODULE__',
|
'get_files', 'PathIsh', 'Paths',
|
||||||
'Json',
|
'Json',
|
||||||
'LazyLogger', # legacy import
|
|
||||||
'Path',
|
|
||||||
'PathIsh',
|
|
||||||
'Paths',
|
|
||||||
'Res',
|
|
||||||
'Stats',
|
|
||||||
'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon
|
|
||||||
'dataclass',
|
|
||||||
'datetime_aware',
|
|
||||||
'datetime_naive',
|
|
||||||
'get_files',
|
|
||||||
'make_config',
|
|
||||||
'make_logger',
|
'make_logger',
|
||||||
'notnone',
|
'LazyLogger', # legacy import
|
||||||
'stat',
|
|
||||||
'unwrap',
|
|
||||||
'warn_if_empty',
|
'warn_if_empty',
|
||||||
|
'stat', 'Stats',
|
||||||
|
'datetime_aware', 'datetime_naive',
|
||||||
|
'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon
|
||||||
|
|
||||||
|
'make_config',
|
||||||
|
|
||||||
|
'__NOT_HPI_MODULE__',
|
||||||
|
|
||||||
|
'Res', 'unwrap',
|
||||||
|
|
||||||
|
'dataclass', 'Path',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,7 +52,7 @@ __all__ = [
|
||||||
# you could put _init_hook.py next to your private my/config
|
# you could put _init_hook.py next to your private my/config
|
||||||
# that way you can configure logging/warnings/env variables on every HPI import
|
# that way you can configure logging/warnings/env variables on every HPI import
|
||||||
try:
|
try:
|
||||||
import my._init_hook # type: ignore[import-not-found] # noqa: F401
|
import my._init_hook # type: ignore[import-not-found]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
##
|
##
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
|
@ -9,18 +7,17 @@ import shutil
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import traceback
|
import traceback
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
from contextlib import ExitStack
|
from contextlib import ExitStack
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import PIPE, CompletedProcess, Popen, check_call, run
|
from subprocess import PIPE, CompletedProcess, Popen, check_call, run
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable, Iterable, List, Optional, Sequence, Type
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache
|
@functools.lru_cache()
|
||||||
def mypy_cmd() -> Sequence[str] | None:
|
def mypy_cmd() -> Optional[Sequence[str]]:
|
||||||
try:
|
try:
|
||||||
# preferably, use mypy from current python env
|
# preferably, use mypy from current python env
|
||||||
import mypy # noqa: F401 fine not to use it
|
import mypy # noqa: F401 fine not to use it
|
||||||
|
@ -35,7 +32,7 @@ def mypy_cmd() -> Sequence[str] | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def run_mypy(cfg_path: Path) -> CompletedProcess | None:
|
def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]:
|
||||||
# todo dunno maybe use the same mypy config in repository?
|
# todo dunno maybe use the same mypy config in repository?
|
||||||
# I'd need to install mypy.ini then??
|
# I'd need to install mypy.ini then??
|
||||||
env = {**os.environ}
|
env = {**os.environ}
|
||||||
|
@ -46,7 +43,7 @@ def run_mypy(cfg_path: Path) -> CompletedProcess | None:
|
||||||
cmd = mypy_cmd()
|
cmd = mypy_cmd()
|
||||||
if cmd is None:
|
if cmd is None:
|
||||||
return None
|
return None
|
||||||
mres = run([ # noqa: UP022,PLW1510
|
mres = run([
|
||||||
*cmd,
|
*cmd,
|
||||||
'--namespace-packages',
|
'--namespace-packages',
|
||||||
'--color-output', # not sure if works??
|
'--color-output', # not sure if works??
|
||||||
|
@ -66,27 +63,21 @@ def eprint(x: str) -> None:
|
||||||
# err=True prints to stderr
|
# err=True prints to stderr
|
||||||
click.echo(x, err=True)
|
click.echo(x, err=True)
|
||||||
|
|
||||||
|
|
||||||
def indent(x: str) -> str:
|
def indent(x: str) -> str:
|
||||||
# todo use textwrap.indent?
|
|
||||||
return ''.join(' ' + l for l in x.splitlines(keepends=True))
|
return ''.join(' ' + l for l in x.splitlines(keepends=True))
|
||||||
|
|
||||||
|
|
||||||
OK = '✅'
|
OK = '✅'
|
||||||
OFF = '🔲'
|
OFF = '🔲'
|
||||||
|
|
||||||
|
|
||||||
def info(x: str) -> None:
|
def info(x: str) -> None:
|
||||||
eprint(OK + ' ' + x)
|
eprint(OK + ' ' + x)
|
||||||
|
|
||||||
|
|
||||||
def error(x: str) -> None:
|
def error(x: str) -> None:
|
||||||
eprint('❌ ' + x)
|
eprint('❌ ' + x)
|
||||||
|
|
||||||
|
|
||||||
def warning(x: str) -> None:
|
def warning(x: str) -> None:
|
||||||
eprint('❗ ' + x) # todo yellow?
|
eprint('❗ ' + x) # todo yellow?
|
||||||
|
|
||||||
|
|
||||||
def tb(e: Exception) -> None:
|
def tb(e: Exception) -> None:
|
||||||
tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
||||||
|
@ -95,7 +86,6 @@ def tb(e: Exception) -> None:
|
||||||
|
|
||||||
def config_create() -> None:
|
def config_create() -> None:
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
mycfg_dir = get_mycfg_dir()
|
mycfg_dir = get_mycfg_dir()
|
||||||
|
|
||||||
created = False
|
created = False
|
||||||
|
@ -104,8 +94,7 @@ def config_create() -> None:
|
||||||
my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
|
my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
|
||||||
|
|
||||||
my_config.parent.mkdir(parents=True)
|
my_config.parent.mkdir(parents=True)
|
||||||
my_config.write_text(
|
my_config.write_text('''
|
||||||
'''
|
|
||||||
### HPI personal config
|
### HPI personal config
|
||||||
## see
|
## see
|
||||||
# https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
|
# https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
|
||||||
|
@ -128,8 +117,7 @@ class example:
|
||||||
|
|
||||||
### you can insert your own configuration below
|
### you can insert your own configuration below
|
||||||
### but feel free to delete the stuff above if you don't need ti
|
### but feel free to delete the stuff above if you don't need ti
|
||||||
'''.lstrip()
|
'''.lstrip())
|
||||||
)
|
|
||||||
info(f'created empty config: {my_config}')
|
info(f'created empty config: {my_config}')
|
||||||
created = True
|
created = True
|
||||||
else:
|
else:
|
||||||
|
@ -142,13 +130,12 @@ class example:
|
||||||
|
|
||||||
# todo return the config as a result?
|
# todo return the config as a result?
|
||||||
def config_ok() -> bool:
|
def config_ok() -> bool:
|
||||||
errors: list[Exception] = []
|
errors: List[Exception] = []
|
||||||
|
|
||||||
# at this point 'my' should already be imported, so doesn't hurt to extract paths from it
|
# at this point 'my' should already be imported, so doesn't hurt to extract paths from it
|
||||||
import my
|
import my
|
||||||
|
|
||||||
try:
|
try:
|
||||||
paths: list[str] = list(my.__path__)
|
paths: List[str] = list(my.__path__)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors.append(e)
|
errors.append(e)
|
||||||
error('failed to determine module import path')
|
error('failed to determine module import path')
|
||||||
|
@ -158,23 +145,19 @@ def config_ok() -> bool:
|
||||||
|
|
||||||
# first try doing as much as possible without actually importing my.config
|
# first try doing as much as possible without actually importing my.config
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
cfg_path = get_mycfg_dir()
|
cfg_path = get_mycfg_dir()
|
||||||
# alternative is importing my.config and then getting cfg_path from its __file__/__path__
|
# alternative is importing my.config and then getting cfg_path from its __file__/__path__
|
||||||
# not sure which is better tbh
|
# not sure which is better tbh
|
||||||
|
|
||||||
## check we're not using stub config
|
## check we're not using stub config
|
||||||
import my.core
|
import my.core
|
||||||
|
|
||||||
try:
|
try:
|
||||||
core_pkg_path = str(Path(my.core.__path__[0]).parent)
|
core_pkg_path = str(Path(my.core.__path__[0]).parent)
|
||||||
if str(cfg_path).startswith(core_pkg_path):
|
if str(cfg_path).startswith(core_pkg_path):
|
||||||
error(
|
error(f'''
|
||||||
f'''
|
|
||||||
Seems that the stub config is used ({cfg_path}). This is likely not going to work.
|
Seems that the stub config is used ({cfg_path}). This is likely not going to work.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
|
||||||
'''.strip()
|
'''.strip())
|
||||||
)
|
|
||||||
errors.append(RuntimeError('bad config path'))
|
errors.append(RuntimeError('bad config path'))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors.append(e)
|
errors.append(e)
|
||||||
|
@ -188,6 +171,8 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
# use a temporary directory, useful because
|
# use a temporary directory, useful because
|
||||||
# - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
|
# - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
|
||||||
# - compileall isn't following symlinks, just silently ignores them
|
# - compileall isn't following symlinks, just silently ignores them
|
||||||
|
# note: ugh, annoying that copytree requires a non-existing dir before 3.8.
|
||||||
|
# once we have min version 3.8, can use dirs_exist_ok=True param
|
||||||
tdir = Path(td) / 'cfg'
|
tdir = Path(td) / 'cfg'
|
||||||
# NOTE: compileall still returns code 0 if the path doesn't exist..
|
# NOTE: compileall still returns code 0 if the path doesn't exist..
|
||||||
# but in our case hopefully it's not an issue
|
# but in our case hopefully it's not an issue
|
||||||
|
@ -196,7 +181,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
try:
|
try:
|
||||||
# this will resolve symlinks when copying
|
# this will resolve symlinks when copying
|
||||||
# should be under try/catch since might fail if some symlinks are missing
|
# should be under try/catch since might fail if some symlinks are missing
|
||||||
shutil.copytree(cfg_path, tdir, dirs_exist_ok=True)
|
shutil.copytree(cfg_path, tdir)
|
||||||
check_call(cmd)
|
check_call(cmd)
|
||||||
info('syntax check: ' + ' '.join(cmd))
|
info('syntax check: ' + ' '.join(cmd))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -206,7 +191,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
|
|
||||||
## check types
|
## check types
|
||||||
mypy_res = run_mypy(cfg_path)
|
mypy_res = run_mypy(cfg_path)
|
||||||
if mypy_res is not None: # has mypy
|
if mypy_res is not None: # has mypy
|
||||||
rc = mypy_res.returncode
|
rc = mypy_res.returncode
|
||||||
if rc == 0:
|
if rc == 0:
|
||||||
info('mypy check : success')
|
info('mypy check : success')
|
||||||
|
@ -229,16 +214,16 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
if len(errors) > 0:
|
if len(errors) > 0:
|
||||||
error(f'config check: {len(errors)} errors')
|
error(f'config check: {len(errors)} errors')
|
||||||
return False
|
return False
|
||||||
|
else:
|
||||||
# note: shouldn't exit here, might run something else
|
# note: shouldn't exit here, might run something else
|
||||||
info('config check: success!')
|
info('config check: success!')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
from .util import HPIModule, modules
|
from .util import HPIModule, modules
|
||||||
|
|
||||||
|
|
||||||
def _modules(*, all: bool = False) -> Iterable[HPIModule]:
|
def _modules(*, all: bool=False) -> Iterable[HPIModule]:
|
||||||
skipped = []
|
skipped = []
|
||||||
for m in modules():
|
for m in modules():
|
||||||
if not all and m.skip_reason is not None:
|
if not all and m.skip_reason is not None:
|
||||||
|
@ -249,7 +234,7 @@ def _modules(*, all: bool = False) -> Iterable[HPIModule]:
|
||||||
warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
|
warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
|
||||||
|
|
||||||
|
|
||||||
def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: list[str]) -> None:
|
def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: List[str]) -> None:
|
||||||
if len(for_modules) > 0:
|
if len(for_modules) > 0:
|
||||||
# if you're checking specific modules, show errors
|
# if you're checking specific modules, show errors
|
||||||
# hopefully makes sense?
|
# hopefully makes sense?
|
||||||
|
@ -273,7 +258,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: li
|
||||||
# todo add a --all argument to disregard is_active check?
|
# todo add a --all argument to disregard is_active check?
|
||||||
for mr in mods:
|
for mr in mods:
|
||||||
skip = mr.skip_reason
|
skip = mr.skip_reason
|
||||||
m = mr.name
|
m = mr.name
|
||||||
if skip is not None:
|
if skip is not None:
|
||||||
eprint(f'{OFF} {click.style("SKIP", fg="yellow")}: {m:<50} {skip}')
|
eprint(f'{OFF} {click.style("SKIP", fg="yellow")}: {m:<50} {skip}')
|
||||||
continue
|
continue
|
||||||
|
@ -323,8 +308,8 @@ def list_modules(*, list_all: bool) -> None:
|
||||||
tabulate_warnings()
|
tabulate_warnings()
|
||||||
|
|
||||||
for mr in _modules(all=list_all):
|
for mr in _modules(all=list_all):
|
||||||
m = mr.name
|
m = mr.name
|
||||||
sr = mr.skip_reason
|
sr = mr.skip_reason
|
||||||
if sr is None:
|
if sr is None:
|
||||||
pre = OK
|
pre = OK
|
||||||
suf = ''
|
suf = ''
|
||||||
|
@ -340,20 +325,17 @@ def tabulate_warnings() -> None:
|
||||||
Helper to avoid visual noise in hpi modules/doctor
|
Helper to avoid visual noise in hpi modules/doctor
|
||||||
'''
|
'''
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
orig = warnings.formatwarning
|
orig = warnings.formatwarning
|
||||||
|
|
||||||
def override(*args, **kwargs) -> str:
|
def override(*args, **kwargs) -> str:
|
||||||
res = orig(*args, **kwargs)
|
res = orig(*args, **kwargs)
|
||||||
return ''.join(' ' + x for x in res.splitlines(keepends=True))
|
return ''.join(' ' + x for x in res.splitlines(keepends=True))
|
||||||
|
|
||||||
warnings.formatwarning = override
|
warnings.formatwarning = override
|
||||||
# TODO loggers as well?
|
# TODO loggers as well?
|
||||||
|
|
||||||
|
|
||||||
def _requires(modules: Sequence[str]) -> Sequence[str]:
|
def _requires(modules: Sequence[str]) -> Sequence[str]:
|
||||||
from .discovery_pure import module_by_name
|
from .discovery_pure import module_by_name
|
||||||
|
|
||||||
mods = [module_by_name(module) for module in modules]
|
mods = [module_by_name(module) for module in modules]
|
||||||
res = []
|
res = []
|
||||||
for mod in mods:
|
for mod in mods:
|
||||||
|
@ -380,7 +362,7 @@ def module_requires(*, module: Sequence[str]) -> None:
|
||||||
click.echo(x)
|
click.echo(x)
|
||||||
|
|
||||||
|
|
||||||
def module_install(*, user: bool, module: Sequence[str], parallel: bool = False, break_system_packages: bool = False) -> None:
|
def module_install(*, user: bool, module: Sequence[str], parallel: bool=False, break_system_packages: bool=False) -> None:
|
||||||
if isinstance(module, str):
|
if isinstance(module, str):
|
||||||
# legacy behavior, used to take a since argument
|
# legacy behavior, used to take a since argument
|
||||||
module = [module]
|
module = [module]
|
||||||
|
@ -391,9 +373,8 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
|
||||||
warning('requirements list is empty, no need to install anything')
|
warning('requirements list is empty, no need to install anything')
|
||||||
return
|
return
|
||||||
|
|
||||||
use_uv = 'HPI_MODULE_INSTALL_USE_UV' in os.environ
|
|
||||||
pre_cmd = [
|
pre_cmd = [
|
||||||
sys.executable, '-m', *(['uv'] if use_uv else []), 'pip',
|
sys.executable, '-m', 'pip',
|
||||||
'install',
|
'install',
|
||||||
*(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
|
*(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
|
||||||
*(['--break-system-packages'] if break_system_packages else []), # https://peps.python.org/pep-0668/
|
*(['--break-system-packages'] if break_system_packages else []), # https://peps.python.org/pep-0668/
|
||||||
|
@ -411,7 +392,7 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool = False,
|
||||||
# I think it only helps for pypi artifacts (not git!),
|
# I think it only helps for pypi artifacts (not git!),
|
||||||
# and only if they weren't cached
|
# and only if they weren't cached
|
||||||
for r in requirements:
|
for r in requirements:
|
||||||
cmds.append([*pre_cmd, r])
|
cmds.append(pre_cmd + [r])
|
||||||
else:
|
else:
|
||||||
if parallel:
|
if parallel:
|
||||||
warning('parallel install is not supported on this platform, installing sequentially...')
|
warning('parallel install is not supported on this platform, installing sequentially...')
|
||||||
|
@ -457,7 +438,7 @@ def _ui_getchar_pick(choices: Sequence[str], prompt: str = 'Select from: ') -> i
|
||||||
return result_map[ch]
|
return result_map[ch]
|
||||||
|
|
||||||
|
|
||||||
def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]:
|
def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True) -> Iterable[Callable[..., Any]]:
|
||||||
from .query import QueryException, locate_qualified_function
|
from .query import QueryException, locate_qualified_function
|
||||||
from .stats import is_data_provider
|
from .stats import is_data_provider
|
||||||
|
|
||||||
|
@ -475,9 +456,9 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
|
||||||
# user to select a 'data provider' like function
|
# user to select a 'data provider' like function
|
||||||
try:
|
try:
|
||||||
mod = importlib.import_module(qualname)
|
mod = importlib.import_module(qualname)
|
||||||
except Exception as ie:
|
except Exception:
|
||||||
eprint(f"During fallback, importing '{qualname}' as module failed")
|
eprint(f"During fallback, importing '{qualname}' as module failed")
|
||||||
raise qr_err from ie
|
raise qr_err
|
||||||
|
|
||||||
# find data providers in this module
|
# find data providers in this module
|
||||||
data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
|
data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
|
||||||
|
@ -507,7 +488,6 @@ def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = Tr
|
||||||
|
|
||||||
def _warn_exceptions(exc: Exception) -> None:
|
def _warn_exceptions(exc: Exception) -> None:
|
||||||
from my.core import make_logger
|
from my.core import make_logger
|
||||||
|
|
||||||
logger = make_logger('CLI', level='warning')
|
logger = make_logger('CLI', level='warning')
|
||||||
|
|
||||||
logger.exception(f'hpi query: {exc}')
|
logger.exception(f'hpi query: {exc}')
|
||||||
|
@ -519,14 +499,14 @@ def query_hpi_functions(
|
||||||
*,
|
*,
|
||||||
output: str = 'json',
|
output: str = 'json',
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
qualified_names: list[str],
|
qualified_names: List[str],
|
||||||
order_key: str | None,
|
order_key: Optional[str],
|
||||||
order_by_value_type: type | None,
|
order_by_value_type: Optional[Type],
|
||||||
after: Any,
|
after: Any,
|
||||||
before: Any,
|
before: Any,
|
||||||
within: Any,
|
within: Any,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: int | None,
|
limit: Optional[int],
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool,
|
wrap_unsorted: bool,
|
||||||
warn_exceptions: bool,
|
warn_exceptions: bool,
|
||||||
|
@ -538,9 +518,6 @@ def query_hpi_functions(
|
||||||
# chain list of functions from user, in the order they wrote them on the CLI
|
# chain list of functions from user, in the order they wrote them on the CLI
|
||||||
input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
|
input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
|
||||||
|
|
||||||
# NOTE: if passing just one function to this which returns a single namedtuple/dataclass,
|
|
||||||
# using both --order-key and --order-type will often be faster as it does not need to
|
|
||||||
# duplicate the iterator in memory, or try to find the --order-type type on each object before sorting
|
|
||||||
res = select_range(
|
res = select_range(
|
||||||
input_src,
|
input_src,
|
||||||
order_key=order_key,
|
order_key=order_key,
|
||||||
|
@ -553,8 +530,7 @@ def query_hpi_functions(
|
||||||
warn_exceptions=warn_exceptions,
|
warn_exceptions=warn_exceptions,
|
||||||
warn_func=_warn_exceptions,
|
warn_func=_warn_exceptions,
|
||||||
raise_exceptions=raise_exceptions,
|
raise_exceptions=raise_exceptions,
|
||||||
drop_exceptions=drop_exceptions,
|
drop_exceptions=drop_exceptions)
|
||||||
)
|
|
||||||
|
|
||||||
if output == 'json':
|
if output == 'json':
|
||||||
from .serialize import dumps
|
from .serialize import dumps
|
||||||
|
@ -588,7 +564,7 @@ def query_hpi_functions(
|
||||||
|
|
||||||
# can ignore the mypy warning here, locations_to_gpx yields any errors
|
# can ignore the mypy warning here, locations_to_gpx yields any errors
|
||||||
# if you didnt pass it something that matches the LocationProtocol
|
# if you didnt pass it something that matches the LocationProtocol
|
||||||
for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type]
|
for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type]
|
||||||
if warn_exceptions:
|
if warn_exceptions:
|
||||||
_warn_exceptions(exc)
|
_warn_exceptions(exc)
|
||||||
elif raise_exceptions:
|
elif raise_exceptions:
|
||||||
|
@ -601,11 +577,10 @@ def query_hpi_functions(
|
||||||
# output == 'repl'
|
# output == 'repl'
|
||||||
eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
|
eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
|
||||||
try:
|
try:
|
||||||
import IPython # type: ignore[import,unused-ignore]
|
import IPython # type: ignore[import]
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
|
eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
|
||||||
import code
|
import code
|
||||||
|
|
||||||
code.interact(local=locals())
|
code.interact(local=locals())
|
||||||
else:
|
else:
|
||||||
IPython.embed()
|
IPython.embed()
|
||||||
|
@ -613,7 +588,7 @@ def query_hpi_functions(
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
|
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
|
||||||
def main(*, debug: bool) -> None:
|
def main(debug: bool) -> None:
|
||||||
'''
|
'''
|
||||||
Human Programming Interface
|
Human Programming Interface
|
||||||
|
|
||||||
|
@ -639,19 +614,20 @@ def main(*, debug: bool) -> None:
|
||||||
# to run things at the end (would need to use a callback or pass context)
|
# to run things at the end (would need to use a callback or pass context)
|
||||||
# https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
|
# https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
|
||||||
|
|
||||||
tdir = Path(tempfile.gettempdir()) / 'hpi_temp_dir'
|
tdir: str = os.path.join(tempfile.gettempdir(), 'hpi_temp_dir')
|
||||||
tdir.mkdir(exist_ok=True)
|
if not os.path.exists(tdir):
|
||||||
|
os.makedirs(tdir)
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=1)
|
@functools.lru_cache(maxsize=1)
|
||||||
def _all_mod_names() -> list[str]:
|
def _all_mod_names() -> List[str]:
|
||||||
"""Should include all modules, in case user is trying to diagnose issues"""
|
"""Should include all modules, in case user is trying to diagnose issues"""
|
||||||
# sort this, so that the order doesn't change while tabbing through
|
# sort this, so that the order doesn't change while tabbing through
|
||||||
return sorted([m.name for m in modules()])
|
return sorted([m.name for m in modules()])
|
||||||
|
|
||||||
|
|
||||||
def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> list[str]:
|
def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> List[str]:
|
||||||
return [m for m in _all_mod_names() if m.startswith(incomplete)]
|
return [m for m in _all_mod_names() if m.startswith(incomplete)]
|
||||||
|
|
||||||
|
|
||||||
|
@ -661,7 +637,7 @@ def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: st
|
||||||
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
|
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
|
||||||
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
|
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
|
||||||
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
|
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
|
||||||
def doctor_cmd(*, verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
|
def doctor_cmd(verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Run various checks
|
Run various checks
|
||||||
|
|
||||||
|
@ -695,7 +671,7 @@ def config_create_cmd() -> None:
|
||||||
|
|
||||||
@main.command(name='modules', short_help='list available modules')
|
@main.command(name='modules', short_help='list available modules')
|
||||||
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
|
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
|
||||||
def module_cmd(*, list_all: bool) -> None:
|
def module_cmd(list_all: bool) -> None:
|
||||||
'''List available modules'''
|
'''List available modules'''
|
||||||
list_modules(list_all=list_all)
|
list_modules(list_all=list_all)
|
||||||
|
|
||||||
|
@ -708,7 +684,7 @@ def module_grp() -> None:
|
||||||
|
|
||||||
@module_grp.command(name='requires', short_help='print module reqs')
|
@module_grp.command(name='requires', short_help='print module reqs')
|
||||||
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
||||||
def module_requires_cmd(*, modules: Sequence[str]) -> None:
|
def module_requires_cmd(modules: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Print MODULES requirements
|
Print MODULES requirements
|
||||||
|
|
||||||
|
@ -725,7 +701,7 @@ def module_requires_cmd(*, modules: Sequence[str]) -> None:
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help='Bypass PEP 668 and install dependencies into the system-wide python package directory.')
|
help='Bypass PEP 668 and install dependencies into the system-wide python package directory.')
|
||||||
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
||||||
def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
|
def module_install_cmd(user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Install dependencies for modules using pip
|
Install dependencies for modules using pip
|
||||||
|
|
||||||
|
@ -806,18 +782,17 @@ def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: boo
|
||||||
help='ignore any errors returned as objects from the functions')
|
help='ignore any errors returned as objects from the functions')
|
||||||
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
|
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
|
||||||
def query_cmd(
|
def query_cmd(
|
||||||
*,
|
|
||||||
function_name: Sequence[str],
|
function_name: Sequence[str],
|
||||||
output: str,
|
output: str,
|
||||||
stream: bool,
|
stream: bool,
|
||||||
order_key: str | None,
|
order_key: Optional[str],
|
||||||
order_type: str | None,
|
order_type: Optional[str],
|
||||||
after: str | None,
|
after: Optional[str],
|
||||||
before: str | None,
|
before: Optional[str],
|
||||||
within: str | None,
|
within: Optional[str],
|
||||||
recent: str | None,
|
recent: Optional[str],
|
||||||
reverse: bool,
|
reverse: bool,
|
||||||
limit: int | None,
|
limit: Optional[int],
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool,
|
wrap_unsorted: bool,
|
||||||
warn_exceptions: bool,
|
warn_exceptions: bool,
|
||||||
|
@ -853,7 +828,7 @@ def query_cmd(
|
||||||
|
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
|
|
||||||
chosen_order_type: type | None
|
chosen_order_type: Optional[Type]
|
||||||
if order_type == "datetime":
|
if order_type == "datetime":
|
||||||
chosen_order_type = datetime
|
chosen_order_type = datetime
|
||||||
elif order_type == "date":
|
elif order_type == "date":
|
||||||
|
@ -889,8 +864,7 @@ def query_cmd(
|
||||||
wrap_unsorted=wrap_unsorted,
|
wrap_unsorted=wrap_unsorted,
|
||||||
warn_exceptions=warn_exceptions,
|
warn_exceptions=warn_exceptions,
|
||||||
raise_exceptions=raise_exceptions,
|
raise_exceptions=raise_exceptions,
|
||||||
drop_exceptions=drop_exceptions,
|
drop_exceptions=drop_exceptions)
|
||||||
)
|
|
||||||
except QueryException as qe:
|
except QueryException as qe:
|
||||||
eprint(str(qe))
|
eprint(str(qe))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -905,7 +879,6 @@ def query_cmd(
|
||||||
|
|
||||||
def test_requires() -> None:
|
def test_requires() -> None:
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
|
result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "github.com/karlicoss/ghexport" in result.output
|
assert "github.com/karlicoss/ghexport" in result.output
|
||||||
|
|
|
@ -10,18 +10,15 @@ how many cores we want to dedicate to the DAL.
|
||||||
Enabled by the env variable, specifying how many cores to dedicate
|
Enabled by the env variable, specifying how many cores to dedicate
|
||||||
e.g. "HPI_CPU_POOL=4 hpi query ..."
|
e.g. "HPI_CPU_POOL=4 hpi query ..."
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from typing import cast
|
from typing import Optional, cast
|
||||||
|
|
||||||
_NOT_SET = cast(ProcessPoolExecutor, object())
|
_NOT_SET = cast(ProcessPoolExecutor, object())
|
||||||
_INSTANCE: ProcessPoolExecutor | None = _NOT_SET
|
_INSTANCE: Optional[ProcessPoolExecutor] = _NOT_SET
|
||||||
|
|
||||||
|
|
||||||
def get_cpu_pool() -> ProcessPoolExecutor | None:
|
def get_cpu_pool() -> Optional[ProcessPoolExecutor]:
|
||||||
global _INSTANCE
|
global _INSTANCE
|
||||||
if _INSTANCE is _NOT_SET:
|
if _INSTANCE is _NOT_SET:
|
||||||
use_cpu_pool = os.environ.get('HPI_CPU_POOL')
|
use_cpu_pool = os.environ.get('HPI_CPU_POOL')
|
||||||
|
|
|
@ -1,17 +1,16 @@
|
||||||
"""
|
"""
|
||||||
Various helpers for compression
|
Various helpers for compression
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import pathlib
|
import pathlib
|
||||||
from collections.abc import Iterator, Sequence
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import total_ordering
|
from functools import total_ordering
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import IO, Union
|
from typing import IO, Any, Iterator, Sequence, Union
|
||||||
|
|
||||||
PathIsh = Union[Path, str]
|
PathIsh = Union[Path, str]
|
||||||
|
|
||||||
|
@ -28,7 +27,7 @@ class Ext:
|
||||||
def is_compressed(p: Path) -> bool:
|
def is_compressed(p: Path) -> bool:
|
||||||
# todo kinda lame way for now.. use mime ideally?
|
# todo kinda lame way for now.. use mime ideally?
|
||||||
# should cooperate with kompress.kopen?
|
# should cooperate with kompress.kopen?
|
||||||
return any(p.name.endswith(ext) for ext in [Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz])
|
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
|
||||||
|
|
||||||
|
|
||||||
def _zstd_open(path: Path, *args, **kwargs) -> IO:
|
def _zstd_open(path: Path, *args, **kwargs) -> IO:
|
||||||
|
@ -88,7 +87,7 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
|
||||||
elif name.endswith(Ext.lz4):
|
elif name.endswith(Ext.lz4):
|
||||||
import lz4.frame # type: ignore
|
import lz4.frame # type: ignore
|
||||||
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
||||||
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst): # noqa: PIE810
|
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
|
||||||
kwargs['mode'] = mode
|
kwargs['mode'] = mode
|
||||||
return _zstd_open(pp, *args, **kwargs)
|
return _zstd_open(pp, *args, **kwargs)
|
||||||
elif name.endswith(Ext.targz):
|
elif name.endswith(Ext.targz):
|
||||||
|
@ -121,7 +120,7 @@ class CPath(BasePath):
|
||||||
Path only has _accessor and _closed slots, so can't directly set .open method
|
Path only has _accessor and _closed slots, so can't directly set .open method
|
||||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||||
"""
|
"""
|
||||||
def open(self, *args, **kwargs): # noqa: ARG002
|
def open(self, *args, **kwargs):
|
||||||
kopen_kwargs = {}
|
kopen_kwargs = {}
|
||||||
mode = kwargs.get('mode')
|
mode = kwargs.get('mode')
|
||||||
if mode is not None:
|
if mode is not None:
|
||||||
|
@ -142,16 +141,21 @@ open = kopen # TODO deprecate
|
||||||
def kexists(path: PathIsh, subpath: str) -> bool:
|
def kexists(path: PathIsh, subpath: str) -> bool:
|
||||||
try:
|
try:
|
||||||
kopen(path, subpath)
|
kopen(path, subpath)
|
||||||
|
return True
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
# meh... zipfile.Path is not available on 3.7
|
if sys.version_info[:2] >= (3, 8):
|
||||||
zipfile_Path = zipfile.Path
|
# meh... zipfile.Path is not available on 3.7
|
||||||
|
zipfile_Path = zipfile.Path
|
||||||
|
else:
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
zipfile_Path = Any
|
||||||
|
else:
|
||||||
|
zipfile_Path = object
|
||||||
|
|
||||||
|
|
||||||
@total_ordering
|
@total_ordering
|
||||||
|
@ -211,7 +215,7 @@ class ZipPath(zipfile_Path):
|
||||||
|
|
||||||
def iterdir(self) -> Iterator[ZipPath]:
|
def iterdir(self) -> Iterator[ZipPath]:
|
||||||
for s in self._as_dir().iterdir():
|
for s in self._as_dir().iterdir():
|
||||||
yield ZipPath(s.root, s.at)
|
yield ZipPath(s.root, s.at) # type: ignore[attr-defined]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def stem(self) -> str:
|
def stem(self) -> str:
|
||||||
|
@ -240,7 +244,7 @@ class ZipPath(zipfile_Path):
|
||||||
# see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
|
# see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
|
||||||
dt = datetime(*self.root.getinfo(self.at).date_time)
|
dt = datetime(*self.root.getinfo(self.at).date_time)
|
||||||
ts = int(dt.timestamp())
|
ts = int(dt.timestamp())
|
||||||
params = dict( # noqa: C408
|
params = dict(
|
||||||
st_mode=0,
|
st_mode=0,
|
||||||
st_ino=0,
|
st_ino=0,
|
||||||
st_dev=0,
|
st_dev=0,
|
||||||
|
|
|
@ -1,18 +1,17 @@
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from collections.abc import Iterator
|
import warnings
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
Iterator,
|
||||||
|
Optional,
|
||||||
|
Type,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
|
@ -21,8 +20,6 @@ from typing import (
|
||||||
|
|
||||||
import appdirs # type: ignore[import-untyped]
|
import appdirs # type: ignore[import-untyped]
|
||||||
|
|
||||||
from . import warnings
|
|
||||||
|
|
||||||
PathIsh = Union[str, Path] # avoid circular import from .common
|
PathIsh = Union[str, Path] # avoid circular import from .common
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,12 +58,12 @@ def _appdirs_cache_dir() -> Path:
|
||||||
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
|
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
|
||||||
|
|
||||||
|
|
||||||
def cache_dir(suffix: PathIsh | None = None) -> Path:
|
def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
cdir_ = CC.config.get_cache_dir()
|
cdir_ = CC.config.get_cache_dir()
|
||||||
|
|
||||||
sp: Path | None = None
|
sp: Optional[Path] = None
|
||||||
if suffix is not None:
|
if suffix is not None:
|
||||||
sp = Path(suffix)
|
sp = Path(suffix)
|
||||||
# guess if you do need absolute, better path it directly instead of as suffix?
|
# guess if you do need absolute, better path it directly instead of as suffix?
|
||||||
|
@ -119,7 +116,7 @@ def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
|
||||||
try:
|
try:
|
||||||
import cachew
|
import cachew
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
warnings.high('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
||||||
return lambda orig_func: orig_func
|
return lambda orig_func: orig_func
|
||||||
else:
|
else:
|
||||||
kwargs['cache_path'] = cache_path
|
kwargs['cache_path'] = cache_path
|
||||||
|
@ -136,7 +133,7 @@ if TYPE_CHECKING:
|
||||||
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
||||||
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
||||||
# NOTE: in cachew, HashFunction type returns str
|
# NOTE: in cachew, HashFunction type returns str
|
||||||
# however in practice, cachew always calls str for its result
|
# however in practice, cachew alwasy calls str for its result
|
||||||
# so perhaps better to switch it to Any in cachew as well
|
# so perhaps better to switch it to Any in cachew as well
|
||||||
HashFunction = Callable[P, Any]
|
HashFunction = Callable[P, Any]
|
||||||
|
|
||||||
|
@ -145,19 +142,21 @@ if TYPE_CHECKING:
|
||||||
# we need two versions due to @doublewrap
|
# we need two versions due to @doublewrap
|
||||||
# this is when we just annotate as @cachew without any args
|
# this is when we just annotate as @cachew without any args
|
||||||
@overload # type: ignore[no-overload-impl]
|
@overload # type: ignore[no-overload-impl]
|
||||||
def mcachew(fun: F) -> F: ...
|
def mcachew(fun: F) -> F:
|
||||||
|
...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def mcachew(
|
def mcachew(
|
||||||
cache_path: PathProvider | None = ...,
|
cache_path: Optional[PathProvider] = ...,
|
||||||
*,
|
*,
|
||||||
force_file: bool = ...,
|
force_file: bool = ...,
|
||||||
cls: type | None = ...,
|
cls: Optional[Type] = ...,
|
||||||
depends_on: HashFunction = ...,
|
depends_on: HashFunction = ...,
|
||||||
logger: logging.Logger | None = ...,
|
logger: Optional[logging.Logger] = ...,
|
||||||
chunk_by: int = ...,
|
chunk_by: int = ...,
|
||||||
synthetic_key: str | None = ...,
|
synthetic_key: Optional[str] = ...,
|
||||||
) -> Callable[[F], F]: ...
|
) -> Callable[[F], F]:
|
||||||
|
...
|
||||||
|
|
||||||
else:
|
else:
|
||||||
mcachew = _mcachew_impl
|
mcachew = _mcachew_impl
|
||||||
|
|
|
@ -3,32 +3,28 @@ from __future__ import annotations
|
||||||
import importlib
|
import importlib
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import ExitStack, contextmanager
|
from contextlib import ExitStack, contextmanager
|
||||||
from typing import Any, Callable, TypeVar
|
from typing import Any, Callable, Dict, Iterator, Optional, Type, TypeVar
|
||||||
|
|
||||||
Attrs = dict[str, Any]
|
Attrs = Dict[str, Any]
|
||||||
|
|
||||||
C = TypeVar('C')
|
C = TypeVar('C')
|
||||||
|
|
||||||
|
|
||||||
# todo not sure about it, could be overthinking...
|
# todo not sure about it, could be overthinking...
|
||||||
# but short enough to change later
|
# but short enough to change later
|
||||||
# TODO document why it's necessary?
|
# TODO document why it's necessary?
|
||||||
def make_config(cls: type[C], migration: Callable[[Attrs], Attrs] = lambda x: x) -> C:
|
def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C:
|
||||||
user_config = cls.__base__
|
user_config = cls.__base__
|
||||||
old_props = {
|
old_props = {
|
||||||
# NOTE: deliberately use gettatr to 'force' class properties here
|
# NOTE: deliberately use gettatr to 'force' class properties here
|
||||||
k: getattr(user_config, k)
|
k: getattr(user_config, k) for k in vars(user_config)
|
||||||
for k in vars(user_config)
|
|
||||||
}
|
}
|
||||||
new_props = migration(old_props)
|
new_props = migration(old_props)
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
k: v
|
k: v
|
||||||
for k, v in new_props.items()
|
for k, v in new_props.items()
|
||||||
if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
||||||
}
|
}
|
||||||
# todo maybe return type here?
|
# todo maybe return type here?
|
||||||
return cls(**params)
|
return cls(**params)
|
||||||
|
@ -55,8 +51,6 @@ def _override_config(config: F) -> Iterator[F]:
|
||||||
|
|
||||||
|
|
||||||
ModuleRegex = str
|
ModuleRegex = str
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
||||||
# need to use list here, otherwise reordering with set might mess things up
|
# need to use list here, otherwise reordering with set might mess things up
|
||||||
|
@ -87,14 +81,13 @@ def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def tmp_config(*, modules: ModuleRegex | None = None, config=None):
|
def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None):
|
||||||
if modules is None:
|
if modules is None:
|
||||||
assert config is None
|
assert config is None
|
||||||
if modules is not None:
|
if modules is not None:
|
||||||
assert config is not None
|
assert config is not None
|
||||||
|
|
||||||
import my.config
|
import my.config
|
||||||
|
|
||||||
with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
|
with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
|
||||||
if config is not None:
|
if config is not None:
|
||||||
overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
|
overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
|
||||||
|
@ -109,7 +102,6 @@ def tmp_config(*, modules: ModuleRegex | None = None, config=None):
|
||||||
def test_tmp_config() -> None:
|
def test_tmp_config() -> None:
|
||||||
class extra:
|
class extra:
|
||||||
data_path = '/path/to/data'
|
data_path = '/path/to/data'
|
||||||
|
|
||||||
with tmp_config() as c:
|
with tmp_config() as c:
|
||||||
assert c.google != 'whatever'
|
assert c.google != 'whatever'
|
||||||
assert not hasattr(c, 'extra')
|
assert not hasattr(c, 'extra')
|
||||||
|
|
|
@ -1,18 +1,21 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from collections.abc import Iterable, Sequence
|
import warnings
|
||||||
from glob import glob as do_glob
|
from glob import glob as do_glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Callable,
|
Callable,
|
||||||
Generic,
|
Generic,
|
||||||
|
Iterable,
|
||||||
|
List,
|
||||||
|
Sequence,
|
||||||
|
Tuple,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
from . import compat, warnings
|
from . import compat
|
||||||
|
from . import warnings as core_warnings
|
||||||
|
|
||||||
# some helper functions
|
# some helper functions
|
||||||
# TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit
|
# TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit
|
||||||
|
@ -22,22 +25,19 @@ Paths = Union[Sequence[PathIsh], PathIsh]
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_GLOB = '*'
|
DEFAULT_GLOB = '*'
|
||||||
|
|
||||||
|
|
||||||
def get_files(
|
def get_files(
|
||||||
pp: Paths,
|
pp: Paths,
|
||||||
glob: str = DEFAULT_GLOB,
|
glob: str=DEFAULT_GLOB,
|
||||||
*,
|
sort: bool=True,
|
||||||
sort: bool = True,
|
guess_compression: bool=True,
|
||||||
guess_compression: bool = True,
|
) -> Tuple[Path, ...]:
|
||||||
) -> tuple[Path, ...]:
|
|
||||||
"""
|
"""
|
||||||
Helper function to avoid boilerplate.
|
Helper function to avoid boilerplate.
|
||||||
|
|
||||||
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
||||||
"""
|
"""
|
||||||
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
||||||
sources: list[Path]
|
sources: List[Path]
|
||||||
if isinstance(pp, Path):
|
if isinstance(pp, Path):
|
||||||
sources = [pp]
|
sources = [pp]
|
||||||
elif isinstance(pp, str):
|
elif isinstance(pp, str):
|
||||||
|
@ -54,7 +54,7 @@ def get_files(
|
||||||
# TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
|
# TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
|
||||||
return traceback.extract_stack()[-3].filename
|
return traceback.extract_stack()[-3].filename
|
||||||
|
|
||||||
paths: list[Path] = []
|
paths: List[Path] = []
|
||||||
for src in sources:
|
for src in sources:
|
||||||
if src.parts[0] == '~':
|
if src.parts[0] == '~':
|
||||||
src = src.expanduser()
|
src = src.expanduser()
|
||||||
|
@ -62,9 +62,9 @@ def get_files(
|
||||||
gs = str(src)
|
gs = str(src)
|
||||||
if '*' in gs:
|
if '*' in gs:
|
||||||
if glob != DEFAULT_GLOB:
|
if glob != DEFAULT_GLOB:
|
||||||
warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
|
warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
|
||||||
paths.extend(map(Path, do_glob(gs))) # noqa: PTH207
|
paths.extend(map(Path, do_glob(gs)))
|
||||||
elif os.path.isdir(str(src)): # noqa: PTH112
|
elif os.path.isdir(str(src)):
|
||||||
# NOTE: we're using os.path here on purpose instead of src.is_dir
|
# NOTE: we're using os.path here on purpose instead of src.is_dir
|
||||||
# the reason is is_dir for archives might return True and then
|
# the reason is is_dir for archives might return True and then
|
||||||
# this clause would try globbing insize the archives
|
# this clause would try globbing insize the archives
|
||||||
|
@ -80,11 +80,11 @@ def get_files(
|
||||||
paths.append(src)
|
paths.append(src)
|
||||||
|
|
||||||
if sort:
|
if sort:
|
||||||
paths = sorted(paths)
|
paths = list(sorted(paths))
|
||||||
|
|
||||||
if len(paths) == 0:
|
if len(paths) == 0:
|
||||||
# todo make it conditionally defensive based on some global settings
|
# todo make it conditionally defensive based on some global settings
|
||||||
warnings.high(f'''
|
core_warnings.high(f'''
|
||||||
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
|
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
|
||||||
'''.strip())
|
'''.strip())
|
||||||
# traceback is useful to figure out what config caused it?
|
# traceback is useful to figure out what config caused it?
|
||||||
|
@ -133,8 +133,8 @@ def test_classproperty() -> None:
|
||||||
return 'hello'
|
return 'hello'
|
||||||
|
|
||||||
res = C.prop
|
res = C.prop
|
||||||
assert_type(res, str)
|
|
||||||
assert res == 'hello'
|
assert res == 'hello'
|
||||||
|
assert_type(res, str)
|
||||||
|
|
||||||
|
|
||||||
# hmm, this doesn't really work with mypy well..
|
# hmm, this doesn't really work with mypy well..
|
||||||
|
@ -157,7 +157,7 @@ def get_valid_filename(s: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
# TODO deprecate and suggest to use one from my.core directly? not sure
|
# TODO deprecate and suggest to use one from my.core directly? not sure
|
||||||
from .utils.itertools import unique_everseen # noqa: F401
|
from .utils.itertools import unique_everseen
|
||||||
|
|
||||||
### legacy imports, keeping them here for backwards compatibility
|
### legacy imports, keeping them here for backwards compatibility
|
||||||
## hiding behind TYPE_CHECKING so it works in runtime
|
## hiding behind TYPE_CHECKING so it works in runtime
|
||||||
|
@ -234,14 +234,16 @@ if not TYPE_CHECKING:
|
||||||
return types.asdict(*args, **kwargs)
|
return types.asdict(*args, **kwargs)
|
||||||
|
|
||||||
# todo wrap these in deprecated decorator as well?
|
# todo wrap these in deprecated decorator as well?
|
||||||
# TODO hmm how to deprecate these in runtime?
|
|
||||||
# tricky cause they are actually classes/types
|
|
||||||
from typing import Literal # noqa: F401
|
|
||||||
|
|
||||||
from .cachew import mcachew # noqa: F401
|
from .cachew import mcachew # noqa: F401
|
||||||
|
|
||||||
# this is kinda internal, should just use my.core.logging.setup_logger if necessary
|
# this is kinda internal, should just use my.core.logging.setup_logger if necessary
|
||||||
from .logging import setup_logger
|
from .logging import setup_logger
|
||||||
|
|
||||||
|
# TODO hmm how to deprecate these in runtime?
|
||||||
|
# tricky cause they are actually classes/types
|
||||||
|
|
||||||
|
from typing import Literal # noqa: F401
|
||||||
|
|
||||||
from .stats import Stats
|
from .stats import Stats
|
||||||
from .types import (
|
from .types import (
|
||||||
Json,
|
Json,
|
||||||
|
|
|
@ -3,8 +3,6 @@ Contains backwards compatibility helpers for different python versions.
|
||||||
If something is relevant to HPI itself, please put it in .hpi_compat instead
|
If something is relevant to HPI itself, please put it in .hpi_compat instead
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
@ -23,20 +21,20 @@ if not TYPE_CHECKING:
|
||||||
# TODO warn here?
|
# TODO warn here?
|
||||||
source.backup(dest, **kwargs)
|
source.backup(dest, **kwargs)
|
||||||
|
|
||||||
# keeping for runtime backwards compatibility (added in 3.9)
|
|
||||||
@deprecated('use .removeprefix method on string directly instead')
|
|
||||||
def removeprefix(text: str, prefix: str) -> str:
|
|
||||||
return text.removeprefix(prefix)
|
|
||||||
|
|
||||||
@deprecated('use .removesuffix method on string directly instead')
|
# can remove after python3.9 (although need to keep the method itself for bwd compat)
|
||||||
def removesuffix(text: str, suffix: str) -> str:
|
def removeprefix(text: str, prefix: str) -> str:
|
||||||
return text.removesuffix(suffix)
|
if text.startswith(prefix):
|
||||||
|
return text[len(prefix) :]
|
||||||
|
return text
|
||||||
|
|
||||||
##
|
|
||||||
|
|
||||||
## used to have compat function before 3.8 for these, keeping for runtime back compatibility
|
## used to have compat function before 3.8 for these, keeping for runtime back compatibility
|
||||||
|
if not TYPE_CHECKING:
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Literal, Protocol, TypedDict
|
from typing import Literal, Protocol, TypedDict
|
||||||
|
else:
|
||||||
|
from typing_extensions import Literal, Protocol, TypedDict
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,13 +47,13 @@ else:
|
||||||
# bisect_left doesn't have a 'key' parameter (which we use)
|
# bisect_left doesn't have a 'key' parameter (which we use)
|
||||||
# till python3.10
|
# till python3.10
|
||||||
if sys.version_info[:2] <= (3, 9):
|
if sys.version_info[:2] <= (3, 9):
|
||||||
from typing import Any, Callable, List, Optional, TypeVar # noqa: UP035
|
from typing import Any, Callable, List, Optional, TypeVar
|
||||||
|
|
||||||
X = TypeVar('X')
|
X = TypeVar('X')
|
||||||
|
|
||||||
# copied from python src
|
# copied from python src
|
||||||
# fmt: off
|
# fmt: off
|
||||||
def bisect_left(a: list[Any], x: Any, lo: int=0, hi: int | None=None, *, key: Callable[..., Any] | None=None) -> int:
|
def bisect_left(a: List[Any], x: Any, lo: int=0, hi: Optional[int]=None, *, key: Optional[Callable[..., Any]]=None) -> int:
|
||||||
if lo < 0:
|
if lo < 0:
|
||||||
raise ValueError('lo must be non-negative')
|
raise ValueError('lo must be non-negative')
|
||||||
if hi is None:
|
if hi is None:
|
||||||
|
@ -127,10 +125,8 @@ def test_fromisoformat() -> None:
|
||||||
|
|
||||||
if sys.version_info[:2] >= (3, 10):
|
if sys.version_info[:2] >= (3, 10):
|
||||||
from types import NoneType
|
from types import NoneType
|
||||||
from typing import TypeAlias
|
|
||||||
else:
|
else:
|
||||||
NoneType = type(None)
|
NoneType = type(None)
|
||||||
from typing_extensions import TypeAlias
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[:2] >= (3, 11):
|
if sys.version_info[:2] >= (3, 11):
|
||||||
|
|
|
@ -2,21 +2,18 @@
|
||||||
Bindings for the 'core' HPI configuration
|
Bindings for the 'core' HPI configuration
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from collections.abc import Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Optional, Sequence
|
||||||
|
|
||||||
from . import warnings
|
from . import PathIsh, warnings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from my.config import core as user_config # type: ignore[attr-defined]
|
from my.config import core as user_config # type: ignore[attr-defined]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
from my.config import common as user_config # type: ignore[attr-defined]
|
from my.config import common as user_config # type: ignore[attr-defined]
|
||||||
|
|
||||||
warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
|
warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
|
||||||
except Exception as e2:
|
except Exception as e2:
|
||||||
# make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
|
# make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
|
||||||
|
@ -27,7 +24,6 @@ except Exception as e:
|
||||||
|
|
||||||
_HPI_CACHE_DIR_DEFAULT = ''
|
_HPI_CACHE_DIR_DEFAULT = ''
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config(user_config):
|
class Config(user_config):
|
||||||
'''
|
'''
|
||||||
|
@ -38,7 +34,7 @@ class Config(user_config):
|
||||||
cache_dir = '/your/custom/cache/path'
|
cache_dir = '/your/custom/cache/path'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
cache_dir: Path | str | None = _HPI_CACHE_DIR_DEFAULT
|
cache_dir: Optional[PathIsh] = _HPI_CACHE_DIR_DEFAULT
|
||||||
'''
|
'''
|
||||||
Base directory for cachew.
|
Base directory for cachew.
|
||||||
- if None , means cache is disabled
|
- if None , means cache is disabled
|
||||||
|
@ -48,7 +44,7 @@ class Config(user_config):
|
||||||
NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
|
NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
|
||||||
'''
|
'''
|
||||||
|
|
||||||
tmp_dir: Path | str | None = None
|
tmp_dir: Optional[PathIsh] = None
|
||||||
'''
|
'''
|
||||||
Path to a temporary directory.
|
Path to a temporary directory.
|
||||||
This can be used temporarily while extracting zipfiles etc...
|
This can be used temporarily while extracting zipfiles etc...
|
||||||
|
@ -56,36 +52,34 @@ class Config(user_config):
|
||||||
- otherwise , use the specified directory as the base temporary directory
|
- otherwise , use the specified directory as the base temporary directory
|
||||||
'''
|
'''
|
||||||
|
|
||||||
enabled_modules: Sequence[str] | None = None
|
enabled_modules : Optional[Sequence[str]] = None
|
||||||
'''
|
'''
|
||||||
list of regexes/globs
|
list of regexes/globs
|
||||||
- None means 'rely on disabled_modules'
|
- None means 'rely on disabled_modules'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
disabled_modules: Sequence[str] | None = None
|
disabled_modules: Optional[Sequence[str]] = None
|
||||||
'''
|
'''
|
||||||
list of regexes/globs
|
list of regexes/globs
|
||||||
- None means 'rely on enabled_modules'
|
- None means 'rely on enabled_modules'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def get_cache_dir(self) -> Path | None:
|
def get_cache_dir(self) -> Optional[Path]:
|
||||||
cdir = self.cache_dir
|
cdir = self.cache_dir
|
||||||
if cdir is None:
|
if cdir is None:
|
||||||
return None
|
return None
|
||||||
if cdir == _HPI_CACHE_DIR_DEFAULT:
|
if cdir == _HPI_CACHE_DIR_DEFAULT:
|
||||||
from .cachew import _appdirs_cache_dir
|
from .cachew import _appdirs_cache_dir
|
||||||
|
|
||||||
return _appdirs_cache_dir()
|
return _appdirs_cache_dir()
|
||||||
else:
|
else:
|
||||||
return Path(cdir).expanduser()
|
return Path(cdir).expanduser()
|
||||||
|
|
||||||
def get_tmp_dir(self) -> Path:
|
def get_tmp_dir(self) -> Path:
|
||||||
tdir: Path | str | None = self.tmp_dir
|
tdir: Optional[PathIsh] = self.tmp_dir
|
||||||
tpath: Path
|
tpath: Path
|
||||||
# use tempfile if unset
|
# use tempfile if unset
|
||||||
if tdir is None:
|
if tdir is None:
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
tpath = Path(tempfile.gettempdir()) / 'HPI'
|
tpath = Path(tempfile.gettempdir()) / 'HPI'
|
||||||
else:
|
else:
|
||||||
tpath = Path(tdir)
|
tpath = Path(tdir)
|
||||||
|
@ -93,10 +87,10 @@ class Config(user_config):
|
||||||
tpath.mkdir(parents=True, exist_ok=True)
|
tpath.mkdir(parents=True, exist_ok=True)
|
||||||
return tpath
|
return tpath
|
||||||
|
|
||||||
def _is_module_active(self, module: str) -> bool | None:
|
def _is_module_active(self, module: str) -> Optional[bool]:
|
||||||
# None means the config doesn't specify anything
|
# None means the config doesn't specify anything
|
||||||
# todo might be nice to return the 'reason' too? e.g. which option has matched
|
# todo might be nice to return the 'reason' too? e.g. which option has matched
|
||||||
def matches(specs: Sequence[str]) -> str | None:
|
def matches(specs: Sequence[str]) -> Optional[str]:
|
||||||
for spec in specs:
|
for spec in specs:
|
||||||
# not sure because . (packages separate) matches anything, but I guess unlikely to clash
|
# not sure because . (packages separate) matches anything, but I guess unlikely to clash
|
||||||
if re.match(spec, module):
|
if re.match(spec, module):
|
||||||
|
@ -112,10 +106,10 @@ class Config(user_config):
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
else: # not None
|
else: # not None
|
||||||
if off is None:
|
if off is None:
|
||||||
return True
|
return True
|
||||||
else: # not None
|
else: # not None
|
||||||
# fallback onto the 'enable everything', then the user will notice
|
# fallback onto the 'enable everything', then the user will notice
|
||||||
warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.")
|
warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.")
|
||||||
return True
|
return True
|
||||||
|
@ -127,8 +121,8 @@ config = make_config(Config)
|
||||||
|
|
||||||
|
|
||||||
### tests start
|
### tests start
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import contextmanager as ctx
|
from contextlib import contextmanager as ctx
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
|
||||||
@ctx
|
@ctx
|
||||||
|
@ -169,5 +163,4 @@ def test_active_modules() -> None:
|
||||||
assert cc._is_module_active("my.body.exercise") is True
|
assert cc._is_module_active("my.body.exercise") is True
|
||||||
assert len(record_warnings) == 1
|
assert len(record_warnings) == 1
|
||||||
|
|
||||||
|
|
||||||
### tests end
|
### tests end
|
||||||
|
|
|
@ -5,25 +5,23 @@ A helper module for defining denylists for sources programmatically
|
||||||
For docs, see doc/DENYLIST.md
|
For docs, see doc/DENYLIST.md
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Iterator, Mapping
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, TypeVar
|
from typing import Any, Dict, Iterator, List, Mapping, Set, TypeVar
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from more_itertools import seekable
|
from more_itertools import seekable
|
||||||
|
|
||||||
from .serialize import dumps
|
from my.core.common import PathIsh
|
||||||
from .warnings import medium
|
from my.core.serialize import dumps
|
||||||
|
from my.core.warnings import medium
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
DenyMap = Mapping[str, set[Any]]
|
DenyMap = Mapping[str, Set[Any]]
|
||||||
|
|
||||||
|
|
||||||
def _default_key_func(obj: T) -> str:
|
def _default_key_func(obj: T) -> str:
|
||||||
|
@ -31,9 +29,9 @@ def _default_key_func(obj: T) -> str:
|
||||||
|
|
||||||
|
|
||||||
class DenyList:
|
class DenyList:
|
||||||
def __init__(self, denylist_file: Path | str) -> None:
|
def __init__(self, denylist_file: PathIsh):
|
||||||
self.file = Path(denylist_file).expanduser().absolute()
|
self.file = Path(denylist_file).expanduser().absolute()
|
||||||
self._deny_raw_list: list[dict[str, Any]] = []
|
self._deny_raw_list: List[Dict[str, Any]] = []
|
||||||
self._deny_map: DenyMap = defaultdict(set)
|
self._deny_map: DenyMap = defaultdict(set)
|
||||||
|
|
||||||
# deny cli, user can override these
|
# deny cli, user can override these
|
||||||
|
@ -47,7 +45,7 @@ class DenyList:
|
||||||
return
|
return
|
||||||
|
|
||||||
deny_map: DenyMap = defaultdict(set)
|
deny_map: DenyMap = defaultdict(set)
|
||||||
data: list[dict[str, Any]] = json.loads(self.file.read_text())
|
data: List[Dict[str, Any]]= json.loads(self.file.read_text())
|
||||||
self._deny_raw_list = data
|
self._deny_raw_list = data
|
||||||
|
|
||||||
for ignore in data:
|
for ignore in data:
|
||||||
|
@ -98,7 +96,6 @@ class DenyList:
|
||||||
def filter(
|
def filter(
|
||||||
self,
|
self,
|
||||||
itr: Iterator[T],
|
itr: Iterator[T],
|
||||||
*,
|
|
||||||
invert: bool = False,
|
invert: bool = False,
|
||||||
) -> Iterator[T]:
|
) -> Iterator[T]:
|
||||||
denyf = functools.partial(self._allow, deny_map=self.load())
|
denyf = functools.partial(self._allow, deny_map=self.load())
|
||||||
|
@ -106,7 +103,7 @@ class DenyList:
|
||||||
return filter(lambda x: not denyf(x), itr)
|
return filter(lambda x: not denyf(x), itr)
|
||||||
return filter(denyf, itr)
|
return filter(denyf, itr)
|
||||||
|
|
||||||
def deny(self, key: str, value: Any, *, write: bool = False) -> None:
|
def deny(self, key: str, value: Any, write: bool = False) -> None:
|
||||||
'''
|
'''
|
||||||
add a key/value pair to the denylist
|
add a key/value pair to the denylist
|
||||||
'''
|
'''
|
||||||
|
@ -114,7 +111,7 @@ class DenyList:
|
||||||
self._load()
|
self._load()
|
||||||
self._deny_raw({key: self._stringify_value(value)}, write=write)
|
self._deny_raw({key: self._stringify_value(value)}, write=write)
|
||||||
|
|
||||||
def _deny_raw(self, data: dict[str, Any], *, write: bool = False) -> None:
|
def _deny_raw(self, data: Dict[str, Any], write: bool = False) -> None:
|
||||||
self._deny_raw_list.append(data)
|
self._deny_raw_list.append(data)
|
||||||
if write:
|
if write:
|
||||||
self.write()
|
self.write()
|
||||||
|
@ -133,7 +130,7 @@ class DenyList:
|
||||||
def _deny_cli_remember(
|
def _deny_cli_remember(
|
||||||
self,
|
self,
|
||||||
items: Iterator[T],
|
items: Iterator[T],
|
||||||
mem: dict[str, T],
|
mem: Dict[str, T],
|
||||||
) -> Iterator[str]:
|
) -> Iterator[str]:
|
||||||
keyf = self._deny_cli_key_func or _default_key_func
|
keyf = self._deny_cli_key_func or _default_key_func
|
||||||
# i.e., convert each item to a string, and map str -> item
|
# i.e., convert each item to a string, and map str -> item
|
||||||
|
@ -159,8 +156,10 @@ class DenyList:
|
||||||
# reset the iterator
|
# reset the iterator
|
||||||
sit.seek(0)
|
sit.seek(0)
|
||||||
# so we can map the selected string from fzf back to the original objects
|
# so we can map the selected string from fzf back to the original objects
|
||||||
memory_map: dict[str, T] = {}
|
memory_map: Dict[str, T] = {}
|
||||||
picker = FzfPrompt(executable_path=self.fzf_path, default_options="--no-multi")
|
picker = FzfPrompt(
|
||||||
|
executable_path=self.fzf_path, default_options="--no-multi"
|
||||||
|
)
|
||||||
picked_l = picker.prompt(
|
picked_l = picker.prompt(
|
||||||
self._deny_cli_remember(itr, memory_map),
|
self._deny_cli_remember(itr, memory_map),
|
||||||
"--read0",
|
"--read0",
|
||||||
|
|
|
@ -10,8 +10,6 @@ This potentially allows it to be:
|
||||||
It should be free of external modules, importlib, exec, etc. etc.
|
It should be free of external modules, importlib, exec, etc. etc.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
REQUIRES = 'REQUIRES'
|
REQUIRES = 'REQUIRES'
|
||||||
NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
|
NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
|
||||||
|
|
||||||
|
@ -21,9 +19,8 @@ import ast
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, NamedTuple, Optional, cast
|
from typing import Any, Iterable, List, NamedTuple, Optional, Sequence, cast
|
||||||
|
|
||||||
'''
|
'''
|
||||||
None means that requirements weren't defined (different from empty requirements)
|
None means that requirements weren't defined (different from empty requirements)
|
||||||
|
@ -33,11 +30,11 @@ Requires = Optional[Sequence[str]]
|
||||||
|
|
||||||
class HPIModule(NamedTuple):
|
class HPIModule(NamedTuple):
|
||||||
name: str
|
name: str
|
||||||
skip_reason: str | None
|
skip_reason: Optional[str]
|
||||||
doc: str | None = None
|
doc: Optional[str] = None
|
||||||
file: Path | None = None
|
file: Optional[Path] = None
|
||||||
requires: Requires = None
|
requires: Requires = None
|
||||||
legacy: str | None = None # contains reason/deprecation warning
|
legacy: Optional[str] = None # contains reason/deprecation warning
|
||||||
|
|
||||||
|
|
||||||
def ignored(m: str) -> bool:
|
def ignored(m: str) -> bool:
|
||||||
|
@ -58,13 +55,13 @@ def has_stats(src: Path) -> bool:
|
||||||
def _has_stats(code: str) -> bool:
|
def _has_stats(code: str) -> bool:
|
||||||
a: ast.Module = ast.parse(code)
|
a: ast.Module = ast.parse(code)
|
||||||
for x in a.body:
|
for x in a.body:
|
||||||
try: # maybe assign
|
try: # maybe assign
|
||||||
[tg] = cast(Any, x).targets
|
[tg] = cast(Any, x).targets
|
||||||
if tg.id == 'stats':
|
if tg.id == 'stats':
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try: # maybe def?
|
try: # maybe def?
|
||||||
name = cast(Any, x).name
|
name = cast(Any, x).name
|
||||||
if name == 'stats':
|
if name == 'stats':
|
||||||
return True
|
return True
|
||||||
|
@ -147,7 +144,7 @@ def all_modules() -> Iterable[HPIModule]:
|
||||||
def _iter_my_roots() -> Iterable[Path]:
|
def _iter_my_roots() -> Iterable[Path]:
|
||||||
import my # doesn't import any code, because of namespace package
|
import my # doesn't import any code, because of namespace package
|
||||||
|
|
||||||
paths: list[str] = list(my.__path__)
|
paths: List[str] = list(my.__path__)
|
||||||
if len(paths) == 0:
|
if len(paths) == 0:
|
||||||
# should probably never happen?, if this code is running, it was imported
|
# should probably never happen?, if this code is running, it was imported
|
||||||
# because something was added to __path__ to match this name
|
# because something was added to __path__ to match this name
|
||||||
|
@ -245,7 +242,7 @@ def test_pure() -> None:
|
||||||
src = Path(__file__).read_text()
|
src = Path(__file__).read_text()
|
||||||
# 'import my' is allowed, but
|
# 'import my' is allowed, but
|
||||||
# dont allow anything other HPI modules
|
# dont allow anything other HPI modules
|
||||||
assert re.findall('import ' + r'my\.\S+', src, re.MULTILINE) == []
|
assert re.findall('import ' + r'my\.\S+', src, re.M) == []
|
||||||
assert 'from ' + 'my' not in src
|
assert 'from ' + 'my' not in src
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,16 +3,19 @@ Various error handling helpers
|
||||||
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
|
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from itertools import tee
|
from itertools import tee
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
Literal,
|
Literal,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
|
@ -30,7 +33,7 @@ Res = ResT[T, Exception]
|
||||||
ErrorPolicy = Literal["yield", "raise", "drop"]
|
ErrorPolicy = Literal["yield", "raise", "drop"]
|
||||||
|
|
||||||
|
|
||||||
def notnone(x: T | None) -> T:
|
def notnone(x: Optional[T]) -> T:
|
||||||
assert x is not None
|
assert x is not None
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
@ -38,7 +41,8 @@ def notnone(x: T | None) -> T:
|
||||||
def unwrap(res: Res[T]) -> T:
|
def unwrap(res: Res[T]) -> T:
|
||||||
if isinstance(res, Exception):
|
if isinstance(res, Exception):
|
||||||
raise res
|
raise res
|
||||||
return res
|
else:
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def drop_exceptions(itr: Iterator[Res[T]]) -> Iterator[T]:
|
def drop_exceptions(itr: Iterator[Res[T]]) -> Iterator[T]:
|
||||||
|
@ -57,15 +61,13 @@ def raise_exceptions(itr: Iterable[Res[T]]) -> Iterator[T]:
|
||||||
yield o
|
yield o
|
||||||
|
|
||||||
|
|
||||||
def warn_exceptions(itr: Iterable[Res[T]], warn_func: Callable[[Exception], None] | None = None) -> Iterator[T]:
|
def warn_exceptions(itr: Iterable[Res[T]], warn_func: Optional[Callable[[Exception], None]] = None) -> Iterator[T]:
|
||||||
# if not provided, use the 'warnings' module
|
# if not provided, use the 'warnings' module
|
||||||
if warn_func is None:
|
if warn_func is None:
|
||||||
from my.core.warnings import medium
|
from my.core.warnings import medium
|
||||||
|
|
||||||
def _warn_func(e: Exception) -> None:
|
def _warn_func(e: Exception) -> None:
|
||||||
# TODO: print traceback? but user could always --raise-exceptions as well
|
# TODO: print traceback? but user could always --raise-exceptions as well
|
||||||
medium(str(e))
|
medium(str(e))
|
||||||
|
|
||||||
warn_func = _warn_func
|
warn_func = _warn_func
|
||||||
|
|
||||||
for o in itr:
|
for o in itr:
|
||||||
|
@ -80,7 +82,7 @@ def echain(ex: E, cause: Exception) -> E:
|
||||||
return ex
|
return ex
|
||||||
|
|
||||||
|
|
||||||
def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Iterable[E]]:
|
def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
|
||||||
# TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
|
# TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
|
||||||
vit, eit = tee(l)
|
vit, eit = tee(l)
|
||||||
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
|
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
|
||||||
|
@ -98,9 +100,7 @@ def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Ite
|
||||||
|
|
||||||
|
|
||||||
K = TypeVar('K')
|
K = TypeVar('K')
|
||||||
|
def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]]:
|
||||||
|
|
||||||
def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]]:
|
|
||||||
"""
|
"""
|
||||||
Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
|
Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
|
||||||
The general idea is: the error sticks to the non-error entry that follows it
|
The general idea is: the error sticks to the non-error entry that follows it
|
||||||
|
@ -108,7 +108,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
|
||||||
group = []
|
group = []
|
||||||
groups = []
|
groups = []
|
||||||
for i in items:
|
for i in items:
|
||||||
k: K | None
|
k: Optional[K]
|
||||||
try:
|
try:
|
||||||
k = key(i)
|
k = key(i)
|
||||||
except Exception: # error white computing key? dunno, might be nice to handle...
|
except Exception: # error white computing key? dunno, might be nice to handle...
|
||||||
|
@ -118,10 +118,10 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]
|
||||||
groups.append((k, group))
|
groups.append((k, group))
|
||||||
group = []
|
group = []
|
||||||
|
|
||||||
results: list[Res[T]] = []
|
results: List[Res[T]] = []
|
||||||
for _v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
|
for v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
|
||||||
results.extend(grp)
|
results.extend(grp)
|
||||||
results.extend(group) # handle last group (it will always be errors only)
|
results.extend(group) # handle last group (it will always be errors only)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ def test_sort_res_by() -> None:
|
||||||
Exc('last'),
|
Exc('last'),
|
||||||
]
|
]
|
||||||
|
|
||||||
results2 = sort_res_by([*ress, 0], lambda x: int(x))
|
results2 = sort_res_by(ress + [0], lambda x: int(x))
|
||||||
assert results2 == [Exc('last'), 0] + results[:-1]
|
assert results2 == [Exc('last'), 0] + results[:-1]
|
||||||
|
|
||||||
assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
|
assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
|
||||||
|
@ -163,20 +163,20 @@ def test_sort_res_by() -> None:
|
||||||
# helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
|
# helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
|
||||||
# todo document it under 'patterns' somewhere...
|
# todo document it under 'patterns' somewhere...
|
||||||
# todo proper typevar?
|
# todo proper typevar?
|
||||||
def set_error_datetime(e: Exception, dt: datetime | None) -> None:
|
def set_error_datetime(e: Exception, dt: Optional[datetime]) -> None:
|
||||||
if dt is None:
|
if dt is None:
|
||||||
return
|
return
|
||||||
e.args = (*e.args, dt)
|
e.args = e.args + (dt,)
|
||||||
# todo not sure if should return new exception?
|
# todo not sure if should return new exception?
|
||||||
|
|
||||||
|
|
||||||
def attach_dt(e: Exception, *, dt: datetime | None) -> Exception:
|
def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception:
|
||||||
set_error_datetime(e, dt)
|
set_error_datetime(e, dt)
|
||||||
return e
|
return e
|
||||||
|
|
||||||
|
|
||||||
# todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
|
# todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
|
||||||
def extract_error_datetime(e: Exception) -> datetime | None:
|
def extract_error_datetime(e: Exception) -> Optional[datetime]:
|
||||||
import re
|
import re
|
||||||
|
|
||||||
for x in reversed(e.args):
|
for x in reversed(e.args):
|
||||||
|
@ -201,12 +201,7 @@ def error_to_json(e: Exception) -> Json:
|
||||||
MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
|
MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
|
||||||
|
|
||||||
|
|
||||||
def warn_my_config_import_error(
|
def warn_my_config_import_error(err: Union[ImportError, AttributeError], help_url: Optional[str] = None) -> bool:
|
||||||
err: ImportError | AttributeError,
|
|
||||||
*,
|
|
||||||
help_url: str | None = None,
|
|
||||||
module_name: str | None = None,
|
|
||||||
) -> bool:
|
|
||||||
"""
|
"""
|
||||||
If the user tried to import something from my.config but it failed,
|
If the user tried to import something from my.config but it failed,
|
||||||
possibly due to missing the config block in my.config?
|
possibly due to missing the config block in my.config?
|
||||||
|
@ -238,24 +233,10 @@ See {help_url}\
|
||||||
config_obj = cast(object, getattr(err, 'obj')) # the object that caused the attribute error
|
config_obj = cast(object, getattr(err, 'obj')) # the object that caused the attribute error
|
||||||
# e.g. active_browser for my.browser
|
# e.g. active_browser for my.browser
|
||||||
nested_block_name = err.name
|
nested_block_name = err.name
|
||||||
errmsg = f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
|
|
||||||
See {help_url} or check the corresponding module.py file for an example\
|
|
||||||
"""
|
|
||||||
if config_obj.__module__ == 'my.config':
|
if config_obj.__module__ == 'my.config':
|
||||||
click.secho(errmsg, fg='yellow', err=True)
|
click.secho(f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
|
||||||
return True
|
See {help_url} or check the corresponding module.py file for an example\
|
||||||
if module_name is not None and nested_block_name == module_name.split('.')[-1]:
|
""", fg='yellow', err=True)
|
||||||
# this tries to cover cases like these
|
|
||||||
# user config:
|
|
||||||
# class location:
|
|
||||||
# class via_ip:
|
|
||||||
# accuracy = 10_000
|
|
||||||
# then when we import it, we do something like
|
|
||||||
# from my.config import location
|
|
||||||
# user_config = location.via_ip
|
|
||||||
# so if location is present, but via_ip is not, we get
|
|
||||||
# AttributeError: type object 'location' has no attribute 'via_ip'
|
|
||||||
click.secho(errmsg, fg='yellow', err=True)
|
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
click.echo(f"Unexpected error... {err}", err=True)
|
click.echo(f"Unexpected error... {err}", err=True)
|
||||||
|
@ -266,7 +247,7 @@ def test_datetime_errors() -> None:
|
||||||
import pytz # noqa: I001
|
import pytz # noqa: I001
|
||||||
|
|
||||||
dt_notz = datetime.now()
|
dt_notz = datetime.now()
|
||||||
dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
|
dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
|
||||||
for dt in [dt_tz, dt_notz]:
|
for dt in [dt_tz, dt_notz]:
|
||||||
e1 = RuntimeError('whatever')
|
e1 = RuntimeError('whatever')
|
||||||
assert extract_error_datetime(e1) is None
|
assert extract_error_datetime(e1) is None
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
from typing import Any
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
# The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages
|
# The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages
|
||||||
|
@ -22,7 +20,7 @@ def import_original_module(
|
||||||
file: str,
|
file: str,
|
||||||
*,
|
*,
|
||||||
star: bool = False,
|
star: bool = False,
|
||||||
globals: dict[str, Any] | None = None,
|
globals: Optional[Dict[str, Any]] = None,
|
||||||
) -> types.ModuleType:
|
) -> types.ModuleType:
|
||||||
module_to_restore = sys.modules[module_name]
|
module_to_restore = sys.modules[module_name]
|
||||||
|
|
||||||
|
|
|
@ -1,29 +1,29 @@
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
import dataclasses as dcl
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
import dataclasses
|
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Any, Generic, TypeVar
|
from typing import Any, Type, TypeVar
|
||||||
|
|
||||||
D = TypeVar('D')
|
D = TypeVar('D')
|
||||||
|
|
||||||
|
|
||||||
def _freeze_dataclass(Orig: type[D]):
|
def _freeze_dataclass(Orig: Type[D]):
|
||||||
ofields = [(f.name, f.type, f) for f in dataclasses.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
ofields = [(f.name, f.type, f) for f in dcl.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
||||||
|
|
||||||
# extract properties along with their types
|
# extract properties along with their types
|
||||||
props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
|
props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
|
||||||
pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
|
pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
|
||||||
# FIXME not sure about name?
|
# FIXME not sure about name?
|
||||||
# NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
|
# NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
|
||||||
RRR = dataclasses.make_dataclass('RRR', fields=[*ofields, *pfields])
|
RRR = dcl.make_dataclass('RRR', fields=[*ofields, *pfields])
|
||||||
# todo maybe even declare as slots?
|
# todo maybe even declare as slots?
|
||||||
return props, RRR
|
return props, RRR
|
||||||
|
|
||||||
|
|
||||||
|
# todo need some decorator thingie?
|
||||||
|
from typing import Generic
|
||||||
|
|
||||||
|
|
||||||
class Freezer(Generic[D]):
|
class Freezer(Generic[D]):
|
||||||
'''
|
'''
|
||||||
Some magic which converts dataclass properties into fields.
|
Some magic which converts dataclass properties into fields.
|
||||||
|
@ -31,13 +31,13 @@ class Freezer(Generic[D]):
|
||||||
For now only supports dataclasses.
|
For now only supports dataclasses.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, Orig: type[D]) -> None:
|
def __init__(self, Orig: Type[D]) -> None:
|
||||||
self.Orig = Orig
|
self.Orig = Orig
|
||||||
self.props, self.Frozen = _freeze_dataclass(Orig)
|
self.props, self.Frozen = _freeze_dataclass(Orig)
|
||||||
|
|
||||||
def freeze(self, value: D) -> D:
|
def freeze(self, value: D) -> D:
|
||||||
pvalues = {name: getattr(value, name) for name, _ in self.props}
|
pvalues = {name: getattr(value, name) for name, _ in self.props}
|
||||||
return self.Frozen(**dataclasses.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
|
return self.Frozen(**dcl.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
|
||||||
|
|
||||||
|
|
||||||
### tests
|
### tests
|
||||||
|
@ -45,7 +45,7 @@ class Freezer(Generic[D]):
|
||||||
|
|
||||||
# this needs to be defined here to prevent a mypy bug
|
# this needs to be defined here to prevent a mypy bug
|
||||||
# see https://github.com/python/mypy/issues/7281
|
# see https://github.com/python/mypy/issues/7281
|
||||||
@dataclasses.dataclass
|
@dcl.dataclass
|
||||||
class _A:
|
class _A:
|
||||||
x: Any
|
x: Any
|
||||||
|
|
||||||
|
@ -60,10 +60,8 @@ class _A:
|
||||||
|
|
||||||
|
|
||||||
def test_freezer() -> None:
|
def test_freezer() -> None:
|
||||||
val = _A(x={
|
|
||||||
'an_int': 123,
|
val = _A(x=dict(an_int=123, an_any=[1, 2, 3]))
|
||||||
'an_any': [1, 2, 3],
|
|
||||||
})
|
|
||||||
af = Freezer(_A)
|
af = Freezer(_A)
|
||||||
fval = af.freeze(val)
|
fval = af.freeze(val)
|
||||||
|
|
||||||
|
@ -71,7 +69,6 @@ def test_freezer() -> None:
|
||||||
assert fd['typed'] == 123
|
assert fd['typed'] == 123
|
||||||
assert fd['untyped'] == [1, 2, 3]
|
assert fd['untyped'] == [1, 2, 3]
|
||||||
|
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
# TODO shit. what to do with exceptions?
|
# TODO shit. what to do with exceptions?
|
||||||
|
|
|
@ -2,15 +2,11 @@
|
||||||
Contains various backwards compatibility/deprecation helpers relevant to HPI itself.
|
Contains various backwards compatibility/deprecation helpers relevant to HPI itself.
|
||||||
(as opposed to .compat module which implements compatibility between python versions)
|
(as opposed to .compat module which implements compatibility between python versions)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
from typing import TypeVar
|
from typing import Iterator, List, Optional, TypeVar
|
||||||
|
|
||||||
from . import warnings
|
from . import warnings
|
||||||
|
|
||||||
|
@ -18,7 +14,7 @@ from . import warnings
|
||||||
def handle_legacy_import(
|
def handle_legacy_import(
|
||||||
parent_module_name: str,
|
parent_module_name: str,
|
||||||
legacy_submodule_name: str,
|
legacy_submodule_name: str,
|
||||||
parent_module_path: list[str],
|
parent_module_path: List[str],
|
||||||
) -> bool:
|
) -> bool:
|
||||||
###
|
###
|
||||||
# this is to trick mypy into treating this as a proper namespace package
|
# this is to trick mypy into treating this as a proper namespace package
|
||||||
|
@ -75,7 +71,7 @@ def pre_pip_dal_handler(
|
||||||
name: str,
|
name: str,
|
||||||
e: ModuleNotFoundError,
|
e: ModuleNotFoundError,
|
||||||
cfg,
|
cfg,
|
||||||
requires: Sequence[str] = (),
|
requires=[],
|
||||||
) -> ModuleType:
|
) -> ModuleType:
|
||||||
'''
|
'''
|
||||||
https://github.com/karlicoss/HPI/issues/79
|
https://github.com/karlicoss/HPI/issues/79
|
||||||
|
@ -120,141 +116,32 @@ V = TypeVar('V')
|
||||||
# named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
|
# named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
|
||||||
class always_supports_sequence(Iterator[V]):
|
class always_supports_sequence(Iterator[V]):
|
||||||
"""
|
"""
|
||||||
Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible in runtime
|
Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, it: Iterator[V]) -> None:
|
def __init__(self, it: Iterator[V]) -> None:
|
||||||
self._it = it
|
self.it = it
|
||||||
self._list: list[V] | None = None
|
self._list: Optional[List] = None
|
||||||
self._lit: Iterator[V] | None = None
|
|
||||||
|
|
||||||
def __iter__(self) -> Iterator[V]: # noqa: PYI034
|
def __iter__(self) -> Iterator[V]:
|
||||||
if self._list is not None:
|
return self.it.__iter__()
|
||||||
self._lit = iter(self._list)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __next__(self) -> V:
|
def __next__(self) -> V:
|
||||||
if self._list is not None:
|
return self.it.__next__()
|
||||||
assert self._lit is not None
|
|
||||||
delegate = self._lit
|
|
||||||
else:
|
|
||||||
delegate = self._it
|
|
||||||
return next(delegate)
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
return getattr(self._it, name)
|
return getattr(self.it, name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _aslist(self) -> list[V]:
|
def aslist(self) -> List[V]:
|
||||||
if self._list is None:
|
if self._list is None:
|
||||||
qualname = getattr(self._it, '__qualname__', '<no qualname>') # defensive just in case
|
qualname = getattr(self.it, '__qualname__', '<no qualname>') # defensive just in case
|
||||||
warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
|
warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
|
||||||
self._list = list(self._it)
|
self._list = list(self.it)
|
||||||
|
|
||||||
# this is necessary for list constructor to work correctly
|
|
||||||
# since it's __iter__ first, then tries to compute length and then starts iterating...
|
|
||||||
self._lit = iter(self._list)
|
|
||||||
return self._list
|
return self._list
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self._aslist)
|
return len(self.aslist)
|
||||||
|
|
||||||
def __getitem__(self, i: int) -> V:
|
def __getitem__(self, i: int) -> V:
|
||||||
return self._aslist[i]
|
return self.aslist[i]
|
||||||
|
|
||||||
|
|
||||||
def test_always_supports_sequence_list_constructor() -> None:
|
|
||||||
exhausted = 0
|
|
||||||
|
|
||||||
def it() -> Iterator[str]:
|
|
||||||
nonlocal exhausted
|
|
||||||
yield from ['a', 'b', 'c']
|
|
||||||
exhausted += 1
|
|
||||||
|
|
||||||
sit = always_supports_sequence(it())
|
|
||||||
|
|
||||||
# list constructor is a bit special... it's trying to compute length if it's available to optimize memory allocation
|
|
||||||
# so, what's happening in this case is
|
|
||||||
# - sit.__iter__ is called
|
|
||||||
# - sit.__len__ is called
|
|
||||||
# - sit.__next__ is called
|
|
||||||
res = list(sit)
|
|
||||||
assert res == ['a', 'b', 'c']
|
|
||||||
assert exhausted == 1
|
|
||||||
|
|
||||||
res = list(sit)
|
|
||||||
assert res == ['a', 'b', 'c']
|
|
||||||
assert exhausted == 1 # this will iterate over 'cached' list now, so original generator is only exhausted once
|
|
||||||
|
|
||||||
|
|
||||||
def test_always_supports_sequence_indexing() -> None:
|
|
||||||
exhausted = 0
|
|
||||||
|
|
||||||
def it() -> Iterator[str]:
|
|
||||||
nonlocal exhausted
|
|
||||||
yield from ['a', 'b', 'c']
|
|
||||||
exhausted += 1
|
|
||||||
|
|
||||||
sit = always_supports_sequence(it())
|
|
||||||
|
|
||||||
assert len(sit) == 3
|
|
||||||
assert exhausted == 1
|
|
||||||
|
|
||||||
assert sit[2] == 'c'
|
|
||||||
assert sit[1] == 'b'
|
|
||||||
assert sit[0] == 'a'
|
|
||||||
assert exhausted == 1
|
|
||||||
|
|
||||||
# a few tests to make sure list-like operations are working..
|
|
||||||
assert list(sit) == ['a', 'b', 'c']
|
|
||||||
assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
|
|
||||||
assert list(sit) == ['a', 'b', 'c']
|
|
||||||
assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
|
|
||||||
assert exhausted == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_always_supports_sequence_next() -> None:
|
|
||||||
exhausted = 0
|
|
||||||
|
|
||||||
def it() -> Iterator[str]:
|
|
||||||
nonlocal exhausted
|
|
||||||
yield from ['a', 'b', 'c']
|
|
||||||
exhausted += 1
|
|
||||||
|
|
||||||
sit = always_supports_sequence(it())
|
|
||||||
|
|
||||||
x = next(sit)
|
|
||||||
assert x == 'a'
|
|
||||||
assert exhausted == 0
|
|
||||||
|
|
||||||
x = next(sit)
|
|
||||||
assert x == 'b'
|
|
||||||
assert exhausted == 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_always_supports_sequence_iter() -> None:
|
|
||||||
exhausted = 0
|
|
||||||
|
|
||||||
def it() -> Iterator[str]:
|
|
||||||
nonlocal exhausted
|
|
||||||
yield from ['a', 'b', 'c']
|
|
||||||
exhausted += 1
|
|
||||||
|
|
||||||
sit = always_supports_sequence(it())
|
|
||||||
|
|
||||||
for x in sit:
|
|
||||||
assert x == 'a'
|
|
||||||
break
|
|
||||||
|
|
||||||
x = next(sit)
|
|
||||||
assert x == 'b'
|
|
||||||
|
|
||||||
assert exhausted == 0
|
|
||||||
|
|
||||||
x = next(sit)
|
|
||||||
assert x == 'c'
|
|
||||||
assert exhausted == 0
|
|
||||||
|
|
||||||
for _ in sit:
|
|
||||||
raise RuntimeError # shouldn't trigger, just exhaust the iterator
|
|
||||||
assert exhausted == 1
|
|
||||||
|
|
|
@ -2,14 +2,9 @@
|
||||||
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
from typing import Any, Dict, Iterable, Optional
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
|
@ -26,7 +21,7 @@ class config:
|
||||||
RESET_DEFAULT = False
|
RESET_DEFAULT = False
|
||||||
|
|
||||||
|
|
||||||
def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt_col: str = 'dt') -> None:
|
def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_col: str='dt') -> None:
|
||||||
# todo infer dt column automatically, reuse in stat?
|
# todo infer dt column automatically, reuse in stat?
|
||||||
# it doesn't like dots, ends up some syntax error?
|
# it doesn't like dots, ends up some syntax error?
|
||||||
measurement = measurement.replace('.', '_')
|
measurement = measurement.replace('.', '_')
|
||||||
|
@ -35,7 +30,6 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
|
||||||
db = config.db
|
db = config.db
|
||||||
|
|
||||||
from influxdb import InfluxDBClient # type: ignore
|
from influxdb import InfluxDBClient # type: ignore
|
||||||
|
|
||||||
client = InfluxDBClient()
|
client = InfluxDBClient()
|
||||||
# todo maybe create if not exists?
|
# todo maybe create if not exists?
|
||||||
# client.create_database(db)
|
# client.create_database(db)
|
||||||
|
@ -46,7 +40,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
|
||||||
client.delete_series(database=db, measurement=measurement)
|
client.delete_series(database=db, measurement=measurement)
|
||||||
|
|
||||||
# TODO need to take schema here...
|
# TODO need to take schema here...
|
||||||
cache: dict[str, bool] = {}
|
cache: Dict[str, bool] = {}
|
||||||
|
|
||||||
def good(f, v) -> bool:
|
def good(f, v) -> bool:
|
||||||
c = cache.get(f)
|
c = cache.get(f)
|
||||||
|
@ -65,9 +59,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
|
||||||
def dit() -> Iterable[Json]:
|
def dit() -> Iterable[Json]:
|
||||||
for i in it:
|
for i in it:
|
||||||
d = asdict(i)
|
d = asdict(i)
|
||||||
tags: Json | None = None
|
tags: Optional[Json] = None
|
||||||
tags_ = d.get('tags') # meh... handle in a more robust manner
|
tags_ = d.get('tags') # meh... handle in a more robust manner
|
||||||
if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
|
if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
|
||||||
del d['tags']
|
del d['tags']
|
||||||
tags = tags_
|
tags = tags_
|
||||||
|
|
||||||
|
@ -78,19 +72,18 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
|
||||||
|
|
||||||
fields = filter_dict(d)
|
fields = filter_dict(d)
|
||||||
|
|
||||||
yield {
|
yield dict(
|
||||||
'measurement': measurement,
|
measurement=measurement,
|
||||||
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
||||||
# hmm, so tags are autoindexed and might be faster?
|
# hmm, so tags are autoindexed and might be faster?
|
||||||
# not sure what's the big difference though
|
# not sure what's the big difference though
|
||||||
# "fields are data and tags are metadata"
|
# "fields are data and tags are metadata"
|
||||||
'tags': tags,
|
tags=tags,
|
||||||
'time': dt,
|
time=dt,
|
||||||
'fields': fields,
|
fields=fields,
|
||||||
}
|
)
|
||||||
|
|
||||||
from more_itertools import chunked
|
from more_itertools import chunked
|
||||||
|
|
||||||
# "The optimal batch size is 5000 lines of line protocol."
|
# "The optimal batch size is 5000 lines of line protocol."
|
||||||
# some chunking is def necessary, otherwise it fails
|
# some chunking is def necessary, otherwise it fails
|
||||||
inserted = 0
|
inserted = 0
|
||||||
|
@ -104,9 +97,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt
|
||||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
# todo "Specify timestamp precision when writing to InfluxDB."?
|
||||||
|
|
||||||
|
|
||||||
def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> None:
|
def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None:
|
||||||
if name is None:
|
if name is None:
|
||||||
assert callable(it) # generators have no name/module
|
assert callable(it) # generators have no name/module
|
||||||
name = f'{it.__module__}:{it.__name__}'
|
name = f'{it.__module__}:{it.__name__}'
|
||||||
assert name is not None
|
assert name is not None
|
||||||
|
|
||||||
|
@ -116,7 +109,6 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
|
||||||
from itertools import tee
|
from itertools import tee
|
||||||
|
|
||||||
from more_itertools import first, one
|
from more_itertools import first, one
|
||||||
|
|
||||||
it, x = tee(it)
|
it, x = tee(it)
|
||||||
f = first(x, default=None)
|
f = first(x, default=None)
|
||||||
if f is None:
|
if f is None:
|
||||||
|
@ -126,11 +118,9 @@ def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> N
|
||||||
# TODO can we reuse pandas code or something?
|
# TODO can we reuse pandas code or something?
|
||||||
#
|
#
|
||||||
from .pandas import _as_columns
|
from .pandas import _as_columns
|
||||||
|
|
||||||
schema = _as_columns(type(f))
|
schema = _as_columns(type(f))
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
||||||
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
||||||
|
|
||||||
|
@ -145,9 +135,8 @@ def main() -> None:
|
||||||
@main.command(name='populate', short_help='populate influxdb')
|
@main.command(name='populate', short_help='populate influxdb')
|
||||||
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
|
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
|
||||||
@click.argument('FUNCTION_NAME', type=str, required=True)
|
@click.argument('FUNCTION_NAME', type=str, required=True)
|
||||||
def populate(*, function_name: str, reset: bool) -> None:
|
def populate(function_name: str, reset: bool) -> None:
|
||||||
from .__main__ import _locate_functions_or_prompt
|
from .__main__ import _locate_functions_or_prompt
|
||||||
|
|
||||||
[provider] = list(_locate_functions_or_prompt([function_name]))
|
[provider] = list(_locate_functions_or_prompt([function_name]))
|
||||||
# todo could have a non-interactive version which populates from all data sources for the provider?
|
# todo could have a non-interactive version which populates from all data sources for the provider?
|
||||||
magic_fill(provider, reset=reset)
|
magic_fill(provider, reset=reset)
|
||||||
|
|
|
@ -19,14 +19,13 @@ def setup_config() -> None:
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
mycfg_dir = get_mycfg_dir()
|
mycfg_dir = get_mycfg_dir()
|
||||||
|
|
||||||
if not mycfg_dir.exists():
|
if not mycfg_dir.exists():
|
||||||
warnings.warn(f"""
|
warnings.warn(f"""
|
||||||
'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
|
'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
""".strip(), stacklevel=1)
|
""".strip())
|
||||||
return
|
return
|
||||||
|
|
||||||
mpath = str(mycfg_dir)
|
mpath = str(mycfg_dir)
|
||||||
|
@ -44,12 +43,11 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
|
||||||
except ImportError as ex:
|
except ImportError as ex:
|
||||||
# just in case... who knows what crazy setup users have
|
# just in case... who knows what crazy setup users have
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.exception(ex)
|
logging.exception(ex)
|
||||||
warnings.warn(f"""
|
warnings.warn(f"""
|
||||||
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
|
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
""", stacklevel=1)
|
""")
|
||||||
else:
|
else:
|
||||||
# defensive just in case -- __file__ may not be present if there is some dynamic magic involved
|
# defensive just in case -- __file__ may not be present if there is some dynamic magic involved
|
||||||
used_config_file = getattr(my.config, '__file__', None)
|
used_config_file = getattr(my.config, '__file__', None)
|
||||||
|
@ -65,7 +63,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
|
||||||
Expected my.config to be located at {mycfg_dir}, but instead its path is {used_config_path}.
|
Expected my.config to be located at {mycfg_dir}, but instead its path is {used_config_path}.
|
||||||
This will likely cause issues down the line -- double check {mycfg_dir} structure.
|
This will likely cause issues down the line -- double check {mycfg_dir} structure.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
""", stacklevel=1
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
from .internal import assert_subpackage
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
from . import warnings
|
from . import warnings
|
||||||
|
|
||||||
# do this later -- for now need to transition modules to avoid using kompress directly (e.g. ZipPath)
|
# do this later -- for now need to transition modules to avoid using kompress directly (e.g. ZipPath)
|
||||||
|
@ -11,7 +8,10 @@ try:
|
||||||
from kompress import *
|
from kompress import *
|
||||||
except ModuleNotFoundError as e:
|
except ModuleNotFoundError as e:
|
||||||
if e.name == 'kompress':
|
if e.name == 'kompress':
|
||||||
warnings.high('Please install kompress (pip3 install kompress). Falling onto vendorized kompress for now.')
|
warnings.high('Please install kompress (pip3 install kompress), it will be required in the future. Falling onto vendorized kompress for now.')
|
||||||
from ._deprecated.kompress import * # type: ignore[assignment]
|
from ._deprecated.kompress import * # type: ignore[assignment]
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
# this is deprecated in compress, keep here for backwards compatibility
|
||||||
|
open = kopen # noqa: F405
|
||||||
|
|
|
@ -5,21 +5,17 @@ This can potentially allow both for safer defensive parsing, and let you know if
|
||||||
TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
|
TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any
|
from typing import Any, List
|
||||||
|
|
||||||
|
|
||||||
def ignore(w, *keys):
|
def ignore(w, *keys):
|
||||||
for k in keys:
|
for k in keys:
|
||||||
w[k].ignore()
|
w[k].ignore()
|
||||||
|
|
||||||
|
|
||||||
def zoom(w, *keys):
|
def zoom(w, *keys):
|
||||||
return [w[k].zoom() for k in keys]
|
return [w[k].zoom() for k in keys]
|
||||||
|
|
||||||
|
|
||||||
# TODO need to support lists
|
# TODO need to support lists
|
||||||
class Zoomable:
|
class Zoomable:
|
||||||
def __init__(self, parent, *args, **kwargs) -> None:
|
def __init__(self, parent, *args, **kwargs) -> None:
|
||||||
|
@ -44,7 +40,7 @@ class Zoomable:
|
||||||
assert self.parent is not None
|
assert self.parent is not None
|
||||||
self.parent._remove(self)
|
self.parent._remove(self)
|
||||||
|
|
||||||
def zoom(self) -> Zoomable:
|
def zoom(self) -> 'Zoomable':
|
||||||
self.consume()
|
self.consume()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -67,7 +63,6 @@ class Wdict(Zoomable, OrderedDict):
|
||||||
|
|
||||||
def this_consumed(self):
|
def this_consumed(self):
|
||||||
return len(self) == 0
|
return len(self) == 0
|
||||||
|
|
||||||
# TODO specify mypy type for the index special method?
|
# TODO specify mypy type for the index special method?
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,7 +77,6 @@ class Wlist(Zoomable, list):
|
||||||
def this_consumed(self):
|
def this_consumed(self):
|
||||||
return len(self) == 0
|
return len(self) == 0
|
||||||
|
|
||||||
|
|
||||||
class Wvalue(Zoomable):
|
class Wvalue(Zoomable):
|
||||||
def __init__(self, parent, value: Any) -> None:
|
def __init__(self, parent, value: Any) -> None:
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
|
@ -93,20 +87,23 @@ class Wvalue(Zoomable):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def this_consumed(self):
|
def this_consumed(self):
|
||||||
return True # TODO not sure..
|
return True # TODO not sure..
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'WValue{' + repr(self.value) + '}'
|
return 'WValue{' + repr(self.value) + '}'
|
||||||
|
|
||||||
|
|
||||||
def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
|
from typing import Tuple
|
||||||
|
|
||||||
|
|
||||||
|
def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
|
||||||
res: Zoomable
|
res: Zoomable
|
||||||
cc: list[Zoomable]
|
cc: List[Zoomable]
|
||||||
if isinstance(j, dict):
|
if isinstance(j, dict):
|
||||||
res = Wdict(parent)
|
res = Wdict(parent)
|
||||||
cc = [res]
|
cc = [res]
|
||||||
for k, v in j.items():
|
for k, v in j.items():
|
||||||
vv, c = _wrap(v, parent=res)
|
vv, c = _wrap(v, parent=res)
|
||||||
res[k] = vv
|
res[k] = vv
|
||||||
cc.extend(c)
|
cc.extend(c)
|
||||||
return res, cc
|
return res, cc
|
||||||
|
@ -125,23 +122,22 @@ def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
|
||||||
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
|
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
|
||||||
class UnconsumedError(Exception):
|
class UnconsumedError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# TODO think about error policy later...
|
# TODO think about error policy later...
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def wrap(j, *, throw=True) -> Iterator[Zoomable]:
|
def wrap(j, throw=True) -> Iterator[Zoomable]:
|
||||||
w, children = _wrap(j)
|
w, children = _wrap(j)
|
||||||
|
|
||||||
yield w
|
yield w
|
||||||
|
|
||||||
for c in children:
|
for c in children:
|
||||||
if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed???
|
if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed???
|
||||||
if throw:
|
if throw:
|
||||||
# TODO need to keep a full path or something...
|
# TODO need to keep a full path or something...
|
||||||
raise UnconsumedError(f'''
|
raise UnconsumedError(f'''
|
||||||
|
@ -157,7 +153,6 @@ from typing import cast
|
||||||
|
|
||||||
def test_unconsumed() -> None:
|
def test_unconsumed() -> None:
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
with pytest.raises(UnconsumedError):
|
with pytest.raises(UnconsumedError):
|
||||||
with wrap({'a': 1234}) as w:
|
with wrap({'a': 1234}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -168,7 +163,6 @@ def test_unconsumed() -> None:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
d = w['c']['d'].zoom()
|
d = w['c']['d'].zoom()
|
||||||
|
|
||||||
|
|
||||||
def test_consumed() -> None:
|
def test_consumed() -> None:
|
||||||
with wrap({'a': 1234}) as w:
|
with wrap({'a': 1234}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -179,7 +173,6 @@ def test_consumed() -> None:
|
||||||
c = w['c'].zoom()
|
c = w['c'].zoom()
|
||||||
d = c['d'].zoom()
|
d = c['d'].zoom()
|
||||||
|
|
||||||
|
|
||||||
def test_types() -> None:
|
def test_types() -> None:
|
||||||
# (string, number, object, array, boolean or nul
|
# (string, number, object, array, boolean or nul
|
||||||
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
|
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
|
||||||
|
@ -188,10 +181,9 @@ def test_types() -> None:
|
||||||
w['number'].consume()
|
w['number'].consume()
|
||||||
w['boolean'].zoom()
|
w['boolean'].zoom()
|
||||||
w['null'].zoom()
|
w['null'].zoom()
|
||||||
for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
|
for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
|
||||||
x.consume()
|
x.consume()
|
||||||
|
|
||||||
|
|
||||||
def test_consume_all() -> None:
|
def test_consume_all() -> None:
|
||||||
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
|
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -201,9 +193,11 @@ def test_consume_all() -> None:
|
||||||
|
|
||||||
def test_consume_few() -> None:
|
def test_consume_few() -> None:
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
pytest.skip('Will think about it later..')
|
pytest.skip('Will think about it later..')
|
||||||
with wrap({'important': 123, 'unimportant': 'whatever'}) as w:
|
with wrap({
|
||||||
|
'important': 123,
|
||||||
|
'unimportant': 'whatever'
|
||||||
|
}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
w['important'].zoom()
|
w['important'].zoom()
|
||||||
w.consume_all()
|
w.consume_all()
|
||||||
|
@ -212,7 +206,6 @@ def test_consume_few() -> None:
|
||||||
|
|
||||||
def test_zoom() -> None:
|
def test_zoom() -> None:
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
with wrap({'aaa': 'whatever'}) as w:
|
with wrap({'aaa': 'whatever'}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
|
@ -236,7 +229,7 @@ def test_zoom() -> None:
|
||||||
# - very flexible, easy to adjust behaviour
|
# - very flexible, easy to adjust behaviour
|
||||||
# - cons:
|
# - cons:
|
||||||
# - can forget to assert about extra entities etc, so error prone
|
# - can forget to assert about extra entities etc, so error prone
|
||||||
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
|
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder
|
||||||
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
||||||
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
||||||
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
||||||
|
|
|
@ -15,7 +15,7 @@ def test() -> None:
|
||||||
|
|
||||||
## prepare exception for later
|
## prepare exception for later
|
||||||
try:
|
try:
|
||||||
None.whatever # type: ignore[attr-defined] # noqa: B018
|
None.whatever # type: ignore[attr-defined]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ex = e
|
ex = e
|
||||||
##
|
##
|
||||||
|
@ -146,7 +146,7 @@ def _setup_handlers_and_formatters(name: str) -> None:
|
||||||
# try colorlog first, so user gets nice colored logs
|
# try colorlog first, so user gets nice colored logs
|
||||||
import colorlog
|
import colorlog
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
warnings.warn("You might want to 'pip install colorlog' for nice colored logs", stacklevel=1)
|
warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
|
||||||
formatter = logging.Formatter(FORMAT_NOCOLOR)
|
formatter = logging.Formatter(FORMAT_NOCOLOR)
|
||||||
else:
|
else:
|
||||||
# log_color/reset are specific to colorlog
|
# log_color/reset are specific to colorlog
|
||||||
|
@ -233,7 +233,7 @@ def get_enlighten():
|
||||||
try:
|
try:
|
||||||
import enlighten # type: ignore[import-untyped]
|
import enlighten # type: ignore[import-untyped]
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
warnings.warn("You might want to 'pip install enlighten' for a nice progress bar", stacklevel=1)
|
warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")
|
||||||
|
|
||||||
return Mock()
|
return Mock()
|
||||||
|
|
||||||
|
@ -250,7 +250,7 @@ if __name__ == '__main__':
|
||||||
test()
|
test()
|
||||||
|
|
||||||
|
|
||||||
## legacy/deprecated methods for backwards compatibility
|
## legacy/deprecated methods for backwards compatilibity
|
||||||
if not TYPE_CHECKING:
|
if not TYPE_CHECKING:
|
||||||
from .compat import deprecated
|
from .compat import deprecated
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,11 @@
|
||||||
Utils for mime/filetype handling
|
Utils for mime/filetype handling
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
from pathlib import Path
|
|
||||||
|
from .common import PathIsh
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(1)
|
@functools.lru_cache(1)
|
||||||
|
@ -26,7 +23,7 @@ import mimetypes # todo do I need init()?
|
||||||
|
|
||||||
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
|
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
|
||||||
# whereas magic detects correctly: application/x-zstd and application/x-xz
|
# whereas magic detects correctly: application/x-zstd and application/x-xz
|
||||||
def fastermime(path: Path | str) -> str:
|
def fastermime(path: PathIsh) -> str:
|
||||||
paths = str(path)
|
paths = str(path)
|
||||||
# mimetypes is faster, so try it first
|
# mimetypes is faster, so try it first
|
||||||
(mime, _) = mimetypes.guess_type(paths)
|
(mime, _) = mimetypes.guess_type(paths)
|
||||||
|
|
|
@ -1,42 +1,39 @@
|
||||||
"""
|
"""
|
||||||
Various helpers for reading org-mode data
|
Various helpers for reading org-mode data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def parse_org_datetime(s: str) -> datetime:
|
def parse_org_datetime(s: str) -> datetime:
|
||||||
s = s.strip('[]')
|
s = s.strip('[]')
|
||||||
for fmt, _cls in [
|
for fmt, cl in [
|
||||||
("%Y-%m-%d %a %H:%M", datetime),
|
("%Y-%m-%d %a %H:%M", datetime),
|
||||||
("%Y-%m-%d %H:%M" , datetime),
|
("%Y-%m-%d %H:%M" , datetime),
|
||||||
# todo not sure about these... fallback on 00:00?
|
# todo not sure about these... fallback on 00:00?
|
||||||
# ("%Y-%m-%d %a" , date),
|
# ("%Y-%m-%d %a" , date),
|
||||||
# ("%Y-%m-%d" , date),
|
# ("%Y-%m-%d" , date),
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
return datetime.strptime(s, fmt)
|
return datetime.strptime(s, fmt)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
raise RuntimeError(f"Bad datetime string {s}")
|
else:
|
||||||
|
raise RuntimeError(f"Bad datetime string {s}")
|
||||||
|
|
||||||
|
|
||||||
# TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
|
# TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
|
||||||
|
|
||||||
from collections.abc import Iterable
|
from typing import Callable, Iterable, TypeVar
|
||||||
from typing import Callable, TypeVar
|
|
||||||
|
|
||||||
from orgparse import OrgNode
|
from orgparse import OrgNode
|
||||||
|
|
||||||
V = TypeVar('V')
|
V = TypeVar('V')
|
||||||
|
|
||||||
|
|
||||||
def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
|
def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
|
||||||
yield from cfun(n)
|
yield from cfun(n)
|
||||||
for c in n.children:
|
for c in n.children:
|
||||||
yield from collect(c, cfun)
|
yield from collect(c, cfun)
|
||||||
|
|
||||||
|
|
||||||
from more_itertools import one
|
from more_itertools import one
|
||||||
from orgparse.extra import Table
|
from orgparse.extra import Table
|
||||||
|
|
||||||
|
@ -50,7 +47,7 @@ class TypedTable(Table):
|
||||||
tt = super().__new__(TypedTable)
|
tt = super().__new__(TypedTable)
|
||||||
tt.__dict__ = orig.__dict__
|
tt.__dict__ = orig.__dict__
|
||||||
blocks = list(orig.blocks)
|
blocks = list(orig.blocks)
|
||||||
header = blocks[0] # fist block is schema
|
header = blocks[0] # fist block is schema
|
||||||
if len(header) == 2:
|
if len(header) == 2:
|
||||||
# TODO later interpret first line as types
|
# TODO later interpret first line as types
|
||||||
header = header[1:]
|
header = header[1:]
|
||||||
|
|
|
@ -7,14 +7,17 @@ from __future__ import annotations
|
||||||
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
||||||
# NOTE: this file is meant to be importable without Pandas installed
|
# NOTE: this file is meant to be importable without Pandas installed
|
||||||
import dataclasses
|
import dataclasses
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
Literal,
|
Literal,
|
||||||
|
Type,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -175,10 +178,10 @@ def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
|
||||||
Schema = Any
|
Schema = Any
|
||||||
|
|
||||||
|
|
||||||
def _as_columns(s: Schema) -> dict[str, type]:
|
def _as_columns(s: Schema) -> Dict[str, Type]:
|
||||||
# todo would be nice to extract properties; add tests for this as well
|
# todo would be nice to extract properties; add tests for this as well
|
||||||
if dataclasses.is_dataclass(s):
|
if dataclasses.is_dataclass(s):
|
||||||
return {f.name: f.type for f in dataclasses.fields(s)} # type: ignore[misc] # ugh, why mypy thinks f.type can return str??
|
return {f.name: f.type for f in dataclasses.fields(s)}
|
||||||
# else must be NamedTuple??
|
# else must be NamedTuple??
|
||||||
# todo assert my.core.common.is_namedtuple?
|
# todo assert my.core.common.is_namedtuple?
|
||||||
return getattr(s, '_field_types')
|
return getattr(s, '_field_types')
|
||||||
|
@ -219,7 +222,7 @@ def test_as_dataframe() -> None:
|
||||||
|
|
||||||
from .compat import fromisoformat
|
from .compat import fromisoformat
|
||||||
|
|
||||||
it = ({'i': i, 's': f'str{i}'} for i in range(5))
|
it = (dict(i=i, s=f'str{i}') for i in range(5))
|
||||||
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
|
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
|
||||||
df: DataFrameT = as_dataframe(it)
|
df: DataFrameT = as_dataframe(it)
|
||||||
# todo test other error col policies
|
# todo test other error col policies
|
||||||
|
|
|
@ -8,7 +8,6 @@ def get_mycfg_dir() -> Path:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import appdirs # type: ignore[import-untyped]
|
import appdirs # type: ignore[import-untyped]
|
||||||
|
|
||||||
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
|
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
|
||||||
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
|
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
|
||||||
mvar = os.environ.get('MY_CONFIG')
|
mvar = os.environ.get('MY_CONFIG')
|
||||||
|
|
|
@ -2,9 +2,7 @@
|
||||||
Helpers to prevent depending on pytest in runtime
|
Helpers to prevent depending on pytest in runtime
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import typing
|
import typing
|
||||||
|
@ -17,7 +15,7 @@ if typing.TYPE_CHECKING or under_pytest:
|
||||||
parametrize = pytest.mark.parametrize
|
parametrize = pytest.mark.parametrize
|
||||||
else:
|
else:
|
||||||
|
|
||||||
def parametrize(*_args, **_kwargs):
|
def parametrize(*args, **kwargs):
|
||||||
def wrapper(f):
|
def wrapper(f):
|
||||||
return f
|
return f
|
||||||
|
|
||||||
|
|
107
my/core/query.py
107
my/core/query.py
|
@ -5,20 +5,23 @@ The main entrypoint to this library is the 'select' function below; try:
|
||||||
python3 -c "from my.core.query import select; help(select)"
|
python3 -c "from my.core.query import select; help(select)"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
NamedTuple,
|
NamedTuple,
|
||||||
Optional,
|
Optional,
|
||||||
|
Tuple,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
import more_itertools
|
import more_itertools
|
||||||
|
@ -48,7 +51,6 @@ class Unsortable(NamedTuple):
|
||||||
|
|
||||||
class QueryException(ValueError):
|
class QueryException(ValueError):
|
||||||
"""Used to differentiate query-related errors, so the CLI interface is more expressive"""
|
"""Used to differentiate query-related errors, so the CLI interface is more expressive"""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,7 +63,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
mod = importlib.import_module(module_name)
|
mod = importlib.import_module(module_name)
|
||||||
for fname, f in inspect.getmembers(mod, inspect.isfunction):
|
for (fname, f) in inspect.getmembers(mod, inspect.isfunction):
|
||||||
if fname == function_name:
|
if fname == function_name:
|
||||||
return f
|
return f
|
||||||
# in case the function is defined dynamically,
|
# in case the function is defined dynamically,
|
||||||
|
@ -70,7 +72,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
|
||||||
if func is not None and callable(func):
|
if func is not None and callable(func):
|
||||||
return func
|
return func
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise QueryException(str(e)) # noqa: B904
|
raise QueryException(str(e))
|
||||||
raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
|
raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,10 +83,10 @@ def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]]
|
||||||
if "." not in qualified_name:
|
if "." not in qualified_name:
|
||||||
raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.rexport.comments")
|
raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.rexport.comments")
|
||||||
rdot_index = qualified_name.rindex(".")
|
rdot_index = qualified_name.rindex(".")
|
||||||
return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :])
|
return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1:])
|
||||||
|
|
||||||
|
|
||||||
def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc | None:
|
def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]:
|
||||||
"""
|
"""
|
||||||
Attempts to find an attribute which matches the 'where_function' on the object,
|
Attempts to find an attribute which matches the 'where_function' on the object,
|
||||||
using some getattr/dict checks. Returns a function which when called with
|
using some getattr/dict checks. Returns a function which when called with
|
||||||
|
@ -112,7 +114,7 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
|
||||||
if where(v):
|
if where(v):
|
||||||
return lambda o: o.get(k, default) # type: ignore[union-attr]
|
return lambda o: o.get(k, default) # type: ignore[union-attr]
|
||||||
elif dataclasses.is_dataclass(obj):
|
elif dataclasses.is_dataclass(obj):
|
||||||
for field_name in obj.__annotations__.keys():
|
for (field_name, _annotation) in obj.__annotations__.items():
|
||||||
if where(getattr(obj, field_name)):
|
if where(getattr(obj, field_name)):
|
||||||
return lambda o: getattr(o, field_name, default)
|
return lambda o: getattr(o, field_name, default)
|
||||||
elif is_namedtuple(obj):
|
elif is_namedtuple(obj):
|
||||||
|
@ -129,13 +131,12 @@ def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc
|
||||||
|
|
||||||
|
|
||||||
def _generate_order_by_func(
|
def _generate_order_by_func(
|
||||||
obj_res: Res[T],
|
obj_res: Res[T],
|
||||||
*,
|
key: Optional[str] = None,
|
||||||
key: str | None = None,
|
where_function: Optional[Where] = None,
|
||||||
where_function: Where | None = None,
|
default: Optional[U] = None,
|
||||||
default: U | None = None,
|
force_unsortable: bool = False,
|
||||||
force_unsortable: bool = False,
|
) -> Optional[OrderFunc]:
|
||||||
) -> OrderFunc | None:
|
|
||||||
"""
|
"""
|
||||||
Accepts an object Res[T] (Instance of some class or Exception)
|
Accepts an object Res[T] (Instance of some class or Exception)
|
||||||
|
|
||||||
|
@ -200,7 +201,7 @@ pass 'drop_exceptions' to ignore exceptions""")
|
||||||
|
|
||||||
# user must provide either a key or a where predicate
|
# user must provide either a key or a where predicate
|
||||||
if where_function is not None:
|
if where_function is not None:
|
||||||
func: OrderFunc | None = attribute_func(obj, where_function, default)
|
func: Optional[OrderFunc] = attribute_func(obj, where_function, default)
|
||||||
if func is not None:
|
if func is not None:
|
||||||
return func
|
return func
|
||||||
|
|
||||||
|
@ -216,6 +217,8 @@ pass 'drop_exceptions' to ignore exceptions""")
|
||||||
return None # couldn't compute a OrderFunc for this class/instance
|
return None # couldn't compute a OrderFunc for this class/instance
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# currently using the 'key set' as a proxy for 'this is the same type of thing'
|
# currently using the 'key set' as a proxy for 'this is the same type of thing'
|
||||||
def _determine_order_by_value_key(obj_res: ET) -> Any:
|
def _determine_order_by_value_key(obj_res: ET) -> Any:
|
||||||
"""
|
"""
|
||||||
|
@ -240,7 +243,7 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]:
|
||||||
|
|
||||||
# try getting the first value from the iterator
|
# try getting the first value from the iterator
|
||||||
# similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
|
# similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
|
||||||
def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
|
def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
|
||||||
itr = more_itertools.peekable(itr)
|
itr = more_itertools.peekable(itr)
|
||||||
try:
|
try:
|
||||||
first_item = itr.peek()
|
first_item = itr.peek()
|
||||||
|
@ -251,9 +254,9 @@ def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
|
||||||
|
|
||||||
|
|
||||||
# similar to 'my.core.error.sort_res_by'?
|
# similar to 'my.core.error.sort_res_by'?
|
||||||
def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Unsortable], Iterator[ET]]:
|
def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
|
||||||
unsortable: list[Unsortable] = []
|
unsortable: List[Unsortable] = []
|
||||||
sortable: list[ET] = []
|
sortable: List[ET] = []
|
||||||
for o in itr:
|
for o in itr:
|
||||||
# if input to select was another select
|
# if input to select was another select
|
||||||
if isinstance(o, Unsortable):
|
if isinstance(o, Unsortable):
|
||||||
|
@ -271,11 +274,10 @@ def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Un
|
||||||
# the second being items for which orderfunc returned a non-none value
|
# the second being items for which orderfunc returned a non-none value
|
||||||
def _handle_unsorted(
|
def _handle_unsorted(
|
||||||
itr: Iterator[ET],
|
itr: Iterator[ET],
|
||||||
*,
|
|
||||||
orderfunc: OrderFunc,
|
orderfunc: OrderFunc,
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool
|
wrap_unsorted: bool
|
||||||
) -> tuple[Iterator[Unsortable], Iterator[ET]]:
|
) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
|
||||||
# prefer drop_unsorted to wrap_unsorted, if both were present
|
# prefer drop_unsorted to wrap_unsorted, if both were present
|
||||||
if drop_unsorted:
|
if drop_unsorted:
|
||||||
return iter([]), _drop_unsorted(itr, orderfunc)
|
return iter([]), _drop_unsorted(itr, orderfunc)
|
||||||
|
@ -290,16 +292,16 @@ def _handle_unsorted(
|
||||||
# different types. ***This consumes the iterator***, so
|
# different types. ***This consumes the iterator***, so
|
||||||
# you should definitely itertoolts.tee it beforehand
|
# you should definitely itertoolts.tee it beforehand
|
||||||
# as to not exhaust the values
|
# as to not exhaust the values
|
||||||
def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U | None = None) -> OrderFunc:
|
def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc:
|
||||||
# TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
|
# TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
|
||||||
order_by_lookup: dict[Any, OrderFunc] = {}
|
order_by_lookup: Dict[Any, OrderFunc] = {}
|
||||||
|
|
||||||
# need to go through a copy of the whole iterator here to
|
# need to go through a copy of the whole iterator here to
|
||||||
# pre-generate functions to support sorting mixed types
|
# pre-generate functions to support sorting mixed types
|
||||||
for obj_res in itr:
|
for obj_res in itr:
|
||||||
key: Any = _determine_order_by_value_key(obj_res)
|
key: Any = _determine_order_by_value_key(obj_res)
|
||||||
if key not in order_by_lookup:
|
if key not in order_by_lookup:
|
||||||
keyfunc: OrderFunc | None = _generate_order_by_func(
|
keyfunc: Optional[OrderFunc] = _generate_order_by_func(
|
||||||
obj_res,
|
obj_res,
|
||||||
where_function=order_value,
|
where_function=order_value,
|
||||||
default=default,
|
default=default,
|
||||||
|
@ -320,12 +322,12 @@ def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U
|
||||||
def _handle_generate_order_by(
|
def _handle_generate_order_by(
|
||||||
itr,
|
itr,
|
||||||
*,
|
*,
|
||||||
order_by: OrderFunc | None = None,
|
order_by: Optional[OrderFunc] = None,
|
||||||
order_key: str | None = None,
|
order_key: Optional[str] = None,
|
||||||
order_value: Where | None = None,
|
order_value: Optional[Where] = None,
|
||||||
default: U | None = None,
|
default: Optional[U] = None,
|
||||||
) -> tuple[OrderFunc | None, Iterator[ET]]:
|
) -> Tuple[Optional[OrderFunc], Iterator[ET]]:
|
||||||
order_by_chosen: OrderFunc | None = order_by # if the user just supplied a function themselves
|
order_by_chosen: Optional[OrderFunc] = order_by # if the user just supplied a function themselves
|
||||||
if order_by is not None:
|
if order_by is not None:
|
||||||
return order_by, itr
|
return order_by, itr
|
||||||
if order_key is not None:
|
if order_key is not None:
|
||||||
|
@ -350,19 +352,19 @@ def _handle_generate_order_by(
|
||||||
|
|
||||||
|
|
||||||
def select(
|
def select(
|
||||||
src: Iterable[ET] | Callable[[], Iterable[ET]],
|
src: Union[Iterable[ET], Callable[[], Iterable[ET]]],
|
||||||
*,
|
*,
|
||||||
where: Where | None = None,
|
where: Optional[Where] = None,
|
||||||
order_by: OrderFunc | None = None,
|
order_by: Optional[OrderFunc] = None,
|
||||||
order_key: str | None = None,
|
order_key: Optional[str] = None,
|
||||||
order_value: Where | None = None,
|
order_value: Optional[Where] = None,
|
||||||
default: U | None = None,
|
default: Optional[U] = None,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: int | None = None,
|
limit: Optional[int] = None,
|
||||||
drop_unsorted: bool = False,
|
drop_unsorted: bool = False,
|
||||||
wrap_unsorted: bool = True,
|
wrap_unsorted: bool = True,
|
||||||
warn_exceptions: bool = False,
|
warn_exceptions: bool = False,
|
||||||
warn_func: Callable[[Exception], None] | None = None,
|
warn_func: Optional[Callable[[Exception], None]] = None,
|
||||||
drop_exceptions: bool = False,
|
drop_exceptions: bool = False,
|
||||||
raise_exceptions: bool = False,
|
raise_exceptions: bool = False,
|
||||||
) -> Iterator[ET]:
|
) -> Iterator[ET]:
|
||||||
|
@ -464,7 +466,7 @@ Will attempt to call iter() on the value""")
|
||||||
try:
|
try:
|
||||||
itr: Iterator[ET] = iter(it)
|
itr: Iterator[ET] = iter(it)
|
||||||
except TypeError as t:
|
except TypeError as t:
|
||||||
raise QueryException("Could not convert input src to an Iterator: " + str(t)) # noqa: B904
|
raise QueryException("Could not convert input src to an Iterator: " + str(t))
|
||||||
|
|
||||||
# if both drop_exceptions and drop_exceptions are provided for some reason,
|
# if both drop_exceptions and drop_exceptions are provided for some reason,
|
||||||
# should raise exceptions before dropping them
|
# should raise exceptions before dropping them
|
||||||
|
@ -501,12 +503,7 @@ Will attempt to call iter() on the value""")
|
||||||
# note: can't just attach sort unsortable values in the same iterable as the
|
# note: can't just attach sort unsortable values in the same iterable as the
|
||||||
# other items because they don't have any lookups for order_key or functions
|
# other items because they don't have any lookups for order_key or functions
|
||||||
# to handle items in the order_by_lookup dictionary
|
# to handle items in the order_by_lookup dictionary
|
||||||
unsortable, itr = _handle_unsorted(
|
unsortable, itr = _handle_unsorted(itr, order_by_chosen, drop_unsorted, wrap_unsorted)
|
||||||
itr,
|
|
||||||
orderfunc=order_by_chosen,
|
|
||||||
drop_unsorted=drop_unsorted,
|
|
||||||
wrap_unsorted=wrap_unsorted,
|
|
||||||
)
|
|
||||||
|
|
||||||
# run the sort, with the computed order by function
|
# run the sort, with the computed order by function
|
||||||
itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse)) # type: ignore[arg-type]
|
itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse)) # type: ignore[arg-type]
|
||||||
|
@ -613,7 +610,7 @@ class _B(NamedTuple):
|
||||||
|
|
||||||
# move these to tests/? They are re-used so much in the tests below,
|
# move these to tests/? They are re-used so much in the tests below,
|
||||||
# not sure where the best place for these is
|
# not sure where the best place for these is
|
||||||
def _mixed_iter() -> Iterator[_A | _B]:
|
def _mixed_iter() -> Iterator[Union[_A, _B]]:
|
||||||
yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
|
yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
|
||||||
yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
|
yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
|
||||||
yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
|
yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
|
||||||
|
@ -622,7 +619,7 @@ def _mixed_iter() -> Iterator[_A | _B]:
|
||||||
yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
|
yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
|
||||||
|
|
||||||
|
|
||||||
def _mixed_iter_errors() -> Iterator[Res[_A | _B]]:
|
def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]:
|
||||||
m = _mixed_iter()
|
m = _mixed_iter()
|
||||||
yield from itertools.islice(m, 0, 3)
|
yield from itertools.islice(m, 0, 3)
|
||||||
yield RuntimeError("Unhandled error!")
|
yield RuntimeError("Unhandled error!")
|
||||||
|
@ -658,7 +655,7 @@ def test_wrap_unsortable() -> None:
|
||||||
|
|
||||||
# by default, wrap unsortable
|
# by default, wrap unsortable
|
||||||
res = list(select(_mixed_iter(), order_key="z"))
|
res = list(select(_mixed_iter(), order_key="z"))
|
||||||
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 2})
|
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 2})
|
||||||
|
|
||||||
|
|
||||||
def test_disabled_wrap_unsorted() -> None:
|
def test_disabled_wrap_unsorted() -> None:
|
||||||
|
@ -677,7 +674,7 @@ def test_drop_unsorted() -> None:
|
||||||
# test drop unsortable, should remove them before the 'sorted' call
|
# test drop unsortable, should remove them before the 'sorted' call
|
||||||
res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
|
res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
|
||||||
assert len(res) == 4
|
assert len(res) == 4
|
||||||
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4})
|
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4})
|
||||||
|
|
||||||
|
|
||||||
def test_drop_exceptions() -> None:
|
def test_drop_exceptions() -> None:
|
||||||
|
@ -708,9 +705,9 @@ def test_wrap_unsortable_with_error_and_warning() -> None:
|
||||||
# by default should wrap unsortable (error)
|
# by default should wrap unsortable (error)
|
||||||
with pytest.warns(UserWarning, match=r"encountered exception"):
|
with pytest.warns(UserWarning, match=r"encountered exception"):
|
||||||
res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
|
res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
|
||||||
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
|
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
|
||||||
# compare the returned error wrapped in the Unsortable
|
# compare the returned error wrapped in the Unsortable
|
||||||
returned_error = next(o for o in res if isinstance(o, Unsortable)).obj
|
returned_error = next((o for o in res if isinstance(o, Unsortable))).obj
|
||||||
assert "Unhandled error!" == str(returned_error)
|
assert "Unhandled error!" == str(returned_error)
|
||||||
|
|
||||||
|
|
||||||
|
@ -720,7 +717,7 @@ def test_order_key_unsortable() -> None:
|
||||||
|
|
||||||
# both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
|
# both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
|
||||||
res = list(select(_mixed_iter_errors(), order_key="z"))
|
res = list(select(_mixed_iter_errors(), order_key="z"))
|
||||||
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
|
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
|
||||||
|
|
||||||
|
|
||||||
def test_order_default_param() -> None:
|
def test_order_default_param() -> None:
|
||||||
|
@ -740,7 +737,7 @@ def test_no_recursive_unsortables() -> None:
|
||||||
# select to select as input, wrapping unsortables the first time, second should drop them
|
# select to select as input, wrapping unsortables the first time, second should drop them
|
||||||
# reverse=True to send errors to the end, so the below order_key works
|
# reverse=True to send errors to the end, so the below order_key works
|
||||||
res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
|
res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
|
||||||
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
|
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
|
||||||
|
|
||||||
# drop_unsorted
|
# drop_unsorted
|
||||||
dropped = list(select(res, order_key="z", drop_unsorted=True))
|
dropped = list(select(res, order_key="z", drop_unsorted=True))
|
||||||
|
|
|
@ -7,14 +7,11 @@ filtered iterator
|
||||||
See the select_range function below
|
See the select_range function below
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from collections.abc import Iterator
|
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
from functools import cache
|
from functools import lru_cache
|
||||||
from typing import Any, Callable, NamedTuple
|
from typing import Any, Callable, Iterator, NamedTuple, Optional, Type
|
||||||
|
|
||||||
import more_itertools
|
import more_itertools
|
||||||
|
|
||||||
|
@ -28,9 +25,7 @@ from .query import (
|
||||||
select,
|
select,
|
||||||
)
|
)
|
||||||
|
|
||||||
timedelta_regex = re.compile(
|
timedelta_regex = re.compile(r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$")
|
||||||
r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/51916936
|
# https://stackoverflow.com/a/51916936
|
||||||
|
@ -93,7 +88,7 @@ def parse_datetime_float(date_str: str) -> float:
|
||||||
# dateparser is a bit more lenient than the above, lets you type
|
# dateparser is a bit more lenient than the above, lets you type
|
||||||
# all sorts of dates as inputs
|
# all sorts of dates as inputs
|
||||||
# https://github.com/scrapinghub/dateparser#how-to-use
|
# https://github.com/scrapinghub/dateparser#how-to-use
|
||||||
res: datetime | None = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
|
res: Optional[datetime] = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
|
||||||
if res is not None:
|
if res is not None:
|
||||||
return res.timestamp()
|
return res.timestamp()
|
||||||
|
|
||||||
|
@ -103,7 +98,7 @@ def parse_datetime_float(date_str: str) -> float:
|
||||||
# probably DateLike input? but a user could specify an order_key
|
# probably DateLike input? but a user could specify an order_key
|
||||||
# which is an epoch timestamp or a float value which they
|
# which is an epoch timestamp or a float value which they
|
||||||
# expect to be converted to a datetime to compare
|
# expect to be converted to a datetime to compare
|
||||||
@cache
|
@lru_cache(maxsize=None)
|
||||||
def _datelike_to_float(dl: Any) -> float:
|
def _datelike_to_float(dl: Any) -> float:
|
||||||
if isinstance(dl, datetime):
|
if isinstance(dl, datetime):
|
||||||
return dl.timestamp()
|
return dl.timestamp()
|
||||||
|
@ -114,7 +109,7 @@ def _datelike_to_float(dl: Any) -> float:
|
||||||
try:
|
try:
|
||||||
return parse_datetime_float(dl)
|
return parse_datetime_float(dl)
|
||||||
except QueryException as q:
|
except QueryException as q:
|
||||||
raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q)) # noqa: B904
|
raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))
|
||||||
|
|
||||||
|
|
||||||
class RangeTuple(NamedTuple):
|
class RangeTuple(NamedTuple):
|
||||||
|
@ -135,12 +130,11 @@ class RangeTuple(NamedTuple):
|
||||||
of the timeframe -- 'before'
|
of the timeframe -- 'before'
|
||||||
- before and after - anything after 'after' and before 'before', acts as a time range
|
- before and after - anything after 'after' and before 'before', acts as a time range
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# technically doesn't need to be Optional[Any],
|
# technically doesn't need to be Optional[Any],
|
||||||
# just to make it more clear these can be None
|
# just to make it more clear these can be None
|
||||||
after: Any | None
|
after: Optional[Any]
|
||||||
before: Any | None
|
before: Optional[Any]
|
||||||
within: Any | None
|
within: Optional[Any]
|
||||||
|
|
||||||
|
|
||||||
Converter = Callable[[Any], Any]
|
Converter = Callable[[Any], Any]
|
||||||
|
@ -151,9 +145,9 @@ def _parse_range(
|
||||||
unparsed_range: RangeTuple,
|
unparsed_range: RangeTuple,
|
||||||
end_parser: Converter,
|
end_parser: Converter,
|
||||||
within_parser: Converter,
|
within_parser: Converter,
|
||||||
parsed_range: RangeTuple | None = None,
|
parsed_range: Optional[RangeTuple] = None,
|
||||||
error_message: str | None = None,
|
error_message: Optional[str] = None
|
||||||
) -> RangeTuple | None:
|
) -> Optional[RangeTuple]:
|
||||||
|
|
||||||
if parsed_range is not None:
|
if parsed_range is not None:
|
||||||
return parsed_range
|
return parsed_range
|
||||||
|
@ -182,11 +176,11 @@ def _create_range_filter(
|
||||||
end_parser: Converter,
|
end_parser: Converter,
|
||||||
within_parser: Converter,
|
within_parser: Converter,
|
||||||
attr_func: Where,
|
attr_func: Where,
|
||||||
parsed_range: RangeTuple | None = None,
|
parsed_range: Optional[RangeTuple] = None,
|
||||||
default_before: Any | None = None,
|
default_before: Optional[Any] = None,
|
||||||
value_coercion_func: Converter | None = None,
|
value_coercion_func: Optional[Converter] = None,
|
||||||
error_message: str | None = None,
|
error_message: Optional[str] = None,
|
||||||
) -> Where | None:
|
) -> Optional[Where]:
|
||||||
"""
|
"""
|
||||||
Handles:
|
Handles:
|
||||||
- parsing the user input into values that are comparable to items the iterable returns
|
- parsing the user input into values that are comparable to items the iterable returns
|
||||||
|
@ -278,17 +272,17 @@ def _create_range_filter(
|
||||||
def select_range(
|
def select_range(
|
||||||
itr: Iterator[ET],
|
itr: Iterator[ET],
|
||||||
*,
|
*,
|
||||||
where: Where | None = None,
|
where: Optional[Where] = None,
|
||||||
order_key: str | None = None,
|
order_key: Optional[str] = None,
|
||||||
order_value: Where | None = None,
|
order_value: Optional[Where] = None,
|
||||||
order_by_value_type: type | None = None,
|
order_by_value_type: Optional[Type] = None,
|
||||||
unparsed_range: RangeTuple | None = None,
|
unparsed_range: Optional[RangeTuple] = None,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: int | None = None,
|
limit: Optional[int] = None,
|
||||||
drop_unsorted: bool = False,
|
drop_unsorted: bool = False,
|
||||||
wrap_unsorted: bool = False,
|
wrap_unsorted: bool = False,
|
||||||
warn_exceptions: bool = False,
|
warn_exceptions: bool = False,
|
||||||
warn_func: Callable[[Exception], None] | None = None,
|
warn_func: Optional[Callable[[Exception], None]] = None,
|
||||||
drop_exceptions: bool = False,
|
drop_exceptions: bool = False,
|
||||||
raise_exceptions: bool = False,
|
raise_exceptions: bool = False,
|
||||||
) -> Iterator[ET]:
|
) -> Iterator[ET]:
|
||||||
|
@ -323,10 +317,9 @@ def select_range(
|
||||||
drop_exceptions=drop_exceptions,
|
drop_exceptions=drop_exceptions,
|
||||||
raise_exceptions=raise_exceptions,
|
raise_exceptions=raise_exceptions,
|
||||||
warn_exceptions=warn_exceptions,
|
warn_exceptions=warn_exceptions,
|
||||||
warn_func=warn_func,
|
warn_func=warn_func)
|
||||||
)
|
|
||||||
|
|
||||||
order_by_chosen: OrderFunc | None = None
|
order_by_chosen: Optional[OrderFunc] = None
|
||||||
|
|
||||||
# if the user didn't specify an attribute to order value, but specified a type
|
# if the user didn't specify an attribute to order value, but specified a type
|
||||||
# we should search for on each value in the iterator
|
# we should search for on each value in the iterator
|
||||||
|
@ -337,8 +330,6 @@ def select_range(
|
||||||
# if the user supplied a order_key, and/or we've generated an order_value, create
|
# if the user supplied a order_key, and/or we've generated an order_value, create
|
||||||
# the function that accesses that type on each value in the iterator
|
# the function that accesses that type on each value in the iterator
|
||||||
if order_key is not None or order_value is not None:
|
if order_key is not None or order_value is not None:
|
||||||
# _generate_order_value_func internally here creates a copy of the iterator, which has to
|
|
||||||
# be consumed in-case we're sorting by mixed types
|
|
||||||
order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
|
order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
|
||||||
# signifies that itr is empty -- can early return here
|
# signifies that itr is empty -- can early return here
|
||||||
if order_by_chosen is None:
|
if order_by_chosen is None:
|
||||||
|
@ -350,39 +341,37 @@ def select_range(
|
||||||
if order_by_chosen is None:
|
if order_by_chosen is None:
|
||||||
raise QueryException("""Can't order by range if we have no way to order_by!
|
raise QueryException("""Can't order by range if we have no way to order_by!
|
||||||
Specify a type or a key to order the value by""")
|
Specify a type or a key to order the value by""")
|
||||||
|
|
||||||
# force drop_unsorted=True so we can use _create_range_filter
|
|
||||||
# sort the iterable by the generated order_by_chosen function
|
|
||||||
itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
|
|
||||||
filter_func: Where | None
|
|
||||||
if order_by_value_type in [datetime, date]:
|
|
||||||
filter_func = _create_range_filter(
|
|
||||||
unparsed_range=unparsed_range,
|
|
||||||
end_parser=parse_datetime_float,
|
|
||||||
within_parser=parse_timedelta_float,
|
|
||||||
attr_func=order_by_chosen, # type: ignore[arg-type]
|
|
||||||
default_before=time.time(),
|
|
||||||
value_coercion_func=_datelike_to_float,
|
|
||||||
)
|
|
||||||
elif order_by_value_type in [int, float]:
|
|
||||||
# allow primitives to be converted using the default int(), float() callables
|
|
||||||
filter_func = _create_range_filter(
|
|
||||||
unparsed_range=unparsed_range,
|
|
||||||
end_parser=order_by_value_type,
|
|
||||||
within_parser=order_by_value_type,
|
|
||||||
attr_func=order_by_chosen, # type: ignore[arg-type]
|
|
||||||
default_before=None,
|
|
||||||
value_coercion_func=order_by_value_type,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
|
# force drop_unsorted=True so we can use _create_range_filter
|
||||||
# would need to allow passing the end_parser, within parser, default before and value_coercion_func...
|
# sort the iterable by the generated order_by_chosen function
|
||||||
# (seems like a lot?)
|
itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
|
||||||
raise QueryException("Sorting by custom types is currently unsupported")
|
filter_func: Optional[Where]
|
||||||
|
if order_by_value_type in [datetime, date]:
|
||||||
|
filter_func = _create_range_filter(
|
||||||
|
unparsed_range=unparsed_range,
|
||||||
|
end_parser=parse_datetime_float,
|
||||||
|
within_parser=parse_timedelta_float,
|
||||||
|
attr_func=order_by_chosen, # type: ignore[arg-type]
|
||||||
|
default_before=time.time(),
|
||||||
|
value_coercion_func=_datelike_to_float)
|
||||||
|
elif order_by_value_type in [int, float]:
|
||||||
|
# allow primitives to be converted using the default int(), float() callables
|
||||||
|
filter_func = _create_range_filter(
|
||||||
|
unparsed_range=unparsed_range,
|
||||||
|
end_parser=order_by_value_type,
|
||||||
|
within_parser=order_by_value_type,
|
||||||
|
attr_func=order_by_chosen, # type: ignore[arg-type]
|
||||||
|
default_before=None,
|
||||||
|
value_coercion_func=order_by_value_type)
|
||||||
|
else:
|
||||||
|
# TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
|
||||||
|
# would need to allow passing the end_parser, within parser, default before and value_coercion_func...
|
||||||
|
# (seems like a lot?)
|
||||||
|
raise QueryException("Sorting by custom types is currently unsupported")
|
||||||
|
|
||||||
# use the created filter function
|
# use the created filter function
|
||||||
# we've already applied drop_exceptions and kwargs related to unsortable values above
|
# we've already applied drop_exceptions and kwargs related to unsortable values above
|
||||||
itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
|
itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
|
||||||
else:
|
else:
|
||||||
# wrap_unsorted may be used here if the user specified an order_key,
|
# wrap_unsorted may be used here if the user specified an order_key,
|
||||||
# or manually passed a order_value function
|
# or manually passed a order_value function
|
||||||
|
@ -400,7 +389,7 @@ Specify a type or a key to order the value by""")
|
||||||
return itr
|
return itr
|
||||||
|
|
||||||
|
|
||||||
# reuse items from query for testing
|
# re-use items from query for testing
|
||||||
from .query import _A, _B, _Float, _mixed_iter_errors
|
from .query import _A, _B, _Float, _mixed_iter_errors
|
||||||
|
|
||||||
|
|
||||||
|
@ -481,7 +470,7 @@ def test_range_predicate() -> None:
|
||||||
|
|
||||||
# filter from 0 to 5
|
# filter from 0 to 5
|
||||||
rn: RangeTuple = RangeTuple("0", "5", None)
|
rn: RangeTuple = RangeTuple("0", "5", None)
|
||||||
zero_to_five_filter: Where | None = int_filter_func(unparsed_range=rn)
|
zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn)
|
||||||
assert zero_to_five_filter is not None
|
assert zero_to_five_filter is not None
|
||||||
# this is just a Where function, given some input it return True/False if the value is allowed
|
# this is just a Where function, given some input it return True/False if the value is allowed
|
||||||
assert zero_to_five_filter(3) is True
|
assert zero_to_five_filter(3) is True
|
||||||
|
@ -494,7 +483,6 @@ def test_range_predicate() -> None:
|
||||||
rn = RangeTuple(None, 3, "3.5")
|
rn = RangeTuple(None, 3, "3.5")
|
||||||
assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
|
assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
|
||||||
|
|
||||||
|
|
||||||
def test_parse_range() -> None:
|
def test_parse_range() -> None:
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
@ -538,8 +526,9 @@ def test_parse_timedelta_string() -> None:
|
||||||
|
|
||||||
|
|
||||||
def test_parse_datetime_float() -> None:
|
def test_parse_datetime_float() -> None:
|
||||||
|
|
||||||
pnow = parse_datetime_float("now")
|
pnow = parse_datetime_float("now")
|
||||||
sec_diff = abs(pnow - datetime.now().timestamp())
|
sec_diff = abs((pnow - datetime.now().timestamp()))
|
||||||
# should probably never fail? could mock time.time
|
# should probably never fail? could mock time.time
|
||||||
# but there seems to be issues with doing that use C-libraries (as time.time) does
|
# but there seems to be issues with doing that use C-libraries (as time.time) does
|
||||||
# https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
|
# https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
from dataclasses import asdict, is_dataclass
|
from dataclasses import asdict, is_dataclass
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from functools import cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, NamedTuple
|
from typing import Any, Callable, NamedTuple, Optional
|
||||||
|
|
||||||
from .error import error_to_json
|
from .error import error_to_json
|
||||||
from .pytest import parametrize
|
from .pytest import parametrize
|
||||||
|
@ -59,12 +57,12 @@ def _default_encode(obj: Any) -> Any:
|
||||||
# could possibly run multiple times/raise warning if you provide different 'default'
|
# could possibly run multiple times/raise warning if you provide different 'default'
|
||||||
# functions or change the kwargs? The alternative is to maintain all of this at the module
|
# functions or change the kwargs? The alternative is to maintain all of this at the module
|
||||||
# level, which is just as annoying
|
# level, which is just as annoying
|
||||||
@cache
|
@lru_cache(maxsize=None)
|
||||||
def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
use_default: DefaultEncoder = _default_encode
|
use_default: DefaultEncoder = _default_encode
|
||||||
# if the user passed an additional 'default' parameter,
|
# if the user passed an additional 'default' parameter,
|
||||||
# try using that to serialize before before _default_encode
|
# try using that to serialize before before _default_encode
|
||||||
_additional_default: DefaultEncoder | None = kwargs.get("default")
|
_additional_default: Optional[DefaultEncoder] = kwargs.get("default")
|
||||||
if _additional_default is not None and callable(_additional_default):
|
if _additional_default is not None and callable(_additional_default):
|
||||||
|
|
||||||
def wrapped_default(obj: Any) -> Any:
|
def wrapped_default(obj: Any) -> Any:
|
||||||
|
@ -80,9 +78,9 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
|
|
||||||
kwargs["default"] = use_default
|
kwargs["default"] = use_default
|
||||||
|
|
||||||
prefer_factory: str | None = kwargs.pop('_prefer_factory', None)
|
prefer_factory: Optional[str] = kwargs.pop('_prefer_factory', None)
|
||||||
|
|
||||||
def orjson_factory() -> Dumps | None:
|
def orjson_factory() -> Optional[Dumps]:
|
||||||
try:
|
try:
|
||||||
import orjson
|
import orjson
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
|
@ -97,7 +95,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
|
|
||||||
return _orjson_dumps
|
return _orjson_dumps
|
||||||
|
|
||||||
def simplejson_factory() -> Dumps | None:
|
def simplejson_factory() -> Optional[Dumps]:
|
||||||
try:
|
try:
|
||||||
from simplejson import dumps as simplejson_dumps
|
from simplejson import dumps as simplejson_dumps
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
|
@ -117,7 +115,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
|
|
||||||
return _simplejson_dumps
|
return _simplejson_dumps
|
||||||
|
|
||||||
def stdlib_factory() -> Dumps | None:
|
def stdlib_factory() -> Optional[Dumps]:
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .warnings import high
|
from .warnings import high
|
||||||
|
@ -147,12 +145,13 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
res = factory()
|
res = factory()
|
||||||
if res is not None:
|
if res is not None:
|
||||||
return res
|
return res
|
||||||
raise RuntimeError("Should not happen!")
|
else:
|
||||||
|
raise RuntimeError("Should not happen!")
|
||||||
|
|
||||||
|
|
||||||
def dumps(
|
def dumps(
|
||||||
obj: Any,
|
obj: Any,
|
||||||
default: DefaultEncoder | None = None,
|
default: Optional[DefaultEncoder] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -3,12 +3,9 @@ Decorator to gracefully handle importing a data source, or warning
|
||||||
and yielding nothing (or a default) when its not available
|
and yielding nothing (or a default) when its not available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import Any, Callable, TypeVar
|
from typing import Any, Callable, Iterable, Iterator, Optional, TypeVar
|
||||||
|
|
||||||
from .warnings import medium
|
from .warnings import medium
|
||||||
|
|
||||||
|
@ -29,8 +26,8 @@ _DEFAULT_ITR = ()
|
||||||
def import_source(
|
def import_source(
|
||||||
*,
|
*,
|
||||||
default: Iterable[T] = _DEFAULT_ITR,
|
default: Iterable[T] = _DEFAULT_ITR,
|
||||||
module_name: str | None = None,
|
module_name: Optional[str] = None,
|
||||||
help_url: str | None = None,
|
help_url: Optional[str] = None,
|
||||||
) -> Callable[..., Callable[..., Iterator[T]]]:
|
) -> Callable[..., Callable[..., Iterator[T]]]:
|
||||||
"""
|
"""
|
||||||
doesn't really play well with types, but is used to catch
|
doesn't really play well with types, but is used to catch
|
||||||
|
@ -53,7 +50,6 @@ def import_source(
|
||||||
except (ImportError, AttributeError) as err:
|
except (ImportError, AttributeError) as err:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
from .error import warn_my_config_import_error
|
from .error import warn_my_config_import_error
|
||||||
|
|
||||||
suppressed_in_conf = False
|
suppressed_in_conf = False
|
||||||
if module_name is not None and CC.config._is_module_active(module_name) is False:
|
if module_name is not None and CC.config._is_module_active(module_name) is False:
|
||||||
suppressed_in_conf = True
|
suppressed_in_conf = True
|
||||||
|
@ -65,18 +61,16 @@ def import_source(
|
||||||
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
|
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
|
||||||
|
|
||||||
class core:
|
class core:
|
||||||
disabled_modules = [{module_name!r}]
|
disabled_modules = [{repr(module_name)}]
|
||||||
""", stacklevel=1)
|
""")
|
||||||
# try to check if this is a config error or based on dependencies not being installed
|
# try to check if this is a config error or based on dependencies not being installed
|
||||||
if isinstance(err, (ImportError, AttributeError)):
|
if isinstance(err, (ImportError, AttributeError)):
|
||||||
matched_config_err = warn_my_config_import_error(err, module_name=module_name, help_url=help_url)
|
matched_config_err = warn_my_config_import_error(err, help_url=help_url)
|
||||||
# if we determined this wasn't a config error, and it was an attribute error
|
# if we determined this wasn't a config error, and it was an attribute error
|
||||||
# it could be *any* attribute error -- we should raise this since its otherwise a fatal error
|
# it could be *any* attribute error -- we should raise this since its otherwise a fatal error
|
||||||
# from some code in the module failing
|
# from some code in the module failing
|
||||||
if not matched_config_err and isinstance(err, AttributeError):
|
if not matched_config_err and isinstance(err, AttributeError):
|
||||||
raise err
|
raise err
|
||||||
yield from default
|
yield from default
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|
|
@ -1,16 +1,12 @@
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage # noqa: I001
|
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
from typing import Any, Callable, Literal, Union, overload
|
from typing import Any, Callable, Iterator, Literal, Optional, Tuple, Union, overload
|
||||||
|
|
||||||
from .common import PathIsh
|
from .common import PathIsh
|
||||||
from .compat import assert_never
|
from .compat import assert_never
|
||||||
|
@ -26,7 +22,6 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
|
||||||
conn.execute('CREATE TABLE testtable (col)')
|
conn.execute('CREATE TABLE testtable (col)')
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
|
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
|
||||||
with sqlite_connect_immutable(db) as conn:
|
with sqlite_connect_immutable(db) as conn:
|
||||||
conn.execute('DROP TABLE testtable')
|
conn.execute('DROP TABLE testtable')
|
||||||
|
@ -38,17 +33,15 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
|
||||||
|
|
||||||
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
||||||
|
|
||||||
|
|
||||||
def dict_factory(cursor, row):
|
def dict_factory(cursor, row):
|
||||||
fields = [column[0] for column in cursor.description]
|
fields = [column[0] for column in cursor.description]
|
||||||
return dict(zip(fields, row))
|
return {key: value for key, value in zip(fields, row)}
|
||||||
|
|
||||||
|
|
||||||
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]:
|
def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]:
|
||||||
dbp = f'file:{db}'
|
dbp = f'file:{db}'
|
||||||
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
||||||
if immutable:
|
if immutable:
|
||||||
|
@ -104,76 +97,31 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
|
||||||
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
|
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
|
||||||
# a bit annoying to have this copy-pasting, but hopefully not a big issue
|
# a bit annoying to have this copy-pasting, but hopefully not a big issue
|
||||||
|
|
||||||
# fmt: off
|
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any ]]: ...
|
Iterator[Tuple[Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any ]]: ...
|
Iterator[Tuple[Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any ]]: ...
|
Iterator[Tuple[Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any, Any ]]: ...
|
Iterator[Tuple[Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any, Any, Any ]]: ...
|
Iterator[Tuple[Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any, Any, Any, Any ]]: ...
|
Iterator[Tuple[Any, Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
|
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
|
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
|
||||||
# fmt: on
|
|
||||||
|
|
||||||
def select(cols, rest, *, db):
|
def select(cols, rest, *, db):
|
||||||
# db arg is last cause that results in nicer code formatting..
|
# db arg is last cause that results in nicer code formatting..
|
||||||
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
|
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
|
||||||
|
|
||||||
|
|
||||||
class SqliteTool:
|
|
||||||
def __init__(self, connection: sqlite3.Connection) -> None:
|
|
||||||
self.connection = connection
|
|
||||||
|
|
||||||
def _get_sqlite_master(self) -> dict[str, str]:
|
|
||||||
res = {}
|
|
||||||
for c in self.connection.execute('SELECT name, type FROM sqlite_master'):
|
|
||||||
[name, type_] = c
|
|
||||||
assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_) # just in case
|
|
||||||
res[name] = type_
|
|
||||||
return res
|
|
||||||
|
|
||||||
def get_table_names(self) -> list[str]:
|
|
||||||
master = self._get_sqlite_master()
|
|
||||||
res = []
|
|
||||||
for name, type_ in master.items():
|
|
||||||
if type_ != 'table':
|
|
||||||
continue
|
|
||||||
res.append(name)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def get_table_schema(self, name: str) -> dict[str, str]:
|
|
||||||
"""
|
|
||||||
Returns map from column name to column type
|
|
||||||
|
|
||||||
NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps)
|
|
||||||
In this case you might still be able to use get_table_names
|
|
||||||
"""
|
|
||||||
schema: dict[str, str] = {}
|
|
||||||
for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'):
|
|
||||||
col = row[1]
|
|
||||||
type_ = row[2]
|
|
||||||
# hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase
|
|
||||||
# let's do this just in case since python < 3.10 are using the old version
|
|
||||||
# e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs)
|
|
||||||
type_ = type_.upper()
|
|
||||||
schema[col] = type_
|
|
||||||
return schema
|
|
||||||
|
|
||||||
def get_table_schemas(self) -> dict[str, dict[str, str]]:
|
|
||||||
return {name: self.get_table_schema(name) for name in self.get_table_names()}
|
|
||||||
|
|
|
@ -2,13 +2,10 @@
|
||||||
Helpers for hpi doctor/stats functionality.
|
Helpers for hpi doctor/stats functionality.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
import collections
|
||||||
|
|
||||||
import collections.abc
|
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import typing
|
import typing
|
||||||
from collections.abc import Iterable, Iterator, Sequence
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -16,17 +13,24 @@ from types import ModuleType
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
Protocol,
|
Protocol,
|
||||||
|
Sequence,
|
||||||
|
Union,
|
||||||
cast,
|
cast,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .types import asdict
|
from .types import asdict
|
||||||
|
|
||||||
Stats = dict[str, Any]
|
Stats = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
class StatsFun(Protocol):
|
class StatsFun(Protocol):
|
||||||
def __call__(self, *, quick: bool = False) -> Stats: ...
|
def __call__(self, quick: bool = False) -> Stats: ...
|
||||||
|
|
||||||
|
|
||||||
# global state that turns on/off quick stats
|
# global state that turns on/off quick stats
|
||||||
|
@ -51,10 +55,10 @@ def quick_stats():
|
||||||
|
|
||||||
|
|
||||||
def stat(
|
def stat(
|
||||||
func: Callable[[], Iterable[Any]] | Iterable[Any],
|
func: Union[Callable[[], Iterable[Any]], Iterable[Any]],
|
||||||
*,
|
*,
|
||||||
quick: bool = False,
|
quick: bool = False,
|
||||||
name: str | None = None,
|
name: Optional[str] = None,
|
||||||
) -> Stats:
|
) -> Stats:
|
||||||
"""
|
"""
|
||||||
Extracts various statistics from a passed iterable/callable, e.g.:
|
Extracts various statistics from a passed iterable/callable, e.g.:
|
||||||
|
@ -149,8 +153,8 @@ def test_stat() -> None:
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
|
def get_stats(module_name: str, *, guess: bool = False) -> Optional[StatsFun]:
|
||||||
stats: StatsFun | None = None
|
stats: Optional[StatsFun] = None
|
||||||
try:
|
try:
|
||||||
module = importlib.import_module(module_name)
|
module = importlib.import_module(module_name)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -163,7 +167,7 @@ def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
|
||||||
|
|
||||||
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
||||||
# then stats could just use them as hints?
|
# then stats could just use them as hints?
|
||||||
def guess_stats(module: ModuleType) -> StatsFun | None:
|
def guess_stats(module: ModuleType) -> Optional[StatsFun]:
|
||||||
"""
|
"""
|
||||||
If the module doesn't have explicitly defined 'stat' function,
|
If the module doesn't have explicitly defined 'stat' function,
|
||||||
this is used to try to guess what could be included in stats automatically
|
this is used to try to guess what could be included in stats automatically
|
||||||
|
@ -172,7 +176,7 @@ def guess_stats(module: ModuleType) -> StatsFun | None:
|
||||||
if len(providers) == 0:
|
if len(providers) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def auto_stats(*, quick: bool = False) -> Stats:
|
def auto_stats(quick: bool = False) -> Stats:
|
||||||
res = {}
|
res = {}
|
||||||
for k, v in providers.items():
|
for k, v in providers.items():
|
||||||
res.update(stat(v, quick=quick, name=k))
|
res.update(stat(v, quick=quick, name=k))
|
||||||
|
@ -202,7 +206,7 @@ def test_guess_stats() -> None:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _guess_data_providers(module: ModuleType) -> dict[str, Callable]:
|
def _guess_data_providers(module: ModuleType) -> Dict[str, Callable]:
|
||||||
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
||||||
return {k: v for k, v in mfunctions if is_data_provider(v)}
|
return {k: v for k, v in mfunctions if is_data_provider(v)}
|
||||||
|
|
||||||
|
@ -259,7 +263,7 @@ def test_is_data_provider() -> None:
|
||||||
lam = lambda: [1, 2]
|
lam = lambda: [1, 2]
|
||||||
assert not idp(lam)
|
assert not idp(lam)
|
||||||
|
|
||||||
def has_extra_args(count) -> list[int]:
|
def has_extra_args(count) -> List[int]:
|
||||||
return list(range(count))
|
return list(range(count))
|
||||||
|
|
||||||
assert not idp(has_extra_args)
|
assert not idp(has_extra_args)
|
||||||
|
@ -336,10 +340,10 @@ def test_type_is_iterable() -> None:
|
||||||
assert not fun(None)
|
assert not fun(None)
|
||||||
assert not fun(int)
|
assert not fun(int)
|
||||||
assert not fun(Any)
|
assert not fun(Any)
|
||||||
assert not fun(dict[int, int])
|
assert not fun(Dict[int, int])
|
||||||
|
|
||||||
assert fun(list[int])
|
assert fun(List[int])
|
||||||
assert fun(Sequence[dict[str, str]])
|
assert fun(Sequence[Dict[str, str]])
|
||||||
assert fun(Iterable[Any])
|
assert fun(Iterable[Any])
|
||||||
|
|
||||||
|
|
||||||
|
@ -351,7 +355,7 @@ def _stat_item(item):
|
||||||
return _guess_datetime(item)
|
return _guess_datetime(item)
|
||||||
|
|
||||||
|
|
||||||
def _stat_iterable(it: Iterable[Any], *, quick: bool = False) -> Stats:
|
def _stat_iterable(it: Iterable[Any], quick: bool = False) -> Stats:
|
||||||
from more_itertools import first, ilen, take
|
from more_itertools import first, ilen, take
|
||||||
|
|
||||||
# todo not sure if there is something in more_itertools to compute this?
|
# todo not sure if there is something in more_itertools to compute this?
|
||||||
|
@ -410,9 +414,7 @@ def test_stat_iterable() -> None:
|
||||||
dd = datetime.fromtimestamp(123, tz=timezone.utc)
|
dd = datetime.fromtimestamp(123, tz=timezone.utc)
|
||||||
day = timedelta(days=3)
|
day = timedelta(days=3)
|
||||||
|
|
||||||
class X(NamedTuple):
|
X = NamedTuple('X', [('x', int), ('d', datetime)])
|
||||||
x: int
|
|
||||||
d: datetime
|
|
||||||
|
|
||||||
def it():
|
def it():
|
||||||
yield RuntimeError('oops!')
|
yield RuntimeError('oops!')
|
||||||
|
@ -430,13 +432,13 @@ def test_stat_iterable() -> None:
|
||||||
|
|
||||||
|
|
||||||
# experimental, not sure about it..
|
# experimental, not sure about it..
|
||||||
def _guess_datetime(x: Any) -> datetime | None:
|
def _guess_datetime(x: Any) -> Optional[datetime]:
|
||||||
# todo hmm implement without exception..
|
# todo hmm implement without exception..
|
||||||
try:
|
try:
|
||||||
d = asdict(x)
|
d = asdict(x)
|
||||||
except: # noqa: E722 bare except
|
except: # noqa: E722 bare except
|
||||||
return None
|
return None
|
||||||
for v in d.values():
|
for k, v in d.items():
|
||||||
if isinstance(v, datetime):
|
if isinstance(v, datetime):
|
||||||
return v
|
return v
|
||||||
return None
|
return None
|
||||||
|
@ -450,12 +452,9 @@ def test_guess_datetime() -> None:
|
||||||
|
|
||||||
dd = fromisoformat('2021-02-01T12:34:56Z')
|
dd = fromisoformat('2021-02-01T12:34:56Z')
|
||||||
|
|
||||||
class A(NamedTuple):
|
# ugh.. https://github.com/python/mypy/issues/7281
|
||||||
x: int
|
A = NamedTuple('A', [('x', int)])
|
||||||
|
B = NamedTuple('B', [('x', int), ('created', datetime)])
|
||||||
class B(NamedTuple):
|
|
||||||
x: int
|
|
||||||
created: datetime
|
|
||||||
|
|
||||||
assert _guess_datetime(A(x=4)) is None
|
assert _guess_datetime(A(x=4)) is None
|
||||||
assert _guess_datetime(B(x=4, created=dd)) == dd
|
assert _guess_datetime(B(x=4, created=dd)) == dd
|
||||||
|
|
|
@ -1,22 +1,18 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import atexit
|
import atexit
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
|
||||||
import tarfile
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
from collections.abc import Generator, Sequence
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Generator, List, Sequence, Tuple, Union
|
||||||
|
|
||||||
from .logging import make_logger
|
from .logging import make_logger
|
||||||
|
|
||||||
logger = make_logger(__name__, level="info")
|
logger = make_logger(__name__, level="info")
|
||||||
|
|
||||||
|
|
||||||
def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = False) -> bool:
|
def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = False) -> bool:
|
||||||
"""
|
"""
|
||||||
Helper function for match_structure to check if
|
Helper function for match_structure to check if
|
||||||
all subpaths exist at some base directory
|
all subpaths exist at some base directory
|
||||||
|
@ -38,18 +34,17 @@ def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = F
|
||||||
|
|
||||||
|
|
||||||
ZIP_EXT = {".zip"}
|
ZIP_EXT = {".zip"}
|
||||||
TARGZ_EXT = {".tar.gz"}
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def match_structure(
|
def match_structure(
|
||||||
base: Path,
|
base: Path,
|
||||||
expected: str | Sequence[str],
|
expected: Union[str, Sequence[str]],
|
||||||
*,
|
*,
|
||||||
partial: bool = False,
|
partial: bool = False,
|
||||||
) -> Generator[tuple[Path, ...], None, None]:
|
) -> Generator[Tuple[Path, ...], None, None]:
|
||||||
"""
|
"""
|
||||||
Given a 'base' directory or archive (zip/tar.gz), recursively search for one or more paths that match the
|
Given a 'base' directory or zipfile, recursively search for one or more paths that match the
|
||||||
pattern described in 'expected'. That can be a single string, or a list
|
pattern described in 'expected'. That can be a single string, or a list
|
||||||
of relative paths (as strings) you expect at the same directory.
|
of relative paths (as strings) you expect at the same directory.
|
||||||
|
|
||||||
|
@ -57,12 +52,12 @@ def match_structure(
|
||||||
expected be present, not all of them.
|
expected be present, not all of them.
|
||||||
|
|
||||||
This reduces the chances of the user misconfiguring gdpr exports, e.g.
|
This reduces the chances of the user misconfiguring gdpr exports, e.g.
|
||||||
if they archived the folders instead of the parent directory or vice-versa
|
if they zipped the folders instead of the parent directory or vice-versa
|
||||||
|
|
||||||
When this finds a matching directory structure, it stops searching in that subdirectory
|
When this finds a matching directory structure, it stops searching in that subdirectory
|
||||||
and continues onto other possible subdirectories which could match
|
and continues onto other possible subdirectories which could match
|
||||||
|
|
||||||
If base is an archive, this extracts it into a temporary directory
|
If base is a zipfile, this extracts the zipfile into a temporary directory
|
||||||
(configured by core_config.config.get_tmp_dir), and then searches the extracted
|
(configured by core_config.config.get_tmp_dir), and then searches the extracted
|
||||||
folder for matching structures
|
folder for matching structures
|
||||||
|
|
||||||
|
@ -72,21 +67,21 @@ def match_structure(
|
||||||
|
|
||||||
export_dir
|
export_dir
|
||||||
├── exp_2020
|
├── exp_2020
|
||||||
│ ├── channel_data
|
│ ├── channel_data
|
||||||
│ │ ├── data1
|
│ │ ├── data1
|
||||||
│ │ └── data2
|
│ │ └── data2
|
||||||
│ ├── index.json
|
│ ├── index.json
|
||||||
│ ├── messages
|
│ ├── messages
|
||||||
│ │ └── messages.csv
|
│ │ └── messages.csv
|
||||||
│ └── profile
|
│ └── profile
|
||||||
│ └── settings.json
|
│ └── settings.json
|
||||||
└── exp_2021
|
└── exp_2021
|
||||||
├── channel_data
|
├── channel_data
|
||||||
│ ├── data1
|
│ ├── data1
|
||||||
│ └── data2
|
│ └── data2
|
||||||
├── index.json
|
├── index.json
|
||||||
├── messages
|
├── messages
|
||||||
│ └── messages.csv
|
│ └── messages.csv
|
||||||
└── profile
|
└── profile
|
||||||
└── settings.json
|
└── settings.json
|
||||||
|
|
||||||
|
@ -98,12 +93,12 @@ def match_structure(
|
||||||
This doesn't require an exhaustive list of expected values, but its a good idea to supply
|
This doesn't require an exhaustive list of expected values, but its a good idea to supply
|
||||||
a complete picture of the expected structure to avoid false-positives
|
a complete picture of the expected structure to avoid false-positives
|
||||||
|
|
||||||
This does not recursively decompress archives in the subdirectories,
|
This does not recursively unzip zipfiles in the subdirectories,
|
||||||
it only unpacks into a temporary directory if 'base' is an archive
|
it only unzips into a temporary directory if 'base' is a zipfile
|
||||||
|
|
||||||
A common pattern for using this might be to use get_files to get a list
|
A common pattern for using this might be to use get_files to get a list
|
||||||
of archives or top-level gdpr export directories, and use match_structure
|
of zipfiles or top-level gdpr export directories, and use match_structure
|
||||||
to search the resulting paths for an export structure you're expecting
|
to search the resulting paths for a export structure you're expecting
|
||||||
"""
|
"""
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
|
@ -113,37 +108,29 @@ def match_structure(
|
||||||
expected = (expected,)
|
expected = (expected,)
|
||||||
|
|
||||||
is_zip: bool = base.suffix in ZIP_EXT
|
is_zip: bool = base.suffix in ZIP_EXT
|
||||||
is_targz: bool = any(base.name.endswith(suffix) for suffix in TARGZ_EXT)
|
|
||||||
|
|
||||||
searchdir: Path = base.absolute()
|
searchdir: Path = base.absolute()
|
||||||
try:
|
try:
|
||||||
# if the file given by the user is an archive, create a temporary
|
# if the file given by the user is a zipfile, create a temporary
|
||||||
# directory and extract it to that temporary directory
|
# directory and extract the zipfile to that temporary directory
|
||||||
#
|
#
|
||||||
# this temporary directory is removed in the finally block
|
# this temporary directory is removed in the finally block
|
||||||
if is_zip or is_targz:
|
if is_zip:
|
||||||
# sanity check before we start creating directories/rm-tree'ing things
|
# sanity check before we start creating directories/rm-tree'ing things
|
||||||
assert base.exists(), f"archive at {base} doesn't exist"
|
assert base.exists(), f"zipfile at {base} doesn't exist"
|
||||||
|
|
||||||
searchdir = Path(tempfile.mkdtemp(dir=tdir))
|
searchdir = Path(tempfile.mkdtemp(dir=tdir))
|
||||||
|
|
||||||
if is_zip:
|
# base might already be a ZipPath, and str(base) would end with /
|
||||||
# base might already be a ZipPath, and str(base) would end with /
|
zf = zipfile.ZipFile(str(base).rstrip('/'))
|
||||||
zf = zipfile.ZipFile(str(base).rstrip('/'))
|
zf.extractall(path=str(searchdir))
|
||||||
zf.extractall(path=str(searchdir))
|
|
||||||
elif is_targz:
|
|
||||||
with tarfile.open(str(base)) as tar:
|
|
||||||
# filter is a security feature, will be required param in later python version
|
|
||||||
mfilter = {'filter': 'data'} if sys.version_info[:2] >= (3, 12) else {}
|
|
||||||
tar.extractall(path=str(searchdir), **mfilter) # type: ignore[arg-type]
|
|
||||||
else:
|
|
||||||
raise RuntimeError("can't happen")
|
|
||||||
else:
|
else:
|
||||||
if not searchdir.is_dir():
|
if not searchdir.is_dir():
|
||||||
raise NotADirectoryError(f"Expected either a zip/tar.gz archive or a directory, received {searchdir}")
|
raise NotADirectoryError(f"Expected either a zipfile or a directory, received {searchdir}")
|
||||||
|
|
||||||
matches: list[Path] = []
|
matches: List[Path] = []
|
||||||
possible_targets: list[Path] = [searchdir]
|
possible_targets: List[Path] = [searchdir]
|
||||||
|
|
||||||
while len(possible_targets) > 0:
|
while len(possible_targets) > 0:
|
||||||
p = possible_targets.pop(0)
|
p = possible_targets.pop(0)
|
||||||
|
@ -163,9 +150,9 @@ def match_structure(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
||||||
if is_zip or is_targz:
|
if is_zip:
|
||||||
# make sure we're not mistakenly deleting data
|
# make sure we're not mistakenly deleting data
|
||||||
assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting archive to start with the temporary directory prefix ({tdir}), found {searchdir}"
|
assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting zip to start with the temporary directory prefix ({tdir}), found {searchdir}"
|
||||||
|
|
||||||
shutil.rmtree(str(searchdir))
|
shutil.rmtree(str(searchdir))
|
||||||
|
|
||||||
|
@ -174,7 +161,7 @@ def warn_leftover_files() -> None:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
base_tmp: Path = CC.config.get_tmp_dir()
|
base_tmp: Path = CC.config.get_tmp_dir()
|
||||||
leftover: list[Path] = list(base_tmp.iterdir())
|
leftover: List[Path] = list(base_tmp.iterdir())
|
||||||
if leftover:
|
if leftover:
|
||||||
logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
|
logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,11 @@
|
||||||
Helper 'module' for test_guess_stats
|
Helper 'module' for test_guess_stats
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from collections.abc import Iterable, Iterator, Sequence
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Iterable, Iterator, Sequence
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from typing import Iterator, Optional
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -17,7 +15,7 @@ skip_if_uses_optional_deps = pytest.mark.skipif(
|
||||||
|
|
||||||
# TODO maybe move to hpi core?
|
# TODO maybe move to hpi core?
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def tmp_environ_set(key: str, value: str | None) -> Iterator[None]:
|
def tmp_environ_set(key: str, value: Optional[str]) -> Iterator[None]:
|
||||||
prev_value = os.environ.get(key)
|
prev_value = os.environ.get(key)
|
||||||
if value is None:
|
if value is None:
|
||||||
os.environ.pop(key, None)
|
os.environ.pop(key, None)
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
import json
|
import json
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterator
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import NamedTuple
|
from typing import Iterator, NamedTuple
|
||||||
|
|
||||||
from ..denylist import DenyList
|
from ..denylist import DenyList
|
||||||
|
|
||||||
|
@ -92,7 +91,8 @@ def test_denylist(tmp_path: Path) -> None:
|
||||||
|
|
||||||
assert "59.40.113.87" not in [i.addr for i in filtered]
|
assert "59.40.113.87" not in [i.addr for i in filtered]
|
||||||
|
|
||||||
data_json = json.loads(tf.read_text())
|
with open(tf, "r") as f:
|
||||||
|
data_json = json.loads(f.read())
|
||||||
|
|
||||||
assert data_json == [
|
assert data_json == [
|
||||||
{
|
{
|
||||||
|
|
|
@ -14,9 +14,8 @@ def test_gdpr_structure_exists() -> None:
|
||||||
assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
|
assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("archive", ["gdpr_export.zip", "gdpr_export.tar.gz"])
|
def test_gdpr_unzip() -> None:
|
||||||
def test_gdpr_unpack(archive: str) -> None:
|
with match_structure(structure_data / "gdpr_export.zip", expected=gdpr_expected) as results:
|
||||||
with match_structure(structure_data / archive, expected=gdpr_expected) as results:
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
extracted = results[0]
|
extracted = results[0]
|
||||||
index_file = extracted / "messages" / "index.csv"
|
index_file = extracted / "messages" / "index.csv"
|
||||||
|
@ -33,6 +32,6 @@ def test_match_partial() -> None:
|
||||||
|
|
||||||
|
|
||||||
def test_not_directory() -> None:
|
def test_not_directory() -> None:
|
||||||
with pytest.raises(NotADirectoryError, match=r"Expected either a zip/tar.gz archive or a directory"):
|
with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"):
|
||||||
with match_structure(structure_data / "messages/index.csv", expected=gdpr_expected):
|
with match_structure(structure_data / "messages/index.csv", expected=gdpr_expected):
|
||||||
pass
|
pass
|
||||||
|
|
Binary file not shown.
|
@ -1,7 +1,7 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from .common import skip_if_uses_optional_deps as pytestmark
|
from .common import skip_if_uses_optional_deps as pytestmark
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
# TODO ugh, this is very messy.. need to sort out config overriding here
|
# TODO ugh, this is very messy.. need to sort out config overriding here
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ def test_cachew() -> None:
|
||||||
|
|
||||||
# TODO ugh. need doublewrap or something to avoid having to pass parens
|
# TODO ugh. need doublewrap or something to avoid having to pass parens
|
||||||
@mcachew()
|
@mcachew()
|
||||||
def cf() -> list[int]:
|
def cf() -> List[int]:
|
||||||
nonlocal called
|
nonlocal called
|
||||||
called += 1
|
called += 1
|
||||||
return [1, 2, 3]
|
return [1, 2, 3]
|
||||||
|
@ -43,7 +43,7 @@ def test_cachew_dir_none() -> None:
|
||||||
called = 0
|
called = 0
|
||||||
|
|
||||||
@mcachew(cache_path=cache_dir() / 'ctest')
|
@mcachew(cache_path=cache_dir() / 'ctest')
|
||||||
def cf() -> list[int]:
|
def cf() -> List[int]:
|
||||||
nonlocal called
|
nonlocal called
|
||||||
called += 1
|
called += 1
|
||||||
return [called, called, called]
|
return [called, called, called]
|
||||||
|
|
|
@ -1,178 +0,0 @@
|
||||||
"""
|
|
||||||
Various tests that are checking behaviour of user config wrt to various things
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
import pytz
|
|
||||||
|
|
||||||
import my.config
|
|
||||||
from my.core import notnone
|
|
||||||
from my.demo import items, make_config
|
|
||||||
|
|
||||||
from .common import tmp_environ_set
|
|
||||||
|
|
||||||
# TODO would be nice to randomize test order here to catch various config issues
|
|
||||||
|
|
||||||
|
|
||||||
# run the same test multiple times to make sure there are not issues with import order etc
|
|
||||||
@pytest.mark.parametrize('run_id', ['1', '2'])
|
|
||||||
def test_override_config(tmp_path: Path, run_id: str) -> None:
|
|
||||||
class user_config:
|
|
||||||
username = f'user_{run_id}'
|
|
||||||
data_path = f'{tmp_path}/*.json'
|
|
||||||
|
|
||||||
my.config.demo = user_config # type: ignore[misc, assignment]
|
|
||||||
|
|
||||||
[item1, item2] = items()
|
|
||||||
assert item1.username == f'user_{run_id}'
|
|
||||||
assert item2.username == f'user_{run_id}'
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="won't work at the moment because of inheritance")
|
|
||||||
def test_dynamic_config_simplenamespace(tmp_path: Path) -> None:
|
|
||||||
from types import SimpleNamespace
|
|
||||||
|
|
||||||
user_config = SimpleNamespace(
|
|
||||||
username='user3',
|
|
||||||
data_path=f'{tmp_path}/*.json',
|
|
||||||
)
|
|
||||||
my.config.demo = user_config # type: ignore[misc, assignment]
|
|
||||||
|
|
||||||
cfg = make_config()
|
|
||||||
|
|
||||||
assert cfg.username == 'user3'
|
|
||||||
|
|
||||||
|
|
||||||
def test_mixin_attribute_handling(tmp_path: Path) -> None:
|
|
||||||
"""
|
|
||||||
Tests that arbitrary mixin attributes work with our config handling pattern
|
|
||||||
"""
|
|
||||||
|
|
||||||
nytz = pytz.timezone('America/New_York')
|
|
||||||
|
|
||||||
class user_config:
|
|
||||||
# check that override is taken into the account
|
|
||||||
timezone = nytz
|
|
||||||
|
|
||||||
irrelevant = 'hello'
|
|
||||||
|
|
||||||
username = 'UUU'
|
|
||||||
data_path = f'{tmp_path}/*.json'
|
|
||||||
|
|
||||||
my.config.demo = user_config # type: ignore[misc, assignment]
|
|
||||||
|
|
||||||
cfg = make_config()
|
|
||||||
|
|
||||||
assert cfg.username == 'UUU'
|
|
||||||
|
|
||||||
# mypy doesn't know about it, but the attribute is there
|
|
||||||
assert getattr(cfg, 'irrelevant') == 'hello'
|
|
||||||
|
|
||||||
# check that overridden default attribute is actually getting overridden
|
|
||||||
assert cfg.timezone == nytz
|
|
||||||
|
|
||||||
[item1, item2] = items()
|
|
||||||
assert item1.username == 'UUU'
|
|
||||||
assert notnone(item1.dt.tzinfo).zone == nytz.zone # type: ignore[attr-defined]
|
|
||||||
assert item2.username == 'UUU'
|
|
||||||
assert notnone(item2.dt.tzinfo).zone == nytz.zone # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
|
|
||||||
# use multiple identical tests to make sure there are no issues with cached imports etc
|
|
||||||
@pytest.mark.parametrize('run_id', ['1', '2'])
|
|
||||||
def test_dynamic_module_import(tmp_path: Path, run_id: str) -> None:
|
|
||||||
"""
|
|
||||||
Test for dynamic hackery in config properties
|
|
||||||
e.g. importing some external modules
|
|
||||||
"""
|
|
||||||
|
|
||||||
ext = tmp_path / 'external'
|
|
||||||
ext.mkdir()
|
|
||||||
(ext / '__init__.py').write_text(
|
|
||||||
'''
|
|
||||||
def transform(x):
|
|
||||||
from .submodule import do_transform
|
|
||||||
return do_transform(x)
|
|
||||||
|
|
||||||
'''
|
|
||||||
)
|
|
||||||
(ext / 'submodule.py').write_text(
|
|
||||||
f'''
|
|
||||||
def do_transform(x):
|
|
||||||
return {{"total_{run_id}": sum(x.values())}}
|
|
||||||
'''
|
|
||||||
)
|
|
||||||
|
|
||||||
class user_config:
|
|
||||||
username = 'someuser'
|
|
||||||
data_path = f'{tmp_path}/*.json'
|
|
||||||
external = f'{ext}'
|
|
||||||
|
|
||||||
my.config.demo = user_config # type: ignore[misc, assignment]
|
|
||||||
|
|
||||||
[item1, item2] = items()
|
|
||||||
assert item1.raw == {f'total_{run_id}': 1 + 123}, item1
|
|
||||||
assert item2.raw == {f'total_{run_id}': 2 + 456}, item2
|
|
||||||
|
|
||||||
# need to reset these modules, otherwise they get cached
|
|
||||||
# kind of relevant to my.core.cfg.tmp_config
|
|
||||||
sys.modules.pop('external', None)
|
|
||||||
sys.modules.pop('external.submodule', None)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('run_id', ['1', '2'])
|
|
||||||
def test_my_config_env_variable(tmp_path: Path, run_id: str) -> None:
|
|
||||||
"""
|
|
||||||
Tests handling of MY_CONFIG variable
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ugh. so by this point, my.config is already loaded (default stub), so we need to unload it
|
|
||||||
sys.modules.pop('my.config', None)
|
|
||||||
# but my.config itself relies on my.core.init hook, so unless it's reloaded too it wouldn't help
|
|
||||||
sys.modules.pop('my.core', None)
|
|
||||||
sys.modules.pop('my.core.init', None)
|
|
||||||
# it's a bit of a mouthful of course, but in most cases MY_CONFIG would be set once
|
|
||||||
# , and before hpi runs, so hopefully it's not a huge deal
|
|
||||||
cfg_dir = tmp_path / 'my'
|
|
||||||
cfg_file = cfg_dir / 'config.py'
|
|
||||||
cfg_dir.mkdir()
|
|
||||||
|
|
||||||
cfg_file.write_text(
|
|
||||||
f'''
|
|
||||||
# print("IMPORTING CONFIG {run_id}")
|
|
||||||
class demo:
|
|
||||||
username = 'xxx_{run_id}'
|
|
||||||
data_path = r'{tmp_path}{os.sep}*.json' # need raw string for windows...
|
|
||||||
'''
|
|
||||||
)
|
|
||||||
|
|
||||||
with tmp_environ_set('MY_CONFIG', str(tmp_path)):
|
|
||||||
[item1, item2] = items()
|
|
||||||
assert item1.username == f'xxx_{run_id}'
|
|
||||||
assert item2.username == f'xxx_{run_id}'
|
|
||||||
|
|
||||||
# sigh.. so this is cached in sys.path
|
|
||||||
# so it takes precedence later during next import, not giving the MY_CONFIG hook
|
|
||||||
# (imported from builtin my.config) to kick in
|
|
||||||
sys.path.remove(str(tmp_path))
|
|
||||||
|
|
||||||
# FIXME ideally this shouldn't be necessary?
|
|
||||||
# remove this after we fixup my.tests.reddit and my.tests.commits
|
|
||||||
# (they were failing ci when running all tests)
|
|
||||||
sys.modules.pop('my.config', None)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def prepare_data(tmp_path: Path):
|
|
||||||
(tmp_path / 'data.json').write_text(
|
|
||||||
'''
|
|
||||||
[
|
|
||||||
{"key": 1, "value": 123},
|
|
||||||
{"key": 2, "value": 456}
|
|
||||||
]
|
|
||||||
'''
|
|
||||||
)
|
|
|
@ -12,7 +12,7 @@ def _init_default_config() -> None:
|
||||||
|
|
||||||
def test_tmp_config() -> None:
|
def test_tmp_config() -> None:
|
||||||
## ugh. ideally this would be on the top level (would be a better test)
|
## ugh. ideally this would be on the top level (would be a better test)
|
||||||
## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up
|
## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up
|
||||||
## later would be nice to be a bit more careful about them
|
## later would be nice to be a bit more careful about them
|
||||||
_init_default_config()
|
_init_default_config()
|
||||||
from my.simple import items
|
from my.simple import items
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
from __future__ import annotations
|
from functools import lru_cache
|
||||||
|
from typing import Dict, Sequence
|
||||||
from collections.abc import Sequence
|
|
||||||
from functools import cache, lru_cache
|
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
@ -13,24 +11,22 @@ def user_forced() -> Sequence[str]:
|
||||||
# https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
|
# https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
|
||||||
try:
|
try:
|
||||||
from my.config import time as user_config
|
from my.config import time as user_config
|
||||||
|
return user_config.tz.force_abbreviations # type: ignore[attr-defined]
|
||||||
return user_config.tz.force_abbreviations # type: ignore[attr-defined] # noqa: TRY300
|
|
||||||
# note: noqa since we're catching case where config doesn't have attribute here as well
|
|
||||||
except:
|
except:
|
||||||
# todo log/apply policy
|
# todo log/apply policy
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(1)
|
@lru_cache(1)
|
||||||
def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
|
def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
|
||||||
# also force UTC to always correspond to utc
|
# also force UTC to always correspond to utc
|
||||||
# this makes more sense than Zulu it ends up by default
|
# this makes more sense than Zulu it ends up by default
|
||||||
timezones = [*pytz.all_timezones, 'UTC', *user_forced()]
|
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
||||||
|
|
||||||
res: dict[str, pytz.BaseTzInfo] = {}
|
res: Dict[str, pytz.BaseTzInfo] = {}
|
||||||
for tzname in timezones:
|
for tzname in timezones:
|
||||||
tz = pytz.timezone(tzname)
|
tz = pytz.timezone(tzname)
|
||||||
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
||||||
for info in infos:
|
for info in infos:
|
||||||
abbr = info[-1]
|
abbr = info[-1]
|
||||||
# todo could support this with a better error handling strategy?
|
# todo could support this with a better error handling strategy?
|
||||||
|
@ -46,7 +42,7 @@ def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@cache
|
@lru_cache(maxsize=None)
|
||||||
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
|
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
|
||||||
return _abbr_to_timezone_map()[abbr]
|
return _abbr_to_timezone_map()[abbr]
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
from __future__ import annotations
|
from .internal import assert_subpackage; assert_subpackage(__name__)
|
||||||
|
|
||||||
from .internal import assert_subpackage
|
|
||||||
|
|
||||||
assert_subpackage(__name__)
|
|
||||||
|
|
||||||
from dataclasses import asdict as dataclasses_asdict
|
from dataclasses import asdict as dataclasses_asdict
|
||||||
from dataclasses import is_dataclass
|
from dataclasses import is_dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Dict,
|
||||||
|
)
|
||||||
|
|
||||||
Json = dict[str, Any]
|
Json = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
|
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import sys
|
import sys
|
||||||
from collections.abc import Iterable
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
|
from typing import Iterable, List, Optional
|
||||||
|
|
||||||
from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
|
from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
|
||||||
|
|
||||||
|
@ -14,7 +12,8 @@ from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored
|
||||||
def modules() -> Iterable[HPIModule]:
|
def modules() -> Iterable[HPIModule]:
|
||||||
import my
|
import my
|
||||||
|
|
||||||
yield from _iter_all_importables(my)
|
for m in _iter_all_importables(my):
|
||||||
|
yield m
|
||||||
|
|
||||||
|
|
||||||
__NOT_HPI_MODULE__ = 'Import this to mark a python file as a helper, not an actual HPI module'
|
__NOT_HPI_MODULE__ = 'Import this to mark a python file as a helper, not an actual HPI module'
|
||||||
|
@ -22,14 +21,13 @@ from .discovery_pure import NOT_HPI_MODULE_VAR
|
||||||
|
|
||||||
assert NOT_HPI_MODULE_VAR in globals() # check name consistency
|
assert NOT_HPI_MODULE_VAR in globals() # check name consistency
|
||||||
|
|
||||||
|
def is_not_hpi_module(module: str) -> Optional[str]:
|
||||||
def is_not_hpi_module(module: str) -> str | None:
|
|
||||||
'''
|
'''
|
||||||
None if a module, otherwise returns reason
|
None if a module, otherwise returns reason
|
||||||
'''
|
'''
|
||||||
import importlib.util
|
import importlib
|
||||||
|
|
||||||
path: str | None = None
|
path: Optional[str] = None
|
||||||
try:
|
try:
|
||||||
# TODO annoying, this can cause import of the parent module?
|
# TODO annoying, this can cause import of the parent module?
|
||||||
spec = importlib.util.find_spec(module)
|
spec = importlib.util.find_spec(module)
|
||||||
|
@ -38,7 +36,7 @@ def is_not_hpi_module(module: str) -> str | None:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# todo a bit misleading.. it actually shouldn't import in most cases, it's just the weird parent module import thing
|
# todo a bit misleading.. it actually shouldn't import in most cases, it's just the weird parent module import thing
|
||||||
return "import error (possibly missing config entry)" # todo add exc message?
|
return "import error (possibly missing config entry)" # todo add exc message?
|
||||||
assert path is not None # not sure if can happen?
|
assert path is not None # not sure if can happen?
|
||||||
if _is_not_module_src(Path(path)):
|
if _is_not_module_src(Path(path)):
|
||||||
return f"marked explicitly (via {NOT_HPI_MODULE_VAR})"
|
return f"marked explicitly (via {NOT_HPI_MODULE_VAR})"
|
||||||
|
|
||||||
|
@ -60,10 +58,9 @@ def _iter_all_importables(pkg: ModuleType) -> Iterable[HPIModule]:
|
||||||
|
|
||||||
|
|
||||||
def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModule]:
|
def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModule]:
|
||||||
|
from .core_config import config
|
||||||
|
|
||||||
"""Yield all importables under a given path and package."""
|
"""Yield all importables under a given path and package."""
|
||||||
|
|
||||||
from .core_config import config # noqa: F401
|
|
||||||
|
|
||||||
for dir_path, dirs, file_names in os.walk(pkg_pth):
|
for dir_path, dirs, file_names in os.walk(pkg_pth):
|
||||||
file_names.sort()
|
file_names.sort()
|
||||||
# NOTE: sorting dirs in place is intended, it's the way you're supposed to do it with os.walk
|
# NOTE: sorting dirs in place is intended, it's the way you're supposed to do it with os.walk
|
||||||
|
@ -78,7 +75,7 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
|
||||||
continue
|
continue
|
||||||
|
|
||||||
rel_pt = pkg_dir_path.relative_to(pkg_pth)
|
rel_pt = pkg_dir_path.relative_to(pkg_pth)
|
||||||
pkg_pref = '.'.join((pkg_name, *rel_pt.parts))
|
pkg_pref = '.'.join((pkg_name, ) + rel_pt.parts)
|
||||||
|
|
||||||
yield from _walk_packages(
|
yield from _walk_packages(
|
||||||
(str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
|
(str(pkg_dir_path), ), prefix=f'{pkg_pref}.',
|
||||||
|
@ -86,7 +83,6 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
|
||||||
# TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
|
# TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason)
|
||||||
# use onerror=?
|
# use onerror=?
|
||||||
|
|
||||||
|
|
||||||
# ignored explicitly -> not HPI
|
# ignored explicitly -> not HPI
|
||||||
# if enabled in config -> HPI
|
# if enabled in config -> HPI
|
||||||
# if disabled in config -> HPI
|
# if disabled in config -> HPI
|
||||||
|
@ -95,17 +91,17 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu
|
||||||
# TODO when do we need to recurse?
|
# TODO when do we need to recurse?
|
||||||
|
|
||||||
|
|
||||||
def _walk_packages(path: Iterable[str], prefix: str = '', onerror=None) -> Iterable[HPIModule]:
|
def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterable[HPIModule]:
|
||||||
"""
|
"""
|
||||||
Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53,
|
Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53,
|
||||||
to avoid importing modules that are skipped
|
to alvoid importing modules that are skipped
|
||||||
"""
|
"""
|
||||||
from .core_config import config
|
from .core_config import config
|
||||||
|
|
||||||
def seen(p, m={}): # noqa: B006
|
def seen(p, m={}):
|
||||||
if p in m:
|
if p in m:
|
||||||
return True
|
return True
|
||||||
m[p] = True # noqa: RET503
|
m[p] = True
|
||||||
|
|
||||||
for info in pkgutil.iter_modules(path, prefix):
|
for info in pkgutil.iter_modules(path, prefix):
|
||||||
mname = info.name
|
mname = info.name
|
||||||
|
@ -158,9 +154,8 @@ def _walk_packages(path: Iterable[str], prefix: str = '', onerror=None) -> Itera
|
||||||
path = [p for p in path if not seen(p)]
|
path = [p for p in path if not seen(p)]
|
||||||
yield from _walk_packages(path, mname + '.', onerror)
|
yield from _walk_packages(path, mname + '.', onerror)
|
||||||
|
|
||||||
|
|
||||||
# deprecate?
|
# deprecate?
|
||||||
def get_modules() -> list[HPIModule]:
|
def get_modules() -> List[HPIModule]:
|
||||||
return list(modules())
|
return list(modules())
|
||||||
|
|
||||||
|
|
||||||
|
@ -175,14 +170,14 @@ def test_module_detection() -> None:
|
||||||
with reset() as cc:
|
with reset() as cc:
|
||||||
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*']
|
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*']
|
||||||
mods = {m.name: m for m in modules()}
|
mods = {m.name: m for m in modules()}
|
||||||
assert mods['my.demo'].skip_reason == "has no 'stats()' function"
|
assert mods['my.demo'] .skip_reason == "has no 'stats()' function"
|
||||||
|
|
||||||
with reset() as cc:
|
with reset() as cc:
|
||||||
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm']
|
cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm']
|
||||||
cc.enabled_modules = ['my.demo']
|
cc.enabled_modules = ['my.demo']
|
||||||
mods = {m.name: m for m in modules()}
|
mods = {m.name: m for m in modules()}
|
||||||
|
|
||||||
assert mods['my.demo'].skip_reason is None # not skipped
|
assert mods['my.demo'] .skip_reason is None # not skipped
|
||||||
assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
|
assert mods['my.lastfm'].skip_reason == "suppressed in the user config"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from __future__ import annotations
|
import sys
|
||||||
|
|
||||||
from concurrent.futures import Executor, Future
|
from concurrent.futures import Executor, Future
|
||||||
from typing import Any, Callable, TypeVar
|
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
||||||
|
|
||||||
from ..compat import ParamSpec
|
from ..compat import ParamSpec
|
||||||
|
|
||||||
|
@ -16,25 +15,37 @@ class DummyExecutor(Executor):
|
||||||
but also want to provide an option to run the code serially (e.g. for debugging)
|
but also want to provide an option to run the code serially (e.g. for debugging)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, max_workers: int | None = 1) -> None:
|
def __init__(self, max_workers: Optional[int] = 1) -> None:
|
||||||
self._shutdown = False
|
self._shutdown = False
|
||||||
self._max_workers = max_workers
|
self._max_workers = max_workers
|
||||||
|
|
||||||
def submit(self, fn: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Future[_T]:
|
if TYPE_CHECKING:
|
||||||
if self._shutdown:
|
if sys.version_info[:2] <= (3, 8):
|
||||||
raise RuntimeError('cannot schedule new futures after shutdown')
|
# 3.8 doesn't support ParamSpec as Callable arg :(
|
||||||
|
# and any attempt to type results in incompatible supertype.. so whatever
|
||||||
|
def submit(self, fn, *args, **kwargs): ...
|
||||||
|
|
||||||
f: Future[Any] = Future()
|
|
||||||
try:
|
|
||||||
result = fn(*args, **kwargs)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
f.set_exception(e)
|
|
||||||
else:
|
else:
|
||||||
f.set_result(result)
|
|
||||||
|
|
||||||
return f
|
def submit(self, fn: Callable[_P, _T], /, *args: _P.args, **kwargs: _P.kwargs) -> Future[_T]: ...
|
||||||
|
|
||||||
def shutdown(self, wait: bool = True, **kwargs) -> None: # noqa: FBT001,FBT002,ARG002
|
else:
|
||||||
|
|
||||||
|
def submit(self, fn, *args, **kwargs):
|
||||||
|
if self._shutdown:
|
||||||
|
raise RuntimeError('cannot schedule new futures after shutdown')
|
||||||
|
|
||||||
|
f: Future[Any] = Future()
|
||||||
|
try:
|
||||||
|
result = fn(*args, **kwargs)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
f.set_exception(e)
|
||||||
|
else:
|
||||||
|
f.set_result(result)
|
||||||
|
|
||||||
|
return f
|
||||||
|
|
||||||
|
def shutdown(self, wait: bool = True, **kwargs) -> None:
|
||||||
self._shutdown = True
|
self._shutdown = True
|
||||||
|
|
|
@ -1,27 +1,27 @@
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from ..common import PathIsh
|
||||||
|
|
||||||
|
|
||||||
# TODO only used in tests? not sure if useful at all.
|
# TODO only used in tests? not sure if useful at all.
|
||||||
def import_file(p: Path | str, name: str | None = None) -> ModuleType:
|
def import_file(p: PathIsh, name: Optional[str] = None) -> ModuleType:
|
||||||
p = Path(p)
|
p = Path(p)
|
||||||
if name is None:
|
if name is None:
|
||||||
name = p.stem
|
name = p.stem
|
||||||
spec = importlib.util.spec_from_file_location(name, p)
|
spec = importlib.util.spec_from_file_location(name, p)
|
||||||
assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}"
|
assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}"
|
||||||
foo = importlib.util.module_from_spec(spec)
|
foo = importlib.util.module_from_spec(spec)
|
||||||
loader = spec.loader
|
loader = spec.loader; assert loader is not None
|
||||||
assert loader is not None
|
|
||||||
loader.exec_module(foo)
|
loader.exec_module(foo)
|
||||||
return foo
|
return foo
|
||||||
|
|
||||||
|
|
||||||
def import_from(path: Path | str, name: str) -> ModuleType:
|
def import_from(path: PathIsh, name: str) -> ModuleType:
|
||||||
path = str(path)
|
path = str(path)
|
||||||
sys.path.append(path)
|
sys.path.append(path)
|
||||||
try:
|
try:
|
||||||
|
@ -30,7 +30,7 @@ def import_from(path: Path | str, name: str) -> ModuleType:
|
||||||
sys.path.remove(path)
|
sys.path.remove(path)
|
||||||
|
|
||||||
|
|
||||||
def import_dir(path: Path | str, extra: str = '') -> ModuleType:
|
def import_dir(path: PathIsh, extra: str = '') -> ModuleType:
|
||||||
p = Path(path)
|
p = Path(path)
|
||||||
if p.parts[0] == '~':
|
if p.parts[0] == '~':
|
||||||
p = p.expanduser() # TODO eh. not sure about this..
|
p = p.expanduser() # TODO eh. not sure about this..
|
||||||
|
|
|
@ -4,13 +4,17 @@ Various helpers/transforms of iterators
|
||||||
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
|
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Hashable, Iterable, Iterator, Sized
|
from collections.abc import Hashable
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
Callable,
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Sized,
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
|
@ -19,7 +23,6 @@ from typing import (
|
||||||
import more_itertools
|
import more_itertools
|
||||||
from decorator import decorator
|
from decorator import decorator
|
||||||
|
|
||||||
from .. import warnings as core_warnings
|
|
||||||
from ..compat import ParamSpec
|
from ..compat import ParamSpec
|
||||||
|
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
|
@ -34,7 +37,7 @@ def _identity(v: T) -> V: # type: ignore[type-var]
|
||||||
# ugh. nothing in more_itertools?
|
# ugh. nothing in more_itertools?
|
||||||
# perhaps duplicates_everseen? but it doesn't yield non-unique elements?
|
# perhaps duplicates_everseen? but it doesn't yield non-unique elements?
|
||||||
def ensure_unique(it: Iterable[T], *, key: Callable[[T], K]) -> Iterable[T]:
|
def ensure_unique(it: Iterable[T], *, key: Callable[[T], K]) -> Iterable[T]:
|
||||||
key2item: dict[K, T] = {}
|
key2item: Dict[K, T] = {}
|
||||||
for i in it:
|
for i in it:
|
||||||
k = key(i)
|
k = key(i)
|
||||||
pi = key2item.get(k, None)
|
pi = key2item.get(k, None)
|
||||||
|
@ -58,7 +61,7 @@ def test_ensure_unique() -> None:
|
||||||
list(it)
|
list(it)
|
||||||
|
|
||||||
# hacky way to force distinct objects?
|
# hacky way to force distinct objects?
|
||||||
list(ensure_unique(dups, key=lambda _: object()))
|
list(ensure_unique(dups, key=lambda i: object()))
|
||||||
|
|
||||||
|
|
||||||
def make_dict(
|
def make_dict(
|
||||||
|
@ -67,10 +70,10 @@ def make_dict(
|
||||||
key: Callable[[T], K],
|
key: Callable[[T], K],
|
||||||
# TODO make value optional instead? but then will need a typing override for it?
|
# TODO make value optional instead? but then will need a typing override for it?
|
||||||
value: Callable[[T], V] = _identity,
|
value: Callable[[T], V] = _identity,
|
||||||
) -> dict[K, V]:
|
) -> Dict[K, V]:
|
||||||
with_keys = ((key(i), i) for i in it)
|
with_keys = ((key(i), i) for i in it)
|
||||||
uniques = ensure_unique(with_keys, key=lambda p: p[0])
|
uniques = ensure_unique(with_keys, key=lambda p: p[0])
|
||||||
res: dict[K, V] = {}
|
res: Dict[K, V] = {}
|
||||||
for k, i in uniques:
|
for k, i in uniques:
|
||||||
res[k] = i if value is None else value(i)
|
res[k] = i if value is None else value(i)
|
||||||
return res
|
return res
|
||||||
|
@ -88,8 +91,8 @@ def test_make_dict() -> None:
|
||||||
d = make_dict(it, key=lambda i: i % 2, value=lambda i: i)
|
d = make_dict(it, key=lambda i: i % 2, value=lambda i: i)
|
||||||
|
|
||||||
# check type inference
|
# check type inference
|
||||||
d2: dict[str, int] = make_dict(it, key=lambda i: str(i))
|
d2: Dict[str, int] = make_dict(it, key=lambda i: str(i))
|
||||||
d3: dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
|
d3: Dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
|
||||||
|
|
||||||
|
|
||||||
LFP = ParamSpec('LFP')
|
LFP = ParamSpec('LFP')
|
||||||
|
@ -97,7 +100,7 @@ LV = TypeVar('LV')
|
||||||
|
|
||||||
|
|
||||||
@decorator
|
@decorator
|
||||||
def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> list[LV]:
|
def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> List[LV]:
|
||||||
"""
|
"""
|
||||||
Wraps a function's return value in wrapper (e.g. list)
|
Wraps a function's return value in wrapper (e.g. list)
|
||||||
Useful when an algorithm can be expressed more cleanly as a generator
|
Useful when an algorithm can be expressed more cleanly as a generator
|
||||||
|
@ -110,7 +113,7 @@ def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.k
|
||||||
# so seems easiest to just use specialize instantiations of decorator instead
|
# so seems easiest to just use specialize instantiations of decorator instead
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
|
||||||
def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, list[LV]]: ... # noqa: ARG001
|
def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, List[LV]]: ...
|
||||||
|
|
||||||
else:
|
else:
|
||||||
listify = _listify
|
listify = _listify
|
||||||
|
@ -125,7 +128,7 @@ def test_listify() -> None:
|
||||||
yield 2
|
yield 2
|
||||||
|
|
||||||
res = it()
|
res = it()
|
||||||
assert_type(res, list[int])
|
assert_type(res, List[int])
|
||||||
assert res == [1, 2]
|
assert res == [1, 2]
|
||||||
|
|
||||||
|
|
||||||
|
@ -139,7 +142,8 @@ def _warn_if_empty(func, *args, **kwargs):
|
||||||
if isinstance(iterable, Sized):
|
if isinstance(iterable, Sized):
|
||||||
sz = len(iterable)
|
sz = len(iterable)
|
||||||
if sz == 0:
|
if sz == 0:
|
||||||
core_warnings.medium(f"Function {func} returned empty container, make sure your config paths are correct")
|
# todo use hpi warnings here?
|
||||||
|
warnings.warn(f"Function {func} returned empty container, make sure your config paths are correct")
|
||||||
return iterable
|
return iterable
|
||||||
else: # must be an iterator
|
else: # must be an iterator
|
||||||
|
|
||||||
|
@ -149,7 +153,7 @@ def _warn_if_empty(func, *args, **kwargs):
|
||||||
yield i
|
yield i
|
||||||
empty = False
|
empty = False
|
||||||
if empty:
|
if empty:
|
||||||
core_warnings.medium(f"Function {func} didn't emit any data, make sure your config paths are correct")
|
warnings.warn(f"Function {func} didn't emit any data, make sure your config paths are correct")
|
||||||
|
|
||||||
return wit()
|
return wit()
|
||||||
|
|
||||||
|
@ -157,7 +161,7 @@ def _warn_if_empty(func, *args, **kwargs):
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
FF = TypeVar('FF', bound=Callable[..., Iterable])
|
FF = TypeVar('FF', bound=Callable[..., Iterable])
|
||||||
|
|
||||||
def warn_if_empty(func: FF) -> FF: ... # noqa: ARG001
|
def warn_if_empty(f: FF) -> FF: ...
|
||||||
|
|
||||||
else:
|
else:
|
||||||
warn_if_empty = _warn_if_empty
|
warn_if_empty = _warn_if_empty
|
||||||
|
@ -196,24 +200,24 @@ def test_warn_if_empty_list() -> None:
|
||||||
ll = [1, 2, 3]
|
ll = [1, 2, 3]
|
||||||
|
|
||||||
@warn_if_empty
|
@warn_if_empty
|
||||||
def nonempty() -> list[int]:
|
def nonempty() -> List[int]:
|
||||||
return ll
|
return ll
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
res1 = nonempty()
|
res1 = nonempty()
|
||||||
assert len(w) == 0
|
assert len(w) == 0
|
||||||
assert_type(res1, list[int])
|
assert_type(res1, List[int])
|
||||||
assert isinstance(res1, list)
|
assert isinstance(res1, list)
|
||||||
assert res1 is ll # object should be unchanged!
|
assert res1 is ll # object should be unchanged!
|
||||||
|
|
||||||
@warn_if_empty
|
@warn_if_empty
|
||||||
def empty() -> list[str]:
|
def empty() -> List[str]:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
res2 = empty()
|
res2 = empty()
|
||||||
assert len(w) == 1
|
assert len(w) == 1
|
||||||
assert_type(res2, list[str])
|
assert_type(res2, List[str])
|
||||||
assert isinstance(res2, list)
|
assert isinstance(res2, list)
|
||||||
assert res2 == []
|
assert res2 == []
|
||||||
|
|
||||||
|
@ -237,7 +241,7 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
|
||||||
"""
|
"""
|
||||||
NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety
|
NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety
|
||||||
Consider hashable type X, and Y that inherits from X, but not hashable
|
Consider hashable type X, and Y that inherits from X, but not hashable
|
||||||
Then l: list[X] = [Y(...)] is a valid expression, and type checks against Hashable,
|
Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable,
|
||||||
but isn't runtime hashable
|
but isn't runtime hashable
|
||||||
"""
|
"""
|
||||||
# Sadly this doesn't work 100% correctly with dataclasses atm...
|
# Sadly this doesn't work 100% correctly with dataclasses atm...
|
||||||
|
@ -263,27 +267,30 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
|
||||||
# TODO different policies -- error/warn/ignore?
|
# TODO different policies -- error/warn/ignore?
|
||||||
def test_check_if_hashable() -> None:
|
def test_check_if_hashable() -> None:
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Set, Tuple
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ..compat import assert_type
|
from ..compat import assert_type
|
||||||
|
|
||||||
x1: list[int] = [1, 2]
|
x1: List[int] = [1, 2]
|
||||||
r1 = check_if_hashable(x1)
|
r1 = check_if_hashable(x1)
|
||||||
assert_type(r1, Iterable[int])
|
# tgype: ignore[comparison-overlap] # object should be unchanged
|
||||||
assert r1 is x1
|
assert r1 is x1
|
||||||
|
assert_type(r1, Iterable[int])
|
||||||
|
|
||||||
x2: Iterator[int | str] = iter((123, 'aba'))
|
x2: Iterator[Union[int, str]] = iter((123, 'aba'))
|
||||||
r2 = check_if_hashable(x2)
|
r2 = check_if_hashable(x2)
|
||||||
assert_type(r2, Iterable[Union[int, str]])
|
|
||||||
assert list(r2) == [123, 'aba']
|
assert list(r2) == [123, 'aba']
|
||||||
|
assert_type(r2, Iterable[Union[int, str]])
|
||||||
|
|
||||||
x3: tuple[object, ...] = (789, 'aba')
|
x3: Tuple[object, ...] = (789, 'aba')
|
||||||
r3 = check_if_hashable(x3)
|
r3 = check_if_hashable(x3)
|
||||||
|
# ttype: ignore[comparison-overlap] # object should be unchanged
|
||||||
|
assert r3 is x3
|
||||||
assert_type(r3, Iterable[object])
|
assert_type(r3, Iterable[object])
|
||||||
assert r3 is x3 # object should be unchanged
|
|
||||||
|
|
||||||
x4: list[set[int]] = [{1, 2, 3}, {4, 5, 6}]
|
x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}]
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
# should be rejected by mypy sice set isn't Hashable, but also throw at runtime
|
# should be rejected by mypy sice set isn't Hashable, but also throw at runtime
|
||||||
r4 = check_if_hashable(x4) # type: ignore[type-var]
|
r4 = check_if_hashable(x4) # type: ignore[type-var]
|
||||||
|
@ -301,7 +308,7 @@ def test_check_if_hashable() -> None:
|
||||||
class X:
|
class X:
|
||||||
a: int
|
a: int
|
||||||
|
|
||||||
x6: list[X] = [X(a=123)]
|
x6: List[X] = [X(a=123)]
|
||||||
r6 = check_if_hashable(x6)
|
r6 = check_if_hashable(x6)
|
||||||
assert x6 is r6
|
assert x6 is r6
|
||||||
|
|
||||||
|
@ -310,7 +317,7 @@ def test_check_if_hashable() -> None:
|
||||||
class Y(X):
|
class Y(X):
|
||||||
b: str
|
b: str
|
||||||
|
|
||||||
x7: list[Y] = [Y(a=123, b='aba')]
|
x7: List[Y] = [Y(a=123, b='aba')]
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
# ideally that would also be rejected by mypy, but currently there is a bug
|
# ideally that would also be rejected by mypy, but currently there is a bug
|
||||||
# which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463
|
# which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463
|
||||||
|
@ -321,12 +328,15 @@ _UET = TypeVar('_UET')
|
||||||
_UEU = TypeVar('_UEU')
|
_UEU = TypeVar('_UEU')
|
||||||
|
|
||||||
|
|
||||||
# NOTE: for historic reasons, this function had to accept Callable that returns iterator
|
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
|
||||||
# instead of just iterator
|
# instead of just iterator
|
||||||
# TODO maybe deprecated Callable support? not sure
|
# TODO maybe deprecated Callable support? not sure
|
||||||
def unique_everseen(
|
def unique_everseen(
|
||||||
fun: Callable[[], Iterable[_UET]] | Iterable[_UET],
|
fun: Union[
|
||||||
key: Callable[[_UET], _UEU] | None = None,
|
Callable[[], Iterable[_UET]],
|
||||||
|
Iterable[_UET]
|
||||||
|
],
|
||||||
|
key: Optional[Callable[[_UET], _UEU]] = None,
|
||||||
) -> Iterator[_UET]:
|
) -> Iterator[_UET]:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -358,7 +368,7 @@ def test_unique_everseen() -> None:
|
||||||
assert list(unique_everseen(fun_good)) == [123]
|
assert list(unique_everseen(fun_good)) == [123]
|
||||||
|
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
# since function returns a list rather than iterator, check happens immediately
|
# since function retuns a list rather than iterator, check happens immediately
|
||||||
# , even without advancing the iterator
|
# , even without advancing the iterator
|
||||||
unique_everseen(fun_bad)
|
unique_everseen(fun_bad)
|
||||||
|
|
||||||
|
|
|
@ -5,16 +5,14 @@ since who looks at the terminal output?
|
||||||
E.g. would be nice to propagate the warnings in the UI (it's even a subclass of Exception!)
|
E.g. would be nice to propagate the warnings in the UI (it's even a subclass of Exception!)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
|
|
||||||
def _colorize(x: str, color: str | None = None) -> str:
|
def _colorize(x: str, color: Optional[str]=None) -> str:
|
||||||
if color is None:
|
if color is None:
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
@ -26,10 +24,10 @@ def _colorize(x: str, color: str | None = None) -> str:
|
||||||
return click.style(x, fg=color)
|
return click.style(x, fg=color)
|
||||||
|
|
||||||
|
|
||||||
def _warn(message: str, *args, color: str | None = None, **kwargs) -> None:
|
def _warn(message: str, *args, color: Optional[str]=None, **kwargs) -> None:
|
||||||
stacklevel = kwargs.get('stacklevel', 1)
|
stacklevel = kwargs.get('stacklevel', 1)
|
||||||
kwargs['stacklevel'] = stacklevel + 2 # +1 for this function, +1 for medium/high wrapper
|
kwargs['stacklevel'] = stacklevel + 2 # +1 for this function, +1 for medium/high wrapper
|
||||||
warnings.warn(_colorize(message, color=color), *args, **kwargs) # noqa: B028
|
warnings.warn(_colorize(message, color=color), *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def low(message: str, *args, **kwargs) -> None:
|
def low(message: str, *args, **kwargs) -> None:
|
||||||
|
@ -57,4 +55,4 @@ if not TYPE_CHECKING:
|
||||||
def warn(*args, **kwargs):
|
def warn(*args, **kwargs):
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
return warnings.warn(*args, **kwargs) # noqa: B028
|
return warnings.warn(*args, **kwargs)
|
||||||
|
|
56
my/demo.py
56
my/demo.py
|
@ -1,77 +1,69 @@
|
||||||
'''
|
'''
|
||||||
Just a demo module for testing and documentation purposes
|
Just a demo module for testing and documentation purposes
|
||||||
'''
|
'''
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
from .core import Paths, PathIsh
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import tzinfo, timezone
|
||||||
|
|
||||||
|
from my.config import demo as user_config
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone, tzinfo
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Protocol
|
|
||||||
|
|
||||||
from my.core import Json, PathIsh, Paths, get_files
|
|
||||||
|
|
||||||
|
|
||||||
class config(Protocol):
|
@dataclass
|
||||||
|
class demo(user_config):
|
||||||
data_path: Paths
|
data_path: Paths
|
||||||
|
|
||||||
# this is to check required attribute handling
|
|
||||||
username: str
|
username: str
|
||||||
|
|
||||||
# this is to check optional attribute handling
|
|
||||||
timezone: tzinfo = timezone.utc
|
timezone: tzinfo = timezone.utc
|
||||||
|
|
||||||
external: PathIsh | None = None
|
external: Optional[PathIsh] = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def external_module(self):
|
def external_module(self):
|
||||||
rpath = self.external
|
rpath = self.external
|
||||||
if rpath is not None:
|
if rpath is not None:
|
||||||
from my.core.utils.imports import import_dir
|
from .core.utils.imports import import_dir
|
||||||
|
|
||||||
return import_dir(rpath)
|
return import_dir(rpath)
|
||||||
|
|
||||||
import my.config.repos.external as m # type: ignore
|
import my.config.repos.external as m # type: ignore
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
def make_config() -> config:
|
from .core import make_config
|
||||||
from my.config import demo as user_config
|
config = make_config(demo)
|
||||||
|
|
||||||
class combined_config(user_config, config): ...
|
# TODO not sure about type checking?
|
||||||
|
external = config.external_module
|
||||||
|
|
||||||
return combined_config()
|
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Sequence, Iterable
|
||||||
|
from datetime import datetime
|
||||||
|
from .core import Json, get_files
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Item:
|
class Item:
|
||||||
'''
|
'''
|
||||||
Some completely arbitrary artificial stuff, just for testing
|
Some completely arbitrary artificial stuff, just for testing
|
||||||
'''
|
'''
|
||||||
|
|
||||||
username: str
|
username: str
|
||||||
raw: Json
|
raw: Json
|
||||||
dt: datetime
|
dt: datetime
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
cfg = make_config()
|
return get_files(config.data_path)
|
||||||
return get_files(cfg.data_path)
|
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
def items() -> Iterable[Item]:
|
def items() -> Iterable[Item]:
|
||||||
cfg = make_config()
|
|
||||||
|
|
||||||
transform = (lambda i: i) if cfg.external is None else cfg.external_module.transform
|
|
||||||
|
|
||||||
for f in inputs():
|
for f in inputs():
|
||||||
dt = datetime.fromtimestamp(f.stat().st_mtime, tz=cfg.timezone)
|
dt = datetime.fromtimestamp(f.stat().st_mtime, tz=config.timezone)
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
for raw in j:
|
for raw in j:
|
||||||
yield Item(
|
yield Item(
|
||||||
username=cfg.username,
|
username=config.username,
|
||||||
raw=transform(raw),
|
raw=external.identity(raw),
|
||||||
dt=dt,
|
dt=dt,
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,33 +4,30 @@
|
||||||
Consumes data exported by https://github.com/karlicoss/emfitexport
|
Consumes data exported by https://github.com/karlicoss/emfitexport
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
REQUIRES = [
|
REQUIRES = [
|
||||||
'git+https://github.com/karlicoss/emfitexport',
|
'git+https://github.com/karlicoss/emfitexport',
|
||||||
]
|
]
|
||||||
|
|
||||||
import dataclasses
|
|
||||||
import inspect
|
|
||||||
from collections.abc import Iterable, Iterator
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
import dataclasses
|
||||||
from datetime import datetime, time, timedelta
|
from datetime import datetime, time, timedelta
|
||||||
|
import inspect
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
||||||
|
|
||||||
import emfitexport.dal as dal
|
|
||||||
|
|
||||||
from my.core import (
|
from my.core import (
|
||||||
Res,
|
|
||||||
Stats,
|
|
||||||
get_files,
|
get_files,
|
||||||
stat,
|
stat,
|
||||||
|
Res,
|
||||||
|
Stats,
|
||||||
)
|
)
|
||||||
from my.core.cachew import cache_dir, mcachew
|
from my.core.cachew import cache_dir, mcachew
|
||||||
from my.core.error import extract_error_datetime, set_error_datetime
|
from my.core.error import set_error_datetime, extract_error_datetime
|
||||||
from my.core.pandas import DataFrameT
|
from my.core.pandas import DataFrameT
|
||||||
|
|
||||||
from my.config import emfit as config # isort: skip
|
from my.config import emfit as config
|
||||||
|
|
||||||
|
import emfitexport.dal as dal
|
||||||
|
|
||||||
|
|
||||||
Emfit = dal.Emfit
|
Emfit = dal.Emfit
|
||||||
|
@ -88,7 +85,7 @@ def datas() -> Iterable[Res[Emfit]]:
|
||||||
# TODO should be used for jawbone data as well?
|
# TODO should be used for jawbone data as well?
|
||||||
def pre_dataframe() -> Iterable[Res[Emfit]]:
|
def pre_dataframe() -> Iterable[Res[Emfit]]:
|
||||||
# TODO shit. I need some sort of interrupted sleep detection?
|
# TODO shit. I need some sort of interrupted sleep detection?
|
||||||
g: list[Emfit] = []
|
g: List[Emfit] = []
|
||||||
|
|
||||||
def flush() -> Iterable[Res[Emfit]]:
|
def flush() -> Iterable[Res[Emfit]]:
|
||||||
if len(g) == 0:
|
if len(g) == 0:
|
||||||
|
@ -115,10 +112,10 @@ def pre_dataframe() -> Iterable[Res[Emfit]]:
|
||||||
|
|
||||||
|
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
dicts: list[dict[str, Any]] = []
|
dicts: List[Dict[str, Any]] = []
|
||||||
last: Emfit | None = None
|
last: Optional[Emfit] = None
|
||||||
for s in pre_dataframe():
|
for s in pre_dataframe():
|
||||||
d: dict[str, Any]
|
d: Dict[str, Any]
|
||||||
if isinstance(s, Exception):
|
if isinstance(s, Exception):
|
||||||
edt = extract_error_datetime(s)
|
edt = extract_error_datetime(s)
|
||||||
d = {
|
d = {
|
||||||
|
@ -158,9 +155,9 @@ def dataframe() -> DataFrameT:
|
||||||
last = s # meh
|
last = s # meh
|
||||||
dicts.append(d)
|
dicts.append(d)
|
||||||
|
|
||||||
import pandas as pd
|
import pandas
|
||||||
|
|
||||||
return pd.DataFrame(dicts)
|
return pandas.DataFrame(dicts)
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
|
@ -169,12 +166,11 @@ def stats() -> Stats:
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def fake_data(nights: int = 500) -> Iterator:
|
def fake_data(nights: int = 500) -> Iterator:
|
||||||
|
from my.core.cfg import tmp_config
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from my.core.cfg import tmp_config
|
|
||||||
|
|
||||||
with TemporaryDirectory() as td:
|
with TemporaryDirectory() as td:
|
||||||
tdir = Path(td)
|
tdir = Path(td)
|
||||||
gen = dal.FakeData()
|
gen = dal.FakeData()
|
||||||
|
@ -191,9 +187,9 @@ def fake_data(nights: int = 500) -> Iterator:
|
||||||
|
|
||||||
|
|
||||||
# TODO remove/deprecate it? I think used by timeline
|
# TODO remove/deprecate it? I think used by timeline
|
||||||
def get_datas() -> list[Emfit]:
|
def get_datas() -> List[Emfit]:
|
||||||
# todo ugh. run lint properly
|
# todo ugh. run lint properly
|
||||||
return sorted(datas(), key=lambda e: e.start) # type: ignore
|
return list(sorted(datas(), key=lambda e: e.start)) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
# TODO move away old entries if there is a diff??
|
# TODO move away old entries if there is a diff??
|
||||||
|
|
|
@ -7,14 +7,13 @@ REQUIRES = [
|
||||||
]
|
]
|
||||||
# todo use ast in setup.py or doctor to extract the corresponding pip packages?
|
# todo use ast in setup.py or doctor to extract the corresponding pip packages?
|
||||||
|
|
||||||
from collections.abc import Iterable, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Sequence, Iterable
|
||||||
from my.config import endomondo as user_config
|
|
||||||
|
|
||||||
from .core import Paths, get_files
|
from .core import Paths, get_files
|
||||||
|
|
||||||
|
from my.config import endomondo as user_config
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class endomondo(user_config):
|
class endomondo(user_config):
|
||||||
|
@ -32,22 +31,20 @@ def inputs() -> Sequence[Path]:
|
||||||
|
|
||||||
# todo add a doctor check for pip endoexport module
|
# todo add a doctor check for pip endoexport module
|
||||||
import endoexport.dal as dal
|
import endoexport.dal as dal
|
||||||
from endoexport.dal import Point, Workout # noqa: F401
|
from endoexport.dal import Point, Workout
|
||||||
|
|
||||||
|
|
||||||
from .core import Res
|
from .core import Res
|
||||||
|
|
||||||
|
|
||||||
# todo cachew?
|
# todo cachew?
|
||||||
def workouts() -> Iterable[Res[Workout]]:
|
def workouts() -> Iterable[Res[Workout]]:
|
||||||
_dal = dal.DAL(inputs())
|
_dal = dal.DAL(inputs())
|
||||||
yield from _dal.workouts()
|
yield from _dal.workouts()
|
||||||
|
|
||||||
|
|
||||||
from .core.pandas import DataFrameT, check_dataframe
|
from .core.pandas import check_dataframe, DataFrameT
|
||||||
|
|
||||||
|
|
||||||
@check_dataframe
|
@check_dataframe
|
||||||
def dataframe(*, defensive: bool=True) -> DataFrameT:
|
def dataframe(defensive: bool=True) -> DataFrameT:
|
||||||
def it():
|
def it():
|
||||||
for w in workouts():
|
for w in workouts():
|
||||||
if isinstance(w, Exception):
|
if isinstance(w, Exception):
|
||||||
|
@ -78,9 +75,7 @@ def dataframe(*, defensive: bool=True) -> DataFrameT:
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
from .core import Stats, stat
|
from .core import stat, Stats
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return {
|
return {
|
||||||
# todo pretty print stats?
|
# todo pretty print stats?
|
||||||
|
@ -91,16 +86,13 @@ def stats() -> Stats:
|
||||||
|
|
||||||
# TODO make sure it's possible to 'advise' functions and override stuff
|
# TODO make sure it's possible to 'advise' functions and override stuff
|
||||||
|
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def fake_data(count: int=100) -> Iterator:
|
def fake_data(count: int=100) -> Iterator:
|
||||||
import json
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
from my.core.cfg import tmp_config
|
from my.core.cfg import tmp_config
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
import json
|
||||||
with TemporaryDirectory() as td:
|
with TemporaryDirectory() as td:
|
||||||
tdir = Path(td)
|
tdir = Path(td)
|
||||||
fd = dal.FakeData()
|
fd = dal.FakeData()
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from .core.warnings import high
|
from .core.warnings import high
|
||||||
|
|
||||||
high("DEPRECATED! Please use my.core.error instead.")
|
high("DEPRECATED! Please use my.core.error instead.")
|
||||||
|
|
||||||
from .core import __NOT_HPI_MODULE__
|
from .core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from .core.error import *
|
from .core.error import *
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from collections.abc import Iterator
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any, Iterator, List, Tuple
|
||||||
|
|
||||||
from my.core.compat import NoneType, assert_never
|
from my.core.compat import NoneType, assert_never
|
||||||
|
|
||||||
|
@ -10,7 +9,7 @@ from my.core.compat import NoneType, assert_never
|
||||||
class Helper:
|
class Helper:
|
||||||
manager: 'Manager'
|
manager: 'Manager'
|
||||||
item: Any # todo realistically, list or dict? could at least type as indexable or something
|
item: Any # todo realistically, list or dict? could at least type as indexable or something
|
||||||
path: tuple[str, ...]
|
path: Tuple[str, ...]
|
||||||
|
|
||||||
def pop_if_primitive(self, *keys: str) -> None:
|
def pop_if_primitive(self, *keys: str) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -27,7 +26,7 @@ class Helper:
|
||||||
assert actual == expected, (key, actual, expected)
|
assert actual == expected, (key, actual, expected)
|
||||||
|
|
||||||
def zoom(self, key: str) -> 'Helper':
|
def zoom(self, key: str) -> 'Helper':
|
||||||
return self.manager.helper(item=self.item.pop(key), path=(*self.path, key))
|
return self.manager.helper(item=self.item.pop(key), path=self.path + (key,))
|
||||||
|
|
||||||
|
|
||||||
def is_empty(x) -> bool:
|
def is_empty(x) -> bool:
|
||||||
|
@ -36,14 +35,14 @@ def is_empty(x) -> bool:
|
||||||
elif isinstance(x, list):
|
elif isinstance(x, list):
|
||||||
return all(map(is_empty, x))
|
return all(map(is_empty, x))
|
||||||
else:
|
else:
|
||||||
assert_never(x) # noqa: RET503
|
assert_never(x)
|
||||||
|
|
||||||
|
|
||||||
class Manager:
|
class Manager:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.helpers: list[Helper] = []
|
self.helpers: List[Helper] = []
|
||||||
|
|
||||||
def helper(self, item: Any, *, path: tuple[str, ...] = ()) -> Helper:
|
def helper(self, item: Any, *, path: Tuple[str, ...] = ()) -> Helper:
|
||||||
res = Helper(manager=self, item=item, path=path)
|
res = Helper(manager=self, item=item, path=path)
|
||||||
self.helpers.append(res)
|
self.helpers.append(res)
|
||||||
return res
|
return res
|
||||||
|
|
|
@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# prevent it from appearing in modules list/doctor
|
# prevent it from apprearing in modules list/doctor
|
||||||
from ..core import __NOT_HPI_MODULE__
|
from ..core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
# kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works
|
||||||
|
@ -20,7 +20,6 @@ REQUIRES = [
|
||||||
|
|
||||||
|
|
||||||
from my.core.hpi_compat import handle_legacy_import
|
from my.core.hpi_compat import handle_legacy_import
|
||||||
|
|
||||||
is_legacy_import = handle_legacy_import(
|
is_legacy_import = handle_legacy_import(
|
||||||
parent_module_name=__name__,
|
parent_module_name=__name__,
|
||||||
legacy_submodule_name='export',
|
legacy_submodule_name='export',
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from collections.abc import Iterator
|
from typing import Iterator
|
||||||
|
from my.core import Res, stat, Stats
|
||||||
from my.core import Res, Stats
|
|
||||||
from my.core.source import import_source
|
from my.core.source import import_source
|
||||||
|
|
||||||
from .common import Message, _merge_messages
|
from .common import Message, _merge_messages
|
||||||
|
|
||||||
|
|
||||||
src_export = import_source(module_name='my.fbmessenger.export')
|
src_export = import_source(module_name='my.fbmessenger.export')
|
||||||
src_android = import_source(module_name='my.fbmessenger.android')
|
src_android = import_source(module_name='my.fbmessenger.android')
|
||||||
|
|
||||||
|
|
|
@ -4,20 +4,19 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
import sqlite3
|
||||||
|
from typing import Iterator, Sequence, Optional, Dict, Union, List
|
||||||
|
|
||||||
from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_config
|
from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config
|
||||||
from my.core.common import unique_everseen
|
from my.core.common import unique_everseen
|
||||||
from my.core.compat import assert_never
|
from my.core.compat import assert_never
|
||||||
from my.core.error import echain
|
from my.core.error import echain
|
||||||
from my.core.sqlite import sqlite_connection, SqliteTool
|
from my.core.sqlite import sqlite_connection
|
||||||
|
|
||||||
from my.config import fbmessenger as user_config # isort: skip
|
from my.config import fbmessenger as user_config
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
@ -28,7 +27,7 @@ class Config(user_config.android):
|
||||||
# paths[s]/glob to the exported sqlite databases
|
# paths[s]/glob to the exported sqlite databases
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
facebook_id: str | None = None
|
facebook_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# hmm. this is necessary for default value (= None) to work
|
# hmm. this is necessary for default value (= None) to work
|
||||||
|
@ -43,13 +42,13 @@ def inputs() -> Sequence[Path]:
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Sender:
|
class Sender:
|
||||||
id: str
|
id: str
|
||||||
name: str | None
|
name: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Thread:
|
class Thread:
|
||||||
id: str
|
id: str
|
||||||
name: str | None # isn't set for groups or one to one messages
|
name: Optional[str] # isn't set for groups or one to one messages
|
||||||
|
|
||||||
|
|
||||||
# todo not sure about order of fields...
|
# todo not sure about order of fields...
|
||||||
|
@ -57,14 +56,14 @@ class Thread:
|
||||||
class _BaseMessage:
|
class _BaseMessage:
|
||||||
id: str
|
id: str
|
||||||
dt: datetime_aware
|
dt: datetime_aware
|
||||||
text: str | None
|
text: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class _Message(_BaseMessage):
|
class _Message(_BaseMessage):
|
||||||
thread_id: str
|
thread_id: str
|
||||||
sender_id: str
|
sender_id: str
|
||||||
reply_to_id: str | None
|
reply_to_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
|
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
|
||||||
|
@ -73,7 +72,7 @@ class _Message(_BaseMessage):
|
||||||
class Message(_BaseMessage):
|
class Message(_BaseMessage):
|
||||||
thread: Thread
|
thread: Thread
|
||||||
sender: Sender
|
sender: Sender
|
||||||
reply_to: Message | None
|
reply_to: Optional[Message]
|
||||||
|
|
||||||
|
|
||||||
Entity = Union[Sender, Thread, _Message]
|
Entity = Union[Sender, Thread, _Message]
|
||||||
|
@ -86,8 +85,8 @@ def _entities() -> Iterator[Res[Entity]]:
|
||||||
for idx, path in enumerate(paths):
|
for idx, path in enumerate(paths):
|
||||||
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
with sqlite_connection(path, immutable=True, row_factory='row') as db:
|
with sqlite_connection(path, immutable=True, row_factory='row') as db:
|
||||||
use_msys = "logging_events_v2" in SqliteTool(db).get_table_names()
|
|
||||||
try:
|
try:
|
||||||
|
use_msys = len(list(db.execute('SELECT * FROM sqlite_master WHERE name = "logging_events_v2"'))) > 0
|
||||||
if use_msys:
|
if use_msys:
|
||||||
yield from _process_db_msys(db)
|
yield from _process_db_msys(db)
|
||||||
else:
|
else:
|
||||||
|
@ -111,7 +110,7 @@ def _normalise_thread_id(key) -> str:
|
||||||
# NOTE: this is sort of copy pasted from other _process_db method
|
# NOTE: this is sort of copy pasted from other _process_db method
|
||||||
# maybe later could unify them
|
# maybe later could unify them
|
||||||
def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
senders: dict[str, Sender] = {}
|
senders: Dict[str, Sender] = {}
|
||||||
for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'):
|
for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'):
|
||||||
s = Sender(
|
s = Sender(
|
||||||
id=r['id'], # looks like it's server id? same used on facebook site
|
id=r['id'], # looks like it's server id? same used on facebook site
|
||||||
|
@ -128,7 +127,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
|
|
||||||
# TODO can we get it from db? could infer as the most common id perhaps?
|
# TODO can we get it from db? could infer as the most common id perhaps?
|
||||||
self_id = config.facebook_id
|
self_id = config.facebook_id
|
||||||
thread_users: dict[str, list[Sender]] = {}
|
thread_users: Dict[str, List[Sender]] = {}
|
||||||
for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'):
|
for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'):
|
||||||
thread_key = r['thread_key']
|
thread_key = r['thread_key']
|
||||||
user_key = r['contact_id']
|
user_key = r['contact_id']
|
||||||
|
@ -169,15 +168,6 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
CAST(sender_id AS TEXT) AS sender_id,
|
CAST(sender_id AS TEXT) AS sender_id,
|
||||||
reply_source_id
|
reply_source_id
|
||||||
FROM messages
|
FROM messages
|
||||||
WHERE
|
|
||||||
/* Regular message_id conforms to mid.* regex.
|
|
||||||
However seems that when message is not sent yet it doesn't have this server id yet
|
|
||||||
(happened only once, but could be just luck of course!)
|
|
||||||
We exclude these messages to avoid duplication.
|
|
||||||
However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something
|
|
||||||
So instead this excludes only such unsent messages.
|
|
||||||
*/
|
|
||||||
message_id != offline_threading_id
|
|
||||||
ORDER BY timestamp_ms /* they aren't in order in the database, so need to sort */
|
ORDER BY timestamp_ms /* they aren't in order in the database, so need to sort */
|
||||||
'''
|
'''
|
||||||
):
|
):
|
||||||
|
@ -194,7 +184,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
|
|
||||||
|
|
||||||
def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
senders: dict[str, Sender] = {}
|
senders: Dict[str, Sender] = {}
|
||||||
for r in db.execute('''SELECT * FROM thread_users'''):
|
for r in db.execute('''SELECT * FROM thread_users'''):
|
||||||
# for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
|
# for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None
|
||||||
# but they are still referenced, so need to keep
|
# but they are still referenced, so need to keep
|
||||||
|
@ -208,7 +198,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
self_id = config.facebook_id
|
self_id = config.facebook_id
|
||||||
thread_users: dict[str, list[Sender]] = {}
|
thread_users: Dict[str, List[Sender]] = {}
|
||||||
for r in db.execute('SELECT * from thread_participants'):
|
for r in db.execute('SELECT * from thread_participants'):
|
||||||
thread_key = r['thread_key']
|
thread_key = r['thread_key']
|
||||||
user_key = r['user_key']
|
user_key = r['user_key']
|
||||||
|
@ -238,7 +228,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]:
|
||||||
|
|
||||||
for r in db.execute(
|
for r in db.execute(
|
||||||
'''
|
'''
|
||||||
SELECT *, json_extract(sender, "$.user_key") AS user_key FROM messages
|
SELECT *, json_extract(sender, "$.user_key") AS user_key FROM messages
|
||||||
WHERE msg_type NOT IN (
|
WHERE msg_type NOT IN (
|
||||||
-1, /* these don't have any data at all, likely immediately deleted or something? */
|
-1, /* these don't have any data at all, likely immediately deleted or something? */
|
||||||
2 /* these are 'left group' system messages, also a bit annoying since they might reference nonexistent users */
|
2 /* these are 'left group' system messages, also a bit annoying since they might reference nonexistent users */
|
||||||
|
@ -268,9 +258,9 @@ def contacts() -> Iterator[Res[Sender]]:
|
||||||
|
|
||||||
|
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def messages() -> Iterator[Res[Message]]:
|
||||||
senders: dict[str, Sender] = {}
|
senders: Dict[str, Sender] = {}
|
||||||
msgs: dict[str, Message] = {}
|
msgs: Dict[str, Message] = {}
|
||||||
threads: dict[str, Thread] = {}
|
threads: Dict[str, Thread] = {}
|
||||||
for x in unique_everseen(_entities):
|
for x in unique_everseen(_entities):
|
||||||
if isinstance(x, Exception):
|
if isinstance(x, Exception):
|
||||||
yield x
|
yield x
|
||||||
|
|
|
@ -1,9 +1,6 @@
|
||||||
from __future__ import annotations
|
from my.core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from my.core import __NOT_HPI_MODULE__ # isort: skip
|
from typing import Iterator, Optional, Protocol
|
||||||
|
|
||||||
from collections.abc import Iterator
|
|
||||||
from typing import Protocol
|
|
||||||
|
|
||||||
from my.core import datetime_aware
|
from my.core import datetime_aware
|
||||||
|
|
||||||
|
@ -13,7 +10,7 @@ class Thread(Protocol):
|
||||||
def id(self) -> str: ...
|
def id(self) -> str: ...
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str | None: ...
|
def name(self) -> Optional[str]: ...
|
||||||
|
|
||||||
|
|
||||||
class Sender(Protocol):
|
class Sender(Protocol):
|
||||||
|
@ -21,7 +18,7 @@ class Sender(Protocol):
|
||||||
def id(self) -> str: ...
|
def id(self) -> str: ...
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str | None: ...
|
def name(self) -> Optional[str]: ...
|
||||||
|
|
||||||
|
|
||||||
class Message(Protocol):
|
class Message(Protocol):
|
||||||
|
@ -32,7 +29,7 @@ class Message(Protocol):
|
||||||
def dt(self) -> datetime_aware: ...
|
def dt(self) -> datetime_aware: ...
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text(self) -> str | None: ...
|
def text(self) -> Optional[str]: ...
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def thread(self) -> Thread: ...
|
def thread(self) -> Thread: ...
|
||||||
|
@ -42,11 +39,8 @@ class Message(Protocol):
|
||||||
|
|
||||||
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
from my.core import warn_if_empty, Res
|
||||||
from my.core import Res, warn_if_empty
|
|
||||||
|
|
||||||
|
|
||||||
@warn_if_empty
|
@warn_if_empty
|
||||||
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
||||||
|
|
|
@ -7,15 +7,16 @@ REQUIRES = [
|
||||||
'git+https://github.com/karlicoss/fbmessengerexport',
|
'git+https://github.com/karlicoss/fbmessengerexport',
|
||||||
]
|
]
|
||||||
|
|
||||||
from collections.abc import Iterator
|
|
||||||
from contextlib import ExitStack, contextmanager
|
from contextlib import ExitStack, contextmanager
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core import PathIsh, Res, stat, Stats
|
||||||
|
from my.core.warnings import high
|
||||||
|
from my.config import fbmessenger as user_config
|
||||||
|
|
||||||
import fbmessengerexport.dal as messenger
|
import fbmessengerexport.dal as messenger
|
||||||
|
|
||||||
from my.config import fbmessenger as user_config
|
|
||||||
from my.core import PathIsh, Res, Stats, stat
|
|
||||||
from my.core.warnings import high
|
|
||||||
|
|
||||||
###
|
###
|
||||||
# support old style config
|
# support old style config
|
||||||
|
|
|
@ -2,14 +2,16 @@
|
||||||
Foursquare/Swarm checkins
|
Foursquare/Swarm checkins
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import json
|
from datetime import datetime, timezone, timedelta
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
from pathlib import Path
|
||||||
from my.config import foursquare as config
|
import json
|
||||||
|
|
||||||
# TODO pytz for timezone???
|
# TODO pytz for timezone???
|
||||||
|
|
||||||
from my.core import get_files, make_logger
|
from my.core import get_files, make_logger
|
||||||
|
from my.config import foursquare as config
|
||||||
|
|
||||||
|
|
||||||
logger = make_logger(__name__)
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,8 @@ Unified Github data (merged from GDPR export and periodic API updates)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from . import gdpr, ghexport
|
from . import gdpr, ghexport
|
||||||
from .common import Results, merge_events
|
|
||||||
|
from .common import merge_events, Results
|
||||||
|
|
||||||
|
|
||||||
def events() -> Results:
|
def events() -> Results:
|
||||||
|
|
|
@ -1,27 +1,24 @@
|
||||||
"""
|
"""
|
||||||
Github events and their metadata: comments/issues/pull requests
|
Github events and their metadata: comments/issues/pull requests
|
||||||
"""
|
"""
|
||||||
|
from ..core import __NOT_HPI_MODULE__
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from my.core import __NOT_HPI_MODULE__ # isort: skip
|
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import NamedTuple, Optional
|
from typing import Optional, NamedTuple, Iterable, Set, Tuple
|
||||||
|
|
||||||
from my.core import make_logger, warn_if_empty
|
from ..core import warn_if_empty, LazyLogger
|
||||||
from my.core.error import Res
|
from ..core.error import Res
|
||||||
|
|
||||||
logger = make_logger(__name__)
|
|
||||||
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
class Event(NamedTuple):
|
class Event(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
summary: str
|
summary: str
|
||||||
eid: str
|
eid: str
|
||||||
link: Optional[str]
|
link: Optional[str]
|
||||||
body: Optional[str] = None
|
body: Optional[str]=None
|
||||||
is_bot: bool = False
|
is_bot: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,7 +27,7 @@ Results = Iterable[Res[Event]]
|
||||||
@warn_if_empty
|
@warn_if_empty
|
||||||
def merge_events(*sources: Results) -> Results:
|
def merge_events(*sources: Results) -> Results:
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
emitted: set[tuple[datetime, str]] = set()
|
emitted: Set[Tuple[datetime, str]] = set()
|
||||||
for e in chain(*sources):
|
for e in chain(*sources):
|
||||||
if isinstance(e, Exception):
|
if isinstance(e, Exception):
|
||||||
yield e
|
yield e
|
||||||
|
@ -55,7 +52,7 @@ def parse_dt(s: str) -> datetime:
|
||||||
# experimental way of supportint event ids... not sure
|
# experimental way of supportint event ids... not sure
|
||||||
class EventIds:
|
class EventIds:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def repo_created(*, dts: str, name: str, ref_type: str, ref: str | None) -> str:
|
def repo_created(*, dts: str, name: str, ref_type: str, ref: Optional[str]) -> str:
|
||||||
return f'{dts}_repocreated_{name}_{ref_type}_{ref}'
|
return f'{dts}_repocreated_{name}_{ref_type}_{ref}'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -1,43 +1,36 @@
|
||||||
"""
|
"""
|
||||||
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
||||||
"""
|
"""
|
||||||
|
from dataclasses import dataclass
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from abc import abstractmethod
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
import tarfile
|
||||||
|
from typing import Iterable, Any, Sequence, Dict, Optional
|
||||||
|
|
||||||
from my.core import Paths, Res, Stats, get_files, make_logger, stat, warnings
|
from my.core import get_files, Res, PathIsh, stat, Stats, make_logger
|
||||||
from my.core.error import echain
|
from my.core.cfg import make_config
|
||||||
|
from my.core.error import notnone, echain
|
||||||
|
|
||||||
|
from .common import Event, parse_dt, EventIds
|
||||||
|
|
||||||
|
# TODO later, use a separate user config? (github_gdpr)
|
||||||
|
from my.config import github as user_config
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class github(user_config):
|
||||||
|
gdpr_dir: PathIsh # path to unpacked GDPR archive
|
||||||
|
|
||||||
|
|
||||||
|
config = make_config(github)
|
||||||
|
|
||||||
from .common import Event, EventIds, parse_dt
|
|
||||||
|
|
||||||
logger = make_logger(__name__)
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class config:
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def gdpr_dir(self) -> Paths:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
def make_config() -> config:
|
|
||||||
# TODO later, use a separate user config? (github_gdpr)
|
|
||||||
from my.config import github as user_config
|
|
||||||
|
|
||||||
class combined_config(user_config, config):
|
|
||||||
pass
|
|
||||||
|
|
||||||
return combined_config()
|
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
gdpr_dir = make_config().gdpr_dir
|
gdir = config.gdpr_dir
|
||||||
res = get_files(gdpr_dir)
|
res = get_files(gdir)
|
||||||
schema_json = [f for f in res if f.name == 'schema.json']
|
schema_json = [f for f in res if f.name == 'schema.json']
|
||||||
was_unpacked = len(schema_json) > 0
|
was_unpacked = len(schema_json) > 0
|
||||||
if was_unpacked:
|
if was_unpacked:
|
||||||
|
@ -50,37 +43,22 @@ def inputs() -> Sequence[Path]:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def events() -> Iterator[Res[Event]]:
|
def events() -> Iterable[Res[Event]]:
|
||||||
last = max(inputs())
|
last = max(inputs())
|
||||||
|
|
||||||
logger.info(f'extracting data from {last}')
|
logger.info(f'extracting data from {last}')
|
||||||
|
|
||||||
root: Path | None = None
|
# a bit naughty and ad-hoc, but we will generify reading from tar.gz. once we have more examples
|
||||||
|
# another one is zulip archive
|
||||||
if last.is_dir(): # if it's already CPath, this will match it
|
if last.is_dir():
|
||||||
root = last
|
files = list(sorted(last.glob('*.json'))) # looks like all files are in the root
|
||||||
|
open_file = lambda f: f.open()
|
||||||
else:
|
else:
|
||||||
try:
|
# treat as .tar.gz
|
||||||
from kompress import CPath
|
tfile = tarfile.open(last)
|
||||||
|
files = list(sorted(map(Path, tfile.getnames())))
|
||||||
root = CPath(last)
|
files = [p for p in files if len(p.parts) == 1 and p.suffix == '.json']
|
||||||
assert len(list(root.iterdir())) > 0 # trigger to check if we have the kompress version with targz support
|
open_file = lambda p: notnone(tfile.extractfile(f'./{p}')) # NOTE odd, doesn't work without ./
|
||||||
except Exception as e:
|
|
||||||
logger.exception(e)
|
|
||||||
warnings.high("Upgrade 'kompress' to latest version with native .tar.gz support. Falling back to unpacking to tmp dir.")
|
|
||||||
|
|
||||||
if root is None:
|
|
||||||
from my.core.structure import match_structure
|
|
||||||
|
|
||||||
with match_structure(last, expected=()) as res: # expected=() matches it regardless any patterns
|
|
||||||
[root] = res
|
|
||||||
yield from _process_one(root)
|
|
||||||
else:
|
|
||||||
yield from _process_one(root)
|
|
||||||
|
|
||||||
|
|
||||||
def _process_one(root: Path) -> Iterator[Res[Event]]:
|
|
||||||
files = sorted(root.glob('*.json')) # looks like all files are in the root
|
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
handler_map = {
|
handler_map = {
|
||||||
|
@ -122,7 +100,8 @@ def _process_one(root: Path) -> Iterator[Res[Event]]:
|
||||||
# ignored
|
# ignored
|
||||||
continue
|
continue
|
||||||
|
|
||||||
j = json.loads(f.read_text())
|
with open_file(f) as fo:
|
||||||
|
j = json.load(fo)
|
||||||
for r in j:
|
for r in j:
|
||||||
try:
|
try:
|
||||||
yield handler(r)
|
yield handler(r)
|
||||||
|
@ -137,7 +116,7 @@ def stats() -> Stats:
|
||||||
|
|
||||||
|
|
||||||
# TODO typing.TypedDict could be handy here..
|
# TODO typing.TypedDict could be handy here..
|
||||||
def _parse_common(d: dict) -> dict:
|
def _parse_common(d: Dict) -> Dict:
|
||||||
url = d['url']
|
url = d['url']
|
||||||
body = d.get('body')
|
body = d.get('body')
|
||||||
return {
|
return {
|
||||||
|
@ -147,7 +126,7 @@ def _parse_common(d: dict) -> dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _parse_repository(d: dict) -> Event:
|
def _parse_repository(d: Dict) -> Event:
|
||||||
pref = 'https://github.com/'
|
pref = 'https://github.com/'
|
||||||
url = d['url']
|
url = d['url']
|
||||||
dts = d['created_at']
|
dts = d['created_at']
|
||||||
|
@ -163,13 +142,13 @@ def _parse_repository(d: dict) -> Event:
|
||||||
|
|
||||||
|
|
||||||
# user may be None if the user was deleted
|
# user may be None if the user was deleted
|
||||||
def _is_bot(user: str | None) -> bool:
|
def _is_bot(user: Optional[str]) -> bool:
|
||||||
if user is None:
|
if user is None:
|
||||||
return False
|
return False
|
||||||
return "[bot]" in user
|
return "[bot]" in "user"
|
||||||
|
|
||||||
|
|
||||||
def _parse_issue_comment(d: dict) -> Event:
|
def _parse_issue_comment(d: Dict) -> Event:
|
||||||
url = d['url']
|
url = d['url']
|
||||||
return Event(
|
return Event(
|
||||||
**_parse_common(d),
|
**_parse_common(d),
|
||||||
|
@ -179,7 +158,7 @@ def _parse_issue_comment(d: dict) -> Event:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_issue(d: dict) -> Event:
|
def _parse_issue(d: Dict) -> Event:
|
||||||
url = d['url']
|
url = d['url']
|
||||||
title = d['title']
|
title = d['title']
|
||||||
return Event(
|
return Event(
|
||||||
|
@ -190,7 +169,7 @@ def _parse_issue(d: dict) -> Event:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_pull_request(d: dict) -> Event:
|
def _parse_pull_request(d: Dict) -> Event:
|
||||||
dts = d['created_at']
|
dts = d['created_at']
|
||||||
url = d['url']
|
url = d['url']
|
||||||
title = d['title']
|
title = d['title']
|
||||||
|
@ -204,7 +183,7 @@ def _parse_pull_request(d: dict) -> Event:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_project(d: dict) -> Event:
|
def _parse_project(d: Dict) -> Event:
|
||||||
url = d['url']
|
url = d['url']
|
||||||
title = d['name']
|
title = d['name']
|
||||||
is_bot = "[bot]" in d["creator"]
|
is_bot = "[bot]" in d["creator"]
|
||||||
|
@ -219,7 +198,7 @@ def _parse_project(d: dict) -> Event:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_release(d: dict) -> Event:
|
def _parse_release(d: Dict) -> Event:
|
||||||
tag = d['tag_name']
|
tag = d['tag_name']
|
||||||
return Event(
|
return Event(
|
||||||
**_parse_common(d),
|
**_parse_common(d),
|
||||||
|
@ -228,7 +207,7 @@ def _parse_release(d: dict) -> Event:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_commit_comment(d: dict) -> Event:
|
def _parse_commit_comment(d: Dict) -> Event:
|
||||||
url = d['url']
|
url = d['url']
|
||||||
return Event(
|
return Event(
|
||||||
**_parse_common(d),
|
**_parse_common(d),
|
||||||
|
|
|
@ -1,17 +1,13 @@
|
||||||
"""
|
"""
|
||||||
Github data: events, comments, etc. (API data)
|
Github data: events, comments, etc. (API data)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
REQUIRES = [
|
REQUIRES = [
|
||||||
'git+https://github.com/karlicoss/ghexport',
|
'git+https://github.com/karlicoss/ghexport',
|
||||||
]
|
]
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from my.config import github as user_config
|
|
||||||
from my.core import Paths
|
from my.core import Paths
|
||||||
|
from my.config import github as user_config
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -25,9 +21,7 @@ class github(user_config):
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
from my.core.cfg import Attrs, make_config
|
from my.core.cfg import make_config, Attrs
|
||||||
|
|
||||||
|
|
||||||
def migration(attrs: Attrs) -> Attrs:
|
def migration(attrs: Attrs) -> Attrs:
|
||||||
export_dir = 'export_dir'
|
export_dir = 'export_dir'
|
||||||
if export_dir in attrs: # legacy name
|
if export_dir in attrs: # legacy name
|
||||||
|
@ -47,14 +41,15 @@ except ModuleNotFoundError as e:
|
||||||
|
|
||||||
############################
|
############################
|
||||||
|
|
||||||
from collections.abc import Sequence
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Tuple, Dict, Sequence, Optional
|
||||||
|
|
||||||
from my.core import LazyLogger, get_files
|
from my.core import get_files, LazyLogger
|
||||||
from my.core.cachew import mcachew
|
from my.core.cachew import mcachew
|
||||||
|
|
||||||
from .common import Event, EventIds, Results, parse_dt
|
from .common import Event, parse_dt, Results, EventIds
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
@ -87,9 +82,7 @@ def _events() -> Results:
|
||||||
yield e
|
yield e
|
||||||
|
|
||||||
|
|
||||||
from my.core import Stats, stat
|
from my.core import stat, Stats
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return {
|
return {
|
||||||
**stat(events),
|
**stat(events),
|
||||||
|
@ -106,7 +99,7 @@ def _log_if_unhandled(e) -> None:
|
||||||
Link = str
|
Link = str
|
||||||
EventId = str
|
EventId = str
|
||||||
Body = str
|
Body = str
|
||||||
def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]:
|
def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Body]]:
|
||||||
# TODO would be nice to give access to raw event within timeline
|
# TODO would be nice to give access to raw event within timeline
|
||||||
dts = e['created_at']
|
dts = e['created_at']
|
||||||
eid = e['id']
|
eid = e['id']
|
||||||
|
@ -202,7 +195,7 @@ def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]:
|
||||||
return tp, None, None, None
|
return tp, None, None, None
|
||||||
|
|
||||||
|
|
||||||
def _parse_event(d: dict) -> Event:
|
def _parse_event(d: Dict) -> Event:
|
||||||
summary, link, eid, body = _get_summary(d)
|
summary, link, eid, body = _get_summary(d)
|
||||||
if eid is None:
|
if eid is None:
|
||||||
eid = d['id'] # meh
|
eid = d['id'] # meh
|
||||||
|
|
|
@ -7,18 +7,15 @@ REQUIRES = [
|
||||||
|
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from my.core import datetime_aware, Paths
|
||||||
from my.config import goodreads as user_config
|
from my.config import goodreads as user_config
|
||||||
from my.core import Paths, datetime_aware
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class goodreads(user_config):
|
class goodreads(user_config):
|
||||||
# paths[s]/glob to the exported JSON data
|
# paths[s]/glob to the exported JSON data
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
from my.core.cfg import Attrs, make_config
|
from my.core.cfg import make_config, Attrs
|
||||||
|
|
||||||
|
|
||||||
def _migration(attrs: Attrs) -> Attrs:
|
def _migration(attrs: Attrs) -> Attrs:
|
||||||
export_dir = 'export_dir'
|
export_dir = 'export_dir'
|
||||||
|
@ -32,19 +29,18 @@ config = make_config(goodreads, migration=_migration)
|
||||||
#############################3
|
#############################3
|
||||||
|
|
||||||
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from my.core import get_files
|
from my.core import get_files
|
||||||
|
from typing import Sequence, Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
|
|
||||||
from goodrexport import dal
|
from goodrexport import dal
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from my.core import __NOT_HPI_MODULE__ # isort: skip
|
from my.core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
# NOTE: this tool was quite useful https://github.com/aj3423/aproto
|
# NOTE: this tool was quite useful https://github.com/aj3423/aproto
|
||||||
|
|
||||||
from google.protobuf import descriptor_pb2, descriptor_pool, message_factory
|
from google.protobuf import descriptor_pool, descriptor_pb2, message_factory
|
||||||
|
|
||||||
TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING
|
TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING
|
||||||
TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES
|
TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES
|
||||||
|
|
|
@ -7,20 +7,20 @@ REQUIRES = [
|
||||||
"protobuf", # for parsing blobs from the database
|
"protobuf", # for parsing blobs from the database
|
||||||
]
|
]
|
||||||
|
|
||||||
from collections.abc import Iterator, Sequence
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, Iterator, Optional, Sequence
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
from my.core import LazyLogger, Paths, Res, datetime_aware, get_files
|
from my.core import datetime_aware, get_files, LazyLogger, Paths, Res
|
||||||
from my.core.common import unique_everseen
|
from my.core.common import unique_everseen
|
||||||
from my.core.sqlite import sqlite_connection
|
from my.core.sqlite import sqlite_connection
|
||||||
|
|
||||||
|
import my.config
|
||||||
|
|
||||||
from ._android_protobuf import parse_labeled, parse_list, parse_place
|
from ._android_protobuf import parse_labeled, parse_list, parse_place
|
||||||
|
|
||||||
import my.config # isort: skip
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
@ -59,8 +59,8 @@ class Place:
|
||||||
updated_at: datetime_aware # TODO double check it's utc?
|
updated_at: datetime_aware # TODO double check it's utc?
|
||||||
title: str
|
title: str
|
||||||
location: Location
|
location: Location
|
||||||
address: str | None
|
address: Optional[str]
|
||||||
note: str | None
|
note: Optional[str]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def place_url(self) -> str:
|
def place_url(self) -> str:
|
||||||
|
|
|
@ -2,22 +2,19 @@
|
||||||
Google Takeout exports: browsing history, search/youtube/google play activity
|
Google Takeout exports: browsing history, search/youtube/google play activity
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from my.core import __NOT_HPI_MODULE__ # isort: skip
|
|
||||||
|
|
||||||
import re
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from datetime import datetime
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from html.parser import HTMLParser
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable
|
from datetime import datetime
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
from typing import List, Optional, Any, Callable, Iterable, Tuple
|
||||||
|
from collections import OrderedDict
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from my.core.time import abbr_to_timezone
|
from ...core.time import abbr_to_timezone
|
||||||
|
|
||||||
|
|
||||||
# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly
|
# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly
|
||||||
_TIME_FORMATS = [
|
_TIME_FORMATS = [
|
||||||
|
@ -40,7 +37,7 @@ def parse_dt(s: str) -> datetime:
|
||||||
s, tzabbr = s.rsplit(maxsplit=1)
|
s, tzabbr = s.rsplit(maxsplit=1)
|
||||||
tz = abbr_to_timezone(tzabbr)
|
tz = abbr_to_timezone(tzabbr)
|
||||||
|
|
||||||
dt: datetime | None = None
|
dt: Optional[datetime] = None
|
||||||
for fmt in _TIME_FORMATS:
|
for fmt in _TIME_FORMATS:
|
||||||
try:
|
try:
|
||||||
dt = datetime.strptime(s, fmt)
|
dt = datetime.strptime(s, fmt)
|
||||||
|
@ -77,7 +74,7 @@ class State(Enum):
|
||||||
|
|
||||||
Url = str
|
Url = str
|
||||||
Title = str
|
Title = str
|
||||||
Parsed = tuple[datetime, Url, Title]
|
Parsed = Tuple[datetime, Url, Title]
|
||||||
Callback = Callable[[datetime, Url, Title], None]
|
Callback = Callable[[datetime, Url, Title], None]
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,9 +84,9 @@ class TakeoutHTMLParser(HTMLParser):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.state: State = State.OUTSIDE
|
self.state: State = State.OUTSIDE
|
||||||
|
|
||||||
self.title_parts: list[str] = []
|
self.title_parts: List[str] = []
|
||||||
self.title: str | None = None
|
self.title: Optional[str] = None
|
||||||
self.url: str | None = None
|
self.url: Optional[str] = None
|
||||||
|
|
||||||
self.callback = callback
|
self.callback = callback
|
||||||
|
|
||||||
|
@ -97,8 +94,8 @@ class TakeoutHTMLParser(HTMLParser):
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.state == State.INSIDE and tag == 'a':
|
if self.state == State.INSIDE and tag == 'a':
|
||||||
self.state = State.PARSING_LINK
|
self.state = State.PARSING_LINK
|
||||||
[hr] = (v for k, v in attrs if k == 'href')
|
attrs = OrderedDict(attrs)
|
||||||
assert hr is not None
|
hr = attrs['href']
|
||||||
|
|
||||||
# sometimes it's starts with this prefix, it's apparently clicks from google search? or visits from chrome address line? who knows...
|
# sometimes it's starts with this prefix, it's apparently clicks from google search? or visits from chrome address line? who knows...
|
||||||
# TODO handle http?
|
# TODO handle http?
|
||||||
|
@ -126,7 +123,7 @@ class TakeoutHTMLParser(HTMLParser):
|
||||||
# JamiexxVEVO
|
# JamiexxVEVO
|
||||||
# Jun 21, 2018, 5:48:34 AM
|
# Jun 21, 2018, 5:48:34 AM
|
||||||
# Products:
|
# Products:
|
||||||
# YouTube
|
# YouTube
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
if self.state == State.OUTSIDE:
|
if self.state == State.OUTSIDE:
|
||||||
if data[:-1].strip() in ("Watched", "Visited"):
|
if data[:-1].strip() in ("Watched", "Visited"):
|
||||||
|
@ -152,7 +149,7 @@ class TakeoutHTMLParser(HTMLParser):
|
||||||
|
|
||||||
|
|
||||||
def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
|
def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
|
||||||
results: list[Parsed] = []
|
results: List[Parsed] = []
|
||||||
def cb(dt: datetime, url: Url, title: Title) -> None:
|
def cb(dt: datetime, url: Url, title: Title) -> None:
|
||||||
results.append((dt, url, title))
|
results.append((dt, url, title))
|
||||||
parser = TakeoutHTMLParser(callback=cb)
|
parser = TakeoutHTMLParser(callback=cb)
|
||||||
|
@ -160,3 +157,5 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
|
||||||
data = fo.read()
|
data = fo.read()
|
||||||
parser.feed(data)
|
parser.feed(data)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
from ...core import __NOT_HPI_MODULE__
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
"""
|
"""
|
||||||
Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
|
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
||||||
|
|
||||||
See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information
|
See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information
|
||||||
about how to export and organize your takeouts
|
about how to export and organize your takeouts
|
||||||
|
|
||||||
If the DISABLE_TAKEOUT_CACHE environment variable is set, this won't cache individual
|
If the DISABLE_TAKEOUT_CACHE environment variable is set, this won't cache individual
|
||||||
|
@ -12,31 +12,27 @@ zip files of the exports, which are temporarily unpacked while creating
|
||||||
the cachew cache
|
the cachew cache
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"]
|
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
|
||||||
|
|
||||||
import os
|
|
||||||
from collections.abc import Sequence
|
|
||||||
from contextlib import ExitStack
|
from contextlib import ExitStack
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import os
|
||||||
|
from typing import List, Sequence, cast
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import cast
|
from my.core import make_config, stat, Stats, get_files, Paths, make_logger
|
||||||
|
|
||||||
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
|
|
||||||
|
|
||||||
from my.core import Paths, Stats, get_files, make_config, make_logger, stat
|
|
||||||
from my.core.cachew import mcachew
|
from my.core.cachew import mcachew
|
||||||
from my.core.error import ErrorPolicy
|
from my.core.error import ErrorPolicy
|
||||||
from my.core.structure import match_structure
|
from my.core.structure import match_structure
|
||||||
from my.core.time import user_forced
|
|
||||||
|
|
||||||
|
from my.core.time import user_forced
|
||||||
|
from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES
|
||||||
ABBR_TIMEZONES.extend(user_forced())
|
ABBR_TIMEZONES.extend(user_forced())
|
||||||
|
|
||||||
import google_takeout_parser
|
import google_takeout_parser
|
||||||
from google_takeout_parser.merge import CacheResults, GoogleEventSet
|
|
||||||
from google_takeout_parser.models import BaseEvent
|
|
||||||
from google_takeout_parser.path_dispatch import TakeoutParser
|
from google_takeout_parser.path_dispatch import TakeoutParser
|
||||||
|
from google_takeout_parser.merge import GoogleEventSet, CacheResults
|
||||||
|
|
||||||
# see https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
|
# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example
|
||||||
from my.config import google as user_config
|
from my.config import google as user_config
|
||||||
|
|
||||||
|
|
||||||
|
@ -59,7 +55,6 @@ logger = make_logger(__name__, level="warning")
|
||||||
|
|
||||||
# patch the takeout parser logger to match the computed loglevel
|
# patch the takeout parser logger to match the computed loglevel
|
||||||
from google_takeout_parser.log import setup as setup_takeout_logger
|
from google_takeout_parser.log import setup as setup_takeout_logger
|
||||||
|
|
||||||
setup_takeout_logger(logger.level)
|
setup_takeout_logger(logger.level)
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,7 +82,7 @@ except ImportError:
|
||||||
|
|
||||||
google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown'))
|
google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown'))
|
||||||
|
|
||||||
def _cachew_depends_on() -> list[str]:
|
def _cachew_depends_on() -> List[str]:
|
||||||
exports = sorted([str(p) for p in inputs()])
|
exports = sorted([str(p) for p in inputs()])
|
||||||
# add google takeout parser pip version to hash, so this re-creates on breaking changes
|
# add google takeout parser pip version to hash, so this re-creates on breaking changes
|
||||||
exports.insert(0, f"google_takeout_version: {google_takeout_version}")
|
exports.insert(0, f"google_takeout_version: {google_takeout_version}")
|
||||||
|
@ -96,21 +91,10 @@ def _cachew_depends_on() -> list[str]:
|
||||||
|
|
||||||
# ResultsType is a Union of all of the models in google_takeout_parser
|
# ResultsType is a Union of all of the models in google_takeout_parser
|
||||||
@mcachew(depends_on=_cachew_depends_on, logger=logger, force_file=True)
|
@mcachew(depends_on=_cachew_depends_on, logger=logger, force_file=True)
|
||||||
def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: # noqa: FBT001
|
def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
|
||||||
error_policy = config.error_policy
|
error_policy = config.error_policy
|
||||||
count = 0
|
count = 0
|
||||||
emitted = GoogleEventSet()
|
emitted = GoogleEventSet()
|
||||||
|
|
||||||
try:
|
|
||||||
emitted_add = emitted.add_if_not_present
|
|
||||||
except AttributeError:
|
|
||||||
# compat for older versions of google_takeout_parser which didn't have this method
|
|
||||||
def emitted_add(other: BaseEvent) -> bool:
|
|
||||||
if other in emitted:
|
|
||||||
return False
|
|
||||||
emitted.add(other)
|
|
||||||
return True
|
|
||||||
|
|
||||||
# reversed shouldn't really matter? but logic is to use newer
|
# reversed shouldn't really matter? but logic is to use newer
|
||||||
# takeouts if they're named according to date, since JSON Activity
|
# takeouts if they're named according to date, since JSON Activity
|
||||||
# is nicer than HTML Activity
|
# is nicer than HTML Activity
|
||||||
|
@ -123,7 +107,7 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
|
||||||
else:
|
else:
|
||||||
results = exit_stack.enter_context(match_structure(path, expected=EXPECTED, partial=True))
|
results = exit_stack.enter_context(match_structure(path, expected=EXPECTED, partial=True))
|
||||||
for m in results:
|
for m in results:
|
||||||
# e.g. /home/username/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457'
|
# e.g. /home/sean/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457'
|
||||||
# means that zipped takeouts have nice filenames from cachew
|
# means that zipped takeouts have nice filenames from cachew
|
||||||
cw_id, _, _ = path.name.rpartition(".")
|
cw_id, _, _ = path.name.rpartition(".")
|
||||||
# each takeout result is cached as well, in individual databases per-type
|
# each takeout result is cached as well, in individual databases per-type
|
||||||
|
@ -139,9 +123,10 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults:
|
||||||
elif error_policy == 'drop':
|
elif error_policy == 'drop':
|
||||||
pass
|
pass
|
||||||
continue
|
continue
|
||||||
|
if event in emitted:
|
||||||
if emitted_add(event):
|
continue
|
||||||
yield event # type: ignore[misc]
|
emitted.add(event)
|
||||||
|
yield event # type: ignore[misc]
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
|
f"HPI Takeout merge: from a total of {count} events, removed {count - len(emitted)} duplicates"
|
||||||
)
|
)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue