Compare commits
199 commits
v0.3.20230
...
master
Author | SHA1 | Date | |
---|---|---|---|
|
bb703c8c6a | ||
|
54df429f61 | ||
|
f1d23c5e96 | ||
|
d8c53bde34 | ||
|
95a16b956f | ||
|
a7f05c2cad | ||
|
ad55c5c345 | ||
|
7ab6f0d5cb | ||
|
a2b397ec4a | ||
|
8496d131e7 | ||
|
d3f9a8e8b6 | ||
|
bc7c3ac253 | ||
|
a8f86e32b9 | ||
|
6a6d157040 | ||
|
bf8af6c598 | ||
|
8ed9e1947e | ||
|
75639a3d5e | ||
|
3166109f15 | ||
|
02dabe9f2b | ||
|
239e6617fe | ||
|
e036cc9e85 | ||
|
2ca323da84 | ||
|
6a18f47c37 | ||
|
201ddd4d7c | ||
|
27178c0939 | ||
|
71fdeca5e1 | ||
|
d58453410c | ||
|
1c5efc46aa | ||
|
affa79ba3a | ||
|
fc0e0be291 | ||
|
c5df3ce128 | ||
|
ac08af7aab | ||
|
9fd4227abf | ||
|
bd1e5d2f11 | ||
|
985c0f94e6 | ||
|
72cc8ff3ac | ||
|
d0df8e8f2d | ||
|
b594377a59 | ||
|
664c40e3e8 | ||
|
118c2d4484 | ||
|
d244c7cc4e | ||
|
c08ddbc781 | ||
|
b1fe23b8d0 | ||
|
b87d1c970a | ||
|
a5643206a0 | ||
|
270080bd56 | ||
|
094519acaf | ||
|
7cae9d5bf3 | ||
|
2ff2dcfc00 | ||
|
1215181af5 | ||
|
5a67f0bafe | ||
|
d154825591 | ||
|
9f017fb29b | ||
|
5ec357915b | ||
|
245ad22057 | ||
|
7bfce72b7c | ||
|
7023088d13 | ||
|
614c929f95 | ||
|
2b0f92c883 | ||
|
7f8a502310 | ||
|
88f3c17c27 | ||
|
c45c51af22 | ||
|
18529257e7 | ||
|
bcc4c15304 | ||
|
06084a8787 | ||
|
770dba5506 | ||
|
66c08a6c80 | ||
|
c64d7f5b67 | ||
|
973c4205df | ||
|
a7439c7846 | ||
|
1317914bff | ||
|
1e1e8d8494 | ||
|
069264ce52 | ||
|
c69a0b43ba | ||
|
34593c032d | ||
|
074e24c309 | ||
|
fb8e9909a4 | ||
|
3aebc573e8 | ||
|
b615ba10b1 | ||
|
2c63fe25c0 | ||
|
652ee9b875 | ||
|
9e72672b4f | ||
|
d5fccf1874 | ||
|
0e6dd32afe | ||
|
c9c0e19543 | ||
|
35dd5d82a0 | ||
|
8a8a1ebb0e | ||
|
103ea2096e | ||
|
751ed02f43 | ||
|
477b7e8fd3 | ||
|
0f3d09915c | ||
|
7236024c7a | ||
|
87a8a7781b | ||
|
93e475795d | ||
|
1b187b2c1b | ||
|
3ec362fce9 | ||
|
a0ce666024 | ||
|
1c452b12d4 | ||
|
51209c547e | ||
|
a4a7bc41b9 | ||
|
3d75abafe9 | ||
|
a8f8858cb1 | ||
|
adbc0e73a2 | ||
|
84d835962d | ||
|
224ba521e3 | ||
|
a843407e40 | ||
|
09e0f66892 | ||
|
bde43d6a7a | ||
|
37643c098f | ||
|
7b1cec9326 | ||
|
657ce08ac8 | ||
|
996169aa29 | ||
|
70bb9ed0c5 | ||
|
65c617ed94 | ||
|
ac5f71c68b | ||
|
e547acfa59 | ||
|
33f8d867e2 | ||
|
19353e996d | ||
|
4ac3bbb101 | ||
|
5630621ec1 | ||
|
7631f1f2e4 | ||
|
105928238f | ||
|
24da04f142 | ||
|
71cb66df5f | ||
|
d6786084ca | ||
|
79ce8e84ec | ||
|
f28f68b14b | ||
|
ea195e3d17 | ||
|
bd27bd4c24 | ||
|
f668208bce | ||
|
6821fbc2fe | ||
|
edea2c2e75 | ||
|
d88a1b9933 | ||
|
4f7c9b4a71 | ||
|
70bf51a125 | ||
|
fb2b3e07de | ||
|
32aa87b3ec | ||
|
3a25c9042c | ||
|
bef0423b4f | ||
|
a0910e798d | ||
|
1f61e853c9 | ||
|
a5c04e789a | ||
|
0e94e0a9ea | ||
|
72ab2603d5 | ||
|
414b88178f | ||
|
f355a55e06 | ||
|
f9a1050ceb | ||
|
86ea605aec | ||
|
c335c0c9d8 | ||
|
a60d69fb30 | ||
|
c5fe2e9412 | ||
|
872053a3c3 | ||
|
37bb33cdbc | ||
|
8c2d1c9463 | ||
|
c63e80ce94 | ||
|
9ffce1b696 | ||
|
29832a9f75 | ||
|
28d2450a21 | ||
|
fe26efaea8 | ||
|
bb478f369d | ||
|
68289c1be3 | ||
|
0512488241 | ||
|
fabcbab751 | ||
|
8cd74a9fc4 | ||
|
f3507613f0 | ||
|
8addd2d58a | ||
|
01480ec8eb | ||
|
be81466871 | ||
|
2a46341ce2 | ||
|
ff84d8fc88 | ||
|
c283e542e3 | ||
|
642e3b14d5 | ||
|
7ec894807f | ||
|
fcaa7c1561 | ||
|
d6af4dec11 | ||
|
88a3aa8d67 | ||
|
c25ab51664 | ||
|
6f6be5c78e | ||
|
dff31455f1 | ||
|
661714f1d9 | ||
|
6aa3d4225e | ||
|
ab7135d42f | ||
|
c12224af74 | ||
|
c91534b966 | ||
|
5fe21240b4 | ||
|
f8cd31044e | ||
|
fcfc423a75 | ||
|
9594caa1cd | ||
|
04d976f937 | ||
|
a98bc6daca | ||
|
fe88380499 | ||
|
c34656e8fb | ||
|
a445d2cbfe | ||
|
7a32302d66 | ||
|
82bc51d9fc | ||
|
40de162fab | ||
|
02c738594f | ||
|
d464b1e607 | ||
|
0c5b2b4a09 |
263 changed files with 9617 additions and 4867 deletions
|
@ -21,7 +21,7 @@ import shutil
|
||||||
|
|
||||||
is_ci = os.environ.get('CI') is not None
|
is_ci = os.environ.get('CI') is not None
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
import argparse
|
import argparse
|
||||||
p = argparse.ArgumentParser()
|
p = argparse.ArgumentParser()
|
||||||
p.add_argument('--test', action='store_true', help='use test pypi')
|
p.add_argument('--test', action='store_true', help='use test pypi')
|
||||||
|
@ -29,7 +29,7 @@ def main():
|
||||||
|
|
||||||
extra = []
|
extra = []
|
||||||
if args.test:
|
if args.test:
|
||||||
extra.extend(['--repository-url', 'https://test.pypi.org/legacy/'])
|
extra.extend(['--repository', 'testpypi'])
|
||||||
|
|
||||||
root = Path(__file__).absolute().parent.parent
|
root = Path(__file__).absolute().parent.parent
|
||||||
os.chdir(root) # just in case
|
os.chdir(root) # just in case
|
||||||
|
@ -42,7 +42,7 @@ def main():
|
||||||
if dist.exists():
|
if dist.exists():
|
||||||
shutil.rmtree(dist)
|
shutil.rmtree(dist)
|
||||||
|
|
||||||
check_call('python3 setup.py sdist bdist_wheel', shell=True)
|
check_call(['python3', '-m', 'build'])
|
||||||
|
|
||||||
TP = 'TWINE_PASSWORD'
|
TP = 'TWINE_PASSWORD'
|
||||||
password = os.environ.get(TP)
|
password = os.environ.get(TP)
|
|
@ -11,6 +11,8 @@ if ! command -v sudo; then
|
||||||
}
|
}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# --parallel-live to show outputs while it's running
|
||||||
|
tox_cmd='run-parallel --parallel-live'
|
||||||
if [ -n "${CI-}" ]; then
|
if [ -n "${CI-}" ]; then
|
||||||
# install OS specific stuff here
|
# install OS specific stuff here
|
||||||
case "$OSTYPE" in
|
case "$OSTYPE" in
|
||||||
|
@ -20,7 +22,8 @@ if [ -n "${CI-}" ]; then
|
||||||
;;
|
;;
|
||||||
cygwin* | msys* | win*)
|
cygwin* | msys* | win*)
|
||||||
# windows
|
# windows
|
||||||
:
|
# ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that
|
||||||
|
tox_cmd='run'
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
# must be linux?
|
# must be linux?
|
||||||
|
@ -37,5 +40,9 @@ if ! command -v python3 &> /dev/null; then
|
||||||
PY_BIN="python"
|
PY_BIN="python"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
"$PY_BIN" -m pip install --user tox
|
|
||||||
"$PY_BIN" -m tox
|
# TODO hmm for some reason installing uv with pip and then running
|
||||||
|
# "$PY_BIN" -m uv tool fails with missing setuptools error??
|
||||||
|
# just uvx directly works, but it's not present in PATH...
|
||||||
|
"$PY_BIN" -m pip install --user pipx
|
||||||
|
"$PY_BIN" -m pipx run uv tool run --with=tox-uv tox $tox_cmd "$@"
|
49
.github/workflows/main.yml
vendored
49
.github/workflows/main.yml
vendored
|
@ -5,24 +5,36 @@ on:
|
||||||
push:
|
push:
|
||||||
branches: '*'
|
branches: '*'
|
||||||
tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi
|
tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi
|
||||||
# Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them".
|
# Ideally I would put this in the pypi job... but github syntax doesn't allow for regexes there :shrug:
|
||||||
pull_request: # needed to trigger on others' PRs
|
pull_request: # needed to trigger on others' PRs
|
||||||
# Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them".
|
# Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them".
|
||||||
workflow_dispatch: # needed to trigger workflows manually
|
workflow_dispatch: # needed to trigger workflows manually
|
||||||
# todo cron?
|
# todo cron?
|
||||||
|
inputs:
|
||||||
|
debug_enabled:
|
||||||
|
type: boolean
|
||||||
|
description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)'
|
||||||
|
required: false
|
||||||
|
default: false
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
platform: [ubuntu-latest, macos-latest, windows-latest]
|
platform: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
python-version: ['3.7', '3.8', '3.9', '3.10']
|
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
||||||
exclude: [
|
exclude: [
|
||||||
# windows runners are pretty scarce, so let's only run one of them..
|
# windows runners are pretty scarce, so let's only run lowest and highest python version
|
||||||
{platform: windows-latest, python-version: '3.7' },
|
|
||||||
{platform: windows-latest, python-version: '3.9' },
|
|
||||||
{platform: windows-latest, python-version: '3.10'},
|
{platform: windows-latest, python-version: '3.10'},
|
||||||
|
{platform: windows-latest, python-version: '3.11'},
|
||||||
|
{platform: windows-latest, python-version: '3.12'},
|
||||||
|
|
||||||
|
# same, macos is a bit too slow and ubuntu covers python quirks well
|
||||||
|
{platform: macos-latest , python-version: '3.10' },
|
||||||
|
{platform: macos-latest , python-version: '3.11' },
|
||||||
|
{platform: macos-latest , python-version: '3.12' },
|
||||||
]
|
]
|
||||||
|
|
||||||
runs-on: ${{ matrix.platform }}
|
runs-on: ${{ matrix.platform }}
|
||||||
|
@ -34,29 +46,31 @@ jobs:
|
||||||
# ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
|
# ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
|
||||||
- run: echo "$HOME/.local/bin" >> $GITHUB_PATH
|
- run: echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
- uses: actions/setup-python@v3
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
fetch-depth: 0 # nicer to have all git history when debugging/for tests
|
fetch-depth: 0 # nicer to have all git history when debugging/for tests
|
||||||
|
|
||||||
# uncomment for SSH debugging
|
- uses: mxschmitt/action-tmate@v3
|
||||||
# - uses: mxschmitt/action-tmate@v3
|
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
|
||||||
|
|
||||||
# explicit bash command is necessary for Windows CI runner, otherwise it thinks it's cmd...
|
# explicit bash command is necessary for Windows CI runner, otherwise it thinks it's cmd...
|
||||||
- run: bash scripts/ci/run
|
- run: bash .ci/run
|
||||||
|
|
||||||
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
include-hidden-files: true
|
||||||
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
|
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
|
||||||
path: .coverage.mypy-misc/
|
path: .coverage.mypy-misc/
|
||||||
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
include-hidden-files: true
|
||||||
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
|
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
|
||||||
path: .coverage.mypy-core/
|
path: .coverage.mypy-core/
|
||||||
|
|
||||||
|
@ -68,11 +82,11 @@ jobs:
|
||||||
# ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
|
# ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
|
||||||
- run: echo "$HOME/.local/bin" >> $GITHUB_PATH
|
- run: echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
- uses: actions/setup-python@v3
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.8'
|
python-version: '3.10'
|
||||||
|
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
@ -81,8 +95,7 @@ jobs:
|
||||||
if: github.event_name != 'pull_request' && github.event.ref == 'refs/heads/master'
|
if: github.event_name != 'pull_request' && github.event.ref == 'refs/heads/master'
|
||||||
env:
|
env:
|
||||||
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
|
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
|
||||||
run: pip3 install --user wheel twine && scripts/release --test
|
run: pip3 install --user --upgrade build twine && .ci/release --test
|
||||||
# TODO run pip install just to test?
|
|
||||||
|
|
||||||
- name: 'release to pypi'
|
- name: 'release to pypi'
|
||||||
# always deploy tags to release pypi
|
# always deploy tags to release pypi
|
||||||
|
@ -90,4 +103,4 @@ jobs:
|
||||||
if: github.event_name != 'pull_request' && startsWith(github.event.ref, 'refs/tags')
|
if: github.event_name != 'pull_request' && startsWith(github.event.ref, 'refs/tags')
|
||||||
env:
|
env:
|
||||||
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
||||||
run: pip3 install --user wheel twine && scripts/release
|
run: pip3 install --user --upgrade build twine && .ci/release
|
||||||
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -12,6 +12,7 @@
|
||||||
auto-save-list
|
auto-save-list
|
||||||
tramp
|
tramp
|
||||||
.\#*
|
.\#*
|
||||||
|
*.gpx
|
||||||
|
|
||||||
# Org-mode
|
# Org-mode
|
||||||
.org-id-locations
|
.org-id-locations
|
||||||
|
@ -154,6 +155,9 @@ celerybeat-schedule
|
||||||
.dmypy.json
|
.dmypy.json
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
|
# linters
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ General/my.core changes:
|
||||||
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
- e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test
|
||||||
|
|
||||||
Modules:
|
Modules:
|
||||||
- some innitial work on filling **InfluxDB** with HPI data
|
- some initial work on filling **InfluxDB** with HPI data
|
||||||
|
|
||||||
- pinboard
|
- pinboard
|
||||||
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
- 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly
|
||||||
|
|
|
@ -531,7 +531,7 @@ If you like the shell or just want to quickly convert/grab some information from
|
||||||
#+begin_src bash
|
#+begin_src bash
|
||||||
$ hpi query my.coding.commits.commits --stream # stream JSON objects as they're read
|
$ hpi query my.coding.commits.commits --stream # stream JSON objects as they're read
|
||||||
--order-type datetime # find the 'datetime' attribute and order by that
|
--order-type datetime # find the 'datetime' attribute and order by that
|
||||||
--after '2020-01-01 00:00:00' --before '2020-12-31 23:59:59' # in 2020
|
--after '2020-01-01' --before '2021-01-01' # in 2020
|
||||||
| jq '.committed_dt' -r # extract the datetime
|
| jq '.committed_dt' -r # extract the datetime
|
||||||
# mangle the output a bit to group by month and graph it
|
# mangle the output a bit to group by month and graph it
|
||||||
| cut -d'-' -f-2 | sort | uniq -c | awk '{print $2,$1}' | sort -n | termgraph
|
| cut -d'-' -f-2 | sort | uniq -c | awk '{print $2,$1}' | sort -n | termgraph
|
||||||
|
@ -552,6 +552,8 @@ If you like the shell or just want to quickly convert/grab some information from
|
||||||
2020-12: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 383.00
|
2020-12: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 383.00
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
See [[https://github.com/karlicoss/HPI/blob/master/doc/QUERY.md][query docs]]
|
||||||
|
for more examples
|
||||||
|
|
||||||
** Querying Roam Research database
|
** Querying Roam Research database
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
|
@ -721,10 +723,10 @@ If you want to write modules for personal use but don't want to merge them into
|
||||||
|
|
||||||
Other HPI Repositories:
|
Other HPI Repositories:
|
||||||
|
|
||||||
- [[https://github.com/seanbreckenridge/HPI][seanbreckenridge/HPI]]
|
- [[https://github.com/purarue/HPI][purarue/HPI]]
|
||||||
- [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
|
- [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]]
|
||||||
|
|
||||||
If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/seanbreckenridge/HPI-template][template]].
|
If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/purarue/HPI-template][template]].
|
||||||
|
|
||||||
* Related links
|
* Related links
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
|
|
47
conftest.py
Normal file
47
conftest.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# this is a hack to monkey patch pytest so it handles tests inside namespace packages without __init__.py properly
|
||||||
|
# without it, pytest can't discover the package root for some reason
|
||||||
|
# also see https://github.com/karlicoss/pytest_namespace_pkgs for more
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import _pytest.main
|
||||||
|
import _pytest.pathlib
|
||||||
|
|
||||||
|
# we consider all dirs in repo/ to be namespace packages
|
||||||
|
root_dir = pathlib.Path(__file__).absolute().parent.resolve() # / 'src'
|
||||||
|
assert root_dir.exists(), root_dir
|
||||||
|
|
||||||
|
# TODO assert it contains package name?? maybe get it via setuptools..
|
||||||
|
|
||||||
|
namespace_pkg_dirs = [str(d) for d in root_dir.iterdir() if d.is_dir()]
|
||||||
|
|
||||||
|
# resolve_package_path is called from _pytest.pathlib.import_path
|
||||||
|
# takes a full abs path to the test file and needs to return the path to the 'root' package on the filesystem
|
||||||
|
resolve_pkg_path_orig = _pytest.pathlib.resolve_package_path
|
||||||
|
def resolve_package_path(path: pathlib.Path) -> Optional[pathlib.Path]:
|
||||||
|
result = path # search from the test file upwards
|
||||||
|
for parent in result.parents:
|
||||||
|
if str(parent) in namespace_pkg_dirs:
|
||||||
|
return parent
|
||||||
|
if os.name == 'nt':
|
||||||
|
# ??? for some reason on windows it is trying to call this against conftest? but not on linux/osx
|
||||||
|
if path.name == 'conftest.py':
|
||||||
|
return resolve_pkg_path_orig(path)
|
||||||
|
raise RuntimeError("Couldn't determine path for ", path)
|
||||||
|
_pytest.pathlib.resolve_package_path = resolve_package_path
|
||||||
|
|
||||||
|
|
||||||
|
# without patching, the orig function returns just a package name for some reason
|
||||||
|
# (I think it's used as a sort of fallback)
|
||||||
|
# so we need to point it at the absolute path properly
|
||||||
|
# not sure what are the consequences.. maybe it wouldn't be able to run against installed packages? not sure..
|
||||||
|
search_pypath_orig = _pytest.main.search_pypath
|
||||||
|
def search_pypath(module_name: str) -> str:
|
||||||
|
mpath = root_dir / module_name.replace('.', os.sep)
|
||||||
|
if not mpath.is_dir():
|
||||||
|
mpath = mpath.with_suffix('.py')
|
||||||
|
assert mpath.exists(), mpath # just in case
|
||||||
|
return str(mpath)
|
||||||
|
_pytest.main.search_pypath = search_pypath
|
11
demo.py
11
demo.py
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from subprocess import check_call, DEVNULL
|
from subprocess import check_call, DEVNULL
|
||||||
from shutil import copy, copytree
|
from shutil import copytree, ignore_patterns
|
||||||
import os
|
import os
|
||||||
from os.path import abspath
|
from os.path import abspath
|
||||||
from sys import executable as python
|
from sys import executable as python
|
||||||
|
@ -9,12 +9,17 @@ from pathlib import Path
|
||||||
my_repo = Path(__file__).absolute().parent
|
my_repo = Path(__file__).absolute().parent
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run() -> None:
|
||||||
# uses fixed paths; worth it for the sake of demonstration
|
# uses fixed paths; worth it for the sake of demonstration
|
||||||
# assumes we're in /tmp/my_demo now
|
# assumes we're in /tmp/my_demo now
|
||||||
|
|
||||||
# 1. clone git@github.com:karlicoss/my.git
|
# 1. clone git@github.com:karlicoss/my.git
|
||||||
copytree(my_repo, 'my_repo', symlinks=True)
|
copytree(
|
||||||
|
my_repo,
|
||||||
|
'my_repo',
|
||||||
|
symlinks=True,
|
||||||
|
ignore=ignore_patterns('.tox*'), # tox dir might have broken symlinks while tests are running in parallel
|
||||||
|
)
|
||||||
|
|
||||||
# 2. prepare repositories you'd be using. For this demo we only set up Hypothesis
|
# 2. prepare repositories you'd be using. For this demo we only set up Hypothesis
|
||||||
tox = 'TOX' in os.environ
|
tox = 'TOX' in os.environ
|
||||||
|
|
|
@ -76,7 +76,7 @@ This would typically be used in an overridden `all.py` file, or in a one-off scr
|
||||||
which you may want to filter out some items from a source, progressively adding more
|
which you may want to filter out some items from a source, progressively adding more
|
||||||
items to the denylist as you go.
|
items to the denylist as you go.
|
||||||
|
|
||||||
A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/seanbreckenridge/HPI)):
|
A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/purarue/HPI)):
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
@ -119,9 +119,9 @@ python3 -c 'from my.ip import all; all.deny.deny_cli(all.ips())'
|
||||||
To edit the `all.py`, you could either:
|
To edit the `all.py`, you could either:
|
||||||
|
|
||||||
- install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
|
- install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly
|
||||||
- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/seanbreckenridge/reorder_editable), and possibly the [`HPI-template`](https://github.com/seanbreckenridge/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
|
- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/purarue/reorder_editable), and possibly the [`HPI-template`](https://github.com/purarue/HPI-template) to create your own HPI namespace package to create your own `all.py` file.
|
||||||
|
|
||||||
For a real example of this see, [seanbreckenridge/HPI-personal](https://github.com/seanbreckenridge/HPI-personal/blob/master/my/ip/all.py)
|
For a real example of this see, [purarue/HPI-personal](https://github.com/purarue/HPI-personal/blob/master/my/ip/all.py)
|
||||||
|
|
||||||
Sidenote: the reason why we want to specifically override
|
Sidenote: the reason why we want to specifically override
|
||||||
the all.py and not just create a script that filters out the items you're
|
the all.py and not just create a script that filters out the items you're
|
||||||
|
|
|
@ -76,7 +76,7 @@ The config snippets below are meant to be modified accordingly and *pasted into
|
||||||
|
|
||||||
You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
|
You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works.
|
||||||
|
|
||||||
For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
|
For an extensive/complex example, you can check out ~@purarue~'s [[https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py][config]]
|
||||||
|
|
||||||
# Nested Configurations before the doc generation using the block below
|
# Nested Configurations before the doc generation using the block below
|
||||||
** [[file:../my/reddit][my.reddit]]
|
** [[file:../my/reddit][my.reddit]]
|
||||||
|
@ -96,7 +96,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
||||||
|
|
||||||
class pushshift:
|
class pushshift:
|
||||||
'''
|
'''
|
||||||
Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments
|
Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# path[s]/glob to the exported JSON data
|
# path[s]/glob to the exported JSON data
|
||||||
|
@ -106,7 +106,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
||||||
|
|
||||||
** [[file:../my/browser/][my.browser]]
|
** [[file:../my/browser/][my.browser]]
|
||||||
|
|
||||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
|
||||||
|
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
class browser:
|
class browser:
|
||||||
|
@ -132,7 +132,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
||||||
|
|
||||||
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
||||||
provide geolocation data for an IPs (though no IPs are provided from any
|
provide geolocation data for an IPs (though no IPs are provided from any
|
||||||
of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
|
of the sources here). For an example of usage, see [[https://github.com/purarue/HPI/tree/master/my/ip][here]]
|
||||||
|
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
class location:
|
class location:
|
||||||
|
@ -256,9 +256,9 @@ for cls, p in modules:
|
||||||
|
|
||||||
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
||||||
|
|
||||||
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]]
|
||||||
|
|
||||||
See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
|
See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts
|
||||||
|
|
||||||
If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
|
If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't
|
||||||
cache individual exports in =~/.cache/google_takeout_parser=
|
cache individual exports in =~/.cache/google_takeout_parser=
|
||||||
|
|
|
@ -2,6 +2,19 @@ Some thoughts on modules, how to structure them, and adding your own/extending H
|
||||||
|
|
||||||
This is slightly more advanced, and would be useful if you're trying to extend HPI by developing your own modules, or contributing back to HPI
|
This is slightly more advanced, and would be useful if you're trying to extend HPI by developing your own modules, or contributing back to HPI
|
||||||
|
|
||||||
|
* TOC
|
||||||
|
:PROPERTIES:
|
||||||
|
:TOC: :include all :depth 1 :force (nothing) :ignore (this) :local (nothing)
|
||||||
|
:END:
|
||||||
|
:CONTENTS:
|
||||||
|
- [[#allpy][all.py]]
|
||||||
|
- [[#module-count][module count]]
|
||||||
|
- [[#single-file-modules][single file modules]]
|
||||||
|
- [[#adding-new-modules][Adding new modules]]
|
||||||
|
- [[#an-extendable-module-structure][An Extendable module structure]]
|
||||||
|
- [[#logging-guidelines][Logging guidelines]]
|
||||||
|
:END:
|
||||||
|
|
||||||
* all.py
|
* all.py
|
||||||
|
|
||||||
Some modules have lots of different sources for data. For example, ~my.location~ (location data) has lots of possible sources -- from ~my.google.takeout.parser~, using the ~gpslogger~ android app, or through geo locating ~my.ip~ addresses. For a module with multiple possible sources, its common to split it into files like:
|
Some modules have lots of different sources for data. For example, ~my.location~ (location data) has lots of possible sources -- from ~my.google.takeout.parser~, using the ~gpslogger~ android app, or through geo locating ~my.ip~ addresses. For a module with multiple possible sources, its common to split it into files like:
|
||||||
|
@ -54,7 +67,7 @@ If you want to disable a source, you have a few options.
|
||||||
|
|
||||||
... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
|
... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code
|
||||||
|
|
||||||
Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/seanbreckenridge/HPI#partially-in-usewith-overrides][seanbreckenridge]]s location and ip modules.
|
Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/purarue/HPI#partially-in-usewith-overrides][purarue]]s location and ip modules.
|
||||||
|
|
||||||
This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
|
This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources.
|
||||||
|
|
||||||
|
@ -195,13 +208,13 @@ Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this
|
||||||
|
|
||||||
Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
|
Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install.
|
||||||
|
|
||||||
If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository.
|
If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/purarue/reorder_editable][reorder_editable]] repository.
|
||||||
|
|
||||||
There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
|
There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage.
|
||||||
|
|
||||||
There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/seanbreckenridge/HPI-template][template]] to get started.
|
There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/purarue/HPI-template][template]] to get started.
|
||||||
|
|
||||||
Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/seanbreckenridge/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
|
Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/purarue/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example.
|
||||||
|
|
||||||
You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
|
You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use.
|
||||||
|
|
||||||
|
@ -233,3 +246,86 @@ The main goals are:
|
||||||
It could be argued that namespace packages and editable installs are a bit complex for a new user to get the hang of, and this is true. But fortunately ~import_source~ means any user just using HPI only needs to follow the instructions when a warning is printed, or peruse the docs here a bit -- there's no need to clone or create your own override to just use the ~all.py~ file.
|
It could be argued that namespace packages and editable installs are a bit complex for a new user to get the hang of, and this is true. But fortunately ~import_source~ means any user just using HPI only needs to follow the instructions when a warning is printed, or peruse the docs here a bit -- there's no need to clone or create your own override to just use the ~all.py~ file.
|
||||||
|
|
||||||
There's no requirement to use this for individual modules, it just seems to be the best solution we've arrived at so far
|
There's no requirement to use this for individual modules, it just seems to be the best solution we've arrived at so far
|
||||||
|
|
||||||
|
* Logging guidelines
|
||||||
|
HPI doesn't enforce any specific logging mechanism, you're free to use whatever you prefer in your modules.
|
||||||
|
|
||||||
|
However there are some general guidelines for developing modules that can make them more pleasant to use.
|
||||||
|
|
||||||
|
- each module should have its unique logger, the easiest way to ensure that is simply use module's ~__name__~ attribute as the logger name
|
||||||
|
|
||||||
|
In addition, this ensures the logger hierarchy reflect the package hierarchy.
|
||||||
|
For instance, if you initialize the logger for =my.module= with specific settings, the logger for =my.module.helper= would inherit these settings. See more on that [[ https://docs.python.org/3/library/logging.html?highlight=logging#logger-objects][in python docs]].
|
||||||
|
|
||||||
|
As a bonus, if you use the module ~__name__~, this logger will be automatically be picked up and used by ~cachew~.
|
||||||
|
|
||||||
|
- often modules are processing multiple files, extracting data from each one ([[https://beepb00p.xyz/exports.html#types][incremental/synthetic exports]])
|
||||||
|
|
||||||
|
It's nice to log each file name you're processing as =logger.info= so the user of module gets a sense of progress.
|
||||||
|
If possible, add the index of file you're processing and the total count.
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
def process_all_data():
|
||||||
|
paths = inputs()
|
||||||
|
total = len(paths)
|
||||||
|
width = len(str(total))
|
||||||
|
for idx, path in enumerate(paths):
|
||||||
|
# :>{width} to align the logs vertically
|
||||||
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
|
yield from process_path(path)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
If there is a lot of logging happening related to a specific path, instead of adding path to each logging message manually, consider using [[https://docs.python.org/3/library/logging.html?highlight=loggeradapter#logging.LoggerAdapter][LoggerAdapter]].
|
||||||
|
|
||||||
|
- log exceptions, but sparingly
|
||||||
|
|
||||||
|
Generally it's a good practice to call ~logging.exception~ from the ~except~ clause, so it's immediately visible where the errors are happening.
|
||||||
|
|
||||||
|
However, in HPI, instead of crashing on exceptions we often behave defensively and ~yield~ them instead (see [[https://beepb00p.xyz/mypy-error-handling.html][mypy assisted error handling]]).
|
||||||
|
|
||||||
|
In this case logging every time may become a bit spammy, so use exception logging sparingly in this case.
|
||||||
|
Typically it's best to rely on the downstream data consumer to handle the exceptions properly.
|
||||||
|
|
||||||
|
- instead of =logging.getLogger=, it's best to use =my.core.make_logger=
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
from my.core import make_logger
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
# or to set a custom level
|
||||||
|
logger = make_logger(__name__, level='warning')
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
This sets up some nicer defaults over standard =logging= module:
|
||||||
|
|
||||||
|
- colored logs (via =colorlog= library)
|
||||||
|
- =INFO= as the initial logging level (instead of default =ERROR=)
|
||||||
|
- logging full exception trace when even when logging outside of the exception handler
|
||||||
|
|
||||||
|
This is particularly useful for [[https://beepb00p.xyz/mypy-error-handling.html][mypy assisted error handling]].
|
||||||
|
|
||||||
|
By default, =logging= only logs the exception message (without the trace) in this case, which makes errors harder to debug.
|
||||||
|
- control logging level from the shell via ~LOGGING_LEVEL_*~ env variable
|
||||||
|
|
||||||
|
This can be useful to suppress logging output if it's too spammy, or showing more output for debugging.
|
||||||
|
|
||||||
|
E.g. ~LOGGING_LEVEL_my_instagram_gdpr=DEBUG hpi query my.instagram.gdpr.messages~
|
||||||
|
|
||||||
|
- experimental: passing env variable ~LOGGING_COLLAPSE=<loglevel>~ will "collapse" logging with the same level
|
||||||
|
|
||||||
|
Instead of printing new logging line each time, it will 'redraw' the last logged line with a new logging message.
|
||||||
|
|
||||||
|
This can be convenient if there are too many logs, you just need logging to get a sense of progress.
|
||||||
|
|
||||||
|
- experimental: passing env variable ~ENLIGHTEN_ENABLE=yes~ will display TUI progress bars in some cases
|
||||||
|
|
||||||
|
See [[https://github.com/Rockhopper-Technologies/enlighten#readme][https://github.com/Rockhopper-Technologies/enlighten#readme]]
|
||||||
|
|
||||||
|
This can be convenient for showing the progress of parallel processing of different files from HPI:
|
||||||
|
|
||||||
|
#+BEGIN_EXAMPLE
|
||||||
|
ghexport.dal[111] 29%|████████████████████ | 29/100 [00:03<00:07, 10.03 files/s]
|
||||||
|
rexport.dal[comments] 17%|████████ | 115/682 [00:03<00:14, 39.15 files/s]
|
||||||
|
my.instagram.android 0%|▎ | 3/2631 [00:02<34:50, 1.26 files/s]
|
||||||
|
#+END_EXAMPLE
|
||||||
|
|
322
doc/OVERLAYS.org
Normal file
322
doc/OVERLAYS.org
Normal file
|
@ -0,0 +1,322 @@
|
||||||
|
NOTE this kinda overlaps with [[file:MODULE_DESIGN.org][the module design doc]], should be unified in the future.
|
||||||
|
|
||||||
|
Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102
|
||||||
|
|
||||||
|
# This is describing TODO
|
||||||
|
# TODO goals
|
||||||
|
# - overrides
|
||||||
|
# - proper mypy support
|
||||||
|
# - TODO reusing parent modules?
|
||||||
|
|
||||||
|
# You can see them TODO in overlays dir
|
||||||
|
|
||||||
|
Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes.
|
||||||
|
|
||||||
|
- =main= package structure
|
||||||
|
# TODO do links
|
||||||
|
|
||||||
|
- =my/twitter/gdpr.py=
|
||||||
|
Extracts Twitter data from GDPR archive.
|
||||||
|
- =my/twitter/all.py=
|
||||||
|
Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used.
|
||||||
|
This will be overridden by =overlay=.
|
||||||
|
- =my/twitter/common.py=
|
||||||
|
Contains helper function to merge data, so they can be reused by overlay's =all.py=.
|
||||||
|
- =my/reddit.py=
|
||||||
|
Extracts Reddit data -- this won't be overridden by the overlay, we just keep it for demonstration purposes.
|
||||||
|
|
||||||
|
- =overlay= package structure
|
||||||
|
|
||||||
|
- =my/twitter/talon.py=
|
||||||
|
Extracts Twitter data from Talon android app.
|
||||||
|
- =my/twitter/all.py=
|
||||||
|
Override for =all.py= from =main= package -- it merges together data from =gpdr= and =talon= modules.
|
||||||
|
|
||||||
|
# TODO mention resolution? reorder_editable
|
||||||
|
|
||||||
|
* Installing (editable install)
|
||||||
|
|
||||||
|
NOTE: this was tested with =python 3.10= and =pip 23.3.2=.
|
||||||
|
|
||||||
|
To install, we run:
|
||||||
|
|
||||||
|
: pip3 install --user -e overlay/
|
||||||
|
: pip3 install --user -e main/
|
||||||
|
|
||||||
|
# TODO mention non-editable installs (this bit will still work with non-editable install)
|
||||||
|
|
||||||
|
As a result, we get:
|
||||||
|
|
||||||
|
: pip3 list | grep hpi
|
||||||
|
: hpi-main 0.0.0 /project/main/src
|
||||||
|
: hpi-overlay 0.0.0 /project/overlay/src
|
||||||
|
|
||||||
|
: cat ~/.local/lib/python3.10/site-packages/easy-install.pth
|
||||||
|
: /project/overlay/src
|
||||||
|
: /project/main/src
|
||||||
|
|
||||||
|
(the order above is important, so =overlay= takes precedence over =main= TODO link)
|
||||||
|
|
||||||
|
Verify the setup:
|
||||||
|
|
||||||
|
: $ python3 -c 'import my; print(my.__path__)'
|
||||||
|
: _NamespacePath(['/project/overlay/src/my', '/project/main/src/my'])
|
||||||
|
|
||||||
|
This basically means that modules will be searched in both paths, with overlay taking precedence.
|
||||||
|
|
||||||
|
** Installing with =--use-pep517=
|
||||||
|
|
||||||
|
See here for discussion https://github.com/purarue/reorder_editable/issues/2, but TLDR it should work similarly.
|
||||||
|
|
||||||
|
* Testing runtime behaviour (editable install)
|
||||||
|
|
||||||
|
: $ python3 -c 'import my.reddit as R; print(R.upvotes())'
|
||||||
|
: [main] my.reddit hello
|
||||||
|
: ['reddit upvote1', 'reddit upvote2']
|
||||||
|
|
||||||
|
Just as expected here, =my.reddit= is imported from the =main= package, since it doesn't exist in =overlay=.
|
||||||
|
|
||||||
|
Let's theck twitter now:
|
||||||
|
|
||||||
|
: $ python3 -c 'import my.twitter.all as T; print(T.tweets())'
|
||||||
|
: [overlay] my.twitter.all hello
|
||||||
|
: [main] my.twitter.common hello
|
||||||
|
: [main] my.twitter.gdpr hello
|
||||||
|
: [overlay] my.twitter.talon hello
|
||||||
|
: ['gdpr tweet 1', 'gdpr tweet 2', 'talon tweet 1', 'talon tweet 2']
|
||||||
|
|
||||||
|
As expected, =my.twitter.all= was imported from the =overlay=.
|
||||||
|
As you can see it's merged data from =gdpr= (from =main= package) and =talon= (from =overlay= package).
|
||||||
|
|
||||||
|
So far so good, let's see how it works with mypy.
|
||||||
|
|
||||||
|
* Mypy support (editable install)
|
||||||
|
|
||||||
|
To check that mypy works as expected I injected some statements in modules that have no impact on runtime,
|
||||||
|
but should trigger mypy, like this =trigger_mypy_error: str = 123=:
|
||||||
|
|
||||||
|
Let's run it:
|
||||||
|
|
||||||
|
: $ mypy --namespace-packages --strict -p my
|
||||||
|
: overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
|
||||||
|
: [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^
|
||||||
|
: Found 1 error in 1 file (checked 4 source files)
|
||||||
|
|
||||||
|
Hmm, this did find the statement in the =overlay=, but missed everything from =main= (e.g. =reddit.py= and =gdpr.py= should have also triggered the check).
|
||||||
|
|
||||||
|
First, let's check which sources mypy is processing:
|
||||||
|
|
||||||
|
: $ mypy --namespace-packages --strict -p my -v 2>&1 | grep BuildSource
|
||||||
|
: LOG: Found source: BuildSource(path='/project/overlay/src/my', module='my', has_text=False, base_dir=None)
|
||||||
|
: LOG: Found source: BuildSource(path='/project/overlay/src/my/twitter', module='my.twitter', has_text=False, base_dir=None)
|
||||||
|
: LOG: Found source: BuildSource(path='/project/overlay/src/my/twitter/all.py', module='my.twitter.all', has_text=False, base_dir=None)
|
||||||
|
: LOG: Found source: BuildSource(path='/project/overlay/src/my/twitter/talon.py', module='my.twitter.talon', has_text=False, base_dir=None)
|
||||||
|
|
||||||
|
So seems like mypy is not processing anything from =main= package at all?
|
||||||
|
|
||||||
|
At this point I cloned mypy, put a breakpoint, and found out this is the culprit: https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/mypy/modulefinder.py#L288
|
||||||
|
|
||||||
|
This basically returns the first path where it finds =my= package, which happens to be the overlay in this case.
|
||||||
|
So everything else is ignored?
|
||||||
|
|
||||||
|
It even seems to have a test for a similar usecase, which is quite sad.
|
||||||
|
https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/mypy/test/testmodulefinder.py#L64-L71
|
||||||
|
|
||||||
|
For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683
|
||||||
|
|
||||||
|
But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly?
|
||||||
|
Let's see what's going on with imports:
|
||||||
|
|
||||||
|
: $ mypy --namespace-packages --strict -p my --follow-imports=error
|
||||||
|
: overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
|
||||||
|
: [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^
|
||||||
|
: overlay/src/my/twitter/all.py:3: error: Import of "my.twitter.common" ignored [misc]
|
||||||
|
: from .common import merge
|
||||||
|
: ^
|
||||||
|
: overlay/src/my/twitter/all.py:6: error: Import of "my.twitter.gdpr" ignored [misc]
|
||||||
|
: from . import gdpr
|
||||||
|
: ^
|
||||||
|
: overlay/src/my/twitter/all.py:6: note: (Using --follow-imports=error, module not passed on command line)
|
||||||
|
: overlay/src/my/twitter/all.py: note: In function "tweets":
|
||||||
|
: overlay/src/my/twitter/all.py:8: error: Returning Any from function declared to return "List[str]" [no-any-return]
|
||||||
|
: return merge(gdpr, talon)
|
||||||
|
: ^
|
||||||
|
: Found 4 errors in 2 files (checked 4 source files)
|
||||||
|
|
||||||
|
Nope -- looks like it's completely unawareof =main=, and what's worst, by default (without tweaking =--follow-imports=), these errors would be suppressed.
|
||||||
|
|
||||||
|
What if we check =my.twitter= directly?
|
||||||
|
|
||||||
|
: $ mypy --namespace-packages --strict -p my.twitter --follow-imports=error
|
||||||
|
: overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
|
||||||
|
: [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^~~
|
||||||
|
: overlay/src/my/twitter: error: Ancestor package "my" ignored [misc]
|
||||||
|
: overlay/src/my/twitter: note: (Using --follow-imports=error, submodule passed on command line)
|
||||||
|
: overlay/src/my/twitter/all.py:3: error: Import of "my.twitter.common" ignored [misc]
|
||||||
|
: from .common import merge
|
||||||
|
: ^
|
||||||
|
: overlay/src/my/twitter/all.py:3: note: (Using --follow-imports=error, module not passed on command line)
|
||||||
|
: overlay/src/my/twitter/all.py:6: error: Import of "my.twitter.gdpr" ignored [misc]
|
||||||
|
: from . import gdpr
|
||||||
|
: ^
|
||||||
|
: overlay/src/my/twitter/all.py: note: In function "tweets":
|
||||||
|
: overlay/src/my/twitter/all.py:8: error: Returning Any from function declared to return "list[str]" [no-any-return]
|
||||||
|
: return merge(gdpr, talon)
|
||||||
|
: ^~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
: Found 5 errors in 3 files (checked 3 source files)
|
||||||
|
|
||||||
|
Now we're also getting =error: Ancestor package "my" ignored [misc]= .. not ideal.
|
||||||
|
|
||||||
|
* What if we don't install at all?
|
||||||
|
Instead of editable install let's try running mypy directly over source files
|
||||||
|
|
||||||
|
First let's only check =main= package:
|
||||||
|
|
||||||
|
: $ MYPYPATH=main/src mypy --namespace-packages --strict -p my
|
||||||
|
: main/src/my/twitter/gdpr.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str") [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^~~
|
||||||
|
: main/src/my/reddit.py:11: error: Incompatible types in assignment (expression has type "int", variable has type "str") [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^~~
|
||||||
|
: Found 2 errors in 2 files (checked 6 source files)
|
||||||
|
|
||||||
|
As expected, it found both errors.
|
||||||
|
|
||||||
|
Now with overlay as well:
|
||||||
|
|
||||||
|
: $ MYPYPATH=overlay/src:main/src mypy --namespace-packages --strict -p my
|
||||||
|
: overlay/src/my/twitter/all.py:6: note: In module imported here:
|
||||||
|
: main/src/my/twitter/gdpr.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str") [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^~~
|
||||||
|
: overlay/src/my/twitter/talon.py:9: error: Incompatible types in assignment (expression has type "int", variable has type "str")
|
||||||
|
: [assignment]
|
||||||
|
: trigger_mypy_error: str = 123
|
||||||
|
: ^~~
|
||||||
|
: Found 2 errors in 2 files (checked 4 source files)
|
||||||
|
|
||||||
|
Interesting enough, this is slightly better than the editable install (it detected error in =gdpr.py= as well).
|
||||||
|
But still no =reddit.py= error.
|
||||||
|
|
||||||
|
TODO possibly worth submitting to mypy issue tracker as well...
|
||||||
|
|
||||||
|
Overall it seems that properly type checking HPI setup as a whole is kinda problematic, especially if the modules actually override/extend base modules.
|
||||||
|
|
||||||
|
* Modifying (monkey patching) original module in the overlay
|
||||||
|
Let's say we want to modify/monkey patch =my.twitter.talon= module from =main=, for example, convert "gdpr" to uppercase, i.e. =tweet.replace('gdpr', 'GDPR')=.
|
||||||
|
|
||||||
|
# TODO see overlay2/
|
||||||
|
|
||||||
|
I think our options are:
|
||||||
|
|
||||||
|
- symlink to the 'parent' packages, e.g. =main= in the case
|
||||||
|
|
||||||
|
Alternatively, somehow install =main= under a different name/alias (managed by pip).
|
||||||
|
|
||||||
|
This is discussed here: https://github.com/karlicoss/HPI/issues/102
|
||||||
|
|
||||||
|
The main upside is that it's relatively simple and (sort of works with mypy).
|
||||||
|
|
||||||
|
There are a few big downsides:
|
||||||
|
- creates a parallel package hierarchy (to the one maintained by pip), symlinks will need to be carefully managed manually
|
||||||
|
|
||||||
|
This may not be such a huge deal if you don't have too many overlays.
|
||||||
|
However this results in problems if you're trying to switch between two different HPI checkouts (e.g. stable and development). If you have symlinks into "stable" from the overlay then stable modules will sometimes be picked up when you're expecting "development" package.
|
||||||
|
|
||||||
|
- symlinks pointing outside of the source tree might cause pip install to go into infinite loop
|
||||||
|
|
||||||
|
- it modifies the package name
|
||||||
|
|
||||||
|
This may potentially result in some confusing behaviours.
|
||||||
|
|
||||||
|
One thing I noticed for example is that cachew caches might get duplicated.
|
||||||
|
|
||||||
|
- it might not work in all cases or might result in recursive imports
|
||||||
|
|
||||||
|
- do not shadow the original module
|
||||||
|
|
||||||
|
Basically instead of shadowing via namespace package mechanism and creating identically named module,
|
||||||
|
create some sort of hook that would patch the original =my.twitter.talon= module from =main=.
|
||||||
|
|
||||||
|
The downside is that it's a bit unclear where to do that, we need some sort of entry point?
|
||||||
|
|
||||||
|
- it could be some global dynamic hook defined in the overlay, and then executed from =my.core=
|
||||||
|
|
||||||
|
However, it's a bit intrusive, and unclear how to handle errors. E.g. what if we're monkey patching a module that we weren't intending to use, don't have dependencies installed and it's crashing?
|
||||||
|
|
||||||
|
Perhaps core could support something like =_hook= in each of HPI's modules?
|
||||||
|
Note that it can't be =my.twitter.all=, since we might want to override =.all= itself.
|
||||||
|
|
||||||
|
The downside is is this probably not going to work well with =tmp_config= and such -- we'll need to somehow execute the hook again on reloading the module?
|
||||||
|
|
||||||
|
- ideally we'd have something that integrates with =importlib= and executed automatically when module is imported?
|
||||||
|
|
||||||
|
TODO explore these:
|
||||||
|
|
||||||
|
- https://stackoverflow.com/questions/43571737/how-to-implement-an-import-hook-that-can-modify-the-source-code-on-the-fly-using
|
||||||
|
- https://github.com/brettlangdon/importhook
|
||||||
|
|
||||||
|
This one is pretty intrusive, and has some issues, e.g. https://github.com/brettlangdon/importhook/issues/4
|
||||||
|
|
||||||
|
Let's try it:
|
||||||
|
: $ PYTHONPATH=overlay3/src:main/src python3 -c 'import my.twitter._hook; import my.twitter.all as M; print(M.tweets())'
|
||||||
|
: [main] my.twitter.all hello
|
||||||
|
: [main] my.twitter.common hello
|
||||||
|
: [main] my.twitter.gdpr hello
|
||||||
|
: EXECUTING IMPORT HOOK!
|
||||||
|
: ['GDPR tweet 1', 'GDPR tweet 2']
|
||||||
|
|
||||||
|
Ok it worked, and seems pretty neat.
|
||||||
|
However sadly it doesn't work with =tmp_config= (TODO add a proper demo?)
|
||||||
|
Not sure if it's more of an issue with =tmp_config= implementation (which is very hacky), or =importhook= itself?
|
||||||
|
|
||||||
|
In addition, still the question is where to put the hook itself, but in that case even a global one could be fine.
|
||||||
|
|
||||||
|
- define hook in =my/twitter/__init__.py=
|
||||||
|
|
||||||
|
Basically, use =extend_path= to make it behave like a namespace package, but in addition, patch original =my.twitter.talon=?
|
||||||
|
|
||||||
|
: $ cat overlay2/src/my/twitter/__init__.py
|
||||||
|
: print(f'[overlay2] {__name__} hello')
|
||||||
|
:
|
||||||
|
: from pkgutil import extend_path
|
||||||
|
: __path__ = extend_path(__path__, __name__)
|
||||||
|
:
|
||||||
|
: def hack_gdpr_module() -> None:
|
||||||
|
: from . import gdpr
|
||||||
|
: tweets_orig = gdpr.tweets
|
||||||
|
: def tweets_patched():
|
||||||
|
: return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
|
||||||
|
: gdpr.tweets = tweets_patched
|
||||||
|
:
|
||||||
|
: hack_gdpr_module()
|
||||||
|
|
||||||
|
This actually seems to work??
|
||||||
|
|
||||||
|
: PYTHONPATH=overlay2/src:main/src python3 -c 'import my.twitter.all as M; print(M.tweets())'
|
||||||
|
: [overlay2] my.twitter hello
|
||||||
|
: [main] my.twitter.gdpr hello
|
||||||
|
: [main] my.twitter.all hello
|
||||||
|
: [main] my.twitter.common hello
|
||||||
|
: ['GDPR tweet 1', 'GDPR tweet 2']
|
||||||
|
|
||||||
|
However, this doesn't stack, i.e. if the 'parent' overlay had its own =__init__.py=, it won't get called.
|
||||||
|
|
||||||
|
- shadow the original module and temporarily modify =__path__= before importing the same module from the parent overlay
|
||||||
|
|
||||||
|
This approach is implemented in =my.core.experimental.import_original_module=
|
||||||
|
|
||||||
|
TODO demonstrate it properly, but I think that also works in a 'chain' of overlays
|
||||||
|
|
||||||
|
Seems like that option is the most promising so far, albeit very hacky.
|
||||||
|
|
||||||
|
Note that none of these options work well with mypy (since it's all dynamic hackery), even if you disregard the issues described in the previous sections.
|
||||||
|
|
||||||
|
# TODO .pkg files? somewhat interesting... https://github.com/python/cpython/blob/3.12/Lib/pkgutil.py#L395-L410
|
304
doc/QUERY.md
Normal file
304
doc/QUERY.md
Normal file
|
@ -0,0 +1,304 @@
|
||||||
|
`hpi query` is a command line tool for querying the output of any `hpi` function.
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: hpi query [OPTIONS] FUNCTION_NAME...
|
||||||
|
|
||||||
|
This allows you to query the results from one or more functions in HPI
|
||||||
|
|
||||||
|
By default this runs with '-o json', converting the results to JSON and
|
||||||
|
printing them to STDOUT
|
||||||
|
|
||||||
|
You can specify '-o pprint' to just print the objects using their repr, or
|
||||||
|
'-o repl' to drop into a ipython shell with access to the results
|
||||||
|
|
||||||
|
While filtering using --order-key datetime, the --after, --before and
|
||||||
|
--within flags parse the input to their datetime and timedelta equivalents.
|
||||||
|
datetimes can be epoch time, the string 'now', or an date formatted in the
|
||||||
|
ISO format. timedelta (durations) are parsed from a similar format to the
|
||||||
|
GNU 'sleep' command, e.g. 1w2d8h5m20s -> 1 week, 2 days, 8 hours, 5 minutes,
|
||||||
|
20 seconds
|
||||||
|
|
||||||
|
As an example, to query reddit comments I've made in the last month
|
||||||
|
|
||||||
|
hpi query --order-type datetime --before now --within 4w my.reddit.all.comments
|
||||||
|
or...
|
||||||
|
hpi query --recent 4w my.reddit.all.comments
|
||||||
|
|
||||||
|
Can also query within a range. To filter comments between 2016 and 2018:
|
||||||
|
hpi query --order-type datetime --after '2016-01-01' --before '2019-01-01' my.reddit.all.comments
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-o, --output [json|pprint|repl|gpx]
|
||||||
|
what to do with the result [default: json]
|
||||||
|
-s, --stream stream objects from the data source instead
|
||||||
|
of printing a list at the end
|
||||||
|
-k, --order-key TEXT order by an object attribute or dict key on
|
||||||
|
the individual objects returned by the HPI
|
||||||
|
function
|
||||||
|
-t, --order-type [datetime|date|int|float]
|
||||||
|
order by searching for some type on the
|
||||||
|
iterable
|
||||||
|
-a, --after TEXT while ordering, filter items for the key or
|
||||||
|
type larger than or equal to this
|
||||||
|
-b, --before TEXT while ordering, filter items for the key or
|
||||||
|
type smaller than this
|
||||||
|
-w, --within TEXT a range 'after' or 'before' to filter items
|
||||||
|
by. see above for further explanation
|
||||||
|
-r, --recent TEXT a shorthand for '--order-type datetime
|
||||||
|
--reverse --before now --within'. e.g.
|
||||||
|
--recent 5d
|
||||||
|
--reverse / --no-reverse reverse the results returned from the
|
||||||
|
functions
|
||||||
|
-l, --limit INTEGER limit the number of items returned from the
|
||||||
|
(functions)
|
||||||
|
--drop-unsorted if the order of an item can't be determined
|
||||||
|
while ordering, drop those items from the
|
||||||
|
results
|
||||||
|
--wrap-unsorted if the order of an item can't be determined
|
||||||
|
while ordering, wrap them into an
|
||||||
|
'Unsortable' object
|
||||||
|
--warn-exceptions if any errors are returned, print them as
|
||||||
|
errors on STDERR
|
||||||
|
--raise-exceptions if any errors are returned (as objects, not
|
||||||
|
raised) from the functions, raise them
|
||||||
|
--drop-exceptions ignore any errors returned as objects from
|
||||||
|
the functions
|
||||||
|
--help Show this message and exit.
|
||||||
|
```
|
||||||
|
|
||||||
|
This works with any function which returns an iterable, for example `my.coding.commits`, which searches for `git commit`s on your computer:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hpi query my.coding.commits
|
||||||
|
```
|
||||||
|
|
||||||
|
When run with a module, this does some analysis of the functions in that module and tries to find ones that look like data sources. If it can't figure out which, it prompts you like:
|
||||||
|
|
||||||
|
```
|
||||||
|
Which function should be used from 'my.coding.commits'?
|
||||||
|
|
||||||
|
1. commits
|
||||||
|
2. repos
|
||||||
|
```
|
||||||
|
|
||||||
|
You select the one you want by clicking `1` or `2` on your keyboard. Otherwise, you can provide a fully qualified path, like:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.repos
|
||||||
|
```
|
||||||
|
|
||||||
|
The corresponding `repos` function this queries is defined in [`my/coding/commits.py`](../my/coding/commits.py)
|
||||||
|
|
||||||
|
### Ordering/Filtering/Streaming
|
||||||
|
|
||||||
|
By default, this just returns the items in the order they were returned by the function. This allows you to filter by specifying a `--order-key`, or `--order-type`. For example, to get the 10 most recent commits. `--order-type datetime` will try to automatically figure out which attribute to use. If it chooses the wrong one (since `Commit`s have both a `committed_dt` and `authored_dt`), you could tell it which to use. For example, to scan my computer and find the most recent commit I made:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream
|
||||||
|
Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
|
authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
|
||||||
|
message='sources.smscalls: propagate errors if there are breaking '
|
||||||
|
'schema changes',
|
||||||
|
repo='/home/username/Repos/promnesia-fork',
|
||||||
|
sha='22a434fca9a28df9b0915ccf16368df129d2c9ce',
|
||||||
|
ref='refs/heads/smscalls-handle-result')
|
||||||
|
```
|
||||||
|
|
||||||
|
To instead limit in some range, you can use `--before` and `--within` to filter by a range. For example, to get all the commits I committed in the last day:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --order-type datetime --before now --within 1d
|
||||||
|
```
|
||||||
|
|
||||||
|
That prints a a list of `Commit` as JSON objects. You could also use `--output pprint` to pretty-print the objects or `--output repl` drop into a REPL.
|
||||||
|
|
||||||
|
To process the JSON, you can pipe it to [`jq`](https://github.com/stedolan/jq). I often use `jq length` to get the count of some output:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --order-type datetime --before now --within 1d | jq length
|
||||||
|
6
|
||||||
|
```
|
||||||
|
|
||||||
|
Because grabbing data `--before now` is such a common use case, the `--recent` flag is a shorthand for `--order-type datetime --reverse --before now --within`. The same as above, to get the commits from the last day:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --recent 1d | jq length
|
||||||
|
6
|
||||||
|
```
|
||||||
|
|
||||||
|
To select a range of commits, you can use `--after` and `--before`, passing ISO or epoch timestamps. Those can be full `datetimes` (`2021-01-01T00:05:30`) or just dates (`2021-01-01`). For example, to get all the commits I made on January 1st, 2021:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --order-type datetime --after 2021-01-01 --before 2021-01-02 | jq length
|
||||||
|
1
|
||||||
|
```
|
||||||
|
|
||||||
|
If you have [`dateparser`](https://github.com/scrapinghub/dateparser#how-to-use) installed, this supports dozens more natural language formats:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --order-type datetime --after 'last week' --before 'day before yesterday' | jq length
|
||||||
|
28
|
||||||
|
```
|
||||||
|
|
||||||
|
If you're having issues ordering because there are exceptions in your results not all data is sortable (may have `None` for some attributes), you can use `--drop-unsorted` to drop those items from the results, or `--drop-exceptions` to remove the exceptions
|
||||||
|
|
||||||
|
You can also stream the results, which is useful for functions that take a while to process or have a lot of data. For example, if you wanted to pick a sha hash from a particular repo, you could combine `jq` to `select` and pick that attribute from the JSON:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.coding.commits.commits --recent 30d --stream | jq 'select(.repo | contains("HPI"))' | jq '.sha' -r
|
||||||
|
4afa899c8b365b3c10e468f6279c02e316d3b650
|
||||||
|
40de162fab741df594b4d9651348ee46ee021e9b
|
||||||
|
e1cb229913482074dc5523e57ef0acf6e9ec2bb2
|
||||||
|
87c13defd131e39292b93dcea661d3191222dace
|
||||||
|
02c738594f2cae36ca4fab43cf9533fe6aa89396
|
||||||
|
0b3a2a6ef3a9e4992771aaea0252fb28217b814a
|
||||||
|
84817ce72d208038b66f634d4ceb6e3a4c7ec5e9
|
||||||
|
47992b8e046d27fc5141839179f06f925c159510
|
||||||
|
425615614bd508e28ccceb56f43c692240e429ab
|
||||||
|
eed8f949460d768fb1f1c4801e9abab58a5f9021
|
||||||
|
d26ad7d9ce6a4718f96346b994c3c1cd0d74380c
|
||||||
|
aec517e53c6ac022f2b4cc91261daab5651cebf0
|
||||||
|
44b75a88fdfc7af132f61905232877031ce32fcb
|
||||||
|
b0ff6f29dd2846e97f8aa85a2ca73736b03254a8
|
||||||
|
```
|
||||||
|
|
||||||
|
`jq`s `select` function acts on a stream of JSON objects, not a list, so it filters the output of `hpi query` the objects are generated (the goal here is to conserve memory as items which aren't needed are filtered). The alternative would be to print the entire JSON list at the end, like:
|
||||||
|
|
||||||
|
`hpi query my.coding.commits.commits --recent 30d | jq '.[] | select(.repo | contains("Repos/HPI"))' | jq '.sha' -r`, using `jq '.[]'` to convert the JSON list into a stream of JSON objects.
|
||||||
|
|
||||||
|
## Usage on non-HPI code
|
||||||
|
|
||||||
|
The command can accept any qualified function name, so this could for example be used to check the output of [`promnesia`](https://github.com/karlicoss/promnesia) sources:
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query promnesia.sources.smscalls | jq length
|
||||||
|
371
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be used on any function that produces an `Iterator`/`Generator` like output, as long as it can be called with no arguments.
|
||||||
|
|
||||||
|
## GPX
|
||||||
|
|
||||||
|
The `hpi query` command can also be used with the `--output gpx` flag to generate gpx files from a list of locations, like the ones defined in the `my.location` package. This could be used to extract some date range and create a `gpx` file which can then be visualized by a GUI application.
|
||||||
|
|
||||||
|
This prints the contents for the `gpx` file to STDOUT, and prints warnings for any objects it could not convert to locations to STDERR, so pipe STDOUT to a output file, like `>out.gpx`
|
||||||
|
|
||||||
|
```
|
||||||
|
hpi query my.location.all --after '2021-07-01T00:00:00' --before '2021-07-05T00:00:00' --order-type datetime --output gpx >out.gpx
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want to ignore any errors, you can use `--drop-exceptions`.
|
||||||
|
|
||||||
|
To preview, you can use something like [`qgis`](https://qgis.org/en/site/) or for something easier more lightweight, [`gpxsee`](https://github.com/tumic0/GPXSee):
|
||||||
|
|
||||||
|
`gpxsee out.gpx`:
|
||||||
|
|
||||||
|
<img src="https://user-images.githubusercontent.com/7804791/232249184-7e203ee6-a3ec-4053-800c-751d2c28e690.png" width=500 alt="chicago trip" />
|
||||||
|
|
||||||
|
(Sidenote: this is [`@purarue`](https://github.com/purarue/)s locations, on a trip to Chicago)
|
||||||
|
|
||||||
|
## Python reference
|
||||||
|
|
||||||
|
The `hpi query` command is a CLI wrapper around the code in [`query.py`](../my/core/query.py) and [`query_range.py`](../my/core/query_range.py). The `select` function is the core of this, and `select_range` lets you specify dates, timedelta, start-end ranges, and other CLI-specific code.
|
||||||
|
|
||||||
|
`my.core.query.select`:
|
||||||
|
|
||||||
|
```
|
||||||
|
A function to query, order, sort and filter items from one or more sources
|
||||||
|
This supports iterables and lists of mixed types (including handling errors),
|
||||||
|
by allowing you to provide custom predicates (functions) which can sort
|
||||||
|
by a function, an attribute, dict key, or by the attributes values.
|
||||||
|
|
||||||
|
Since this supports mixed types, there's always a possibility
|
||||||
|
of KeyErrors or AttributeErrors while trying to find some value to order by,
|
||||||
|
so this provides multiple mechanisms to deal with that
|
||||||
|
|
||||||
|
'where' lets you filter items before ordering, to remove possible errors
|
||||||
|
or filter the iterator by some condition
|
||||||
|
|
||||||
|
There are multiple ways to instruct select on how to order items. The most
|
||||||
|
flexible is to provide an 'order_by' function, which takes an item in the
|
||||||
|
iterator, does any custom checks you may want and then returns the value to sort by
|
||||||
|
|
||||||
|
'order_key' is best used on items which have a similar structure, or have
|
||||||
|
the same attribute name for every item in the iterator. If you have a
|
||||||
|
iterator of objects whose datetime is accessed by the 'timestamp' attribute,
|
||||||
|
supplying order_key='timestamp' would sort by that (dictionary or attribute) key
|
||||||
|
|
||||||
|
'order_value' is the most confusing, but often the most useful. Instead of
|
||||||
|
testing against the keys of an item, this allows you to write a predicate
|
||||||
|
(function) to test against its values (dictionary, NamedTuple, dataclass, object).
|
||||||
|
If you had an iterator of mixed types and wanted to sort by the datetime,
|
||||||
|
but the attribute to access the datetime is different on each type, you can
|
||||||
|
provide `order_value=lambda v: isinstance(v, datetime)`, and this will
|
||||||
|
try to find that value for each type in the iterator, to sort it by
|
||||||
|
the value which is received when the predicate is true
|
||||||
|
|
||||||
|
'order_value' is often used in the 'hpi query' interface, because of its brevity.
|
||||||
|
Just given the input function, this can typically sort it by timestamp with
|
||||||
|
no human intervention. It can sort of be thought as an educated guess,
|
||||||
|
but it can always be improved by providing a more complete guess function
|
||||||
|
|
||||||
|
Note that 'order_value' is also the most computationally expensive, as it has
|
||||||
|
to copy the iterator in memory (using itertools.tee) to determine how to order it
|
||||||
|
in memory
|
||||||
|
|
||||||
|
The 'drop_exceptions', 'raise_exceptions', 'warn_exceptions' let you ignore or raise
|
||||||
|
when the src contains exceptions. The 'warn_func' lets you provide a custom function
|
||||||
|
to call when an exception is encountered instead of using the 'warnings' module
|
||||||
|
|
||||||
|
src: an iterable of mixed types, or a function to be called,
|
||||||
|
as the input to this function
|
||||||
|
|
||||||
|
where: a predicate which filters the results before sorting
|
||||||
|
|
||||||
|
order_by: a function which when given an item in the src,
|
||||||
|
returns the value to sort by. Similar to the 'key' value
|
||||||
|
typically passed directly to 'sorted'
|
||||||
|
|
||||||
|
order_key: a string which represents a dict key or attribute name
|
||||||
|
to use as they key to sort by
|
||||||
|
|
||||||
|
order_value: predicate which determines which attribute on an ADT-like item to sort by,
|
||||||
|
when given its value. lambda o: isinstance(o, datetime) is commonly passed to sort
|
||||||
|
by datetime, without knowing the attributes or interface for the items in the src
|
||||||
|
|
||||||
|
default: while ordering, if the order for an object cannot be determined,
|
||||||
|
use this as the default value
|
||||||
|
|
||||||
|
reverse: reverse the order of the resulting iterable
|
||||||
|
|
||||||
|
limit: limit the results to this many items
|
||||||
|
|
||||||
|
drop_unsorted: before ordering, drop any items from the iterable for which a
|
||||||
|
order could not be determined. False by default
|
||||||
|
|
||||||
|
wrap_unsorted: before ordering, wrap any items into an 'Unsortable' object. Place
|
||||||
|
them at the front of the list. True by default
|
||||||
|
|
||||||
|
drop_exceptions: ignore any exceptions from the src
|
||||||
|
|
||||||
|
raise_exceptions: raise exceptions when received from the input src
|
||||||
|
```
|
||||||
|
|
||||||
|
`my.core.query_range.select_range`:
|
||||||
|
|
||||||
|
```
|
||||||
|
A specialized select function which offers generating functions
|
||||||
|
to filter/query ranges from an iterable
|
||||||
|
|
||||||
|
order_key and order_value are used in the same way they are in select
|
||||||
|
|
||||||
|
If you specify order_by_value_type, it tries to search for an attribute
|
||||||
|
on each object/type which has that type, ordering the iterable by that value
|
||||||
|
|
||||||
|
unparsed_range is a tuple of length 3, specifying 'after', 'before', 'duration',
|
||||||
|
i.e. some start point to allow the computed value we're ordering by, some
|
||||||
|
end point and a duration (can use the RangeTuple NamedTuple to construct one)
|
||||||
|
|
||||||
|
(this is typically parsed/created in my.core.__main__, from CLI flags
|
||||||
|
|
||||||
|
If you specify a range, drop_unsorted is forced to be True
|
||||||
|
```
|
||||||
|
|
||||||
|
Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/purarue/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range`
|
|
@ -105,10 +105,11 @@ You can also install some optional packages
|
||||||
|
|
||||||
They aren't necessary, but will improve your experience. At the moment these are:
|
They aren't necessary, but will improve your experience. At the moment these are:
|
||||||
|
|
||||||
- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access
|
|
||||||
- [[https://github.com/metachris/logzero][logzero]]: a nice logging library, supporting colors
|
|
||||||
- [[https://github.com/ijl/orjson][orjson]]: a library for serializing data to JSON, used in ~my.core.serialize~ and the ~hpi query~ interface
|
- [[https://github.com/ijl/orjson][orjson]]: a library for serializing data to JSON, used in ~my.core.serialize~ and the ~hpi query~ interface
|
||||||
|
- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access
|
||||||
- [[https://github.com/python/mypy][mypy]]: mypy is used for checking configs and troubleshooting
|
- [[https://github.com/python/mypy][mypy]]: mypy is used for checking configs and troubleshooting
|
||||||
|
- [[https://github.com/borntyping/python-colorlog][colorlog]]: colored formatter for ~logging~ module
|
||||||
|
- [[https://github.com/Rockhopper-Technologies/enlighten]]: console progress bar library
|
||||||
|
|
||||||
* Setting up modules
|
* Setting up modules
|
||||||
This is an *optional step* as few modules work without extra setup.
|
This is an *optional step* as few modules work without extra setup.
|
||||||
|
@ -191,7 +192,11 @@ HPI comes with a command line tool that can help you detect potential issues. Ru
|
||||||
|
|
||||||
If you only have a few modules set up, lots of them will error for you, which is expected, so check the ones you expect to work.
|
If you only have a few modules set up, lots of them will error for you, which is expected, so check the ones you expect to work.
|
||||||
|
|
||||||
If you're having issues with ~cachew~ or want to show logs to troubleshoot what may be happening, you can pass the debug flag (e.g., ~hpi --debug doctor my.module_name~) or set the ~HPI_LOGS~ environment variable (e.g., ~HPI_LOGS=debug hpi query my.module_name~) to print all logs, including the ~cachew~ dependencies. ~HPI_LOGS~ could also be used to silence ~info~ logs, like ~HPI_LOGS=warning hpi ...~
|
If you're having issues with ~cachew~ or want to show logs to troubleshoot what may be happening, you can pass the debug flag (e.g., ~hpi --debug doctor my.module_name~) or set the ~LOGGING_LEVEL_HPI~ environment variable (e.g., ~LOGGING_LEVEL_HPI=debug hpi query my.module_name~) to print all logs, including the ~cachew~ dependencies. ~LOGGING_LEVEL_HPI~ could also be used to silence ~info~ logs, like ~LOGGING_LEVEL_HPI=warning hpi ...~
|
||||||
|
|
||||||
|
If you want to enable logs for a particular module, you can use the
|
||||||
|
~LOGGING_LEVEL_~ prefix and then the module name with underscores, like
|
||||||
|
~LOGGING_LEVEL_my_hypothesis=debug hpi query my.hypothesis~
|
||||||
|
|
||||||
If you want ~HPI~ to autocomplete the module names for you, this comes with shell completion, see [[../misc/completion/][misc/completion]]
|
If you want ~HPI~ to autocomplete the module names for you, this comes with shell completion, see [[../misc/completion/][misc/completion]]
|
||||||
|
|
||||||
|
@ -382,7 +387,7 @@ But there is an extra caveat: rexport is already coming with nice [[https://gith
|
||||||
|
|
||||||
Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
|
Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc.
|
||||||
|
|
||||||
Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
|
Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/purarue/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments=
|
||||||
|
|
||||||
** Twitter
|
** Twitter
|
||||||
|
|
||||||
|
|
4
doc/overlays/install_packages.sh
Executable file
4
doc/overlays/install_packages.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -eux
|
||||||
|
pip3 install --user "$@" -e main/
|
||||||
|
pip3 install --user "$@" -e overlay/
|
17
doc/overlays/main/setup.py
Normal file
17
doc/overlays/main/setup.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
from setuptools import setup, find_namespace_packages # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
pkgs = find_namespace_packages('src')
|
||||||
|
pkg = min(pkgs)
|
||||||
|
setup(
|
||||||
|
name='hpi-main',
|
||||||
|
zip_safe=False,
|
||||||
|
packages=pkgs,
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
package_data={pkg: ['py.typed']},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
11
doc/overlays/main/src/my/reddit.py
Normal file
11
doc/overlays/main/src/my/reddit.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
print(f'[main] {__name__} hello')
|
||||||
|
|
||||||
|
|
||||||
|
def upvotes() -> list[str]:
|
||||||
|
return [
|
||||||
|
'reddit upvote1',
|
||||||
|
'reddit upvote2',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
trigger_mypy_error: str = 123
|
7
doc/overlays/main/src/my/twitter/all.py
Normal file
7
doc/overlays/main/src/my/twitter/all.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
print(f'[main] {__name__} hello')
|
||||||
|
|
||||||
|
from .common import merge
|
||||||
|
|
||||||
|
def tweets() -> list[str]:
|
||||||
|
from . import gdpr
|
||||||
|
return merge(gdpr)
|
11
doc/overlays/main/src/my/twitter/common.py
Normal file
11
doc/overlays/main/src/my/twitter/common.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
print(f'[main] {__name__} hello')
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
class Source(Protocol):
|
||||||
|
def tweets(self) -> list[str]:
|
||||||
|
...
|
||||||
|
|
||||||
|
def merge(*sources: Source) -> list[str]:
|
||||||
|
from itertools import chain
|
||||||
|
return list(chain.from_iterable(src.tweets() for src in sources))
|
9
doc/overlays/main/src/my/twitter/gdpr.py
Normal file
9
doc/overlays/main/src/my/twitter/gdpr.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
print(f'[main] {__name__} hello')
|
||||||
|
|
||||||
|
def tweets() -> list[str]:
|
||||||
|
return [
|
||||||
|
'gdpr tweet 1',
|
||||||
|
'gdpr tweet 2',
|
||||||
|
]
|
||||||
|
|
||||||
|
trigger_mypy_error: str = 123
|
17
doc/overlays/overlay/setup.py
Normal file
17
doc/overlays/overlay/setup.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
from setuptools import setup, find_namespace_packages # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
pkgs = find_namespace_packages('src')
|
||||||
|
pkg = min(pkgs)
|
||||||
|
setup(
|
||||||
|
name='hpi-overlay',
|
||||||
|
zip_safe=False,
|
||||||
|
packages=pkgs,
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
package_data={pkg: ['py.typed']},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
8
doc/overlays/overlay/src/my/twitter/all.py
Normal file
8
doc/overlays/overlay/src/my/twitter/all.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
print(f'[overlay] {__name__} hello')
|
||||||
|
|
||||||
|
from .common import merge
|
||||||
|
|
||||||
|
def tweets() -> list[str]:
|
||||||
|
from . import gdpr
|
||||||
|
from . import talon
|
||||||
|
return merge(gdpr, talon)
|
9
doc/overlays/overlay/src/my/twitter/talon.py
Normal file
9
doc/overlays/overlay/src/my/twitter/talon.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
print(f'[overlay] {__name__} hello')
|
||||||
|
|
||||||
|
def tweets() -> list[str]:
|
||||||
|
return [
|
||||||
|
'talon tweet 1',
|
||||||
|
'talon tweet 2',
|
||||||
|
]
|
||||||
|
|
||||||
|
trigger_mypy_error: str = 123
|
17
doc/overlays/overlay2/setup.py
Normal file
17
doc/overlays/overlay2/setup.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
from setuptools import setup, find_namespace_packages # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
pkgs = find_namespace_packages('src')
|
||||||
|
pkg = min(pkgs)
|
||||||
|
setup(
|
||||||
|
name='hpi-overlay2',
|
||||||
|
zip_safe=False,
|
||||||
|
packages=pkgs,
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
package_data={pkg: ['py.typed']},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
13
doc/overlays/overlay2/src/my/twitter/__init__.py
Normal file
13
doc/overlays/overlay2/src/my/twitter/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
print(f'[overlay2] {__name__} hello')
|
||||||
|
|
||||||
|
from pkgutil import extend_path
|
||||||
|
__path__ = extend_path(__path__, __name__)
|
||||||
|
|
||||||
|
def hack_gdpr_module() -> None:
|
||||||
|
from . import gdpr
|
||||||
|
tweets_orig = gdpr.tweets
|
||||||
|
def tweets_patched():
|
||||||
|
return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
|
||||||
|
gdpr.tweets = tweets_patched
|
||||||
|
|
||||||
|
hack_gdpr_module()
|
17
doc/overlays/overlay3/setup.py
Normal file
17
doc/overlays/overlay3/setup.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
from setuptools import setup, find_namespace_packages # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
pkgs = find_namespace_packages('src')
|
||||||
|
pkg = min(pkgs)
|
||||||
|
setup(
|
||||||
|
name='hpi-overlay3',
|
||||||
|
zip_safe=False,
|
||||||
|
packages=pkgs,
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
package_data={pkg: ['py.typed']},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
9
doc/overlays/overlay3/src/my/twitter/_hook.py
Normal file
9
doc/overlays/overlay3/src/my/twitter/_hook.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
import importhook
|
||||||
|
|
||||||
|
@importhook.on_import('my.twitter.gdpr')
|
||||||
|
def on_import(gdpr):
|
||||||
|
print("EXECUTING IMPORT HOOK!")
|
||||||
|
tweets_orig = gdpr.tweets
|
||||||
|
def tweets_patched():
|
||||||
|
return [t.replace('gdpr', 'GDPR') for t in tweets_orig()]
|
||||||
|
gdpr.tweets = tweets_patched
|
|
@ -32,6 +32,6 @@ ignore =
|
||||||
#
|
#
|
||||||
|
|
||||||
# as a reference:
|
# as a reference:
|
||||||
# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
|
# https://github.com/purarue/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
|
||||||
# and this https://github.com/karlicoss/HPI/pull/151
|
# and this https://github.com/karlicoss/HPI/pull/151
|
||||||
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
|
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
function _hpi_completion;
|
function _hpi_completion;
|
||||||
set -l response;
|
set -l response (env _HPI_COMPLETE=fish_complete COMP_WORDS=(commandline -cp) COMP_CWORD=(commandline -t) hpi);
|
||||||
|
|
||||||
for value in (env _HPI_COMPLETE=fish_complete COMP_WORDS=(commandline -cp) COMP_CWORD=(commandline -t) hpi);
|
|
||||||
set response $response $value;
|
|
||||||
end;
|
|
||||||
|
|
||||||
for completion in $response;
|
for completion in $response;
|
||||||
set -l metadata (string split "," $completion);
|
set -l metadata (string split "," $completion);
|
||||||
|
|
|
@ -31,5 +31,11 @@ _hpi_completion() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
compdef _hpi_completion hpi;
|
if [[ $zsh_eval_context[-1] == loadautofunc ]]; then
|
||||||
|
# autoload from fpath, call function directly
|
||||||
|
_hpi_completion "$@"
|
||||||
|
else
|
||||||
|
# eval/source/. command, register function for later
|
||||||
|
compdef _hpi_completion hpi
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
34
my/arbtt.py
34
my/arbtt.py
|
@ -2,19 +2,22 @@
|
||||||
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
|
[[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
REQUIRES = ['ijson', 'cffi']
|
REQUIRES = ['ijson', 'cffi']
|
||||||
# NOTE likely also needs libyajl2 from apt or elsewhere?
|
# NOTE likely also needs libyajl2 from apt or elsewhere?
|
||||||
|
|
||||||
|
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Sequence, Iterable, List, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
try:
|
try:
|
||||||
from my.config import arbtt as user_config
|
from my.config import arbtt as user_config
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from .core.warnings import low
|
from my.core.warnings import low
|
||||||
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
|
low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.")
|
||||||
return []
|
return []
|
||||||
else:
|
else:
|
||||||
|
@ -22,8 +25,9 @@ def inputs() -> Sequence[Path]:
|
||||||
return get_files(user_config.logfiles)
|
return get_files(user_config.logfiles)
|
||||||
|
|
||||||
|
|
||||||
from .core import dataclass, Json, PathIsh, datetime_aware
|
|
||||||
from .core.common import isoparse
|
from my.core import Json, PathIsh, datetime_aware
|
||||||
|
from my.core.compat import fromisoformat
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -39,6 +43,7 @@ class Entry:
|
||||||
@property
|
@property
|
||||||
def dt(self) -> datetime_aware:
|
def dt(self) -> datetime_aware:
|
||||||
# contains utc already
|
# contains utc already
|
||||||
|
# TODO after python>=3.11, could just use fromisoformat
|
||||||
ds = self.json['date']
|
ds = self.json['date']
|
||||||
elen = 27
|
elen = 27
|
||||||
lds = len(ds)
|
lds = len(ds)
|
||||||
|
@ -46,13 +51,13 @@ class Entry:
|
||||||
# ugh. sometimes contains less that 6 decimal points
|
# ugh. sometimes contains less that 6 decimal points
|
||||||
ds = ds[:-1] + '0' * (elen - lds) + 'Z'
|
ds = ds[:-1] + '0' * (elen - lds) + 'Z'
|
||||||
elif lds > elen:
|
elif lds > elen:
|
||||||
# ahd sometimes more...
|
# and sometimes more...
|
||||||
ds = ds[:elen - 1] + 'Z'
|
ds = ds[:elen - 1] + 'Z'
|
||||||
|
|
||||||
return isoparse(ds)
|
return fromisoformat(ds)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active(self) -> Optional[str]:
|
def active(self) -> str | None:
|
||||||
# NOTE: WIP, might change this in the future...
|
# NOTE: WIP, might change this in the future...
|
||||||
ait = (w for w in self.json['windows'] if w['active'])
|
ait = (w for w in self.json['windows'] if w['active'])
|
||||||
a = next(ait, None)
|
a = next(ait, None)
|
||||||
|
@ -71,17 +76,18 @@ class Entry:
|
||||||
def entries() -> Iterable[Entry]:
|
def entries() -> Iterable[Entry]:
|
||||||
inps = list(inputs())
|
inps = list(inputs())
|
||||||
|
|
||||||
base: List[PathIsh] = ['arbtt-dump', '--format=json']
|
base: list[PathIsh] = ['arbtt-dump', '--format=json']
|
||||||
|
|
||||||
cmds: List[List[PathIsh]]
|
cmds: list[list[PathIsh]]
|
||||||
if len(inps) == 0:
|
if len(inps) == 0:
|
||||||
cmds = [base] # rely on default
|
cmds = [base] # rely on default
|
||||||
else:
|
else:
|
||||||
# otherwise, 'merge' them
|
# otherwise, 'merge' them
|
||||||
cmds = [base + ['--logfile', f] for f in inps]
|
cmds = [[*base, '--logfile', f] for f in inps]
|
||||||
|
|
||||||
|
from subprocess import PIPE, Popen
|
||||||
|
|
||||||
import ijson.backends.yajl2_cffi as ijson # type: ignore
|
import ijson.backends.yajl2_cffi as ijson # type: ignore
|
||||||
from subprocess import Popen, PIPE
|
|
||||||
for cmd in cmds:
|
for cmd in cmds:
|
||||||
with Popen(cmd, stdout=PIPE) as p:
|
with Popen(cmd, stdout=PIPE) as p:
|
||||||
out = p.stdout; assert out is not None
|
out = p.stdout; assert out is not None
|
||||||
|
@ -90,8 +96,8 @@ def entries() -> Iterable[Entry]:
|
||||||
|
|
||||||
|
|
||||||
def fill_influxdb() -> None:
|
def fill_influxdb() -> None:
|
||||||
from .core.influxdb import magic_fill
|
|
||||||
from .core.freezer import Freezer
|
from .core.freezer import Freezer
|
||||||
|
from .core.influxdb import magic_fill
|
||||||
freezer = Freezer(Entry)
|
freezer = Freezer(Entry)
|
||||||
fit = (freezer.freeze(e) for e in entries())
|
fit = (freezer.freeze(e) for e in entries())
|
||||||
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
|
# TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722
|
||||||
|
@ -103,6 +109,8 @@ def fill_influxdb() -> None:
|
||||||
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
|
magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}')
|
||||||
|
|
||||||
|
|
||||||
from .core import stat, Stats
|
from .core import Stats, stat
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(entries)
|
return stat(entries)
|
||||||
|
|
|
@ -1,34 +1,70 @@
|
||||||
#!/usr/bin/python3
|
|
||||||
"""
|
"""
|
||||||
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
|
[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
|
# todo most of it belongs to DAL... but considering so few people use it I didn't bother for now
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from typing import Iterable, Sequence, Set, Optional
|
from abc import abstractmethod
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
from my.core import get_files, LazyLogger, dataclass, Res
|
import pytz
|
||||||
|
|
||||||
|
from my.core import (
|
||||||
|
Paths,
|
||||||
|
Res,
|
||||||
|
Stats,
|
||||||
|
get_files,
|
||||||
|
make_logger,
|
||||||
|
stat,
|
||||||
|
unwrap,
|
||||||
|
)
|
||||||
|
from my.core.cachew import mcachew
|
||||||
|
from my.core.pandas import DataFrameT, as_dataframe
|
||||||
from my.core.sqlite import sqlite_connect_immutable
|
from my.core.sqlite import sqlite_connect_immutable
|
||||||
|
|
||||||
from my.config import bluemaestro as config
|
|
||||||
|
class config(Protocol):
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def export_path(self) -> Paths:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tz(self) -> pytz.BaseTzInfo:
|
||||||
|
# fixme: later, rely on the timezone provider
|
||||||
|
# NOTE: the timezone should be set with respect to the export date!!!
|
||||||
|
return pytz.timezone('Europe/London')
|
||||||
|
# TODO when I change tz, check the diff
|
||||||
|
|
||||||
|
|
||||||
# todo control level via env variable?
|
def make_config() -> config:
|
||||||
# i.e. HPI_LOGGING_MY_BLUEMAESTRO_LEVEL=debug
|
from my.config import bluemaestro as user_config
|
||||||
logger = LazyLogger(__name__, level='debug')
|
|
||||||
|
class combined_config(user_config, config): ...
|
||||||
|
|
||||||
|
return combined_config()
|
||||||
|
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
cfg = make_config()
|
||||||
|
return get_files(cfg.export_path)
|
||||||
|
|
||||||
|
|
||||||
Celsius = float
|
Celsius = float
|
||||||
Percent = float
|
Percent = float
|
||||||
mBar = float
|
mBar = float
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Measurement:
|
class Measurement:
|
||||||
dt: datetime # todo aware/naive
|
dt: datetime # todo aware/naive
|
||||||
|
@ -38,41 +74,39 @@ class Measurement:
|
||||||
dewpoint: Celsius
|
dewpoint: Celsius
|
||||||
|
|
||||||
|
|
||||||
# fixme: later, rely on the timezone provider
|
|
||||||
# NOTE: the timezone should be set with respect to the export date!!!
|
|
||||||
import pytz # type: ignore
|
|
||||||
tz = pytz.timezone('Europe/London')
|
|
||||||
# TODO when I change tz, check the diff
|
|
||||||
|
|
||||||
|
|
||||||
def is_bad_table(name: str) -> bool:
|
def is_bad_table(name: str) -> bool:
|
||||||
# todo hmm would be nice to have a hook that can patch any module up to
|
# todo hmm would be nice to have a hook that can patch any module up to
|
||||||
delegate = getattr(config, 'is_bad_table', None)
|
delegate = getattr(config, 'is_bad_table', None)
|
||||||
return False if delegate is None else delegate(name)
|
return False if delegate is None else delegate(name)
|
||||||
|
|
||||||
|
|
||||||
from my.core.cachew import cache_dir
|
@mcachew(depends_on=inputs)
|
||||||
from my.core.common import mcachew
|
|
||||||
@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro'))
|
|
||||||
def measurements() -> Iterable[Res[Measurement]]:
|
def measurements() -> Iterable[Res[Measurement]]:
|
||||||
# todo ideally this would be via arguments... but needs to be lazy
|
cfg = make_config()
|
||||||
dbs = inputs()
|
tz = cfg.tz
|
||||||
|
|
||||||
last: Optional[datetime] = None
|
# todo ideally this would be via arguments... but needs to be lazy
|
||||||
|
paths = inputs()
|
||||||
|
total = len(paths)
|
||||||
|
width = len(str(total))
|
||||||
|
|
||||||
|
last: datetime | None = None
|
||||||
|
|
||||||
# tables are immutable, so can save on processing..
|
# tables are immutable, so can save on processing..
|
||||||
processed_tables: Set[str] = set()
|
processed_tables: set[str] = set()
|
||||||
for f in dbs:
|
for idx, path in enumerate(paths):
|
||||||
logger.debug('processing %s', f)
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
||||||
tot = 0
|
tot = 0
|
||||||
new = 0
|
new = 0
|
||||||
# todo assert increasing timestamp?
|
# todo assert increasing timestamp?
|
||||||
with sqlite_connect_immutable(f) as db:
|
with sqlite_connect_immutable(path) as db:
|
||||||
db_dt: Optional[datetime] = None
|
db_dt: datetime | None = None
|
||||||
try:
|
try:
|
||||||
datas = db.execute(f'SELECT "{f.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index')
|
datas = db.execute(
|
||||||
|
f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index'
|
||||||
|
)
|
||||||
oldfmt = True
|
oldfmt = True
|
||||||
db_dts = list(db.execute('SELECT last_download FROM info'))[0][0]
|
[(db_dts,)] = db.execute('SELECT last_download FROM info')
|
||||||
if db_dts == 'N/A':
|
if db_dts == 'N/A':
|
||||||
# ??? happens for 20180923-20180928
|
# ??? happens for 20180923-20180928
|
||||||
continue
|
continue
|
||||||
|
@ -105,7 +139,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
processed_tables |= set(log_tables)
|
processed_tables |= set(log_tables)
|
||||||
|
|
||||||
# todo use later?
|
# todo use later?
|
||||||
frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]
|
frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables] # noqa: RUF015
|
||||||
|
|
||||||
# todo could just filter out the older datapoints?? dunno.
|
# todo could just filter out the older datapoints?? dunno.
|
||||||
|
|
||||||
|
@ -121,7 +155,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
oldfmt = False
|
oldfmt = False
|
||||||
db_dt = None
|
db_dt = None
|
||||||
|
|
||||||
for i, (name, tsc, temp, hum, pres, dewp) in enumerate(datas):
|
for (name, tsc, temp, hum, pres, dewp) in datas:
|
||||||
if is_bad_table(name):
|
if is_bad_table(name):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -145,7 +179,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
upper = timedelta(days=10) # kinda arbitrary
|
upper = timedelta(days=10) # kinda arbitrary
|
||||||
if not (db_dt - lower < dt < db_dt + timedelta(days=10)):
|
if not (db_dt - lower < dt < db_dt + timedelta(days=10)):
|
||||||
# todo could be more defenive??
|
# todo could be more defenive??
|
||||||
yield RuntimeError('timestamp too far out', f, name, db_dt, dt)
|
yield RuntimeError('timestamp too far out', path, name, db_dt, dt)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# err.. sometimes my values are just interleaved with these for no apparent reason???
|
# err.. sometimes my values are just interleaved with these for no apparent reason???
|
||||||
|
@ -153,7 +187,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
yield RuntimeError('the weird sensor bug')
|
yield RuntimeError('the weird sensor bug')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
assert -60 <= temp <= 60, (f, dt, temp)
|
assert -60 <= temp <= 60, (path, dt, temp)
|
||||||
##
|
##
|
||||||
|
|
||||||
tot += 1
|
tot += 1
|
||||||
|
@ -170,7 +204,7 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
dewpoint=dewp,
|
dewpoint=dewp,
|
||||||
)
|
)
|
||||||
yield p
|
yield p
|
||||||
logger.debug('%s: new %d/%d', f, new, tot)
|
logger.debug(f'{path}: new {new}/{tot}')
|
||||||
# logger.info('total items: %d', len(merged))
|
# logger.info('total items: %d', len(merged))
|
||||||
# for k, v in merged.items():
|
# for k, v in merged.items():
|
||||||
# # TODO shit. quite a few of them have varying values... how is that freaking possible????
|
# # TODO shit. quite a few of them have varying values... how is that freaking possible????
|
||||||
|
@ -180,12 +214,11 @@ def measurements() -> Iterable[Res[Measurement]]:
|
||||||
# for k, v in merged.items():
|
# for k, v in merged.items():
|
||||||
# yield Point(dt=k, temp=v) # meh?
|
# yield Point(dt=k, temp=v) # meh?
|
||||||
|
|
||||||
from my.core import stat, Stats
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(measurements)
|
return stat(measurements)
|
||||||
|
|
||||||
|
|
||||||
from my.core.pandas import DataFrameT, as_dataframe
|
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
"""
|
"""
|
||||||
%matplotlib gtk
|
%matplotlib gtk
|
||||||
|
@ -200,6 +233,7 @@ def dataframe() -> DataFrameT:
|
||||||
|
|
||||||
def fill_influxdb() -> None:
|
def fill_influxdb() -> None:
|
||||||
from my.core import influxdb
|
from my.core import influxdb
|
||||||
|
|
||||||
influxdb.fill(measurements(), measurement=__name__)
|
influxdb.fill(measurements(), measurement=__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -207,7 +241,6 @@ def check() -> None:
|
||||||
temps = list(measurements())
|
temps = list(measurements())
|
||||||
latest = temps[:-2]
|
latest = temps[:-2]
|
||||||
|
|
||||||
from my.core.error import unwrap
|
|
||||||
prev = unwrap(latest[-2]).dt
|
prev = unwrap(latest[-2]).dt
|
||||||
last = unwrap(latest[-1]).dt
|
last = unwrap(latest[-1]).dt
|
||||||
|
|
||||||
|
|
|
@ -2,41 +2,42 @@
|
||||||
Blood tracking (manual org-mode entries)
|
Blood tracking (manual org-mode entries)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterable, NamedTuple, Optional
|
from typing import NamedTuple
|
||||||
|
|
||||||
from ..core.error import Res
|
|
||||||
from ..core.orgmode import parse_org_datetime, one_table
|
|
||||||
|
|
||||||
|
|
||||||
import pandas as pd # type: ignore
|
|
||||||
import orgparse
|
import orgparse
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
from my.config import blood as config # type: ignore[attr-defined]
|
from my.config import blood as config # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
from ..core.error import Res
|
||||||
|
from ..core.orgmode import one_table, parse_org_datetime
|
||||||
|
|
||||||
|
|
||||||
class Entry(NamedTuple):
|
class Entry(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
|
|
||||||
ketones : Optional[float]=None
|
ketones : float | None=None
|
||||||
glucose : Optional[float]=None
|
glucose : float | None=None
|
||||||
|
|
||||||
vitamin_d : Optional[float]=None
|
vitamin_d : float | None=None
|
||||||
vitamin_b12 : Optional[float]=None
|
vitamin_b12 : float | None=None
|
||||||
|
|
||||||
hdl : Optional[float]=None
|
hdl : float | None=None
|
||||||
ldl : Optional[float]=None
|
ldl : float | None=None
|
||||||
triglycerides: Optional[float]=None
|
triglycerides: float | None=None
|
||||||
|
|
||||||
source : Optional[str]=None
|
source : str | None=None
|
||||||
extra : Optional[str]=None
|
extra : str | None=None
|
||||||
|
|
||||||
|
|
||||||
Result = Res[Entry]
|
Result = Res[Entry]
|
||||||
|
|
||||||
|
|
||||||
def try_float(s: str) -> Optional[float]:
|
def try_float(s: str) -> float | None:
|
||||||
l = s.split()
|
l = s.split()
|
||||||
if len(l) == 0:
|
if len(l) == 0:
|
||||||
return None
|
return None
|
||||||
|
@ -105,6 +106,7 @@ def blood_tests_data() -> Iterable[Result]:
|
||||||
|
|
||||||
def data() -> Iterable[Result]:
|
def data() -> Iterable[Result]:
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from ..core.error import sort_res_by
|
from ..core.error import sort_res_by
|
||||||
datas = chain(glucose_ketones_data(), blood_tests_data())
|
datas = chain(glucose_ketones_data(), blood_tests_data())
|
||||||
return sort_res_by(datas, key=lambda e: e.dt)
|
return sort_res_by(datas, key=lambda e: e.dt)
|
||||||
|
|
|
@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe
|
||||||
@check_dataframe
|
@check_dataframe
|
||||||
def dataframe() -> DataFrameT:
|
def dataframe() -> DataFrameT:
|
||||||
# this should be somehow more flexible...
|
# this should be somehow more flexible...
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
from ...endomondo import dataframe as EDF
|
from ...endomondo import dataframe as EDF
|
||||||
from ...runnerup import dataframe as RDF
|
from ...runnerup import dataframe as RDF
|
||||||
|
|
||||||
import pandas as pd # type: ignore
|
|
||||||
return pd.concat([
|
return pd.concat([
|
||||||
EDF(),
|
EDF(),
|
||||||
RDF(),
|
RDF(),
|
||||||
|
|
|
@ -3,7 +3,6 @@ Cardio data, filtered from various data sources
|
||||||
'''
|
'''
|
||||||
from ...core.pandas import DataFrameT, check_dataframe
|
from ...core.pandas import DataFrameT, check_dataframe
|
||||||
|
|
||||||
|
|
||||||
CARDIO = {
|
CARDIO = {
|
||||||
'Running',
|
'Running',
|
||||||
'Running, treadmill',
|
'Running, treadmill',
|
||||||
|
|
|
@ -5,16 +5,18 @@ This is probably too specific to my needs, so later I will move it away to a per
|
||||||
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
|
For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from __future__ import annotations
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from ...core.pandas import DataFrameT, check_dataframe as cdf
|
from datetime import datetime, timedelta
|
||||||
from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
|
|
||||||
|
import pytz
|
||||||
|
|
||||||
from my.config import exercise as config
|
from my.config import exercise as config
|
||||||
|
|
||||||
|
from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime
|
||||||
|
from ...core.pandas import DataFrameT
|
||||||
|
from ...core.pandas import check_dataframe as cdf
|
||||||
|
|
||||||
import pytz
|
|
||||||
# FIXME how to attach it properly?
|
# FIXME how to attach it properly?
|
||||||
tz = pytz.timezone('Europe/London')
|
tz = pytz.timezone('Europe/London')
|
||||||
|
|
||||||
|
@ -78,7 +80,7 @@ def cross_trainer_manual_dataframe() -> DataFrameT:
|
||||||
'''
|
'''
|
||||||
Only manual org-mode entries
|
Only manual org-mode entries
|
||||||
'''
|
'''
|
||||||
import pandas as pd # type: ignore[import]
|
import pandas as pd
|
||||||
df = pd.DataFrame(cross_trainer_data())
|
df = pd.DataFrame(cross_trainer_data())
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
@ -91,7 +93,7 @@ def dataframe() -> DataFrameT:
|
||||||
'''
|
'''
|
||||||
Attaches manually logged data (which Endomondo can't capture) and attaches it to Endomondo
|
Attaches manually logged data (which Endomondo can't capture) and attaches it to Endomondo
|
||||||
'''
|
'''
|
||||||
import pandas as pd # type: ignore[import]
|
import pandas as pd
|
||||||
|
|
||||||
from ...endomondo import dataframe as EDF
|
from ...endomondo import dataframe as EDF
|
||||||
edf = EDF()
|
edf = EDF()
|
||||||
|
@ -105,7 +107,7 @@ def dataframe() -> DataFrameT:
|
||||||
rows = []
|
rows = []
|
||||||
idxs = [] # type: ignore[var-annotated]
|
idxs = [] # type: ignore[var-annotated]
|
||||||
NO_ENDOMONDO = 'no endomondo matches'
|
NO_ENDOMONDO = 'no endomondo matches'
|
||||||
for i, row in mdf.iterrows():
|
for _i, row in mdf.iterrows():
|
||||||
rd = row.to_dict()
|
rd = row.to_dict()
|
||||||
mdate = row['date']
|
mdate = row['date']
|
||||||
if pd.isna(mdate):
|
if pd.isna(mdate):
|
||||||
|
@ -114,7 +116,7 @@ def dataframe() -> DataFrameT:
|
||||||
rows.append(rd) # presumably has an error set
|
rows.append(rd) # presumably has an error set
|
||||||
continue
|
continue
|
||||||
|
|
||||||
idx: Optional[int]
|
idx: int | None
|
||||||
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
|
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
|
||||||
if len(close) == 0:
|
if len(close) == 0:
|
||||||
idx = None
|
idx = None
|
||||||
|
@ -163,7 +165,9 @@ def dataframe() -> DataFrameT:
|
||||||
# TODO wtf?? where is speed coming from??
|
# TODO wtf?? where is speed coming from??
|
||||||
|
|
||||||
|
|
||||||
from ...core import stat, Stats
|
from ...core import Stats, stat
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(cross_trainer_data)
|
return stat(cross_trainer_data)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from ...core import stat, Stats
|
from ...core import Stats, stat
|
||||||
from ...core.pandas import DataFrameT, check_dataframe as cdf
|
from ...core.pandas import DataFrameT
|
||||||
|
from ...core.pandas import check_dataframe as cdf
|
||||||
|
|
||||||
|
|
||||||
class Combine:
|
class Combine:
|
||||||
|
@ -7,8 +8,8 @@ class Combine:
|
||||||
self.modules = modules
|
self.modules = modules
|
||||||
|
|
||||||
@cdf
|
@cdf
|
||||||
def dataframe(self, with_temperature: bool=True) -> DataFrameT:
|
def dataframe(self, *, with_temperature: bool=True) -> DataFrameT:
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd
|
||||||
# todo include 'source'?
|
# todo include 'source'?
|
||||||
df = pd.concat([m.dataframe() for m in self.modules])
|
df = pd.concat([m.dataframe() for m in self.modules])
|
||||||
|
|
||||||
|
@ -17,15 +18,21 @@ class Combine:
|
||||||
bdf = BM.dataframe()
|
bdf = BM.dataframe()
|
||||||
temp = bdf['temp']
|
temp = bdf['temp']
|
||||||
|
|
||||||
|
# sort index and drop nans, otherwise indexing with [start: end] gonna complain
|
||||||
|
temp = pd.Series(
|
||||||
|
temp.values,
|
||||||
|
index=pd.to_datetime(temp.index, utc=True)
|
||||||
|
).sort_index()
|
||||||
|
temp = temp.loc[temp.index.dropna()]
|
||||||
|
|
||||||
def calc_avg_temperature(row):
|
def calc_avg_temperature(row):
|
||||||
start = row['sleep_start']
|
start = row['sleep_start']
|
||||||
end = row['sleep_end']
|
end = row['sleep_end']
|
||||||
if pd.isna(start) or pd.isna(end):
|
if pd.isna(start) or pd.isna(end):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
between = (start <= temp.index) & (temp.index <= end)
|
|
||||||
# on no temp data, returns nan, ok
|
# on no temp data, returns nan, ok
|
||||||
return temp[between].mean()
|
return temp[start: end].mean()
|
||||||
|
|
||||||
df['avg_temp'] = df.apply(calc_avg_temperature, axis=1)
|
df['avg_temp'] = df.apply(calc_avg_temperature, axis=1)
|
||||||
return df
|
return df
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from ... import jawbone
|
from ... import emfit, jawbone
|
||||||
from ... import emfit
|
|
||||||
|
|
||||||
from .common import Combine
|
from .common import Combine
|
||||||
|
|
||||||
_combined = Combine([
|
_combined = Combine([
|
||||||
jawbone,
|
jawbone,
|
||||||
emfit,
|
emfit,
|
||||||
|
|
|
@ -2,21 +2,29 @@
|
||||||
Weight data (manually logged)
|
Weight data (manually logged)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import NamedTuple, Iterator
|
from typing import Any
|
||||||
|
|
||||||
from ..core import LazyLogger
|
from my import orgmode
|
||||||
from ..core.error import Res, set_error_datetime, extract_error_datetime
|
from my.core import make_logger
|
||||||
|
from my.core.error import Res, extract_error_datetime, set_error_datetime
|
||||||
|
|
||||||
from .. import orgmode
|
config = Any
|
||||||
|
|
||||||
from my.config import weight as config # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
|
|
||||||
log = LazyLogger('my.body.weight')
|
def make_config() -> config:
|
||||||
|
from my.config import weight as user_config # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
return user_config()
|
||||||
|
|
||||||
|
|
||||||
class Entry(NamedTuple):
|
log = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Entry:
|
||||||
dt: datetime
|
dt: datetime
|
||||||
value: float
|
value: float
|
||||||
# TODO comment??
|
# TODO comment??
|
||||||
|
@ -26,6 +34,8 @@ Result = Res[Entry]
|
||||||
|
|
||||||
|
|
||||||
def from_orgmode() -> Iterator[Result]:
|
def from_orgmode() -> Iterator[Result]:
|
||||||
|
cfg = make_config()
|
||||||
|
|
||||||
orgs = orgmode.query()
|
orgs = orgmode.query()
|
||||||
for o in orgmode.query().all():
|
for o in orgmode.query().all():
|
||||||
if 'weight' not in o.tags:
|
if 'weight' not in o.tags:
|
||||||
|
@ -46,7 +56,7 @@ def from_orgmode() -> Iterator[Result]:
|
||||||
yield e
|
yield e
|
||||||
continue
|
continue
|
||||||
# FIXME use timezone provider
|
# FIXME use timezone provider
|
||||||
created = config.default_timezone.localize(created)
|
created = cfg.default_timezone.localize(created)
|
||||||
assert created is not None # ??? somehow mypy wasn't happy?
|
assert created is not None # ??? somehow mypy wasn't happy?
|
||||||
yield Entry(
|
yield Entry(
|
||||||
dt=created,
|
dt=created,
|
||||||
|
@ -56,7 +66,8 @@ def from_orgmode() -> Iterator[Result]:
|
||||||
|
|
||||||
|
|
||||||
def make_dataframe(data: Iterator[Result]):
|
def make_dataframe(data: Iterator[Result]):
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd
|
||||||
|
|
||||||
def it():
|
def it():
|
||||||
for e in data:
|
for e in data:
|
||||||
if isinstance(e, Exception):
|
if isinstance(e, Exception):
|
||||||
|
@ -70,8 +81,9 @@ def make_dataframe(data: Iterator[Result]):
|
||||||
'dt': e.dt,
|
'dt': e.dt,
|
||||||
'weight': e.value,
|
'weight': e.value,
|
||||||
}
|
}
|
||||||
|
|
||||||
df = pd.DataFrame(it())
|
df = pd.DataFrame(it())
|
||||||
df.set_index('dt', inplace=True)
|
df = df.set_index('dt')
|
||||||
# TODO not sure about UTC??
|
# TODO not sure about UTC??
|
||||||
df.index = pd.to_datetime(df.index, utc=True)
|
df.index = pd.to_datetime(df.index, utc=True)
|
||||||
return df
|
return df
|
||||||
|
@ -81,6 +93,7 @@ def dataframe():
|
||||||
entries = from_orgmode()
|
entries = from_orgmode()
|
||||||
return make_dataframe(entries)
|
return make_dataframe(entries)
|
||||||
|
|
||||||
|
|
||||||
# TODO move to a submodule? e.g. my.body.weight.orgmode?
|
# TODO move to a submodule? e.g. my.body.weight.orgmode?
|
||||||
# so there could be more sources
|
# so there could be more sources
|
||||||
# not sure about my.body thing though
|
# not sure about my.body thing though
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from ..core import warnings
|
from my.core import warnings
|
||||||
|
|
||||||
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
|
warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!')
|
||||||
|
|
||||||
from ..core.util import __NOT_HPI_MODULE__
|
from my.core.util import __NOT_HPI_MODULE__
|
||||||
|
from my.kobo import *
|
||||||
from ..kobo import * # type: ignore[no-redef]
|
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
"""
|
"""
|
||||||
Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]]
|
Parses active browser history by backing it up with [[http://github.com/purarue/sqlite_backup][sqlite_backup]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = ["browserexport", "sqlite_backup"]
|
REQUIRES = ["browserexport", "sqlite_backup"]
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from my.config import browser as user_config
|
from my.config import browser as user_config
|
||||||
from my.core import Paths, dataclass
|
from my.core import Paths
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -18,16 +19,19 @@ class config(user_config.active_browser):
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Sequence, Iterator
|
|
||||||
|
|
||||||
from my.core import get_files, Stats
|
from browserexport.merge import Visit, read_visits
|
||||||
from browserexport.merge import read_visits, Visit
|
|
||||||
from sqlite_backup import sqlite_backup
|
from sqlite_backup import sqlite_backup
|
||||||
|
|
||||||
|
from my.core import Stats, get_files, make_logger
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
from .common import _patch_browserexport_logs
|
from .common import _patch_browserexport_logs
|
||||||
|
|
||||||
_patch_browserexport_logs()
|
_patch_browserexport_logs(logger.level)
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
from typing import Iterator
|
from collections.abc import Iterator
|
||||||
|
|
||||||
|
from browserexport.merge import Visit, merge_visits
|
||||||
|
|
||||||
from my.core import Stats
|
from my.core import Stats
|
||||||
from my.core.source import import_source
|
from my.core.source import import_source
|
||||||
from browserexport.merge import merge_visits, Visit
|
|
||||||
|
|
||||||
|
|
||||||
src_export = import_source(module_name="my.browser.export")
|
src_export = import_source(module_name="my.browser.export")
|
||||||
src_active = import_source(module_name="my.browser.active_browser")
|
src_active = import_source(module_name="my.browser.active_browser")
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
import os
|
|
||||||
from my.core.util import __NOT_HPI_MODULE__
|
from my.core.util import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
|
|
||||||
def _patch_browserexport_logs():
|
def _patch_browserexport_logs(level: int):
|
||||||
# patch browserexport logs if HPI_LOGS is present
|
# grab the computed level (respects LOGGING_LEVEL_ prefixes) and set it on the browserexport logger
|
||||||
if "HPI_LOGS" in os.environ:
|
|
||||||
from browserexport.log import setup as setup_browserexport_logger
|
from browserexport.log import setup as setup_browserexport_logger
|
||||||
from my.core.logging import mklevel
|
|
||||||
|
|
||||||
setup_browserexport_logger(mklevel(os.environ["HPI_LOGS"]))
|
setup_browserexport_logger(level)
|
||||||
|
|
|
@ -1,33 +1,37 @@
|
||||||
"""
|
"""
|
||||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/purarue/browserexport][browserexport]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = ["browserexport"]
|
REQUIRES = ["browserexport"]
|
||||||
|
|
||||||
from my.config import browser as user_config
|
from collections.abc import Iterator, Sequence
|
||||||
from my.core import Paths, dataclass
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from browserexport.merge import Visit, read_and_merge
|
||||||
|
|
||||||
|
from my.core import (
|
||||||
|
Paths,
|
||||||
|
Stats,
|
||||||
|
get_files,
|
||||||
|
make_logger,
|
||||||
|
stat,
|
||||||
|
)
|
||||||
|
from my.core.cachew import mcachew
|
||||||
|
|
||||||
|
from .common import _patch_browserexport_logs
|
||||||
|
|
||||||
|
import my.config # isort: skip
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config.export):
|
class config(my.config.browser.export):
|
||||||
# path[s]/glob to your backed up browser history sqlite files
|
# path[s]/glob to your backed up browser history sqlite files
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
from pathlib import Path
|
logger = make_logger(__name__)
|
||||||
from typing import Iterator, Sequence, List
|
_patch_browserexport_logs(logger.level)
|
||||||
|
|
||||||
from my.core import Stats, get_files, LazyLogger
|
|
||||||
from my.core.common import mcachew
|
|
||||||
|
|
||||||
from browserexport.merge import read_and_merge, Visit
|
|
||||||
|
|
||||||
from .common import _patch_browserexport_logs
|
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__, level="warning")
|
|
||||||
|
|
||||||
_patch_browserexport_logs()
|
|
||||||
|
|
||||||
|
|
||||||
# all of my backed up databases
|
# all of my backed up databases
|
||||||
|
@ -35,16 +39,10 @@ def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
def _cachew_depends_on() -> List[str]:
|
@mcachew(depends_on=inputs, logger=logger)
|
||||||
return [str(f) for f in inputs()]
|
|
||||||
|
|
||||||
|
|
||||||
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
|
||||||
def history() -> Iterator[Visit]:
|
def history() -> Iterator[Visit]:
|
||||||
yield from read_and_merge(inputs())
|
yield from read_and_merge(inputs())
|
||||||
|
|
||||||
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
from my.core import stat
|
|
||||||
|
|
||||||
return {**stat(history)}
|
return {**stat(history)}
|
||||||
|
|
|
@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterator, Sequence, Optional, Dict
|
from pathlib import Path
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
from my.config import bumble as user_config
|
from my.core import Paths, get_files
|
||||||
|
|
||||||
|
from my.config import bumble as user_config # isort: skip
|
||||||
|
|
||||||
|
|
||||||
from ..core import Paths
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config.android):
|
class config(user_config.android):
|
||||||
# paths[s]/glob to the exported sqlite databases
|
# paths[s]/glob to the exported sqlite databases
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
from ..core import get_files
|
|
||||||
from pathlib import Path
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
@ -43,20 +43,23 @@ class _BaseMessage:
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class _Message(_BaseMessage):
|
class _Message(_BaseMessage):
|
||||||
conversation_id: str
|
conversation_id: str
|
||||||
reply_to_id: Optional[str]
|
reply_to_id: str | None
|
||||||
|
|
||||||
|
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Message(_BaseMessage):
|
class Message(_BaseMessage):
|
||||||
person: Person
|
person: Person
|
||||||
reply_to: Optional[Message]
|
reply_to: Message | None
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Union
|
|
||||||
from ..core import Res, assert_never
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from ..core.sqlite import sqlite_connect_immutable, select
|
from typing import Union
|
||||||
|
|
||||||
|
from my.core.compat import assert_never
|
||||||
|
|
||||||
|
from ..core import Res
|
||||||
|
from ..core.sqlite import select, sqlite_connect_immutable
|
||||||
|
|
||||||
EntitiesRes = Res[Union[Person, _Message]]
|
EntitiesRes = Res[Union[Person, _Message]]
|
||||||
|
|
||||||
|
@ -89,7 +92,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[EntitiesRes]:
|
||||||
db=db
|
db=db
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
key = {'TEXT': 'text', 'QUESTION_GAME': 'text', 'IMAGE': 'url', 'GIF': 'url'}[payload_type]
|
key = {'TEXT': 'text', 'QUESTION_GAME': 'text', 'IMAGE': 'url', 'GIF': 'url', 'AUDIO': 'url', 'VIDEO': 'url'}[payload_type]
|
||||||
text = json.loads(payload)[key]
|
text = json.loads(payload)[key]
|
||||||
yield _Message(
|
yield _Message(
|
||||||
id=id,
|
id=id,
|
||||||
|
@ -106,10 +109,11 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[EntitiesRes]:
|
||||||
|
|
||||||
def _key(r: EntitiesRes):
|
def _key(r: EntitiesRes):
|
||||||
if isinstance(r, _Message):
|
if isinstance(r, _Message):
|
||||||
if '&srv_width=' in r.text:
|
if '/hidden?' in r.text:
|
||||||
# ugh. seems that image URLs change all the time in the db?
|
# ugh. seems that image URLs change all the time in the db?
|
||||||
# can't access them without login anyway
|
# can't access them without login anyway
|
||||||
# so use a different key for such messages
|
# so use a different key for such messages
|
||||||
|
# todo maybe normalize text instead? since it's gonna always trigger diffs down the line
|
||||||
return (r.id, r.created)
|
return (r.id, r.created)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
@ -118,8 +122,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON"
|
||||||
|
|
||||||
|
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def messages() -> Iterator[Res[Message]]:
|
||||||
id2person: Dict[str, Person] = {}
|
id2person: dict[str, Person] = {}
|
||||||
id2msg: Dict[str, Message] = {}
|
id2msg: dict[str, Message] = {}
|
||||||
for x in unique_everseen(_entities(), key=_key):
|
for x in unique_everseen(_entities(), key=_key):
|
||||||
if isinstance(x, Exception):
|
if isinstance(x, Exception):
|
||||||
yield x
|
yield x
|
||||||
|
|
|
@ -9,16 +9,18 @@ from datetime import date, datetime, timedelta
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from ..core.time import zone_to_countrycode
|
from my.core import Stats
|
||||||
|
from my.core.time import zone_to_countrycode
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(1)
|
@lru_cache(1)
|
||||||
def _calendar():
|
def _calendar():
|
||||||
from workalendar.registry import registry # type: ignore
|
from workalendar.registry import registry # type: ignore
|
||||||
|
|
||||||
# todo switch to using time.tz.main once _get_tz stabilizes?
|
# todo switch to using time.tz.main once _get_tz stabilizes?
|
||||||
from ..time.tz import via_location as LTZ
|
from ..time.tz import via_location as LTZ
|
||||||
# TODO would be nice to do it dynamically depending on the past timezones...
|
# TODO would be nice to do it dynamically depending on the past timezones...
|
||||||
tz = LTZ._get_tz(datetime.now())
|
tz = LTZ.get_tz(datetime.now())
|
||||||
assert tz is not None
|
assert tz is not None
|
||||||
zone = tz.zone; assert zone is not None
|
zone = tz.zone; assert zone is not None
|
||||||
code = zone_to_countrycode(zone)
|
code = zone_to_countrycode(zone)
|
||||||
|
@ -46,7 +48,6 @@ def is_workday(d: DateIsh) -> bool:
|
||||||
return not is_holiday(d)
|
return not is_holiday(d)
|
||||||
|
|
||||||
|
|
||||||
from ..core.common import Stats
|
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
# meh, but not sure what would be a better test?
|
# meh, but not sure what would be a better test?
|
||||||
res = {}
|
res = {}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import my.config as config
|
import my.config as config
|
||||||
|
|
||||||
from .core import __NOT_HPI_MODULE__
|
from .core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from .core import warnings as W
|
from .core import warnings as W
|
||||||
|
|
||||||
# still used in Promnesia, maybe in dashboard?
|
# still used in Promnesia, maybe in dashboard?
|
||||||
|
|
78
my/codeforces.py
Normal file
78
my/codeforces.py
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
import json
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from functools import cached_property
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from my.config import codeforces as config # type: ignore[attr-defined]
|
||||||
|
from my.core import Res, datetime_aware, get_files
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
ContestId = int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Contest:
|
||||||
|
contest_id: ContestId
|
||||||
|
when: datetime_aware
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Competition:
|
||||||
|
contest: Contest
|
||||||
|
old_rating: int
|
||||||
|
new_rating: int
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def when(self) -> datetime_aware:
|
||||||
|
return self.contest.when
|
||||||
|
|
||||||
|
|
||||||
|
# todo not sure if parser is the best name? hmm
|
||||||
|
class Parser:
|
||||||
|
def __init__(self, *, inputs: Sequence[Path]) -> None:
|
||||||
|
self.inputs = inputs
|
||||||
|
self.contests: dict[ContestId, Contest] = {}
|
||||||
|
|
||||||
|
def _parse_allcontests(self, p: Path) -> Iterator[Contest]:
|
||||||
|
j = json.loads(p.read_text())
|
||||||
|
for c in j['result']:
|
||||||
|
yield Contest(
|
||||||
|
contest_id=c['id'],
|
||||||
|
when=datetime.fromtimestamp(c['startTimeSeconds'], tz=timezone.utc),
|
||||||
|
name=c['name'],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_competitions(self, p: Path) -> Iterator[Competition]:
|
||||||
|
j = json.loads(p.read_text())
|
||||||
|
for c in j['result']:
|
||||||
|
contest_id = c['contestId']
|
||||||
|
contest = self.contests[contest_id]
|
||||||
|
yield Competition(
|
||||||
|
contest=contest,
|
||||||
|
old_rating=c['oldRating'],
|
||||||
|
new_rating=c['newRating'],
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse(self) -> Iterator[Res[Competition]]:
|
||||||
|
for path in inputs():
|
||||||
|
if 'allcontests' in path.name:
|
||||||
|
# these contain information about all CF contests along with useful metadata
|
||||||
|
for contest in self._parse_allcontests(path):
|
||||||
|
# TODO some method to assert on mismatch if it exists? not sure
|
||||||
|
self.contests[contest.contest_id] = contest
|
||||||
|
elif 'codeforces' in path.name:
|
||||||
|
# these contain only contests the user participated in
|
||||||
|
yield from self._parse_competitions(path)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f"shouldn't happen: {path.name}")
|
||||||
|
|
||||||
|
|
||||||
|
def data() -> Iterator[Res[Competition]]:
|
||||||
|
return Parser(inputs=inputs()).parse()
|
|
@ -1,92 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
from my.config import codeforces as config # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing import NamedTuple
|
|
||||||
import json
|
|
||||||
from typing import Dict, Iterator
|
|
||||||
|
|
||||||
|
|
||||||
from ..core import get_files, Res, unwrap
|
|
||||||
from ..core.compat import cached_property
|
|
||||||
from ..core.konsume import ignore, wrap
|
|
||||||
|
|
||||||
|
|
||||||
Cid = int
|
|
||||||
|
|
||||||
class Contest(NamedTuple):
|
|
||||||
cid: Cid
|
|
||||||
when: datetime
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def make(cls, j) -> 'Contest':
|
|
||||||
return cls(
|
|
||||||
cid=j['id'],
|
|
||||||
when=datetime.fromtimestamp(j['startTimeSeconds'], tz=timezone.utc),
|
|
||||||
)
|
|
||||||
|
|
||||||
Cmap = Dict[Cid, Contest]
|
|
||||||
|
|
||||||
|
|
||||||
def get_contests() -> Cmap:
|
|
||||||
last = max(get_files(config.export_path, 'allcontests*.json'))
|
|
||||||
j = json.loads(last.read_text())
|
|
||||||
d = {}
|
|
||||||
for c in j['result']:
|
|
||||||
cc = Contest.make(c)
|
|
||||||
d[cc.cid] = cc
|
|
||||||
return d
|
|
||||||
|
|
||||||
|
|
||||||
class Competition(NamedTuple):
|
|
||||||
contest_id: Cid
|
|
||||||
contest: str
|
|
||||||
cmap: Cmap
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def uid(self) -> Cid:
|
|
||||||
return self.contest_id
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.contest_id)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def when(self) -> datetime:
|
|
||||||
return self.cmap[self.uid].when
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def summary(self) -> str:
|
|
||||||
return f'participated in {self.contest}' # TODO
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def make(cls, cmap, json) -> Iterator[Res['Competition']]:
|
|
||||||
# TODO try here??
|
|
||||||
contest_id = json['contestId'].zoom().value
|
|
||||||
contest = json['contestName'].zoom().value
|
|
||||||
yield cls(
|
|
||||||
contest_id=contest_id,
|
|
||||||
contest=contest,
|
|
||||||
cmap=cmap,
|
|
||||||
)
|
|
||||||
# TODO ytry???
|
|
||||||
ignore(json, 'rank', 'oldRating', 'newRating')
|
|
||||||
|
|
||||||
|
|
||||||
def iter_data() -> Iterator[Res[Competition]]:
|
|
||||||
cmap = get_contests()
|
|
||||||
last = max(get_files(config.export_path, 'codeforces*.json'))
|
|
||||||
|
|
||||||
with wrap(json.loads(last.read_text())) as j:
|
|
||||||
j['status'].ignore()
|
|
||||||
res = j['result'].zoom()
|
|
||||||
|
|
||||||
for c in list(res): # TODO maybe we want 'iter' method??
|
|
||||||
ignore(c, 'handle', 'ratingUpdateTimeSeconds')
|
|
||||||
yield from Competition.make(cmap=cmap, json=c)
|
|
||||||
c.consume()
|
|
||||||
# TODO maybe if they are all empty, no need to consume??
|
|
||||||
|
|
||||||
|
|
||||||
def get_data():
|
|
||||||
return list(sorted(iter_data(), key=Competition.when.fget))
|
|
|
@ -1,30 +1,32 @@
|
||||||
"""
|
"""
|
||||||
Git commits data for repositories on your filesystem
|
Git commits data for repositories on your filesystem
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
REQUIRES = [
|
REQUIRES = [
|
||||||
'gitpython',
|
'gitpython',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from collections.abc import Iterator, Sequence
|
||||||
from datetime import datetime, timezone
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import List, Optional, Iterator, Set, Sequence, cast
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, cast
|
||||||
|
|
||||||
|
from my.core import LazyLogger, PathIsh, make_config
|
||||||
from my.core import PathIsh, LazyLogger, make_config
|
from my.core.cachew import cache_dir, mcachew
|
||||||
from my.core.cachew import cache_dir
|
|
||||||
from my.core.common import mcachew
|
|
||||||
from my.core.warnings import high
|
from my.core.warnings import high
|
||||||
|
|
||||||
|
from my.config import commits as user_config # isort: skip
|
||||||
|
|
||||||
|
|
||||||
from my.config import commits as user_config
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class commits_cfg(user_config):
|
class commits_cfg(user_config):
|
||||||
roots: Sequence[PathIsh] = field(default_factory=list)
|
roots: Sequence[PathIsh] = field(default_factory=list)
|
||||||
emails: Optional[Sequence[str]] = None
|
emails: Sequence[str] | None = None
|
||||||
names: Optional[Sequence[str]] = None
|
names: Sequence[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
# experiment to make it lazy?
|
# experiment to make it lazy?
|
||||||
|
@ -38,9 +40,8 @@ def config() -> commits_cfg:
|
||||||
|
|
||||||
##########################
|
##########################
|
||||||
|
|
||||||
import git # type: ignore
|
import git
|
||||||
from git.repo.fun import is_git_dir # type: ignore
|
from git.repo.fun import is_git_dir
|
||||||
|
|
||||||
|
|
||||||
log = LazyLogger(__name__, level='info')
|
log = LazyLogger(__name__, level='info')
|
||||||
|
|
||||||
|
@ -94,7 +95,7 @@ def _git_root(git_dir: PathIsh) -> Path:
|
||||||
return gd # must be bare
|
return gd # must be bare
|
||||||
|
|
||||||
|
|
||||||
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]:
|
def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]:
|
||||||
# without path might not handle pull heads properly
|
# without path might not handle pull heads properly
|
||||||
for c in gr.iter_commits(rev=rev):
|
for c in gr.iter_commits(rev=rev):
|
||||||
if not by_me(c):
|
if not by_me(c):
|
||||||
|
@ -121,7 +122,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Com
|
||||||
|
|
||||||
def repo_commits(repo: PathIsh):
|
def repo_commits(repo: PathIsh):
|
||||||
gr = git.Repo(str(repo))
|
gr = git.Repo(str(repo))
|
||||||
emitted: Set[str] = set()
|
emitted: set[str] = set()
|
||||||
for r in gr.references:
|
for r in gr.references:
|
||||||
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
|
yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted)
|
||||||
|
|
||||||
|
@ -142,56 +143,56 @@ def canonical_name(repo: Path) -> str:
|
||||||
|
|
||||||
def _fd_path() -> str:
|
def _fd_path() -> str:
|
||||||
# todo move it to core
|
# todo move it to core
|
||||||
fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
|
fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd")
|
||||||
if fd_path is None:
|
if fd_path is None:
|
||||||
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
|
high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation")
|
||||||
assert fd_path is not None
|
assert fd_path is not None
|
||||||
return fd_path
|
return fd_path
|
||||||
|
|
||||||
|
|
||||||
def git_repos_in(roots: List[Path]) -> List[Path]:
|
def git_repos_in(roots: list[Path]) -> list[Path]:
|
||||||
from subprocess import check_output
|
from subprocess import check_output
|
||||||
outputs = check_output([
|
outputs = check_output([
|
||||||
_fd_path(),
|
_fd_path(),
|
||||||
# '--follow', # right, not so sure about follow... make configurable?
|
# '--follow', # right, not so sure about follow... make configurable?
|
||||||
'--hidden',
|
'--hidden',
|
||||||
|
'--no-ignore', # otherwise doesn't go inside .git directory (from fd v9)
|
||||||
'--full-path',
|
'--full-path',
|
||||||
'--type', 'f',
|
'--type', 'f',
|
||||||
'/HEAD', # judging by is_git_dir, it should always be here..
|
'/HEAD', # judging by is_git_dir, it should always be here..
|
||||||
*roots,
|
*roots,
|
||||||
]).decode('utf8').splitlines()
|
]).decode('utf8').splitlines()
|
||||||
|
|
||||||
candidates = set(Path(o).resolve().absolute().parent for o in outputs)
|
candidates = {Path(o).resolve().absolute().parent for o in outputs}
|
||||||
|
|
||||||
# exclude stuff within .git dirs (can happen for submodules?)
|
# exclude stuff within .git dirs (can happen for submodules?)
|
||||||
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
|
candidates = {c for c in candidates if '.git' not in c.parts[:-1]}
|
||||||
|
|
||||||
candidates = {c for c in candidates if is_git_dir(c)}
|
candidates = {c for c in candidates if is_git_dir(c)}
|
||||||
|
|
||||||
repos = list(sorted(map(_git_root, candidates)))
|
repos = sorted(map(_git_root, candidates))
|
||||||
return repos
|
return repos
|
||||||
|
|
||||||
|
|
||||||
def repos() -> List[Path]:
|
def repos() -> list[Path]:
|
||||||
return git_repos_in(list(map(Path, config().roots)))
|
return git_repos_in(list(map(Path, config().roots)))
|
||||||
|
|
||||||
|
|
||||||
# returns modification time for an index to use as hash function
|
# returns modification time for an index to use as hash function
|
||||||
def _repo_depends_on(_repo: Path) -> int:
|
def _repo_depends_on(_repo: Path) -> int:
|
||||||
for pp in {
|
for pp in [
|
||||||
".git/FETCH_HEAD",
|
".git/FETCH_HEAD",
|
||||||
".git/HEAD",
|
".git/HEAD",
|
||||||
"FETCH_HEAD", # bare
|
"FETCH_HEAD", # bare
|
||||||
"HEAD", # bare
|
"HEAD", # bare
|
||||||
}:
|
]:
|
||||||
ff = _repo / pp
|
ff = _repo / pp
|
||||||
if ff.exists():
|
if ff.exists():
|
||||||
return int(ff.stat().st_mtime)
|
return int(ff.stat().st_mtime)
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
|
raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}")
|
||||||
|
|
||||||
|
|
||||||
def _commits(_repos: List[Path]) -> Iterator[Commit]:
|
def _commits(_repos: list[Path]) -> Iterator[Commit]:
|
||||||
for r in _repos:
|
for r in _repos:
|
||||||
yield from _cached_commits(r)
|
yield from _cached_commits(r)
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
import warnings
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
warnings.warn('my.coding.github is deprecated! Please use my.github.all instead!')
|
from my.core import warnings
|
||||||
|
|
||||||
|
warnings.high('my.coding.github is deprecated! Please use my.github.all instead!')
|
||||||
# todo why aren't DeprecationWarning shown by default??
|
# todo why aren't DeprecationWarning shown by default??
|
||||||
|
|
||||||
from ..github.all import events, get_events
|
if not TYPE_CHECKING:
|
||||||
|
from ..github.all import events, get_events # noqa: F401
|
||||||
|
|
||||||
# todo deprecate properly
|
# todo deprecate properly
|
||||||
iter_events = events
|
iter_events = events
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
from my.config import topcoder as config # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from typing import NamedTuple
|
|
||||||
import json
|
|
||||||
from typing import Dict, Iterator
|
|
||||||
|
|
||||||
|
|
||||||
from ..core import get_files, Res, unwrap, Json
|
|
||||||
from ..core.compat import cached_property
|
|
||||||
from ..core.error import Res, unwrap
|
|
||||||
from ..core.konsume import zoom, wrap, ignore
|
|
||||||
|
|
||||||
|
|
||||||
def _get_latest() -> Json:
|
|
||||||
pp = max(get_files(config.export_path))
|
|
||||||
return json.loads(pp.read_text())
|
|
||||||
|
|
||||||
|
|
||||||
class Competition(NamedTuple):
|
|
||||||
contest_id: str
|
|
||||||
contest: str
|
|
||||||
percentile: float
|
|
||||||
dates: str
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def uid(self) -> str:
|
|
||||||
return self.contest_id
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.contest_id)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def when(self) -> datetime:
|
|
||||||
return datetime.strptime(self.dates, '%Y-%m-%dT%H:%M:%S.%fZ')
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def summary(self) -> str:
|
|
||||||
return f'participated in {self.contest}: {self.percentile:.0f}'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def make(cls, json) -> Iterator[Res['Competition']]:
|
|
||||||
ignore(json, 'rating', 'placement')
|
|
||||||
cid = json['challengeId'].zoom().value
|
|
||||||
cname = json['challengeName'].zoom().value
|
|
||||||
percentile = json['percentile'].zoom().value
|
|
||||||
dates = json['date'].zoom().value
|
|
||||||
yield cls(
|
|
||||||
contest_id=cid,
|
|
||||||
contest=cname,
|
|
||||||
percentile=percentile,
|
|
||||||
dates=dates,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def iter_data() -> Iterator[Res[Competition]]:
|
|
||||||
with wrap(_get_latest()) as j:
|
|
||||||
ignore(j, 'id', 'version')
|
|
||||||
|
|
||||||
res = j['result'].zoom()
|
|
||||||
ignore(res, 'success', 'status', 'metadata')
|
|
||||||
|
|
||||||
cont = res['content'].zoom()
|
|
||||||
ignore(cont, 'handle', 'handleLower', 'userId', 'createdAt', 'updatedAt', 'createdBy', 'updatedBy')
|
|
||||||
|
|
||||||
cont['DEVELOP'].ignore() # TODO handle it??
|
|
||||||
ds = cont['DATA_SCIENCE'].zoom()
|
|
||||||
|
|
||||||
mar, srm = zoom(ds, 'MARATHON_MATCH', 'SRM')
|
|
||||||
|
|
||||||
mar = mar['history'].zoom()
|
|
||||||
srm = srm['history'].zoom()
|
|
||||||
# TODO right, I guess I could rely on pylint for unused variables??
|
|
||||||
|
|
||||||
for c in mar + srm:
|
|
||||||
yield from Competition.make(json=c)
|
|
||||||
c.consume()
|
|
||||||
|
|
||||||
|
|
||||||
def get_data():
|
|
||||||
return list(sorted(iter_data(), key=Competition.when.fget))
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from .core.warnings import high
|
from .core.warnings import high
|
||||||
|
|
||||||
high("DEPRECATED! Please use my.core.common instead.")
|
high("DEPRECATED! Please use my.core.common instead.")
|
||||||
|
|
||||||
from .core import __NOT_HPI_MODULE__
|
from .core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from .core.common import *
|
from .core.common import *
|
||||||
|
|
53
my/config.py
53
my/config.py
|
@ -9,17 +9,18 @@ This file is used for:
|
||||||
- mypy: this file provides some type annotations
|
- mypy: this file provides some type annotations
|
||||||
- for loading the actual user config
|
- for loading the actual user config
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
#### NOTE: you won't need this line VVVV in your personal config
|
#### NOTE: you won't need this line VVVV in your personal config
|
||||||
from my.core import init
|
from my.core import init # noqa: F401 # isort: skip
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
from datetime import tzinfo
|
from datetime import tzinfo
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
|
||||||
|
|
||||||
|
from my.core import PathIsh, Paths
|
||||||
from my.core import Paths, PathIsh
|
|
||||||
|
|
||||||
|
|
||||||
class hypothesis:
|
class hypothesis:
|
||||||
|
@ -68,17 +69,23 @@ class pinboard:
|
||||||
export_dir: Paths = ''
|
export_dir: Paths = ''
|
||||||
|
|
||||||
class google:
|
class google:
|
||||||
|
class maps:
|
||||||
|
class android:
|
||||||
|
export_path: Paths = ''
|
||||||
|
|
||||||
takeout_path: Paths = ''
|
takeout_path: Paths = ''
|
||||||
|
|
||||||
|
|
||||||
from typing import Sequence, Union, Tuple
|
from collections.abc import Sequence
|
||||||
from datetime import datetime, date, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
DateIsh = Union[datetime, date, str]
|
DateIsh = Union[datetime, date, str]
|
||||||
LatLon = Tuple[float, float]
|
LatLon = tuple[float, float]
|
||||||
class location:
|
class location:
|
||||||
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
|
# todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce
|
||||||
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
||||||
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0)
|
||||||
home_accuracy = 30_000.0
|
home_accuracy = 30_000.0
|
||||||
|
|
||||||
class via_ip:
|
class via_ip:
|
||||||
|
@ -98,7 +105,9 @@ class location:
|
||||||
accuracy: float = 100
|
accuracy: float = 100
|
||||||
|
|
||||||
|
|
||||||
from my.core.compat import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
class time:
|
class time:
|
||||||
class tz:
|
class tz:
|
||||||
policy: Literal['keep', 'convert', 'throw']
|
policy: Literal['keep', 'convert', 'throw']
|
||||||
|
@ -117,10 +126,9 @@ class arbtt:
|
||||||
logfiles: Paths
|
logfiles: Paths
|
||||||
|
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
class commits:
|
class commits:
|
||||||
emails: Optional[Sequence[str]]
|
emails: Sequence[str] | None
|
||||||
names: Optional[Sequence[str]]
|
names: Sequence[str] | None
|
||||||
roots: Sequence[PathIsh]
|
roots: Sequence[PathIsh]
|
||||||
|
|
||||||
|
|
||||||
|
@ -146,8 +154,8 @@ class tinder:
|
||||||
class instagram:
|
class instagram:
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
username: Optional[str]
|
username: str | None
|
||||||
full_name: Optional[str]
|
full_name: str | None
|
||||||
|
|
||||||
class gdpr:
|
class gdpr:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
@ -165,7 +173,7 @@ class materialistic:
|
||||||
class fbmessenger:
|
class fbmessenger:
|
||||||
class fbmessengerexport:
|
class fbmessengerexport:
|
||||||
export_db: PathIsh
|
export_db: PathIsh
|
||||||
facebook_id: Optional[str]
|
facebook_id: str | None
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
@ -177,6 +185,8 @@ class twitter_archive:
|
||||||
class twitter:
|
class twitter:
|
||||||
class talon:
|
class talon:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
class android:
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
class twint:
|
class twint:
|
||||||
|
@ -241,7 +251,7 @@ class runnerup:
|
||||||
class emfit:
|
class emfit:
|
||||||
export_path: Path
|
export_path: Path
|
||||||
timezone: tzinfo
|
timezone: tzinfo
|
||||||
excluded_sids: List[str]
|
excluded_sids: list[str]
|
||||||
|
|
||||||
|
|
||||||
class foursquare:
|
class foursquare:
|
||||||
|
@ -261,5 +271,16 @@ class roamresearch:
|
||||||
username: str
|
username: str
|
||||||
|
|
||||||
|
|
||||||
|
class whatsapp:
|
||||||
|
class android:
|
||||||
|
export_path: Paths
|
||||||
|
my_user_id: str | None
|
||||||
|
|
||||||
|
|
||||||
|
class harmonic:
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
class monzo:
|
||||||
|
class monzoexport:
|
||||||
|
export_path: Paths
|
||||||
|
|
|
@ -1,39 +1,61 @@
|
||||||
# this file only keeps the most common & critical types/utility functions
|
# this file only keeps the most common & critical types/utility functions
|
||||||
from .common import get_files, PathIsh, Paths
|
from typing import TYPE_CHECKING
|
||||||
from .common import Json
|
|
||||||
from .common import LazyLogger
|
|
||||||
from .common import warn_if_empty
|
|
||||||
from .common import stat, Stats
|
|
||||||
from .common import datetime_naive, datetime_aware
|
|
||||||
from .common import assert_never
|
|
||||||
|
|
||||||
from .cfg import make_config
|
from .cfg import make_config
|
||||||
|
from .common import PathIsh, Paths, get_files
|
||||||
|
from .compat import assert_never
|
||||||
|
from .error import Res, notnone, unwrap
|
||||||
|
from .logging import (
|
||||||
|
make_logger,
|
||||||
|
)
|
||||||
|
from .stats import Stats, stat
|
||||||
|
from .types import (
|
||||||
|
Json,
|
||||||
|
datetime_aware,
|
||||||
|
datetime_naive,
|
||||||
|
)
|
||||||
from .util import __NOT_HPI_MODULE__
|
from .util import __NOT_HPI_MODULE__
|
||||||
|
from .utils.itertools import warn_if_empty
|
||||||
|
|
||||||
from .error import Res, unwrap
|
LazyLogger = make_logger # TODO deprecate this in favor of make_logger
|
||||||
|
|
||||||
|
|
||||||
# just for brevity in modules
|
if not TYPE_CHECKING:
|
||||||
# todo not sure about these.. maybe best to rely on regular imports.. perhaps compare?
|
# we used to keep these here for brevity, but feels like it only adds confusion,
|
||||||
|
# e.g. suggest that we perhaps somehow modify builtin behaviour or whatever
|
||||||
|
# so best to prefer explicit behaviour
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'get_files', 'PathIsh', 'Paths',
|
|
||||||
'Json',
|
|
||||||
'LazyLogger',
|
|
||||||
'warn_if_empty',
|
|
||||||
'stat', 'Stats',
|
|
||||||
'datetime_aware', 'datetime_naive',
|
|
||||||
'assert_never',
|
|
||||||
|
|
||||||
'make_config',
|
|
||||||
|
|
||||||
'__NOT_HPI_MODULE__',
|
'__NOT_HPI_MODULE__',
|
||||||
|
'Json',
|
||||||
'Res', 'unwrap',
|
'LazyLogger', # legacy import
|
||||||
|
'Path',
|
||||||
'dataclass', 'Path',
|
'PathIsh',
|
||||||
|
'Paths',
|
||||||
|
'Res',
|
||||||
|
'Stats',
|
||||||
|
'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon
|
||||||
|
'dataclass',
|
||||||
|
'datetime_aware',
|
||||||
|
'datetime_naive',
|
||||||
|
'get_files',
|
||||||
|
'make_config',
|
||||||
|
'make_logger',
|
||||||
|
'notnone',
|
||||||
|
'stat',
|
||||||
|
'unwrap',
|
||||||
|
'warn_if_empty',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
## experimental for now
|
||||||
|
# you could put _init_hook.py next to your private my/config
|
||||||
|
# that way you can configure logging/warnings/env variables on every HPI import
|
||||||
|
try:
|
||||||
|
import my._init_hook # type: ignore[import-not-found] # noqa: F401
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
##
|
||||||
|
|
|
@ -1,23 +1,26 @@
|
||||||
from contextlib import ExitStack
|
from __future__ import annotations
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
from itertools import chain
|
|
||||||
import os
|
import os
|
||||||
import shlex
|
import shlex
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional, Sequence, Iterable, List, Type, Any, Callable
|
from collections.abc import Iterable, Sequence
|
||||||
|
from contextlib import ExitStack
|
||||||
|
from itertools import chain
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import check_call, run, PIPE, CompletedProcess, Popen
|
from subprocess import PIPE, CompletedProcess, Popen, check_call, run
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache()
|
@functools.lru_cache
|
||||||
def mypy_cmd() -> Optional[Sequence[str]]:
|
def mypy_cmd() -> Sequence[str] | None:
|
||||||
try:
|
try:
|
||||||
# preferably, use mypy from current python env
|
# preferably, use mypy from current python env
|
||||||
import mypy # noqa: F401 fine not to use it
|
import mypy # noqa: F401 fine not to use it
|
||||||
|
@ -32,7 +35,7 @@ def mypy_cmd() -> Optional[Sequence[str]]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]:
|
def run_mypy(cfg_path: Path) -> CompletedProcess | None:
|
||||||
# todo dunno maybe use the same mypy config in repository?
|
# todo dunno maybe use the same mypy config in repository?
|
||||||
# I'd need to install mypy.ini then??
|
# I'd need to install mypy.ini then??
|
||||||
env = {**os.environ}
|
env = {**os.environ}
|
||||||
|
@ -43,7 +46,7 @@ def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]:
|
||||||
cmd = mypy_cmd()
|
cmd = mypy_cmd()
|
||||||
if cmd is None:
|
if cmd is None:
|
||||||
return None
|
return None
|
||||||
mres = run([
|
mres = run([ # noqa: UP022,PLW1510
|
||||||
*cmd,
|
*cmd,
|
||||||
'--namespace-packages',
|
'--namespace-packages',
|
||||||
'--color-output', # not sure if works??
|
'--color-output', # not sure if works??
|
||||||
|
@ -63,22 +66,28 @@ def eprint(x: str) -> None:
|
||||||
# err=True prints to stderr
|
# err=True prints to stderr
|
||||||
click.echo(x, err=True)
|
click.echo(x, err=True)
|
||||||
|
|
||||||
|
|
||||||
def indent(x: str) -> str:
|
def indent(x: str) -> str:
|
||||||
|
# todo use textwrap.indent?
|
||||||
return ''.join(' ' + l for l in x.splitlines(keepends=True))
|
return ''.join(' ' + l for l in x.splitlines(keepends=True))
|
||||||
|
|
||||||
|
|
||||||
OK = '✅'
|
OK = '✅'
|
||||||
OFF = '🔲'
|
OFF = '🔲'
|
||||||
|
|
||||||
|
|
||||||
def info(x: str) -> None:
|
def info(x: str) -> None:
|
||||||
eprint(OK + ' ' + x)
|
eprint(OK + ' ' + x)
|
||||||
|
|
||||||
|
|
||||||
def error(x: str) -> None:
|
def error(x: str) -> None:
|
||||||
eprint('❌ ' + x)
|
eprint('❌ ' + x)
|
||||||
|
|
||||||
|
|
||||||
def warning(x: str) -> None:
|
def warning(x: str) -> None:
|
||||||
eprint('❗ ' + x) # todo yellow?
|
eprint('❗ ' + x) # todo yellow?
|
||||||
|
|
||||||
|
|
||||||
def tb(e: Exception) -> None:
|
def tb(e: Exception) -> None:
|
||||||
tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
||||||
sys.stderr.write(indent(tb))
|
sys.stderr.write(indent(tb))
|
||||||
|
@ -86,6 +95,7 @@ def tb(e: Exception) -> None:
|
||||||
|
|
||||||
def config_create() -> None:
|
def config_create() -> None:
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
mycfg_dir = get_mycfg_dir()
|
mycfg_dir = get_mycfg_dir()
|
||||||
|
|
||||||
created = False
|
created = False
|
||||||
|
@ -94,7 +104,8 @@ def config_create() -> None:
|
||||||
my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
|
my_config = mycfg_dir / 'my' / 'config' / '__init__.py'
|
||||||
|
|
||||||
my_config.parent.mkdir(parents=True)
|
my_config.parent.mkdir(parents=True)
|
||||||
my_config.write_text('''
|
my_config.write_text(
|
||||||
|
'''
|
||||||
### HPI personal config
|
### HPI personal config
|
||||||
## see
|
## see
|
||||||
# https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
|
# https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules
|
||||||
|
@ -117,7 +128,8 @@ class example:
|
||||||
|
|
||||||
### you can insert your own configuration below
|
### you can insert your own configuration below
|
||||||
### but feel free to delete the stuff above if you don't need ti
|
### but feel free to delete the stuff above if you don't need ti
|
||||||
'''.lstrip())
|
'''.lstrip()
|
||||||
|
)
|
||||||
info(f'created empty config: {my_config}')
|
info(f'created empty config: {my_config}')
|
||||||
created = True
|
created = True
|
||||||
else:
|
else:
|
||||||
|
@ -130,12 +142,13 @@ class example:
|
||||||
|
|
||||||
# todo return the config as a result?
|
# todo return the config as a result?
|
||||||
def config_ok() -> bool:
|
def config_ok() -> bool:
|
||||||
errors: List[Exception] = []
|
errors: list[Exception] = []
|
||||||
|
|
||||||
# at this point 'my' should already be imported, so doesn't hurt to extract paths from it
|
# at this point 'my' should already be imported, so doesn't hurt to extract paths from it
|
||||||
import my
|
import my
|
||||||
|
|
||||||
try:
|
try:
|
||||||
paths: List[str] = list(my.__path__) # type: ignore[attr-defined]
|
paths: list[str] = list(my.__path__)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors.append(e)
|
errors.append(e)
|
||||||
error('failed to determine module import path')
|
error('failed to determine module import path')
|
||||||
|
@ -145,19 +158,23 @@ def config_ok() -> bool:
|
||||||
|
|
||||||
# first try doing as much as possible without actually importing my.config
|
# first try doing as much as possible without actually importing my.config
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
cfg_path = get_mycfg_dir()
|
cfg_path = get_mycfg_dir()
|
||||||
# alternative is importing my.config and then getting cfg_path from its __file__/__path__
|
# alternative is importing my.config and then getting cfg_path from its __file__/__path__
|
||||||
# not sure which is better tbh
|
# not sure which is better tbh
|
||||||
|
|
||||||
## check we're not using stub config
|
## check we're not using stub config
|
||||||
import my.core
|
import my.core
|
||||||
|
|
||||||
try:
|
try:
|
||||||
core_pkg_path = str(Path(my.core.__path__[0]).parent) # type: ignore[attr-defined]
|
core_pkg_path = str(Path(my.core.__path__[0]).parent)
|
||||||
if str(cfg_path).startswith(core_pkg_path):
|
if str(cfg_path).startswith(core_pkg_path):
|
||||||
error(f'''
|
error(
|
||||||
|
f'''
|
||||||
Seems that the stub config is used ({cfg_path}). This is likely not going to work.
|
Seems that the stub config is used ({cfg_path}). This is likely not going to work.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information
|
||||||
'''.strip())
|
'''.strip()
|
||||||
|
)
|
||||||
errors.append(RuntimeError('bad config path'))
|
errors.append(RuntimeError('bad config path'))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors.append(e)
|
errors.append(e)
|
||||||
|
@ -171,16 +188,15 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
# use a temporary directory, useful because
|
# use a temporary directory, useful because
|
||||||
# - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
|
# - compileall ignores -B, so always craps with .pyc files (annoyng on RO filesystems)
|
||||||
# - compileall isn't following symlinks, just silently ignores them
|
# - compileall isn't following symlinks, just silently ignores them
|
||||||
# note: ugh, annoying that copytree requires a non-existing dir before 3.8.
|
|
||||||
# once we have min version 3.8, can use dirs_exist_ok=True param
|
|
||||||
tdir = Path(td) / 'cfg'
|
tdir = Path(td) / 'cfg'
|
||||||
# this will resolve symlinks when copying
|
|
||||||
shutil.copytree(cfg_path, tdir)
|
|
||||||
# NOTE: compileall still returns code 0 if the path doesn't exist..
|
# NOTE: compileall still returns code 0 if the path doesn't exist..
|
||||||
# but in our case hopefully it's not an issue
|
# but in our case hopefully it's not an issue
|
||||||
cmd = [sys.executable, '-m', 'compileall', '-q', str(tdir)]
|
cmd = [sys.executable, '-m', 'compileall', '-q', str(tdir)]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# this will resolve symlinks when copying
|
||||||
|
# should be under try/catch since might fail if some symlinks are missing
|
||||||
|
shutil.copytree(cfg_path, tdir, dirs_exist_ok=True)
|
||||||
check_call(cmd)
|
check_call(cmd)
|
||||||
info('syntax check: ' + ' '.join(cmd))
|
info('syntax check: ' + ' '.join(cmd))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -213,13 +229,15 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
|
||||||
if len(errors) > 0:
|
if len(errors) > 0:
|
||||||
error(f'config check: {len(errors)} errors')
|
error(f'config check: {len(errors)} errors')
|
||||||
return False
|
return False
|
||||||
else:
|
|
||||||
# note: shouldn't exit here, might run something else
|
# note: shouldn't exit here, might run something else
|
||||||
info('config check: success!')
|
info('config check: success!')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
from .util import HPIModule, modules
|
from .util import HPIModule, modules
|
||||||
|
|
||||||
|
|
||||||
def _modules(*, all: bool = False) -> Iterable[HPIModule]:
|
def _modules(*, all: bool = False) -> Iterable[HPIModule]:
|
||||||
skipped = []
|
skipped = []
|
||||||
for m in modules():
|
for m in modules():
|
||||||
|
@ -231,7 +249,7 @@ def _modules(*, all: bool=False) -> Iterable[HPIModule]:
|
||||||
warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
|
warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.')
|
||||||
|
|
||||||
|
|
||||||
def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: List[str]) -> None:
|
def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: list[str]) -> None:
|
||||||
if len(for_modules) > 0:
|
if len(for_modules) > 0:
|
||||||
# if you're checking specific modules, show errors
|
# if you're checking specific modules, show errors
|
||||||
# hopefully makes sense?
|
# hopefully makes sense?
|
||||||
|
@ -242,10 +260,9 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
|
|
||||||
from .common import quick_stats
|
|
||||||
from .util import get_stats, HPIModule
|
|
||||||
from .stats import guess_stats
|
|
||||||
from .error import warn_my_config_import_error
|
from .error import warn_my_config_import_error
|
||||||
|
from .stats import get_stats, quick_stats
|
||||||
|
from .util import HPIModule
|
||||||
|
|
||||||
mods: Iterable[HPIModule]
|
mods: Iterable[HPIModule]
|
||||||
if len(for_modules) == 0:
|
if len(for_modules) == 0:
|
||||||
|
@ -275,11 +292,8 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
|
||||||
continue
|
continue
|
||||||
|
|
||||||
info(f'{click.style("OK", fg="green")} : {m:<50}')
|
info(f'{click.style("OK", fg="green")} : {m:<50}')
|
||||||
# first try explicitly defined stats function:
|
# TODO add hpi 'stats'? instead of doctor? not sure
|
||||||
stats = get_stats(m)
|
stats = get_stats(m, guess=True)
|
||||||
if stats is None:
|
|
||||||
# then try guessing.. not sure if should log somehow?
|
|
||||||
stats = guess_stats(m, quick=quick)
|
|
||||||
|
|
||||||
if stats is None:
|
if stats is None:
|
||||||
eprint(" - no 'stats' function, can't check the data")
|
eprint(" - no 'stats' function, can't check the data")
|
||||||
|
@ -290,6 +304,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
# todo hmm why wouldn't they be callable??
|
||||||
if callable(stats) and 'quick' in inspect.signature(stats).parameters:
|
if callable(stats) and 'quick' in inspect.signature(stats).parameters:
|
||||||
kwargs['quick'] = quick
|
kwargs['quick'] = quick
|
||||||
with quick_context:
|
with quick_context:
|
||||||
|
@ -325,17 +340,20 @@ def tabulate_warnings() -> None:
|
||||||
Helper to avoid visual noise in hpi modules/doctor
|
Helper to avoid visual noise in hpi modules/doctor
|
||||||
'''
|
'''
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
orig = warnings.formatwarning
|
orig = warnings.formatwarning
|
||||||
|
|
||||||
def override(*args, **kwargs) -> str:
|
def override(*args, **kwargs) -> str:
|
||||||
res = orig(*args, **kwargs)
|
res = orig(*args, **kwargs)
|
||||||
return ''.join(' ' + x for x in res.splitlines(keepends=True))
|
return ''.join(' ' + x for x in res.splitlines(keepends=True))
|
||||||
|
|
||||||
warnings.formatwarning = override
|
warnings.formatwarning = override
|
||||||
# TODO loggers as well?
|
# TODO loggers as well?
|
||||||
|
|
||||||
|
|
||||||
def _requires(modules: Sequence[str]) -> Sequence[str]:
|
def _requires(modules: Sequence[str]) -> Sequence[str]:
|
||||||
from .discovery_pure import module_by_name
|
from .discovery_pure import module_by_name
|
||||||
|
|
||||||
mods = [module_by_name(module) for module in modules]
|
mods = [module_by_name(module) for module in modules]
|
||||||
res = []
|
res = []
|
||||||
for mod in mods:
|
for mod in mods:
|
||||||
|
@ -362,7 +380,7 @@ def module_requires(*, module: Sequence[str]) -> None:
|
||||||
click.echo(x)
|
click.echo(x)
|
||||||
|
|
||||||
|
|
||||||
def module_install(*, user: bool, module: Sequence[str], parallel: bool=False) -> None:
|
def module_install(*, user: bool, module: Sequence[str], parallel: bool = False, break_system_packages: bool = False) -> None:
|
||||||
if isinstance(module, str):
|
if isinstance(module, str):
|
||||||
# legacy behavior, used to take a since argument
|
# legacy behavior, used to take a since argument
|
||||||
module = [module]
|
module = [module]
|
||||||
|
@ -373,10 +391,12 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool=False) -
|
||||||
warning('requirements list is empty, no need to install anything')
|
warning('requirements list is empty, no need to install anything')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
use_uv = 'HPI_MODULE_INSTALL_USE_UV' in os.environ
|
||||||
pre_cmd = [
|
pre_cmd = [
|
||||||
sys.executable, '-m', 'pip',
|
sys.executable, '-m', *(['uv'] if use_uv else []), 'pip',
|
||||||
'install',
|
'install',
|
||||||
*(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
|
*(['--user'] if user else []), # todo maybe instead, forward all the remaining args to pip?
|
||||||
|
*(['--break-system-packages'] if break_system_packages else []), # https://peps.python.org/pep-0668/
|
||||||
]
|
]
|
||||||
|
|
||||||
cmds = []
|
cmds = []
|
||||||
|
@ -391,7 +411,7 @@ def module_install(*, user: bool, module: Sequence[str], parallel: bool=False) -
|
||||||
# I think it only helps for pypi artifacts (not git!),
|
# I think it only helps for pypi artifacts (not git!),
|
||||||
# and only if they weren't cached
|
# and only if they weren't cached
|
||||||
for r in requirements:
|
for r in requirements:
|
||||||
cmds.append(pre_cmd + [r])
|
cmds.append([*pre_cmd, r])
|
||||||
else:
|
else:
|
||||||
if parallel:
|
if parallel:
|
||||||
warning('parallel install is not supported on this platform, installing sequentially...')
|
warning('parallel install is not supported on this platform, installing sequentially...')
|
||||||
|
@ -437,8 +457,8 @@ def _ui_getchar_pick(choices: Sequence[str], prompt: str = 'Select from: ') -> i
|
||||||
return result_map[ch]
|
return result_map[ch]
|
||||||
|
|
||||||
|
|
||||||
def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True) -> Iterable[Callable[..., Any]]:
|
def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]:
|
||||||
from .query import locate_qualified_function, QueryException
|
from .query import QueryException, locate_qualified_function
|
||||||
from .stats import is_data_provider
|
from .stats import is_data_provider
|
||||||
|
|
||||||
# if not connected to a terminal, can't prompt
|
# if not connected to a terminal, can't prompt
|
||||||
|
@ -455,9 +475,9 @@ def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True)
|
||||||
# user to select a 'data provider' like function
|
# user to select a 'data provider' like function
|
||||||
try:
|
try:
|
||||||
mod = importlib.import_module(qualname)
|
mod = importlib.import_module(qualname)
|
||||||
except Exception:
|
except Exception as ie:
|
||||||
eprint(f"During fallback, importing '{qualname}' as module failed")
|
eprint(f"During fallback, importing '{qualname}' as module failed")
|
||||||
raise qr_err
|
raise qr_err from ie
|
||||||
|
|
||||||
# find data providers in this module
|
# find data providers in this module
|
||||||
data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
|
data_providers = [f for _, f in inspect.getmembers(mod, inspect.isfunction) if is_data_provider(f)]
|
||||||
|
@ -485,30 +505,42 @@ def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True)
|
||||||
yield data_providers[chosen_index]
|
yield data_providers[chosen_index]
|
||||||
|
|
||||||
|
|
||||||
|
def _warn_exceptions(exc: Exception) -> None:
|
||||||
|
from my.core import make_logger
|
||||||
|
|
||||||
|
logger = make_logger('CLI', level='warning')
|
||||||
|
|
||||||
|
logger.exception(f'hpi query: {exc}')
|
||||||
|
|
||||||
|
|
||||||
# handle the 'hpi query' call
|
# handle the 'hpi query' call
|
||||||
# can raise a QueryException, caught in the click command
|
# can raise a QueryException, caught in the click command
|
||||||
def query_hpi_functions(
|
def query_hpi_functions(
|
||||||
*,
|
*,
|
||||||
output: str = 'json',
|
output: str = 'json',
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
qualified_names: List[str],
|
qualified_names: list[str],
|
||||||
order_key: Optional[str],
|
order_key: str | None,
|
||||||
order_by_value_type: Optional[Type],
|
order_by_value_type: type | None,
|
||||||
after: Any,
|
after: Any,
|
||||||
before: Any,
|
before: Any,
|
||||||
within: Any,
|
within: Any,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: Optional[int],
|
limit: int | None,
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool,
|
wrap_unsorted: bool,
|
||||||
|
warn_exceptions: bool,
|
||||||
raise_exceptions: bool,
|
raise_exceptions: bool,
|
||||||
drop_exceptions: bool,
|
drop_exceptions: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
from .query_range import select_range, RangeTuple
|
from .query_range import RangeTuple, select_range
|
||||||
|
|
||||||
# chain list of functions from user, in the order they wrote them on the CLI
|
# chain list of functions from user, in the order they wrote them on the CLI
|
||||||
input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
|
input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names)))
|
||||||
|
|
||||||
|
# NOTE: if passing just one function to this which returns a single namedtuple/dataclass,
|
||||||
|
# using both --order-key and --order-type will often be faster as it does not need to
|
||||||
|
# duplicate the iterator in memory, or try to find the --order-type type on each object before sorting
|
||||||
res = select_range(
|
res = select_range(
|
||||||
input_src,
|
input_src,
|
||||||
order_key=order_key,
|
order_key=order_key,
|
||||||
|
@ -518,8 +550,11 @@ def query_hpi_functions(
|
||||||
limit=limit,
|
limit=limit,
|
||||||
drop_unsorted=drop_unsorted,
|
drop_unsorted=drop_unsorted,
|
||||||
wrap_unsorted=wrap_unsorted,
|
wrap_unsorted=wrap_unsorted,
|
||||||
|
warn_exceptions=warn_exceptions,
|
||||||
|
warn_func=_warn_exceptions,
|
||||||
raise_exceptions=raise_exceptions,
|
raise_exceptions=raise_exceptions,
|
||||||
drop_exceptions=drop_exceptions)
|
drop_exceptions=drop_exceptions,
|
||||||
|
)
|
||||||
|
|
||||||
if output == 'json':
|
if output == 'json':
|
||||||
from .serialize import dumps
|
from .serialize import dumps
|
||||||
|
@ -542,15 +577,35 @@ def query_hpi_functions(
|
||||||
pprint(item)
|
pprint(item)
|
||||||
else:
|
else:
|
||||||
pprint(list(res))
|
pprint(list(res))
|
||||||
|
elif output == 'gpx':
|
||||||
|
from my.location.common import locations_to_gpx
|
||||||
|
|
||||||
|
# if user didn't specify to ignore exceptions, warn if locations_to_gpx
|
||||||
|
# cannot process the output of the command. This can be silenced by
|
||||||
|
# passing --drop-exceptions
|
||||||
|
if not raise_exceptions and not drop_exceptions:
|
||||||
|
warn_exceptions = True
|
||||||
|
|
||||||
|
# can ignore the mypy warning here, locations_to_gpx yields any errors
|
||||||
|
# if you didnt pass it something that matches the LocationProtocol
|
||||||
|
for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type]
|
||||||
|
if warn_exceptions:
|
||||||
|
_warn_exceptions(exc)
|
||||||
|
elif raise_exceptions:
|
||||||
|
raise exc
|
||||||
|
elif drop_exceptions:
|
||||||
|
pass
|
||||||
|
sys.stdout.flush()
|
||||||
else:
|
else:
|
||||||
res = list(res) # type: ignore[assignment]
|
res = list(res) # type: ignore[assignment]
|
||||||
# output == 'repl'
|
# output == 'repl'
|
||||||
eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
|
eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
|
||||||
try:
|
try:
|
||||||
import IPython # type: ignore[import]
|
import IPython # type: ignore[import,unused-ignore]
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
|
eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
|
||||||
import code
|
import code
|
||||||
|
|
||||||
code.interact(local=locals())
|
code.interact(local=locals())
|
||||||
else:
|
else:
|
||||||
IPython.embed()
|
IPython.embed()
|
||||||
|
@ -558,16 +613,16 @@ def query_hpi_functions(
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
|
@click.option("--debug", is_flag=True, default=False, help="Show debug logs")
|
||||||
def main(debug: bool) -> None:
|
def main(*, debug: bool) -> None:
|
||||||
'''
|
'''
|
||||||
Human Programming Interface
|
Human Programming Interface
|
||||||
|
|
||||||
Tool for HPI
|
Tool for HPI
|
||||||
Work in progress, will be used for config management, troubleshooting & introspection
|
Work in progress, will be used for config management, troubleshooting & introspection
|
||||||
'''
|
'''
|
||||||
# should overwrite anything else in HPI_LOGS
|
# should overwrite anything else in LOGGING_LEVEL_HPI
|
||||||
if debug:
|
if debug:
|
||||||
os.environ["HPI_LOGS"] = "debug"
|
os.environ['LOGGING_LEVEL_HPI'] = 'debug'
|
||||||
|
|
||||||
# for potential future reference, if shared state needs to be added to groups
|
# for potential future reference, if shared state needs to be added to groups
|
||||||
# https://click.palletsprojects.com/en/7.x/commands/#group-invocation-without-command
|
# https://click.palletsprojects.com/en/7.x/commands/#group-invocation-without-command
|
||||||
|
@ -584,20 +639,19 @@ def main(debug: bool) -> None:
|
||||||
# to run things at the end (would need to use a callback or pass context)
|
# to run things at the end (would need to use a callback or pass context)
|
||||||
# https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
|
# https://click.palletsprojects.com/en/7.x/commands/#nested-handling-and-contexts
|
||||||
|
|
||||||
tdir: str = os.path.join(tempfile.gettempdir(), 'hpi_temp_dir')
|
tdir = Path(tempfile.gettempdir()) / 'hpi_temp_dir'
|
||||||
if not os.path.exists(tdir):
|
tdir.mkdir(exist_ok=True)
|
||||||
os.makedirs(tdir)
|
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=1)
|
@functools.lru_cache(maxsize=1)
|
||||||
def _all_mod_names() -> List[str]:
|
def _all_mod_names() -> list[str]:
|
||||||
"""Should include all modules, in case user is trying to diagnose issues"""
|
"""Should include all modules, in case user is trying to diagnose issues"""
|
||||||
# sort this, so that the order doesn't change while tabbing through
|
# sort this, so that the order doesn't change while tabbing through
|
||||||
return sorted([m.name for m in modules()])
|
return sorted([m.name for m in modules()])
|
||||||
|
|
||||||
|
|
||||||
def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> List[str]:
|
def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> list[str]:
|
||||||
return [m for m in _all_mod_names() if m.startswith(incomplete)]
|
return [m for m in _all_mod_names() if m.startswith(incomplete)]
|
||||||
|
|
||||||
|
|
||||||
|
@ -607,7 +661,7 @@ def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: st
|
||||||
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
|
@click.option('-q', '--quick', is_flag=True, help='Only run partial checks (first 100 items)')
|
||||||
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
|
@click.option('-S', '--skip-config-check', 'skip_conf', is_flag=True, help='Skip configuration check')
|
||||||
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
|
@click.argument('MODULE', nargs=-1, required=False, shell_complete=_module_autocomplete)
|
||||||
def doctor_cmd(verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
|
def doctor_cmd(*, verbose: bool, list_all: bool, quick: bool, skip_conf: bool, module: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Run various checks
|
Run various checks
|
||||||
|
|
||||||
|
@ -641,7 +695,7 @@ def config_create_cmd() -> None:
|
||||||
|
|
||||||
@main.command(name='modules', short_help='list available modules')
|
@main.command(name='modules', short_help='list available modules')
|
||||||
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
|
@click.option('--all', 'list_all', is_flag=True, help='List all modules, including disabled')
|
||||||
def module_cmd(list_all: bool) -> None:
|
def module_cmd(*, list_all: bool) -> None:
|
||||||
'''List available modules'''
|
'''List available modules'''
|
||||||
list_modules(list_all=list_all)
|
list_modules(list_all=list_all)
|
||||||
|
|
||||||
|
@ -654,7 +708,7 @@ def module_grp() -> None:
|
||||||
|
|
||||||
@module_grp.command(name='requires', short_help='print module reqs')
|
@module_grp.command(name='requires', short_help='print module reqs')
|
||||||
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
||||||
def module_requires_cmd(modules: Sequence[str]) -> None:
|
def module_requires_cmd(*, modules: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Print MODULES requirements
|
Print MODULES requirements
|
||||||
|
|
||||||
|
@ -666,22 +720,26 @@ def module_requires_cmd(modules: Sequence[str]) -> None:
|
||||||
@module_grp.command(name='install', short_help='install module deps')
|
@module_grp.command(name='install', short_help='install module deps')
|
||||||
@click.option('--user', is_flag=True, help='same as pip --user')
|
@click.option('--user', is_flag=True, help='same as pip --user')
|
||||||
@click.option('--parallel', is_flag=True, help='EXPERIMENTAL. Install dependencies in parallel.')
|
@click.option('--parallel', is_flag=True, help='EXPERIMENTAL. Install dependencies in parallel.')
|
||||||
|
@click.option('-B',
|
||||||
|
'--break-system-packages',
|
||||||
|
is_flag=True,
|
||||||
|
help='Bypass PEP 668 and install dependencies into the system-wide python package directory.')
|
||||||
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
@click.argument('MODULES', shell_complete=_module_autocomplete, nargs=-1, required=True)
|
||||||
def module_install_cmd(user: bool, parallel: bool, modules: Sequence[str]) -> None:
|
def module_install_cmd(*, user: bool, parallel: bool, break_system_packages: bool, modules: Sequence[str]) -> None:
|
||||||
'''
|
'''
|
||||||
Install dependencies for modules using pip
|
Install dependencies for modules using pip
|
||||||
|
|
||||||
MODULES is one or more specific module names (e.g. my.reddit.rexport)
|
MODULES is one or more specific module names (e.g. my.reddit.rexport)
|
||||||
'''
|
'''
|
||||||
# todo could add functions to check specific module etc..
|
# todo could add functions to check specific module etc..
|
||||||
module_install(user=user, module=modules, parallel=parallel)
|
module_install(user=user, module=modules, parallel=parallel, break_system_packages=break_system_packages)
|
||||||
|
|
||||||
|
|
||||||
@main.command(name='query', short_help='query the results of a HPI function')
|
@main.command(name='query', short_help='query the results of a HPI function')
|
||||||
@click.option('-o',
|
@click.option('-o',
|
||||||
'--output',
|
'--output',
|
||||||
default='json',
|
default='json',
|
||||||
type=click.Choice(['json', 'pprint', 'repl']),
|
type=click.Choice(['json', 'pprint', 'repl', 'gpx']),
|
||||||
help='what to do with the result [default: json]')
|
help='what to do with the result [default: json]')
|
||||||
@click.option('-s',
|
@click.option('-s',
|
||||||
'--stream',
|
'--stream',
|
||||||
|
@ -734,6 +792,10 @@ def module_install_cmd(user: bool, parallel: bool, modules: Sequence[str]) -> No
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="if the order of an item can't be determined while ordering, wrap them into an 'Unsortable' object")
|
help="if the order of an item can't be determined while ordering, wrap them into an 'Unsortable' object")
|
||||||
|
@click.option('--warn-exceptions',
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
help="if any errors are returned, print them as errors on STDERR")
|
||||||
@click.option('--raise-exceptions',
|
@click.option('--raise-exceptions',
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
|
@ -744,19 +806,21 @@ def module_install_cmd(user: bool, parallel: bool, modules: Sequence[str]) -> No
|
||||||
help='ignore any errors returned as objects from the functions')
|
help='ignore any errors returned as objects from the functions')
|
||||||
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
|
@click.argument('FUNCTION_NAME', nargs=-1, required=True, shell_complete=_module_autocomplete)
|
||||||
def query_cmd(
|
def query_cmd(
|
||||||
|
*,
|
||||||
function_name: Sequence[str],
|
function_name: Sequence[str],
|
||||||
output: str,
|
output: str,
|
||||||
stream: bool,
|
stream: bool,
|
||||||
order_key: Optional[str],
|
order_key: str | None,
|
||||||
order_type: Optional[str],
|
order_type: str | None,
|
||||||
after: Optional[str],
|
after: str | None,
|
||||||
before: Optional[str],
|
before: str | None,
|
||||||
within: Optional[str],
|
within: str | None,
|
||||||
recent: Optional[str],
|
recent: str | None,
|
||||||
reverse: bool,
|
reverse: bool,
|
||||||
limit: Optional[int],
|
limit: int | None,
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool,
|
wrap_unsorted: bool,
|
||||||
|
warn_exceptions: bool,
|
||||||
raise_exceptions: bool,
|
raise_exceptions: bool,
|
||||||
drop_exceptions: bool,
|
drop_exceptions: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -784,12 +848,12 @@ def query_cmd(
|
||||||
|
|
||||||
\b
|
\b
|
||||||
Can also query within a range. To filter comments between 2016 and 2018:
|
Can also query within a range. To filter comments between 2016 and 2018:
|
||||||
hpi query --order-type datetime --after '2016-01-01 00:00:00' --before '2019-01-01 00:00:00' my.reddit.all.comments
|
hpi query --order-type datetime --after '2016-01-01' --before '2019-01-01' my.reddit.all.comments
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from datetime import datetime, date
|
from datetime import date, datetime
|
||||||
|
|
||||||
chosen_order_type: Optional[Type]
|
chosen_order_type: type | None
|
||||||
if order_type == "datetime":
|
if order_type == "datetime":
|
||||||
chosen_order_type = datetime
|
chosen_order_type = datetime
|
||||||
elif order_type == "date":
|
elif order_type == "date":
|
||||||
|
@ -823,8 +887,10 @@ def query_cmd(
|
||||||
limit=limit,
|
limit=limit,
|
||||||
drop_unsorted=drop_unsorted,
|
drop_unsorted=drop_unsorted,
|
||||||
wrap_unsorted=wrap_unsorted,
|
wrap_unsorted=wrap_unsorted,
|
||||||
|
warn_exceptions=warn_exceptions,
|
||||||
raise_exceptions=raise_exceptions,
|
raise_exceptions=raise_exceptions,
|
||||||
drop_exceptions=drop_exceptions)
|
drop_exceptions=drop_exceptions,
|
||||||
|
)
|
||||||
except QueryException as qe:
|
except QueryException as qe:
|
||||||
eprint(str(qe))
|
eprint(str(qe))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -839,6 +905,7 @@ def query_cmd(
|
||||||
|
|
||||||
def test_requires() -> None:
|
def test_requires() -> None:
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
|
result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export'])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "github.com/karlicoss/ghexport" in result.output
|
assert "github.com/karlicoss/ghexport" in result.output
|
||||||
|
|
35
my/core/_cpu_pool.py
Normal file
35
my/core/_cpu_pool.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
"""
|
||||||
|
EXPERIMENTAL! use with caution
|
||||||
|
Manages 'global' ProcessPoolExecutor which is 'managed' by HPI itself, and
|
||||||
|
can be passed down to DALs to speed up data processing.
|
||||||
|
|
||||||
|
The reason to have it managed by HPI is because we don't want DALs instantiate pools
|
||||||
|
themselves -- they can't cooperate and it would be hard/infeasible to control
|
||||||
|
how many cores we want to dedicate to the DAL.
|
||||||
|
|
||||||
|
Enabled by the env variable, specifying how many cores to dedicate
|
||||||
|
e.g. "HPI_CPU_POOL=4 hpi query ..."
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
_NOT_SET = cast(ProcessPoolExecutor, object())
|
||||||
|
_INSTANCE: ProcessPoolExecutor | None = _NOT_SET
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu_pool() -> ProcessPoolExecutor | None:
|
||||||
|
global _INSTANCE
|
||||||
|
if _INSTANCE is _NOT_SET:
|
||||||
|
use_cpu_pool = os.environ.get('HPI_CPU_POOL')
|
||||||
|
if use_cpu_pool is None or int(use_cpu_pool) == 0:
|
||||||
|
_INSTANCE = None
|
||||||
|
else:
|
||||||
|
# NOTE: this won't be cleaned up properly, but I guess it's fine?
|
||||||
|
# since this it's basically a singleton for the whole process
|
||||||
|
# , and will be destroyed when python exists
|
||||||
|
_INSTANCE = ProcessPoolExecutor(max_workers=int(use_cpu_pool))
|
||||||
|
return _INSTANCE
|
12
my/core/_deprecated/dataset.py
Normal file
12
my/core/_deprecated/dataset.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from ..common import PathIsh
|
||||||
|
from ..sqlite import sqlite_connect_immutable
|
||||||
|
|
||||||
|
|
||||||
|
def connect_readonly(db: PathIsh):
|
||||||
|
import dataset # type: ignore
|
||||||
|
|
||||||
|
# see https://github.com/pudo/dataset/issues/136#issuecomment-128693122
|
||||||
|
# todo not sure if mode=ro has any benefit, but it doesn't work on read-only filesystems
|
||||||
|
# maybe it should autodetect readonly filesystems and apply this? not sure
|
||||||
|
creator = lambda: sqlite_connect_immutable(db)
|
||||||
|
return dataset.connect('sqlite:///', engine_kwargs={'creator': creator})
|
261
my/core/_deprecated/kompress.py
Normal file
261
my/core/_deprecated/kompress.py
Normal file
|
@ -0,0 +1,261 @@
|
||||||
|
"""
|
||||||
|
Various helpers for compression
|
||||||
|
"""
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
|
from datetime import datetime
|
||||||
|
from functools import total_ordering
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import IO, Union
|
||||||
|
|
||||||
|
PathIsh = Union[Path, str]
|
||||||
|
|
||||||
|
|
||||||
|
class Ext:
|
||||||
|
xz = '.xz'
|
||||||
|
zip = '.zip'
|
||||||
|
lz4 = '.lz4'
|
||||||
|
zstd = '.zstd'
|
||||||
|
zst = '.zst'
|
||||||
|
targz = '.tar.gz'
|
||||||
|
|
||||||
|
|
||||||
|
def is_compressed(p: Path) -> bool:
|
||||||
|
# todo kinda lame way for now.. use mime ideally?
|
||||||
|
# should cooperate with kompress.kopen?
|
||||||
|
return any(p.name.endswith(ext) for ext in [Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz])
|
||||||
|
|
||||||
|
|
||||||
|
def _zstd_open(path: Path, *args, **kwargs) -> IO:
|
||||||
|
import zstandard as zstd # type: ignore
|
||||||
|
fh = path.open('rb')
|
||||||
|
dctx = zstd.ZstdDecompressor()
|
||||||
|
reader = dctx.stream_reader(fh)
|
||||||
|
|
||||||
|
mode = kwargs.get('mode', 'rt')
|
||||||
|
if mode == 'rb':
|
||||||
|
return reader
|
||||||
|
else:
|
||||||
|
# must be text mode
|
||||||
|
kwargs.pop('mode') # TextIOWrapper doesn't like it
|
||||||
|
return io.TextIOWrapper(reader, **kwargs) # meh
|
||||||
|
|
||||||
|
|
||||||
|
# TODO use the 'dependent type' trick for return type?
|
||||||
|
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
|
||||||
|
# just in case, but I think this shouldn't be necessary anymore
|
||||||
|
# since when we call .read_text, encoding is passed already
|
||||||
|
if mode in {'r', 'rt'}:
|
||||||
|
encoding = kwargs.get('encoding', 'utf8')
|
||||||
|
else:
|
||||||
|
encoding = None
|
||||||
|
kwargs['encoding'] = encoding
|
||||||
|
|
||||||
|
pp = Path(path)
|
||||||
|
name = pp.name
|
||||||
|
if name.endswith(Ext.xz):
|
||||||
|
import lzma
|
||||||
|
|
||||||
|
# ugh. for lzma, 'r' means 'rb'
|
||||||
|
# https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97
|
||||||
|
# whereas for regular open, 'r' means 'rt'
|
||||||
|
# https://docs.python.org/3/library/functions.html#open
|
||||||
|
if mode == 'r':
|
||||||
|
mode = 'rt'
|
||||||
|
kwargs['mode'] = mode
|
||||||
|
return lzma.open(pp, *args, **kwargs)
|
||||||
|
elif name.endswith(Ext.zip):
|
||||||
|
# eh. this behaviour is a bit dodgy...
|
||||||
|
from zipfile import ZipFile
|
||||||
|
zfile = ZipFile(pp)
|
||||||
|
|
||||||
|
[subpath] = args # meh?
|
||||||
|
|
||||||
|
## oh god... https://stackoverflow.com/a/5639960/706389
|
||||||
|
ifile = zfile.open(subpath, mode='r')
|
||||||
|
ifile.readable = lambda: True # type: ignore
|
||||||
|
ifile.writable = lambda: False # type: ignore
|
||||||
|
ifile.seekable = lambda: False # type: ignore
|
||||||
|
ifile.read1 = ifile.read # type: ignore
|
||||||
|
# TODO pass all kwargs here??
|
||||||
|
# todo 'expected "BinaryIO"'??
|
||||||
|
return io.TextIOWrapper(ifile, encoding=encoding)
|
||||||
|
elif name.endswith(Ext.lz4):
|
||||||
|
import lz4.frame # type: ignore
|
||||||
|
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
||||||
|
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst): # noqa: PIE810
|
||||||
|
kwargs['mode'] = mode
|
||||||
|
return _zstd_open(pp, *args, **kwargs)
|
||||||
|
elif name.endswith(Ext.targz):
|
||||||
|
import tarfile
|
||||||
|
# FIXME pass mode?
|
||||||
|
tf = tarfile.open(pp)
|
||||||
|
# TODO pass encoding?
|
||||||
|
x = tf.extractfile(*args); assert x is not None
|
||||||
|
return x
|
||||||
|
else:
|
||||||
|
return pp.open(mode, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import typing
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
# otherwise mypy can't figure out that BasePath is a type alias..
|
||||||
|
BasePath = pathlib.Path
|
||||||
|
else:
|
||||||
|
BasePath = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath
|
||||||
|
|
||||||
|
|
||||||
|
class CPath(BasePath):
|
||||||
|
"""
|
||||||
|
Hacky way to support compressed files.
|
||||||
|
If you can think of a better way to do this, please let me know! https://github.com/karlicoss/HPI/issues/20
|
||||||
|
|
||||||
|
Ugh. So, can't override Path because of some _flavour thing.
|
||||||
|
Path only has _accessor and _closed slots, so can't directly set .open method
|
||||||
|
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
||||||
|
"""
|
||||||
|
def open(self, *args, **kwargs): # noqa: ARG002
|
||||||
|
kopen_kwargs = {}
|
||||||
|
mode = kwargs.get('mode')
|
||||||
|
if mode is not None:
|
||||||
|
kopen_kwargs['mode'] = mode
|
||||||
|
encoding = kwargs.get('encoding')
|
||||||
|
if encoding is not None:
|
||||||
|
kopen_kwargs['encoding'] = encoding
|
||||||
|
# TODO assert read only?
|
||||||
|
return kopen(str(self), **kopen_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
open = kopen # TODO deprecate
|
||||||
|
|
||||||
|
|
||||||
|
# meh
|
||||||
|
# TODO ideally switch to ZipPath or smth similar?
|
||||||
|
# nothing else supports subpath properly anyway
|
||||||
|
def kexists(path: PathIsh, subpath: str) -> bool:
|
||||||
|
try:
|
||||||
|
kopen(path, subpath)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
# meh... zipfile.Path is not available on 3.7
|
||||||
|
zipfile_Path = zipfile.Path
|
||||||
|
|
||||||
|
|
||||||
|
@total_ordering
|
||||||
|
class ZipPath(zipfile_Path):
|
||||||
|
# NOTE: is_dir/is_file might not behave as expected, the base class checks it only based on the slash in path
|
||||||
|
|
||||||
|
# seems that root/at are not exposed in the docs, so might be an implementation detail
|
||||||
|
root: zipfile.ZipFile # type: ignore[assignment]
|
||||||
|
at: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filepath(self) -> Path:
|
||||||
|
res = self.root.filename
|
||||||
|
assert res is not None # make mypy happy
|
||||||
|
return Path(res)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def subpath(self) -> Path:
|
||||||
|
return Path(self.at)
|
||||||
|
|
||||||
|
def absolute(self) -> ZipPath:
|
||||||
|
return ZipPath(self.filepath.absolute(), self.at)
|
||||||
|
|
||||||
|
def expanduser(self) -> ZipPath:
|
||||||
|
return ZipPath(self.filepath.expanduser(), self.at)
|
||||||
|
|
||||||
|
def exists(self) -> bool:
|
||||||
|
if self.at == '':
|
||||||
|
# special case, the base class returns False in this case for some reason
|
||||||
|
return self.filepath.exists()
|
||||||
|
return super().exists() or self._as_dir().exists()
|
||||||
|
|
||||||
|
def _as_dir(self) -> zipfile_Path:
|
||||||
|
# note: seems that zip always uses forward slash, regardless OS?
|
||||||
|
return zipfile_Path(self.root, self.at + '/')
|
||||||
|
|
||||||
|
def rglob(self, glob: str) -> Iterator[ZipPath]:
|
||||||
|
# note: not 100% sure about the correctness, but seem fine?
|
||||||
|
# Path.match() matches from the right, so need to
|
||||||
|
rpaths = [p for p in self.root.namelist() if p.startswith(self.at)]
|
||||||
|
rpaths = [p for p in rpaths if Path(p).match(glob)]
|
||||||
|
return (ZipPath(self.root, p) for p in rpaths)
|
||||||
|
|
||||||
|
def relative_to(self, other: ZipPath) -> Path: # type: ignore[override, unused-ignore]
|
||||||
|
assert self.filepath == other.filepath, (self.filepath, other.filepath)
|
||||||
|
return self.subpath.relative_to(other.subpath)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parts(self) -> Sequence[str]:
|
||||||
|
# messy, but might be ok..
|
||||||
|
return self.filepath.parts + self.subpath.parts
|
||||||
|
|
||||||
|
def __truediv__(self, key) -> ZipPath:
|
||||||
|
# need to implement it so the return type is not zipfile.Path
|
||||||
|
tmp = zipfile_Path(self.root) / self.at / key
|
||||||
|
return ZipPath(self.root, tmp.at)
|
||||||
|
|
||||||
|
def iterdir(self) -> Iterator[ZipPath]:
|
||||||
|
for s in self._as_dir().iterdir():
|
||||||
|
yield ZipPath(s.root, s.at)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def stem(self) -> str:
|
||||||
|
return self.subpath.stem
|
||||||
|
|
||||||
|
@property # type: ignore[misc]
|
||||||
|
def __class__(self):
|
||||||
|
return Path
|
||||||
|
|
||||||
|
def __eq__(self, other) -> bool:
|
||||||
|
# hmm, super class doesn't seem to treat as equals unless they are the same object
|
||||||
|
if not isinstance(other, ZipPath):
|
||||||
|
return False
|
||||||
|
return (self.filepath, self.subpath) == (other.filepath, other.subpath)
|
||||||
|
|
||||||
|
def __lt__(self, other) -> bool:
|
||||||
|
if not isinstance(other, ZipPath):
|
||||||
|
return False
|
||||||
|
return (self.filepath, self.subpath) < (other.filepath, other.subpath)
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
return hash((self.filepath, self.subpath))
|
||||||
|
|
||||||
|
def stat(self) -> os.stat_result:
|
||||||
|
# NOTE: zip datetimes have no notion of time zone, usually they just keep local time?
|
||||||
|
# see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
|
||||||
|
dt = datetime(*self.root.getinfo(self.at).date_time)
|
||||||
|
ts = int(dt.timestamp())
|
||||||
|
params = dict( # noqa: C408
|
||||||
|
st_mode=0,
|
||||||
|
st_ino=0,
|
||||||
|
st_dev=0,
|
||||||
|
st_nlink=1,
|
||||||
|
st_uid=1000,
|
||||||
|
st_gid=1000,
|
||||||
|
st_size=0, # todo compute it properly?
|
||||||
|
st_atime=ts,
|
||||||
|
st_mtime=ts,
|
||||||
|
st_ctime=ts,
|
||||||
|
)
|
||||||
|
return os.stat_result(tuple(params.values()))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def suffix(self) -> str:
|
||||||
|
return Path(self.parts[-1]).suffix
|
||||||
|
|
||||||
|
# fmt: on
|
|
@ -1,8 +1,30 @@
|
||||||
from .common import assert_subpackage; assert_subpackage(__name__)
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .internal import assert_subpackage
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from collections.abc import Iterator
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import (
|
||||||
|
TYPE_CHECKING,
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
TypeVar,
|
||||||
|
Union,
|
||||||
|
cast,
|
||||||
|
overload,
|
||||||
|
)
|
||||||
|
|
||||||
|
import appdirs # type: ignore[import-untyped]
|
||||||
|
|
||||||
|
from . import warnings
|
||||||
|
|
||||||
|
PathIsh = Union[str, Path] # avoid circular import from .common
|
||||||
|
|
||||||
|
|
||||||
def disable_cachew() -> None:
|
def disable_cachew() -> None:
|
||||||
try:
|
try:
|
||||||
|
@ -12,10 +34,10 @@ def disable_cachew() -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
from cachew import settings
|
from cachew import settings
|
||||||
|
|
||||||
settings.ENABLE = False
|
settings.ENABLE = False
|
||||||
|
|
||||||
|
|
||||||
from typing import Iterator
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def disabled_cachew() -> Iterator[None]:
|
def disabled_cachew() -> Iterator[None]:
|
||||||
try:
|
try:
|
||||||
|
@ -25,23 +47,26 @@ def disabled_cachew() -> Iterator[None]:
|
||||||
yield
|
yield
|
||||||
return
|
return
|
||||||
from cachew.extra import disabled_cachew
|
from cachew.extra import disabled_cachew
|
||||||
|
|
||||||
with disabled_cachew():
|
with disabled_cachew():
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
def _appdirs_cache_dir() -> Path:
|
def _appdirs_cache_dir() -> Path:
|
||||||
import appdirs # type: ignore
|
|
||||||
cd = Path(appdirs.user_cache_dir('my'))
|
cd = Path(appdirs.user_cache_dir('my'))
|
||||||
cd.mkdir(exist_ok=True, parents=True)
|
cd.mkdir(exist_ok=True, parents=True)
|
||||||
return cd
|
return cd
|
||||||
|
|
||||||
|
|
||||||
from . import PathIsh
|
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
|
||||||
def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
|
|
||||||
|
|
||||||
|
def cache_dir(suffix: PathIsh | None = None) -> Path:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
cdir_ = CC.config.get_cache_dir()
|
cdir_ = CC.config.get_cache_dir()
|
||||||
|
|
||||||
sp: Optional[Path] = None
|
sp: Path | None = None
|
||||||
if suffix is not None:
|
if suffix is not None:
|
||||||
sp = Path(suffix)
|
sp = Path(suffix)
|
||||||
# guess if you do need absolute, better path it directly instead of as suffix?
|
# guess if you do need absolute, better path it directly instead of as suffix?
|
||||||
|
@ -55,9 +80,84 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
|
||||||
# this logic is tested via test_cachew_dir_none
|
# this logic is tested via test_cachew_dir_none
|
||||||
|
|
||||||
if cdir_ is None:
|
if cdir_ is None:
|
||||||
from .common import _CACHE_DIR_NONE_HACK
|
|
||||||
cdir = _CACHE_DIR_NONE_HACK
|
cdir = _CACHE_DIR_NONE_HACK
|
||||||
else:
|
else:
|
||||||
cdir = cdir_
|
cdir = cdir_
|
||||||
|
|
||||||
return cdir if sp is None else cdir / sp
|
return cdir if sp is None else cdir / sp
|
||||||
|
|
||||||
|
|
||||||
|
"""See core.cachew.cache_dir for the explanation"""
|
||||||
|
|
||||||
|
|
||||||
|
_cache_path_dflt = cast(str, object())
|
||||||
|
|
||||||
|
|
||||||
|
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
|
||||||
|
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
|
||||||
|
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
|
||||||
|
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
|
||||||
|
def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
|
||||||
|
"""
|
||||||
|
Stands for 'Maybe cachew'.
|
||||||
|
Defensive wrapper around @cachew to make it an optional dependency.
|
||||||
|
"""
|
||||||
|
if cache_path is _cache_path_dflt:
|
||||||
|
# wasn't specified... so we need to use cache_dir
|
||||||
|
cache_path = cache_dir()
|
||||||
|
|
||||||
|
if isinstance(cache_path, (str, Path)):
|
||||||
|
try:
|
||||||
|
# check that it starts with 'hack' path
|
||||||
|
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
|
||||||
|
except: # noqa: E722 bare except
|
||||||
|
pass # no action needed, doesn't start with 'hack' string
|
||||||
|
else:
|
||||||
|
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
|
||||||
|
# if it does, means that user wanted to disable cache
|
||||||
|
cache_path = None
|
||||||
|
try:
|
||||||
|
import cachew
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
warnings.high('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
||||||
|
return lambda orig_func: orig_func
|
||||||
|
else:
|
||||||
|
kwargs['cache_path'] = cache_path
|
||||||
|
return cachew.cachew(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
R = TypeVar('R')
|
||||||
|
if sys.version_info[:2] >= (3, 10):
|
||||||
|
from typing import ParamSpec
|
||||||
|
else:
|
||||||
|
from typing_extensions import ParamSpec
|
||||||
|
P = ParamSpec('P')
|
||||||
|
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
|
||||||
|
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
|
||||||
|
# NOTE: in cachew, HashFunction type returns str
|
||||||
|
# however in practice, cachew always calls str for its result
|
||||||
|
# so perhaps better to switch it to Any in cachew as well
|
||||||
|
HashFunction = Callable[P, Any]
|
||||||
|
|
||||||
|
F = TypeVar('F', bound=Callable)
|
||||||
|
|
||||||
|
# we need two versions due to @doublewrap
|
||||||
|
# this is when we just annotate as @cachew without any args
|
||||||
|
@overload # type: ignore[no-overload-impl]
|
||||||
|
def mcachew(fun: F) -> F: ...
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def mcachew(
|
||||||
|
cache_path: PathProvider | None = ...,
|
||||||
|
*,
|
||||||
|
force_file: bool = ...,
|
||||||
|
cls: type | None = ...,
|
||||||
|
depends_on: HashFunction = ...,
|
||||||
|
logger: logging.Logger | None = ...,
|
||||||
|
chunk_by: int = ...,
|
||||||
|
synthetic_key: str | None = ...,
|
||||||
|
) -> Callable[[F], F]: ...
|
||||||
|
|
||||||
|
else:
|
||||||
|
mcachew = _mcachew_impl
|
||||||
|
|
|
@ -1,32 +1,42 @@
|
||||||
from typing import TypeVar, Type, Callable, Dict, Any
|
from __future__ import annotations
|
||||||
|
|
||||||
Attrs = Dict[str, Any]
|
import importlib
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from contextlib import ExitStack, contextmanager
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
Attrs = dict[str, Any]
|
||||||
|
|
||||||
C = TypeVar('C')
|
C = TypeVar('C')
|
||||||
|
|
||||||
|
|
||||||
# todo not sure about it, could be overthinking...
|
# todo not sure about it, could be overthinking...
|
||||||
# but short enough to change later
|
# but short enough to change later
|
||||||
# TODO document why it's necessary?
|
# TODO document why it's necessary?
|
||||||
def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C:
|
def make_config(cls: type[C], migration: Callable[[Attrs], Attrs] = lambda x: x) -> C:
|
||||||
user_config = cls.__base__
|
user_config = cls.__base__
|
||||||
old_props = {
|
old_props = {
|
||||||
# NOTE: deliberately use gettatr to 'force' class properties here
|
# NOTE: deliberately use gettatr to 'force' class properties here
|
||||||
k: getattr(user_config, k) for k in vars(user_config)
|
k: getattr(user_config, k)
|
||||||
|
for k in vars(user_config)
|
||||||
}
|
}
|
||||||
new_props = migration(old_props)
|
new_props = migration(old_props)
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
k: v
|
k: v
|
||||||
for k, v in new_props.items()
|
for k, v in new_props.items()
|
||||||
if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
||||||
}
|
}
|
||||||
# todo maybe return type here?
|
# todo maybe return type here?
|
||||||
return cls(**params) # type: ignore[call-arg]
|
return cls(**params)
|
||||||
|
|
||||||
|
|
||||||
F = TypeVar('F')
|
F = TypeVar('F')
|
||||||
from contextlib import contextmanager
|
|
||||||
from typing import Iterator
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def _override_config(config: F) -> Iterator[F]:
|
def _override_config(config: F) -> Iterator[F]:
|
||||||
'''
|
'''
|
||||||
|
@ -44,26 +54,30 @@ def _override_config(config: F) -> Iterator[F]:
|
||||||
delattr(config, k)
|
delattr(config, k)
|
||||||
|
|
||||||
|
|
||||||
import importlib
|
|
||||||
import sys
|
|
||||||
from typing import Optional, Set
|
|
||||||
ModuleRegex = str
|
ModuleRegex = str
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
||||||
def loaded_modules() -> Set[str]:
|
# need to use list here, otherwise reordering with set might mess things up
|
||||||
return {name for name in sys.modules if re.fullmatch(modules, name)}
|
def loaded_modules() -> list[str]:
|
||||||
|
return [name for name in sys.modules if re.fullmatch(modules, name)]
|
||||||
|
|
||||||
modules_before = loaded_modules()
|
modules_before = loaded_modules()
|
||||||
|
|
||||||
for m in modules_before:
|
# uhh... seems that reversed might make more sense -- not 100% sure why, but this works for tests/reddit.py
|
||||||
|
for m in reversed(modules_before):
|
||||||
|
# ugh... seems that reload works whereas pop doesn't work in some cases (e.g. on tests/reddit.py)
|
||||||
|
# sys.modules.pop(m, None)
|
||||||
importlib.reload(sys.modules[m])
|
importlib.reload(sys.modules[m])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
modules_after = loaded_modules()
|
modules_after = loaded_modules()
|
||||||
|
modules_before_set = set(modules_before)
|
||||||
for m in modules_after:
|
for m in modules_after:
|
||||||
if m in modules_before:
|
if m in modules_before_set:
|
||||||
# was previously loaded, so need to reload to pick up old config
|
# was previously loaded, so need to reload to pick up old config
|
||||||
importlib.reload(sys.modules[m])
|
importlib.reload(sys.modules[m])
|
||||||
else:
|
else:
|
||||||
|
@ -72,16 +86,15 @@ def _reload_modules(modules: ModuleRegex) -> Iterator[None]:
|
||||||
sys.modules.pop(m, None)
|
sys.modules.pop(m, None)
|
||||||
|
|
||||||
|
|
||||||
from contextlib import ExitStack
|
|
||||||
import re
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None):
|
def tmp_config(*, modules: ModuleRegex | None = None, config=None):
|
||||||
if modules is None:
|
if modules is None:
|
||||||
assert config is None
|
assert config is None
|
||||||
if modules is not None:
|
if modules is not None:
|
||||||
assert config is not None
|
assert config is not None
|
||||||
|
|
||||||
import my.config
|
import my.config
|
||||||
|
|
||||||
with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
|
with ExitStack() as module_reload_stack, _override_config(my.config) as new_config:
|
||||||
if config is not None:
|
if config is not None:
|
||||||
overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
|
overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')}
|
||||||
|
@ -96,6 +109,7 @@ def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None):
|
||||||
def test_tmp_config() -> None:
|
def test_tmp_config() -> None:
|
||||||
class extra:
|
class extra:
|
||||||
data_path = '/path/to/data'
|
data_path = '/path/to/data'
|
||||||
|
|
||||||
with tmp_config() as c:
|
with tmp_config() as c:
|
||||||
assert c.google != 'whatever'
|
assert c.google != 'whatever'
|
||||||
assert not hasattr(c, 'extra')
|
assert not hasattr(c, 'extra')
|
||||||
|
|
|
@ -1,180 +1,43 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
from glob import glob as do_glob
|
from glob import glob as do_glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from typing import (
|
||||||
import functools
|
TYPE_CHECKING,
|
||||||
from contextlib import contextmanager
|
Callable,
|
||||||
import types
|
Generic,
|
||||||
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING, NoReturn
|
TypeVar,
|
||||||
import warnings
|
Union,
|
||||||
from . import warnings as core_warnings
|
)
|
||||||
|
|
||||||
|
from . import compat, warnings
|
||||||
|
|
||||||
# some helper functions
|
# some helper functions
|
||||||
|
# TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit
|
||||||
PathIsh = Union[Path, str]
|
PathIsh = Union[Path, str]
|
||||||
|
|
||||||
# TODO only used in tests? not sure if useful at all.
|
|
||||||
def import_file(p: PathIsh, name: Optional[str] = None) -> types.ModuleType:
|
|
||||||
p = Path(p)
|
|
||||||
if name is None:
|
|
||||||
name = p.stem
|
|
||||||
import importlib.util
|
|
||||||
spec = importlib.util.spec_from_file_location(name, p)
|
|
||||||
assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}"
|
|
||||||
foo = importlib.util.module_from_spec(spec)
|
|
||||||
loader = spec.loader; assert loader is not None
|
|
||||||
loader.exec_module(foo) # type: ignore[attr-defined]
|
|
||||||
return foo
|
|
||||||
|
|
||||||
|
|
||||||
def import_from(path: PathIsh, name: str) -> types.ModuleType:
|
|
||||||
path = str(path)
|
|
||||||
import sys
|
|
||||||
try:
|
|
||||||
sys.path.append(path)
|
|
||||||
import importlib
|
|
||||||
return importlib.import_module(name)
|
|
||||||
finally:
|
|
||||||
sys.path.remove(path)
|
|
||||||
|
|
||||||
|
|
||||||
def import_dir(path: PathIsh, extra: str='') -> types.ModuleType:
|
|
||||||
p = Path(path)
|
|
||||||
if p.parts[0] == '~':
|
|
||||||
p = p.expanduser() # TODO eh. not sure about this..
|
|
||||||
return import_from(p.parent, p.name + extra)
|
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar('T')
|
|
||||||
K = TypeVar('K')
|
|
||||||
V = TypeVar('V')
|
|
||||||
|
|
||||||
# TODO deprecate? more_itertools.one should be used
|
|
||||||
def the(l: Iterable[T]) -> T:
|
|
||||||
it = iter(l)
|
|
||||||
try:
|
|
||||||
first = next(it)
|
|
||||||
except StopIteration:
|
|
||||||
raise RuntimeError('Empty iterator?')
|
|
||||||
assert all(e == first for e in it)
|
|
||||||
return first
|
|
||||||
|
|
||||||
|
|
||||||
# TODO more_itertools.bucket?
|
|
||||||
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
|
|
||||||
res: Dict[K, List[T]] = {}
|
|
||||||
for i in l:
|
|
||||||
kk = key(i)
|
|
||||||
lst = res.get(kk, [])
|
|
||||||
lst.append(i)
|
|
||||||
res[kk] = lst
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def _identity(v: T) -> V: # type: ignore[type-var]
|
|
||||||
return cast(V, v)
|
|
||||||
|
|
||||||
|
|
||||||
# ugh. nothing in more_itertools?
|
|
||||||
def ensure_unique(
|
|
||||||
it: Iterable[T],
|
|
||||||
*,
|
|
||||||
key: Callable[[T], K],
|
|
||||||
value: Callable[[T], V]=_identity,
|
|
||||||
key2value: Optional[Dict[K, V]]=None
|
|
||||||
) -> Iterable[T]:
|
|
||||||
if key2value is None:
|
|
||||||
key2value = {}
|
|
||||||
for i in it:
|
|
||||||
k = key(i)
|
|
||||||
v = value(i)
|
|
||||||
pv = key2value.get(k, None) # type: ignore
|
|
||||||
if pv is not None:
|
|
||||||
raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
|
|
||||||
key2value[k] = v
|
|
||||||
yield i
|
|
||||||
|
|
||||||
|
|
||||||
def test_ensure_unique() -> None:
|
|
||||||
import pytest # type: ignore
|
|
||||||
assert list(ensure_unique([1, 2, 3], key=lambda i: i)) == [1, 2, 3]
|
|
||||||
|
|
||||||
dups = [1, 2, 1, 4]
|
|
||||||
# this works because it's lazy
|
|
||||||
it = ensure_unique(dups, key=lambda i: i)
|
|
||||||
|
|
||||||
# but forcing throws
|
|
||||||
with pytest.raises(RuntimeError, match='Duplicate key'):
|
|
||||||
list(it)
|
|
||||||
|
|
||||||
# hacky way to force distinct objects?
|
|
||||||
list(ensure_unique(dups, key=lambda i: object()))
|
|
||||||
|
|
||||||
|
|
||||||
def make_dict(
|
|
||||||
it: Iterable[T],
|
|
||||||
*,
|
|
||||||
key: Callable[[T], K],
|
|
||||||
value: Callable[[T], V]=_identity
|
|
||||||
) -> Dict[K, V]:
|
|
||||||
res: Dict[K, V] = {}
|
|
||||||
uniques = ensure_unique(it, key=key, value=value, key2value=res)
|
|
||||||
for _ in uniques:
|
|
||||||
pass # force the iterator
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def test_make_dict() -> None:
|
|
||||||
it = range(5)
|
|
||||||
d = make_dict(it, key=lambda i: i, value=lambda i: i % 2)
|
|
||||||
assert d == {0: 0, 1: 1, 2: 0, 3: 1, 4: 0}
|
|
||||||
|
|
||||||
# check type inference
|
|
||||||
d2: Dict[str, int ] = make_dict(it, key=lambda i: str(i))
|
|
||||||
d3: Dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0)
|
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/12377059/706389
|
|
||||||
def listify(fn=None, wrapper=list):
|
|
||||||
"""
|
|
||||||
Wraps a function's return value in wrapper (e.g. list)
|
|
||||||
Useful when an algorithm can be expressed more cleanly as a generator
|
|
||||||
"""
|
|
||||||
def listify_return(fn):
|
|
||||||
@functools.wraps(fn)
|
|
||||||
def listify_helper(*args, **kw):
|
|
||||||
return wrapper(fn(*args, **kw))
|
|
||||||
return listify_helper
|
|
||||||
if fn is None:
|
|
||||||
return listify_return
|
|
||||||
return listify_return(fn)
|
|
||||||
|
|
||||||
|
|
||||||
# todo use in bluemaestro
|
|
||||||
# def dictify(fn=None, key=None, value=None):
|
|
||||||
# def md(it):
|
|
||||||
# return make_dict(it, key=key, value=value)
|
|
||||||
# return listify(fn=fn, wrapper=md)
|
|
||||||
|
|
||||||
|
|
||||||
from .logging import setup_logger, LazyLogger
|
|
||||||
|
|
||||||
|
|
||||||
Paths = Union[Sequence[PathIsh], PathIsh]
|
Paths = Union[Sequence[PathIsh], PathIsh]
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_GLOB = '*'
|
DEFAULT_GLOB = '*'
|
||||||
|
|
||||||
|
|
||||||
def get_files(
|
def get_files(
|
||||||
pp: Paths,
|
pp: Paths,
|
||||||
glob: str = DEFAULT_GLOB,
|
glob: str = DEFAULT_GLOB,
|
||||||
|
*,
|
||||||
sort: bool = True,
|
sort: bool = True,
|
||||||
guess_compression: bool = True,
|
guess_compression: bool = True,
|
||||||
) -> Tuple[Path, ...]:
|
) -> tuple[Path, ...]:
|
||||||
"""
|
"""
|
||||||
Helper function to avoid boilerplate.
|
Helper function to avoid boilerplate.
|
||||||
|
|
||||||
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
||||||
"""
|
"""
|
||||||
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
||||||
sources: List[Path]
|
sources: list[Path]
|
||||||
if isinstance(pp, Path):
|
if isinstance(pp, Path):
|
||||||
sources = [pp]
|
sources = [pp]
|
||||||
elif isinstance(pp, str):
|
elif isinstance(pp, str):
|
||||||
|
@ -183,14 +46,15 @@ def get_files(
|
||||||
return () # early return to prevent warnings etc
|
return () # early return to prevent warnings etc
|
||||||
sources = [Path(pp)]
|
sources = [Path(pp)]
|
||||||
else:
|
else:
|
||||||
sources = [Path(p) for p in pp]
|
sources = [p if isinstance(p, Path) else Path(p) for p in pp]
|
||||||
|
|
||||||
def caller() -> str:
|
def caller() -> str:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
# TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
|
# TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
|
||||||
return traceback.extract_stack()[-3].filename
|
return traceback.extract_stack()[-3].filename
|
||||||
|
|
||||||
paths: List[Path] = []
|
paths: list[Path] = []
|
||||||
for src in sources:
|
for src in sources:
|
||||||
if src.parts[0] == '~':
|
if src.parts[0] == '~':
|
||||||
src = src.expanduser()
|
src = src.expanduser()
|
||||||
|
@ -198,140 +62,81 @@ def get_files(
|
||||||
gs = str(src)
|
gs = str(src)
|
||||||
if '*' in gs:
|
if '*' in gs:
|
||||||
if glob != DEFAULT_GLOB:
|
if glob != DEFAULT_GLOB:
|
||||||
warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
|
warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
|
||||||
paths.extend(map(Path, do_glob(gs)))
|
paths.extend(map(Path, do_glob(gs))) # noqa: PTH207
|
||||||
elif src.is_dir():
|
elif os.path.isdir(str(src)): # noqa: PTH112
|
||||||
|
# NOTE: we're using os.path here on purpose instead of src.is_dir
|
||||||
|
# the reason is is_dir for archives might return True and then
|
||||||
|
# this clause would try globbing insize the archives
|
||||||
|
# this is generally undesirable (since modules handle archives themselves)
|
||||||
|
|
||||||
# todo not sure if should be recursive?
|
# todo not sure if should be recursive?
|
||||||
# note: glob='**/*.ext' works without any changes.. so perhaps it's ok as it is
|
# note: glob='**/*.ext' works without any changes.. so perhaps it's ok as it is
|
||||||
gp: Iterable[Path] = src.glob(glob)
|
gp: Iterable[Path] = src.glob(glob)
|
||||||
paths.extend(gp)
|
paths.extend(gp)
|
||||||
else:
|
else:
|
||||||
if not src.is_file():
|
assert src.exists(), src
|
||||||
# todo not sure, might be race condition?
|
|
||||||
raise RuntimeError(f"Expected '{src}' to exist")
|
|
||||||
# todo assert matches glob??
|
# todo assert matches glob??
|
||||||
paths.append(src)
|
paths.append(src)
|
||||||
|
|
||||||
if sort:
|
if sort:
|
||||||
paths = list(sorted(paths))
|
paths = sorted(paths)
|
||||||
|
|
||||||
if len(paths) == 0:
|
if len(paths) == 0:
|
||||||
# todo make it conditionally defensive based on some global settings
|
# todo make it conditionally defensive based on some global settings
|
||||||
core_warnings.high(f'''
|
warnings.high(f'''
|
||||||
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
|
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
|
||||||
'''.strip())
|
'''.strip())
|
||||||
# traceback is useful to figure out what config caused it?
|
# traceback is useful to figure out what config caused it?
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_stack()
|
traceback.print_stack()
|
||||||
|
|
||||||
if guess_compression:
|
if guess_compression:
|
||||||
from .kompress import CPath, is_compressed
|
from .kompress import CPath, ZipPath, is_compressed
|
||||||
paths = [CPath(p) if is_compressed(p) else p for p in paths]
|
|
||||||
|
# NOTE: wrap is just for backwards compat with vendorized kompress
|
||||||
|
# with kompress library, only is_compressed check and Cpath should be enough
|
||||||
|
def wrap(p: Path) -> Path:
|
||||||
|
if isinstance(p, ZipPath):
|
||||||
|
return p
|
||||||
|
if p.suffix == '.zip':
|
||||||
|
return ZipPath(p) # type: ignore[return-value]
|
||||||
|
if is_compressed(p):
|
||||||
|
return CPath(p)
|
||||||
|
return p
|
||||||
|
|
||||||
|
paths = [wrap(p) for p in paths]
|
||||||
return tuple(paths)
|
return tuple(paths)
|
||||||
|
|
||||||
|
|
||||||
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing import Callable, TypeVar
|
|
||||||
from typing_extensions import Protocol
|
|
||||||
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
|
|
||||||
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
|
|
||||||
# ok, that's actually a super nice 'pattern'
|
|
||||||
F = TypeVar('F')
|
|
||||||
|
|
||||||
class McachewType(Protocol):
|
|
||||||
def __call__(
|
|
||||||
self,
|
|
||||||
cache_path: Any=None,
|
|
||||||
*,
|
|
||||||
hashf: Any=None, # todo deprecate
|
|
||||||
depends_on: Any=None,
|
|
||||||
force_file: bool=False,
|
|
||||||
chunk_by: int=0,
|
|
||||||
logger: Any=None,
|
|
||||||
) -> Callable[[F], F]:
|
|
||||||
...
|
|
||||||
|
|
||||||
mcachew: McachewType
|
|
||||||
|
|
||||||
|
|
||||||
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
|
|
||||||
"""See core.cachew.cache_dir for the explanation"""
|
|
||||||
|
|
||||||
|
|
||||||
_cache_path_dflt = cast(str, object())
|
|
||||||
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
|
|
||||||
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
|
|
||||||
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
|
|
||||||
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
|
|
||||||
def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
|
|
||||||
"""
|
|
||||||
Stands for 'Maybe cachew'.
|
|
||||||
Defensive wrapper around @cachew to make it an optional dependency.
|
|
||||||
"""
|
|
||||||
if cache_path is _cache_path_dflt:
|
|
||||||
# wasn't specified... so we need to use cache_dir
|
|
||||||
from .cachew import cache_dir
|
|
||||||
cache_path = cache_dir()
|
|
||||||
|
|
||||||
if isinstance(cache_path, (str, Path)):
|
|
||||||
try:
|
|
||||||
# check that it starts with 'hack' path
|
|
||||||
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
|
|
||||||
except: # noqa: E722 bare except
|
|
||||||
pass # no action needed, doesn't start with 'hack' string
|
|
||||||
else:
|
|
||||||
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
|
|
||||||
# if it does, means that user wanted to disable cache
|
|
||||||
cache_path = None
|
|
||||||
try:
|
|
||||||
import cachew
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
|
||||||
return lambda orig_func: orig_func
|
|
||||||
else:
|
|
||||||
kwargs['cache_path'] = cache_path
|
|
||||||
return cachew.cachew(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(1)
|
|
||||||
def _magic():
|
|
||||||
import magic # type: ignore
|
|
||||||
return magic.Magic(mime=True)
|
|
||||||
|
|
||||||
|
|
||||||
# TODO could reuse in pdf module?
|
|
||||||
import mimetypes # todo do I need init()?
|
|
||||||
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
|
|
||||||
# whereas magic detects correctly: application/x-zstd and application/x-xz
|
|
||||||
def fastermime(path: PathIsh) -> str:
|
|
||||||
paths = str(path)
|
|
||||||
# mimetypes is faster
|
|
||||||
(mime, _) = mimetypes.guess_type(paths)
|
|
||||||
if mime is not None:
|
|
||||||
return mime
|
|
||||||
# magic is slower but returns more stuff
|
|
||||||
# TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
|
|
||||||
return _magic().from_file(paths)
|
|
||||||
|
|
||||||
|
|
||||||
Json = Dict[str, Any]
|
|
||||||
|
|
||||||
|
|
||||||
from typing import TypeVar, Callable, Generic
|
|
||||||
|
|
||||||
_C = TypeVar('_C')
|
|
||||||
_R = TypeVar('_R')
|
_R = TypeVar('_R')
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/5192374/706389
|
# https://stackoverflow.com/a/5192374/706389
|
||||||
|
# NOTE: it was added to stdlib in 3.9 and then deprecated in 3.11
|
||||||
|
# seems that the suggested solution is to use custom decorator?
|
||||||
class classproperty(Generic[_R]):
|
class classproperty(Generic[_R]):
|
||||||
def __init__(self, f: Callable[[_C], _R]) -> None:
|
def __init__(self, f: Callable[..., _R]) -> None:
|
||||||
self.f = f
|
self.f = f
|
||||||
|
|
||||||
def __get__(self, obj: None, cls: _C) -> _R:
|
def __get__(self, obj, cls) -> _R:
|
||||||
return self.f(cls)
|
return self.f(cls)
|
||||||
|
|
||||||
|
|
||||||
|
def test_classproperty() -> None:
|
||||||
|
from .compat import assert_type
|
||||||
|
|
||||||
|
class C:
|
||||||
|
@classproperty
|
||||||
|
def prop(cls) -> str:
|
||||||
|
return 'hello'
|
||||||
|
|
||||||
|
res = C.prop
|
||||||
|
assert_type(res, str)
|
||||||
|
assert res == 'hello'
|
||||||
|
|
||||||
|
|
||||||
# hmm, this doesn't really work with mypy well..
|
# hmm, this doesn't really work with mypy well..
|
||||||
# https://github.com/python/mypy/issues/6244
|
# https://github.com/python/mypy/issues/6244
|
||||||
# class staticproperty(Generic[_R]):
|
# class staticproperty(Generic[_R]):
|
||||||
|
@ -341,310 +146,117 @@ class classproperty(Generic[_R]):
|
||||||
# def __get__(self) -> _R:
|
# def __get__(self) -> _R:
|
||||||
# return self.f()
|
# return self.f()
|
||||||
|
|
||||||
# TODO deprecate in favor of datetime_aware
|
|
||||||
tzdatetime = datetime
|
|
||||||
|
|
||||||
|
|
||||||
# TODO doctests?
|
|
||||||
def isoparse(s: str) -> tzdatetime:
|
|
||||||
"""
|
|
||||||
Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
|
|
||||||
"""
|
|
||||||
# TODO could use dateutil? but it's quite slow as far as I remember..
|
|
||||||
# TODO support non-utc.. somehow?
|
|
||||||
assert s.endswith('Z'), s
|
|
||||||
s = s[:-1] + '+00:00'
|
|
||||||
return datetime.fromisoformat(s)
|
|
||||||
|
|
||||||
|
|
||||||
# legacy import -- we should use compat directly instead
|
|
||||||
from .compat import Literal
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/295466/706389
|
# https://stackoverflow.com/a/295466/706389
|
||||||
def get_valid_filename(s: str) -> str:
|
def get_valid_filename(s: str) -> str:
|
||||||
s = str(s).strip().replace(' ', '_')
|
s = str(s).strip().replace(' ', '_')
|
||||||
return re.sub(r'(?u)[^-\w.]', '', s)
|
return re.sub(r'(?u)[^-\w.]', '', s)
|
||||||
|
|
||||||
|
|
||||||
from typing import Generic, Sized, Callable
|
# TODO deprecate and suggest to use one from my.core directly? not sure
|
||||||
|
from .utils.itertools import unique_everseen # noqa: F401
|
||||||
|
|
||||||
|
### legacy imports, keeping them here for backwards compatibility
|
||||||
|
## hiding behind TYPE_CHECKING so it works in runtime
|
||||||
|
## in principle, warnings.deprecated decorator should cooperate with mypy, but doesn't look like it works atm?
|
||||||
|
## perhaps it doesn't work when it's used from typing_extensions
|
||||||
|
|
||||||
# X = TypeVar('X')
|
if not TYPE_CHECKING:
|
||||||
def _warn_iterator(it, f: Any=None):
|
from .compat import deprecated
|
||||||
emitted = False
|
|
||||||
for i in it:
|
|
||||||
yield i
|
|
||||||
emitted = True
|
|
||||||
if not emitted:
|
|
||||||
warnings.warn(f"Function {f} didn't emit any data, make sure your config paths are correct")
|
|
||||||
|
|
||||||
|
@deprecated('use my.core.compat.assert_never instead')
|
||||||
|
def assert_never(*args, **kwargs):
|
||||||
|
return compat.assert_never(*args, **kwargs)
|
||||||
|
|
||||||
# TODO ugh, so I want to express something like:
|
@deprecated('use my.core.compat.fromisoformat instead')
|
||||||
# X = TypeVar('X')
|
def isoparse(*args, **kwargs):
|
||||||
# C = TypeVar('C', bound=Iterable[X])
|
return compat.fromisoformat(*args, **kwargs)
|
||||||
# _warn_iterable(it: C) -> C
|
|
||||||
# but apparently I can't??? ugh.
|
|
||||||
# https://github.com/python/typing/issues/548
|
|
||||||
# I guess for now overloads are fine...
|
|
||||||
|
|
||||||
from typing import overload
|
@deprecated('use more_itertools.one instead')
|
||||||
X = TypeVar('X')
|
def the(*args, **kwargs):
|
||||||
@overload
|
import more_itertools
|
||||||
def _warn_iterable(it: List[X] , f: Any=None) -> List[X] : ...
|
|
||||||
@overload
|
|
||||||
def _warn_iterable(it: Iterable[X], f: Any=None) -> Iterable[X]: ...
|
|
||||||
def _warn_iterable(it, f=None):
|
|
||||||
if isinstance(it, Sized):
|
|
||||||
sz = len(it)
|
|
||||||
if sz == 0:
|
|
||||||
warnings.warn(f"Function {f} returned empty container, make sure your config paths are correct")
|
|
||||||
return it
|
|
||||||
else:
|
|
||||||
return _warn_iterator(it, f=f)
|
|
||||||
|
|
||||||
|
return more_itertools.one(*args, **kwargs)
|
||||||
|
|
||||||
# ok, this seems to work...
|
@deprecated('use functools.cached_property instead')
|
||||||
# https://github.com/python/mypy/issues/1927#issue-167100413
|
def cproperty(*args, **kwargs):
|
||||||
FL = TypeVar('FL', bound=Callable[..., List])
|
import functools
|
||||||
FI = TypeVar('FI', bound=Callable[..., Iterable])
|
|
||||||
|
|
||||||
@overload
|
return functools.cached_property(*args, **kwargs)
|
||||||
def warn_if_empty(f: FL) -> FL: ...
|
|
||||||
@overload
|
|
||||||
def warn_if_empty(f: FI) -> FI: ...
|
|
||||||
|
|
||||||
|
@deprecated('use more_itertools.bucket instead')
|
||||||
def warn_if_empty(f):
|
def group_by_key(l, key):
|
||||||
from functools import wraps
|
res = {}
|
||||||
|
for i in l:
|
||||||
@wraps(f)
|
kk = key(i)
|
||||||
def wrapped(*args, **kwargs):
|
lst = res.get(kk, [])
|
||||||
res = f(*args, **kwargs)
|
lst.append(i)
|
||||||
return _warn_iterable(res, f=f)
|
res[kk] = lst
|
||||||
return wrapped # type: ignore
|
|
||||||
|
|
||||||
|
|
||||||
# global state that turns on/off quick stats
|
|
||||||
# can use the 'quick_stats' contextmanager
|
|
||||||
# to enable/disable this in cli so that module 'stats'
|
|
||||||
# functions don't have to implement custom 'quick' logic
|
|
||||||
QUICK_STATS = False
|
|
||||||
|
|
||||||
|
|
||||||
# in case user wants to use the stats functions/quick option
|
|
||||||
# elsewhere -- can use this decorator instead of editing
|
|
||||||
# the global state directly
|
|
||||||
@contextmanager
|
|
||||||
def quick_stats():
|
|
||||||
global QUICK_STATS
|
|
||||||
prev = QUICK_STATS
|
|
||||||
try:
|
|
||||||
QUICK_STATS = True
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
QUICK_STATS = prev
|
|
||||||
|
|
||||||
|
|
||||||
C = TypeVar('C')
|
|
||||||
Stats = Dict[str, Any]
|
|
||||||
StatsFun = Callable[[], Stats]
|
|
||||||
# todo not sure about return type...
|
|
||||||
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats:
|
|
||||||
if callable(func):
|
|
||||||
fr = func()
|
|
||||||
fname = func.__name__
|
|
||||||
else:
|
|
||||||
# meh. means it's just a list.. not sure how to generate a name then
|
|
||||||
fr = func
|
|
||||||
fname = f'unnamed_{id(fr)}'
|
|
||||||
tname = type(fr).__name__
|
|
||||||
if tname == 'DataFrame':
|
|
||||||
# dynamic, because pandas is an optional dependency..
|
|
||||||
df = cast(Any, fr) # todo ugh, not sure how to annotate properly
|
|
||||||
res = dict(
|
|
||||||
dtypes=df.dtypes.to_dict(),
|
|
||||||
rows=len(df),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
res = _stat_iterable(fr, quick=quick)
|
|
||||||
return {
|
|
||||||
fname: res,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
|
|
||||||
from more_itertools import ilen, take, first
|
|
||||||
|
|
||||||
# todo not sure if there is something in more_itertools to compute this?
|
|
||||||
total = 0
|
|
||||||
errors = 0
|
|
||||||
last = None
|
|
||||||
|
|
||||||
def funcit():
|
|
||||||
nonlocal errors, last, total
|
|
||||||
for x in it:
|
|
||||||
total += 1
|
|
||||||
if isinstance(x, Exception):
|
|
||||||
errors += 1
|
|
||||||
else:
|
|
||||||
last = x
|
|
||||||
yield x
|
|
||||||
|
|
||||||
eit = funcit()
|
|
||||||
count: Any
|
|
||||||
if quick or QUICK_STATS:
|
|
||||||
initial = take(100, eit)
|
|
||||||
count = len(initial)
|
|
||||||
if first(eit, None) is not None: # todo can actually be none...
|
|
||||||
# haven't exhausted
|
|
||||||
count = f'{count}+'
|
|
||||||
else:
|
|
||||||
count = ilen(eit)
|
|
||||||
|
|
||||||
res = {
|
|
||||||
'count': count,
|
|
||||||
}
|
|
||||||
|
|
||||||
if total == 0:
|
|
||||||
# not sure but I guess a good balance? wouldn't want to throw early here?
|
|
||||||
res['warning'] = 'THE ITERABLE RETURNED NO DATA'
|
|
||||||
|
|
||||||
if errors > 0:
|
|
||||||
res['errors'] = errors
|
|
||||||
|
|
||||||
if last is not None:
|
|
||||||
dt = guess_datetime(last)
|
|
||||||
if dt is not None:
|
|
||||||
res['last'] = dt
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@deprecated('use my.core.utils.itertools.make_dict instead')
|
||||||
|
def make_dict(*args, **kwargs):
|
||||||
|
from .utils import itertools as UI
|
||||||
|
|
||||||
def test_stat_iterable() -> None:
|
return UI.make_dict(*args, **kwargs)
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
dd = datetime.utcfromtimestamp(123)
|
@deprecated('use my.core.utils.itertools.listify instead')
|
||||||
day = timedelta(days=3)
|
def listify(*args, **kwargs):
|
||||||
|
from .utils import itertools as UI
|
||||||
|
|
||||||
X = NamedTuple('X', [('x', int), ('d', datetime)])
|
return UI.listify(*args, **kwargs)
|
||||||
|
|
||||||
def it():
|
@deprecated('use my.core.warn_if_empty instead')
|
||||||
yield RuntimeError('oops!')
|
def warn_if_empty(*args, **kwargs):
|
||||||
for i in range(2):
|
from .utils import itertools as UI
|
||||||
yield X(x=i, d=dd + day * i)
|
|
||||||
yield RuntimeError('bad!')
|
|
||||||
for i in range(3):
|
|
||||||
yield X(x=i * 10, d=dd + day * (i * 10))
|
|
||||||
yield X(x=123, d=dd + day * 50)
|
|
||||||
|
|
||||||
res = _stat_iterable(it())
|
return UI.listify(*args, **kwargs)
|
||||||
assert res['count'] == 1 + 2 + 1 + 3 + 1
|
|
||||||
assert res['errors'] == 1 + 1
|
|
||||||
assert res['last'] == dd + day * 50
|
|
||||||
|
|
||||||
|
@deprecated('use my.core.stat instead')
|
||||||
|
def stat(*args, **kwargs):
|
||||||
|
from . import stats
|
||||||
|
|
||||||
# experimental, not sure about it..
|
return stats.stat(*args, **kwargs)
|
||||||
def guess_datetime(x: Any) -> Optional[datetime]:
|
|
||||||
# todo hmm implement withoutexception..
|
|
||||||
try:
|
|
||||||
d = asdict(x)
|
|
||||||
except: # noqa: E722 bare except
|
|
||||||
return None
|
|
||||||
for k, v in d.items():
|
|
||||||
if isinstance(v, datetime):
|
|
||||||
return v
|
|
||||||
return None
|
|
||||||
|
|
||||||
def test_guess_datetime() -> None:
|
@deprecated('use my.core.make_logger instead')
|
||||||
from datetime import datetime
|
def LazyLogger(*args, **kwargs):
|
||||||
from dataclasses import dataclass
|
from . import logging
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
dd = isoparse('2021-02-01T12:34:56Z')
|
return logging.LazyLogger(*args, **kwargs)
|
||||||
|
|
||||||
# ugh.. https://github.com/python/mypy/issues/7281
|
@deprecated('use my.core.types.asdict instead')
|
||||||
A = NamedTuple('A', [('x', int)])
|
def asdict(*args, **kwargs):
|
||||||
B = NamedTuple('B', [('x', int), ('created', datetime)])
|
from . import types
|
||||||
|
|
||||||
assert guess_datetime(A(x=4)) is None
|
return types.asdict(*args, **kwargs)
|
||||||
assert guess_datetime(B(x=4, created=dd)) == dd
|
|
||||||
|
|
||||||
@dataclass
|
# todo wrap these in deprecated decorator as well?
|
||||||
class C:
|
# TODO hmm how to deprecate these in runtime?
|
||||||
a: datetime
|
# tricky cause they are actually classes/types
|
||||||
x: int
|
from typing import Literal # noqa: F401
|
||||||
assert guess_datetime(C(a=dd, x=435)) == dd
|
|
||||||
# TODO not sure what to return when multiple datetime fields?
|
|
||||||
# TODO test @property?
|
|
||||||
|
|
||||||
|
from .cachew import mcachew # noqa: F401
|
||||||
|
|
||||||
def is_namedtuple(thing: Any) -> bool:
|
# this is kinda internal, should just use my.core.logging.setup_logger if necessary
|
||||||
# basic check to see if this is namedtuple-like
|
from .logging import setup_logger
|
||||||
_asdict = getattr(thing, '_asdict', None)
|
from .stats import Stats
|
||||||
return (_asdict is not None) and callable(_asdict)
|
from .types import (
|
||||||
|
Json,
|
||||||
|
datetime_aware,
|
||||||
|
datetime_naive,
|
||||||
|
)
|
||||||
|
|
||||||
|
tzdatetime = datetime_aware
|
||||||
def asdict(thing: Any) -> Json:
|
|
||||||
# todo primitive?
|
|
||||||
# todo exception?
|
|
||||||
if isinstance(thing, dict):
|
|
||||||
return thing
|
|
||||||
import dataclasses as D
|
|
||||||
if D.is_dataclass(thing):
|
|
||||||
return D.asdict(thing)
|
|
||||||
if is_namedtuple(thing):
|
|
||||||
return thing._asdict()
|
|
||||||
raise TypeError(f'Could not convert object {thing} to dict')
|
|
||||||
|
|
||||||
|
|
||||||
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
|
|
||||||
# TODO e.g. maybe use opaque mypy alias?
|
|
||||||
datetime_naive = datetime
|
|
||||||
datetime_aware = datetime
|
|
||||||
|
|
||||||
|
|
||||||
def assert_subpackage(name: str) -> None:
|
|
||||||
# can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
|
|
||||||
# NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
|
|
||||||
assert name == '__main__' or 'my.core' in name, f'Expected module __name__ ({name}) to be __main__ or start with my.core'
|
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/10436851/706389
|
|
||||||
from concurrent.futures import Future, Executor
|
|
||||||
class DummyExecutor(Executor):
|
|
||||||
def __init__(self, max_workers: Optional[int]=1) -> None:
|
|
||||||
self._shutdown = False
|
|
||||||
self._max_workers = max_workers
|
|
||||||
|
|
||||||
# TODO: once support for 3.7 is dropped,
|
|
||||||
# can make 'fn' a positional only parameter,
|
|
||||||
# which fixes the mypy error this throws without the type: ignore
|
|
||||||
def submit(self, fn, *args, **kwargs) -> Future: # type: ignore[override]
|
|
||||||
if self._shutdown:
|
|
||||||
raise RuntimeError('cannot schedule new futures after shutdown')
|
|
||||||
|
|
||||||
f: Future[Any] = Future()
|
|
||||||
try:
|
|
||||||
result = fn(*args, **kwargs)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
f.set_exception(e)
|
|
||||||
else:
|
else:
|
||||||
f.set_result(result)
|
from .compat import Never
|
||||||
|
|
||||||
return f
|
# make these invalid during type check while working in runtime
|
||||||
|
Stats = Never
|
||||||
def shutdown(self, wait: bool=True) -> None: # type: ignore[override]
|
tzdatetime = Never
|
||||||
self._shutdown = True
|
Json = Never
|
||||||
|
datetime_naive = Never
|
||||||
|
datetime_aware = Never
|
||||||
# see https://hakibenita.com/python-mypy-exhaustive-checking#exhaustiveness-checking
|
###
|
||||||
def assert_never(value: NoReturn) -> NoReturn:
|
|
||||||
assert False, f'Unhandled value: {value} ({type(value).__name__})'
|
|
||||||
|
|
||||||
|
|
||||||
# legacy deprecated import
|
|
||||||
from .compat import cached_property as cproperty
|
|
||||||
|
|
|
@ -1,139 +1,61 @@
|
||||||
'''
|
'''
|
||||||
Some backwards compatibility stuff/deprecation helpers
|
Contains backwards compatibility helpers for different python versions.
|
||||||
|
If something is relevant to HPI itself, please put it in .hpi_compat instead
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from types import ModuleType
|
|
||||||
|
|
||||||
from . import warnings
|
|
||||||
from .common import LazyLogger
|
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger('my.core.compat')
|
|
||||||
|
|
||||||
|
|
||||||
def pre_pip_dal_handler(
|
|
||||||
name: str,
|
|
||||||
e: ModuleNotFoundError,
|
|
||||||
cfg,
|
|
||||||
requires=[],
|
|
||||||
) -> ModuleType:
|
|
||||||
'''
|
|
||||||
https://github.com/karlicoss/HPI/issues/79
|
|
||||||
'''
|
|
||||||
if e.name != name:
|
|
||||||
# the module itself was imported, so the problem is with some dependencies
|
|
||||||
raise e
|
|
||||||
try:
|
|
||||||
dal = _get_dal(cfg, name)
|
|
||||||
warnings.high(f'''
|
|
||||||
Specifying modules' dependencies in the config or in my/config/repos is deprecated!
|
|
||||||
Please install {' '.join(requires)} as PIP packages (see the corresponding README instructions).
|
|
||||||
'''.strip(), stacklevel=2)
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
dal = None
|
|
||||||
|
|
||||||
if dal is None:
|
|
||||||
# probably means there was nothing in the old config in the first place
|
|
||||||
# so we should raise the original exception
|
|
||||||
raise e
|
|
||||||
return dal
|
|
||||||
|
|
||||||
|
|
||||||
def _get_dal(cfg, module_name: str):
|
|
||||||
mpath = getattr(cfg, module_name, None)
|
|
||||||
if mpath is not None:
|
|
||||||
from .common import import_dir
|
|
||||||
return import_dir(mpath, '.dal')
|
|
||||||
else:
|
|
||||||
from importlib import import_module
|
|
||||||
return import_module(f'my.config.repos.{module_name}.dal')
|
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
windows = os.name == 'nt'
|
|
||||||
|
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
def sqlite_backup(*, source: sqlite3.Connection, dest: sqlite3.Connection, **kwargs) -> None:
|
|
||||||
if sys.version_info[:2] >= (3, 7):
|
|
||||||
source.backup(dest, **kwargs)
|
|
||||||
else:
|
|
||||||
# https://stackoverflow.com/a/10856450/706389
|
|
||||||
import io
|
|
||||||
tempfile = io.StringIO()
|
|
||||||
for line in source.iterdump():
|
|
||||||
tempfile.write('%s\n' % line)
|
|
||||||
tempfile.seek(0)
|
|
||||||
|
|
||||||
dest.cursor().executescript(tempfile.read())
|
|
||||||
dest.commit()
|
|
||||||
|
|
||||||
|
|
||||||
# can remove after python3.9
|
|
||||||
def removeprefix(text: str, prefix: str) -> str:
|
|
||||||
if text.startswith(prefix):
|
|
||||||
return text[len(prefix):]
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
# can remove after python3.8
|
|
||||||
if sys.version_info[:2] >= (3, 8):
|
|
||||||
from functools import cached_property
|
|
||||||
else:
|
|
||||||
from typing import TypeVar, Callable
|
|
||||||
Cl = TypeVar('Cl')
|
|
||||||
R = TypeVar('R')
|
|
||||||
|
|
||||||
def cached_property(f: Callable[[Cl], R]) -> R:
|
|
||||||
import functools
|
|
||||||
return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
|
|
||||||
del Cl
|
|
||||||
del R
|
|
||||||
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if sys.version_info[:2] >= (3, 13):
|
||||||
if sys.version_info[:2] >= (3, 8):
|
from warnings import deprecated
|
||||||
from typing import Literal
|
|
||||||
else:
|
else:
|
||||||
if TYPE_CHECKING:
|
from typing_extensions import deprecated
|
||||||
from typing_extensions import Literal
|
|
||||||
else:
|
|
||||||
# erm.. I guess as long as it's not crashing, whatever...
|
|
||||||
class _Literal:
|
|
||||||
def __getitem__(self, args):
|
|
||||||
pass
|
|
||||||
Literal = _Literal()
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[:2] >= (3, 8):
|
# keeping just for backwards compatibility, used to have compat implementation for 3.6
|
||||||
from typing import Protocol
|
if not TYPE_CHECKING:
|
||||||
else:
|
import sqlite3
|
||||||
if TYPE_CHECKING:
|
|
||||||
from typing_extensions import Protocol # type: ignore[misc]
|
@deprecated('use .backup method on sqlite3.Connection directly instead')
|
||||||
else:
|
def sqlite_backup(*, source: sqlite3.Connection, dest: sqlite3.Connection, **kwargs) -> None:
|
||||||
# todo could also use NamedTuple?
|
# TODO warn here?
|
||||||
Protocol = object
|
source.backup(dest, **kwargs)
|
||||||
|
|
||||||
|
# keeping for runtime backwards compatibility (added in 3.9)
|
||||||
|
@deprecated('use .removeprefix method on string directly instead')
|
||||||
|
def removeprefix(text: str, prefix: str) -> str:
|
||||||
|
return text.removeprefix(prefix)
|
||||||
|
|
||||||
|
@deprecated('use .removesuffix method on string directly instead')
|
||||||
|
def removesuffix(text: str, suffix: str) -> str:
|
||||||
|
return text.removesuffix(suffix)
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
## used to have compat function before 3.8 for these, keeping for runtime back compatibility
|
||||||
|
from functools import cached_property
|
||||||
|
from typing import Literal, Protocol, TypedDict
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[:2] >= (3, 8):
|
if sys.version_info[:2] >= (3, 10):
|
||||||
from typing import TypedDict
|
from typing import ParamSpec
|
||||||
else:
|
else:
|
||||||
if TYPE_CHECKING:
|
from typing_extensions import ParamSpec
|
||||||
from typing_extensions import TypedDict # type: ignore[misc]
|
|
||||||
else:
|
|
||||||
from typing import Dict
|
|
||||||
TypedDict = Dict
|
|
||||||
|
|
||||||
|
|
||||||
# bisect_left doesn't have a 'key' parameter (which we use)
|
# bisect_left doesn't have a 'key' parameter (which we use)
|
||||||
# till python3.10
|
# till python3.10
|
||||||
if sys.version_info[:2] <= (3, 9):
|
if sys.version_info[:2] <= (3, 9):
|
||||||
from typing import List, TypeVar, Any, Optional, Callable
|
from typing import Any, Callable, List, Optional, TypeVar # noqa: UP035
|
||||||
|
|
||||||
X = TypeVar('X')
|
X = TypeVar('X')
|
||||||
|
|
||||||
# copied from python src
|
# copied from python src
|
||||||
def bisect_left(a: List[Any], x: Any, lo: int=0, hi: Optional[int]=None, *, key: Optional[Callable[..., Any]]=None) -> int:
|
# fmt: off
|
||||||
|
def bisect_left(a: list[Any], x: Any, lo: int=0, hi: int | None=None, *, key: Callable[..., Any] | None=None) -> int:
|
||||||
if lo < 0:
|
if lo < 0:
|
||||||
raise ValueError('lo must be non-negative')
|
raise ValueError('lo must be non-negative')
|
||||||
if hi is None:
|
if hi is None:
|
||||||
|
@ -155,5 +77,63 @@ if sys.version_info[:2] <= (3, 9):
|
||||||
else:
|
else:
|
||||||
hi = mid
|
hi = mid
|
||||||
return lo
|
return lo
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
else:
|
else:
|
||||||
from bisect import bisect_left # type: ignore[misc]
|
from bisect import bisect_left
|
||||||
|
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
if sys.version_info[:2] >= (3, 11):
|
||||||
|
fromisoformat = datetime.fromisoformat
|
||||||
|
else:
|
||||||
|
# fromisoformat didn't support Z as "utc" before 3.11
|
||||||
|
# https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat
|
||||||
|
|
||||||
|
def fromisoformat(date_string: str) -> datetime:
|
||||||
|
if date_string.endswith('Z'):
|
||||||
|
date_string = date_string[:-1] + '+00:00'
|
||||||
|
return datetime.fromisoformat(date_string)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fromisoformat() -> None:
|
||||||
|
from datetime import timezone
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
# feedbin has this format
|
||||||
|
assert fromisoformat('2020-05-01T10:32:02.925961Z') == datetime(
|
||||||
|
2020, 5, 1, 10, 32, 2, 925961, timezone.utc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# polar has this format
|
||||||
|
assert fromisoformat('2018-11-28T22:04:01.304Z') == datetime(
|
||||||
|
2018, 11, 28, 22, 4, 1, 304000, timezone.utc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# stackexchange, runnerup has this format
|
||||||
|
assert fromisoformat('2020-11-30T00:53:12Z') == datetime(
|
||||||
|
2020, 11, 30, 0, 53, 12, 0, timezone.utc,
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# arbtt has this format (sometimes less/more than 6 digits in milliseconds)
|
||||||
|
# TODO doesn't work atm, not sure if really should be supported...
|
||||||
|
# maybe should have flags for weird formats?
|
||||||
|
# assert isoparse('2017-07-18T18:59:38.21731Z') == datetime(
|
||||||
|
# 2017, 7, 18, 18, 59, 38, 217310, timezone.utc,
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info[:2] >= (3, 10):
|
||||||
|
from types import NoneType
|
||||||
|
from typing import TypeAlias
|
||||||
|
else:
|
||||||
|
NoneType = type(None)
|
||||||
|
from typing_extensions import TypeAlias
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info[:2] >= (3, 11):
|
||||||
|
from typing import Never, assert_never, assert_type
|
||||||
|
else:
|
||||||
|
from typing_extensions import Never, assert_never, assert_type
|
||||||
|
|
|
@ -1,27 +1,33 @@
|
||||||
'''
|
'''
|
||||||
Bindings for the 'core' HPI configuration
|
Bindings for the 'core' HPI configuration
|
||||||
'''
|
'''
|
||||||
import re
|
|
||||||
from typing import Sequence, Optional
|
|
||||||
|
|
||||||
from . import warnings, PathIsh, Path
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from . import warnings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from my.config import core as user_config # type: ignore[attr-defined]
|
from my.config import core as user_config # type: ignore[attr-defined]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
from my.config import common as user_config # type: ignore[attr-defined, assignment, misc]
|
from my.config import common as user_config # type: ignore[attr-defined]
|
||||||
|
|
||||||
warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
|
warnings.high("'common' config section is deprecated. Please rename it to 'core'.")
|
||||||
except Exception as e2:
|
except Exception as e2:
|
||||||
# make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
|
# make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc.
|
||||||
# this way it'll at least use the defaults
|
# this way it'll at least use the defaults
|
||||||
# todo actually not sure if needs a warning? Perhaps it's okay without it, because the defaults are reasonable enough
|
# todo actually not sure if needs a warning? Perhaps it's okay without it, because the defaults are reasonable enough
|
||||||
user_config = object # type: ignore[assignment, misc]
|
user_config = object
|
||||||
|
|
||||||
|
|
||||||
_HPI_CACHE_DIR_DEFAULT = ''
|
_HPI_CACHE_DIR_DEFAULT = ''
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config(user_config):
|
class Config(user_config):
|
||||||
'''
|
'''
|
||||||
|
@ -32,7 +38,7 @@ class Config(user_config):
|
||||||
cache_dir = '/your/custom/cache/path'
|
cache_dir = '/your/custom/cache/path'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
cache_dir: Optional[PathIsh] = _HPI_CACHE_DIR_DEFAULT
|
cache_dir: Path | str | None = _HPI_CACHE_DIR_DEFAULT
|
||||||
'''
|
'''
|
||||||
Base directory for cachew.
|
Base directory for cachew.
|
||||||
- if None , means cache is disabled
|
- if None , means cache is disabled
|
||||||
|
@ -42,7 +48,7 @@ class Config(user_config):
|
||||||
NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
|
NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead
|
||||||
'''
|
'''
|
||||||
|
|
||||||
tmp_dir: Optional[PathIsh] = None
|
tmp_dir: Path | str | None = None
|
||||||
'''
|
'''
|
||||||
Path to a temporary directory.
|
Path to a temporary directory.
|
||||||
This can be used temporarily while extracting zipfiles etc...
|
This can be used temporarily while extracting zipfiles etc...
|
||||||
|
@ -50,34 +56,36 @@ class Config(user_config):
|
||||||
- otherwise , use the specified directory as the base temporary directory
|
- otherwise , use the specified directory as the base temporary directory
|
||||||
'''
|
'''
|
||||||
|
|
||||||
enabled_modules : Optional[Sequence[str]] = None
|
enabled_modules: Sequence[str] | None = None
|
||||||
'''
|
'''
|
||||||
list of regexes/globs
|
list of regexes/globs
|
||||||
- None means 'rely on disabled_modules'
|
- None means 'rely on disabled_modules'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
disabled_modules: Optional[Sequence[str]] = None
|
disabled_modules: Sequence[str] | None = None
|
||||||
'''
|
'''
|
||||||
list of regexes/globs
|
list of regexes/globs
|
||||||
- None means 'rely on enabled_modules'
|
- None means 'rely on enabled_modules'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def get_cache_dir(self) -> Optional[Path]:
|
def get_cache_dir(self) -> Path | None:
|
||||||
cdir = self.cache_dir
|
cdir = self.cache_dir
|
||||||
if cdir is None:
|
if cdir is None:
|
||||||
return None
|
return None
|
||||||
if cdir == _HPI_CACHE_DIR_DEFAULT:
|
if cdir == _HPI_CACHE_DIR_DEFAULT:
|
||||||
from .cachew import _appdirs_cache_dir
|
from .cachew import _appdirs_cache_dir
|
||||||
|
|
||||||
return _appdirs_cache_dir()
|
return _appdirs_cache_dir()
|
||||||
else:
|
else:
|
||||||
return Path(cdir).expanduser()
|
return Path(cdir).expanduser()
|
||||||
|
|
||||||
def get_tmp_dir(self) -> Path:
|
def get_tmp_dir(self) -> Path:
|
||||||
tdir: Optional[PathIsh] = self.tmp_dir
|
tdir: Path | str | None = self.tmp_dir
|
||||||
tpath: Path
|
tpath: Path
|
||||||
# use tempfile if unset
|
# use tempfile if unset
|
||||||
if tdir is None:
|
if tdir is None:
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
tpath = Path(tempfile.gettempdir()) / 'HPI'
|
tpath = Path(tempfile.gettempdir()) / 'HPI'
|
||||||
else:
|
else:
|
||||||
tpath = Path(tdir)
|
tpath = Path(tdir)
|
||||||
|
@ -85,10 +93,10 @@ class Config(user_config):
|
||||||
tpath.mkdir(parents=True, exist_ok=True)
|
tpath.mkdir(parents=True, exist_ok=True)
|
||||||
return tpath
|
return tpath
|
||||||
|
|
||||||
def _is_module_active(self, module: str) -> Optional[bool]:
|
def _is_module_active(self, module: str) -> bool | None:
|
||||||
# None means the config doesn't specify anything
|
# None means the config doesn't specify anything
|
||||||
# todo might be nice to return the 'reason' too? e.g. which option has matched
|
# todo might be nice to return the 'reason' too? e.g. which option has matched
|
||||||
def matches(specs: Sequence[str]) -> Optional[str]:
|
def matches(specs: Sequence[str]) -> str | None:
|
||||||
for spec in specs:
|
for spec in specs:
|
||||||
# not sure because . (packages separate) matches anything, but I guess unlikely to clash
|
# not sure because . (packages separate) matches anything, but I guess unlikely to clash
|
||||||
if re.match(spec, module):
|
if re.match(spec, module):
|
||||||
|
@ -114,12 +122,15 @@ class Config(user_config):
|
||||||
|
|
||||||
|
|
||||||
from .cfg import make_config
|
from .cfg import make_config
|
||||||
|
|
||||||
config = make_config(Config)
|
config = make_config(Config)
|
||||||
|
|
||||||
|
|
||||||
### tests start
|
### tests start
|
||||||
from typing import Iterator
|
from collections.abc import Iterator
|
||||||
from contextlib import contextmanager as ctx
|
from contextlib import contextmanager as ctx
|
||||||
|
|
||||||
|
|
||||||
@ctx
|
@ctx
|
||||||
def _reset_config() -> Iterator[Config]:
|
def _reset_config() -> Iterator[Config]:
|
||||||
# todo maybe have this decorator for the whole of my.config?
|
# todo maybe have this decorator for the whole of my.config?
|
||||||
|
@ -146,7 +157,7 @@ def test_active_modules() -> None:
|
||||||
cc.disabled_modules = ['my.body.*']
|
cc.disabled_modules = ['my.body.*']
|
||||||
assert cc._is_module_active('my.whatever' ) is True
|
assert cc._is_module_active('my.whatever' ) is True
|
||||||
assert cc._is_module_active('my.core' ) is None
|
assert cc._is_module_active('my.core' ) is None
|
||||||
assert not cc._is_module_active('my.body.exercise') is True
|
assert cc._is_module_active('my.body.exercise') is False
|
||||||
|
|
||||||
with reset() as cc:
|
with reset() as cc:
|
||||||
# if both are set, enable all
|
# if both are set, enable all
|
||||||
|
@ -158,4 +169,5 @@ def test_active_modules() -> None:
|
||||||
assert cc._is_module_active("my.body.exercise") is True
|
assert cc._is_module_active("my.body.exercise") is True
|
||||||
assert len(record_warnings) == 1
|
assert len(record_warnings) == 1
|
||||||
|
|
||||||
|
|
||||||
### tests end
|
### tests end
|
||||||
|
|
|
@ -1,32 +1,5 @@
|
||||||
from __future__ import annotations
|
from . import warnings
|
||||||
from .common import assert_subpackage; assert_subpackage(__name__)
|
|
||||||
|
|
||||||
from .common import PathIsh
|
warnings.high(f"{__name__} is deprecated, please use dataset directly if you need or switch to my.core.sqlite")
|
||||||
from .compat import Protocol
|
|
||||||
from .sqlite import sqlite_connect_immutable
|
|
||||||
|
|
||||||
## sadly dataset doesn't have any type definitions
|
from ._deprecated.dataset import *
|
||||||
from typing import Iterable, Iterator, Dict, Optional, Any
|
|
||||||
from contextlib import AbstractContextManager
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE: may not be true in general, but will be in the vast majority of cases
|
|
||||||
row_type_T = Dict[str, Any]
|
|
||||||
|
|
||||||
|
|
||||||
class TableT(Iterable, Protocol):
|
|
||||||
def find(self, *, order_by: Optional[str]=None) -> Iterator[row_type_T]: ...
|
|
||||||
|
|
||||||
|
|
||||||
class DatabaseT(AbstractContextManager['DatabaseT'], Protocol):
|
|
||||||
def __getitem__(self, table: str) -> TableT: ...
|
|
||||||
##
|
|
||||||
|
|
||||||
# TODO wonder if also need to open without WAL.. test this on read-only directory/db file
|
|
||||||
def connect_readonly(db: PathIsh) -> DatabaseT:
|
|
||||||
import dataset # type: ignore
|
|
||||||
# see https://github.com/pudo/dataset/issues/136#issuecomment-128693122
|
|
||||||
# todo not sure if mode=ro has any benefit, but it doesn't work on read-only filesystems
|
|
||||||
# maybe it should autodetect readonly filesystems and apply this? not sure
|
|
||||||
creator = lambda: sqlite_connect_immutable(db)
|
|
||||||
return dataset.connect('sqlite:///', engine_kwargs={'creator': creator})
|
|
||||||
|
|
|
@ -5,23 +5,25 @@ A helper module for defining denylists for sources programmatically
|
||||||
For docs, see doc/DENYLIST.md
|
For docs, see doc/DENYLIST.md
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sys
|
from __future__ import annotations
|
||||||
import json
|
|
||||||
import functools
|
import functools
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import TypeVar, Set, Any, Mapping, Iterator, Dict, List
|
from collections.abc import Iterator, Mapping
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any, TypeVar
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from more_itertools import seekable
|
from more_itertools import seekable
|
||||||
from my.core.serialize import dumps
|
|
||||||
from my.core.common import PathIsh
|
|
||||||
from my.core.warnings import medium
|
|
||||||
|
|
||||||
|
from .serialize import dumps
|
||||||
|
from .warnings import medium
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
DenyMap = Mapping[str, Set[Any]]
|
DenyMap = Mapping[str, set[Any]]
|
||||||
|
|
||||||
|
|
||||||
def _default_key_func(obj: T) -> str:
|
def _default_key_func(obj: T) -> str:
|
||||||
|
@ -29,9 +31,9 @@ def _default_key_func(obj: T) -> str:
|
||||||
|
|
||||||
|
|
||||||
class DenyList:
|
class DenyList:
|
||||||
def __init__(self, denylist_file: PathIsh):
|
def __init__(self, denylist_file: Path | str) -> None:
|
||||||
self.file = Path(denylist_file).expanduser().absolute()
|
self.file = Path(denylist_file).expanduser().absolute()
|
||||||
self._deny_raw_list: List[Dict[str, Any]] = []
|
self._deny_raw_list: list[dict[str, Any]] = []
|
||||||
self._deny_map: DenyMap = defaultdict(set)
|
self._deny_map: DenyMap = defaultdict(set)
|
||||||
|
|
||||||
# deny cli, user can override these
|
# deny cli, user can override these
|
||||||
|
@ -45,7 +47,7 @@ class DenyList:
|
||||||
return
|
return
|
||||||
|
|
||||||
deny_map: DenyMap = defaultdict(set)
|
deny_map: DenyMap = defaultdict(set)
|
||||||
data: List[Dict[str, Any]]= json.loads(self.file.read_text())
|
data: list[dict[str, Any]] = json.loads(self.file.read_text())
|
||||||
self._deny_raw_list = data
|
self._deny_raw_list = data
|
||||||
|
|
||||||
for ignore in data:
|
for ignore in data:
|
||||||
|
@ -96,6 +98,7 @@ class DenyList:
|
||||||
def filter(
|
def filter(
|
||||||
self,
|
self,
|
||||||
itr: Iterator[T],
|
itr: Iterator[T],
|
||||||
|
*,
|
||||||
invert: bool = False,
|
invert: bool = False,
|
||||||
) -> Iterator[T]:
|
) -> Iterator[T]:
|
||||||
denyf = functools.partial(self._allow, deny_map=self.load())
|
denyf = functools.partial(self._allow, deny_map=self.load())
|
||||||
|
@ -103,7 +106,7 @@ class DenyList:
|
||||||
return filter(lambda x: not denyf(x), itr)
|
return filter(lambda x: not denyf(x), itr)
|
||||||
return filter(denyf, itr)
|
return filter(denyf, itr)
|
||||||
|
|
||||||
def deny(self, key: str, value: Any, write: bool = False) -> None:
|
def deny(self, key: str, value: Any, *, write: bool = False) -> None:
|
||||||
'''
|
'''
|
||||||
add a key/value pair to the denylist
|
add a key/value pair to the denylist
|
||||||
'''
|
'''
|
||||||
|
@ -111,7 +114,7 @@ class DenyList:
|
||||||
self._load()
|
self._load()
|
||||||
self._deny_raw({key: self._stringify_value(value)}, write=write)
|
self._deny_raw({key: self._stringify_value(value)}, write=write)
|
||||||
|
|
||||||
def _deny_raw(self, data: Dict[str, Any], write: bool = False) -> None:
|
def _deny_raw(self, data: dict[str, Any], *, write: bool = False) -> None:
|
||||||
self._deny_raw_list.append(data)
|
self._deny_raw_list.append(data)
|
||||||
if write:
|
if write:
|
||||||
self.write()
|
self.write()
|
||||||
|
@ -130,7 +133,7 @@ class DenyList:
|
||||||
def _deny_cli_remember(
|
def _deny_cli_remember(
|
||||||
self,
|
self,
|
||||||
items: Iterator[T],
|
items: Iterator[T],
|
||||||
mem: Dict[str, T],
|
mem: dict[str, T],
|
||||||
) -> Iterator[str]:
|
) -> Iterator[str]:
|
||||||
keyf = self._deny_cli_key_func or _default_key_func
|
keyf = self._deny_cli_key_func or _default_key_func
|
||||||
# i.e., convert each item to a string, and map str -> item
|
# i.e., convert each item to a string, and map str -> item
|
||||||
|
@ -156,10 +159,8 @@ class DenyList:
|
||||||
# reset the iterator
|
# reset the iterator
|
||||||
sit.seek(0)
|
sit.seek(0)
|
||||||
# so we can map the selected string from fzf back to the original objects
|
# so we can map the selected string from fzf back to the original objects
|
||||||
memory_map: Dict[str, T] = {}
|
memory_map: dict[str, T] = {}
|
||||||
picker = FzfPrompt(
|
picker = FzfPrompt(executable_path=self.fzf_path, default_options="--no-multi")
|
||||||
executable_path=self.fzf_path, default_options="--no-multi"
|
|
||||||
)
|
|
||||||
picked_l = picker.prompt(
|
picked_l = picker.prompt(
|
||||||
self._deny_cli_remember(itr, memory_map),
|
self._deny_cli_remember(itr, memory_map),
|
||||||
"--read0",
|
"--read0",
|
||||||
|
|
|
@ -10,17 +10,20 @@ This potentially allows it to be:
|
||||||
It should be free of external modules, importlib, exec, etc. etc.
|
It should be free of external modules, importlib, exec, etc. etc.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
REQUIRES = 'REQUIRES'
|
REQUIRES = 'REQUIRES'
|
||||||
NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
|
NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__'
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
import os
|
|
||||||
from typing import Optional, Sequence, List, NamedTuple, Iterable, cast, Any
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, NamedTuple, Optional, cast
|
||||||
|
|
||||||
'''
|
'''
|
||||||
None means that requirements weren't defined (different from empty requirements)
|
None means that requirements weren't defined (different from empty requirements)
|
||||||
|
@ -30,11 +33,11 @@ Requires = Optional[Sequence[str]]
|
||||||
|
|
||||||
class HPIModule(NamedTuple):
|
class HPIModule(NamedTuple):
|
||||||
name: str
|
name: str
|
||||||
skip_reason: Optional[str]
|
skip_reason: str | None
|
||||||
doc: Optional[str] = None
|
doc: str | None = None
|
||||||
file: Optional[Path] = None
|
file: Path | None = None
|
||||||
requires: Requires = None
|
requires: Requires = None
|
||||||
legacy: Optional[str] = None # contains reason/deprecation warning
|
legacy: str | None = None # contains reason/deprecation warning
|
||||||
|
|
||||||
|
|
||||||
def ignored(m: str) -> bool:
|
def ignored(m: str) -> bool:
|
||||||
|
@ -144,7 +147,7 @@ def all_modules() -> Iterable[HPIModule]:
|
||||||
def _iter_my_roots() -> Iterable[Path]:
|
def _iter_my_roots() -> Iterable[Path]:
|
||||||
import my # doesn't import any code, because of namespace package
|
import my # doesn't import any code, because of namespace package
|
||||||
|
|
||||||
paths: List[str] = list(my.__path__) # type: ignore[attr-defined]
|
paths: list[str] = list(my.__path__)
|
||||||
if len(paths) == 0:
|
if len(paths) == 0:
|
||||||
# should probably never happen?, if this code is running, it was imported
|
# should probably never happen?, if this code is running, it was imported
|
||||||
# because something was added to __path__ to match this name
|
# because something was added to __path__ to match this name
|
||||||
|
@ -242,7 +245,7 @@ def test_pure() -> None:
|
||||||
src = Path(__file__).read_text()
|
src = Path(__file__).read_text()
|
||||||
# 'import my' is allowed, but
|
# 'import my' is allowed, but
|
||||||
# dont allow anything other HPI modules
|
# dont allow anything other HPI modules
|
||||||
assert re.findall('import ' + r'my\.\S+', src, re.M) == []
|
assert re.findall('import ' + r'my\.\S+', src, re.MULTILINE) == []
|
||||||
assert 'from ' + 'my' not in src
|
assert 'from ' + 'my' not in src
|
||||||
|
|
||||||
|
|
||||||
|
|
128
my/core/error.py
128
my/core/error.py
|
@ -3,11 +3,22 @@ Various error handling helpers
|
||||||
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
|
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import traceback
|
||||||
|
from collections.abc import Iterable, Iterator
|
||||||
|
from datetime import datetime
|
||||||
from itertools import tee
|
from itertools import tee
|
||||||
from typing import Union, TypeVar, Iterable, List, Tuple, Type, Optional, Callable, Any, cast
|
from typing import (
|
||||||
|
Any,
|
||||||
from .compat import Literal
|
Callable,
|
||||||
|
Literal,
|
||||||
|
TypeVar,
|
||||||
|
Union,
|
||||||
|
cast,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .types import Json
|
||||||
|
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
E = TypeVar('E', bound=Exception) # TODO make covariant?
|
E = TypeVar('E', bound=Exception) # TODO make covariant?
|
||||||
|
@ -18,7 +29,8 @@ Res = ResT[T, Exception]
|
||||||
|
|
||||||
ErrorPolicy = Literal["yield", "raise", "drop"]
|
ErrorPolicy = Literal["yield", "raise", "drop"]
|
||||||
|
|
||||||
def notnone(x: Optional[T]) -> T:
|
|
||||||
|
def notnone(x: T | None) -> T:
|
||||||
assert x is not None
|
assert x is not None
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
@ -26,16 +38,49 @@ def notnone(x: Optional[T]) -> T:
|
||||||
def unwrap(res: Res[T]) -> T:
|
def unwrap(res: Res[T]) -> T:
|
||||||
if isinstance(res, Exception):
|
if isinstance(res, Exception):
|
||||||
raise res
|
raise res
|
||||||
else:
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def drop_exceptions(itr: Iterator[Res[T]]) -> Iterator[T]:
|
||||||
|
"""Return non-errors from the iterable"""
|
||||||
|
for o in itr:
|
||||||
|
if isinstance(o, Exception):
|
||||||
|
continue
|
||||||
|
yield o
|
||||||
|
|
||||||
|
|
||||||
|
def raise_exceptions(itr: Iterable[Res[T]]) -> Iterator[T]:
|
||||||
|
"""Raise errors from the iterable, stops the select function"""
|
||||||
|
for o in itr:
|
||||||
|
if isinstance(o, Exception):
|
||||||
|
raise o
|
||||||
|
yield o
|
||||||
|
|
||||||
|
|
||||||
|
def warn_exceptions(itr: Iterable[Res[T]], warn_func: Callable[[Exception], None] | None = None) -> Iterator[T]:
|
||||||
|
# if not provided, use the 'warnings' module
|
||||||
|
if warn_func is None:
|
||||||
|
from my.core.warnings import medium
|
||||||
|
|
||||||
|
def _warn_func(e: Exception) -> None:
|
||||||
|
# TODO: print traceback? but user could always --raise-exceptions as well
|
||||||
|
medium(str(e))
|
||||||
|
|
||||||
|
warn_func = _warn_func
|
||||||
|
|
||||||
|
for o in itr:
|
||||||
|
if isinstance(o, Exception):
|
||||||
|
warn_func(o)
|
||||||
|
continue
|
||||||
|
yield o
|
||||||
|
|
||||||
|
|
||||||
def echain(ex: E, cause: Exception) -> E:
|
def echain(ex: E, cause: Exception) -> E:
|
||||||
ex.__cause__ = cause
|
ex.__cause__ = cause
|
||||||
return ex
|
return ex
|
||||||
|
|
||||||
|
|
||||||
def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
|
def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Iterable[E]]:
|
||||||
# TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
|
# TODO would be nice to have ET=Exception default? but it causes some mypy complaints?
|
||||||
vit, eit = tee(l)
|
vit, eit = tee(l)
|
||||||
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
|
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
|
||||||
|
@ -53,7 +98,9 @@ def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Ite
|
||||||
|
|
||||||
|
|
||||||
K = TypeVar('K')
|
K = TypeVar('K')
|
||||||
def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]]:
|
|
||||||
|
|
||||||
|
def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]]:
|
||||||
"""
|
"""
|
||||||
Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
|
Sort a sequence potentially interleaved with errors/entries on which the key can't be computed.
|
||||||
The general idea is: the error sticks to the non-error entry that follows it
|
The general idea is: the error sticks to the non-error entry that follows it
|
||||||
|
@ -61,7 +108,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]
|
||||||
group = []
|
group = []
|
||||||
groups = []
|
groups = []
|
||||||
for i in items:
|
for i in items:
|
||||||
k: Optional[K]
|
k: K | None
|
||||||
try:
|
try:
|
||||||
k = key(i)
|
k = key(i)
|
||||||
except Exception: # error white computing key? dunno, might be nice to handle...
|
except Exception: # error white computing key? dunno, might be nice to handle...
|
||||||
|
@ -71,8 +118,8 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]
|
||||||
groups.append((k, group))
|
groups.append((k, group))
|
||||||
group = []
|
group = []
|
||||||
|
|
||||||
results: List[Res[T]] = []
|
results: list[Res[T]] = []
|
||||||
for v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
|
for _v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan??
|
||||||
results.extend(grp)
|
results.extend(grp)
|
||||||
results.extend(group) # handle last group (it will always be errors only)
|
results.extend(group) # handle last group (it will always be errors only)
|
||||||
|
|
||||||
|
@ -94,7 +141,7 @@ def test_sort_res_by() -> None:
|
||||||
1,
|
1,
|
||||||
Exc('last'),
|
Exc('last'),
|
||||||
]
|
]
|
||||||
results = sort_res_by(ress, lambda x: int(x)) # type: ignore
|
results = sort_res_by(ress, lambda x: int(x))
|
||||||
assert results == [
|
assert results == [
|
||||||
1,
|
1,
|
||||||
'bad',
|
'bad',
|
||||||
|
@ -106,32 +153,32 @@ def test_sort_res_by() -> None:
|
||||||
Exc('last'),
|
Exc('last'),
|
||||||
]
|
]
|
||||||
|
|
||||||
results2 = sort_res_by(ress + [0], lambda x: int(x)) # type: ignore
|
results2 = sort_res_by([*ress, 0], lambda x: int(x))
|
||||||
assert results2 == [Exc('last'), 0] + results[:-1]
|
assert results2 == [Exc('last'), 0] + results[:-1]
|
||||||
|
|
||||||
assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
|
assert sort_res_by(['caba', 'a', 'aba', 'daba'], key=lambda x: len(x)) == ['a', 'aba', 'caba', 'daba']
|
||||||
assert sort_res_by([], key=lambda x: x) == [] # type: ignore
|
assert sort_res_by([], key=lambda x: x) == []
|
||||||
|
|
||||||
|
|
||||||
# helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
|
# helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example)
|
||||||
# todo document it under 'patterns' somewhere...
|
# todo document it under 'patterns' somewhere...
|
||||||
|
|
||||||
# todo proper typevar?
|
# todo proper typevar?
|
||||||
from datetime import datetime
|
def set_error_datetime(e: Exception, dt: datetime | None) -> None:
|
||||||
def set_error_datetime(e: Exception, dt: Optional[datetime]) -> None:
|
|
||||||
if dt is None:
|
if dt is None:
|
||||||
return
|
return
|
||||||
e.args = e.args + (dt,)
|
e.args = (*e.args, dt)
|
||||||
# todo not sure if should return new exception?
|
# todo not sure if should return new exception?
|
||||||
|
|
||||||
def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception:
|
|
||||||
|
def attach_dt(e: Exception, *, dt: datetime | None) -> Exception:
|
||||||
set_error_datetime(e, dt)
|
set_error_datetime(e, dt)
|
||||||
return e
|
return e
|
||||||
|
|
||||||
|
|
||||||
# todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
|
# todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
|
||||||
def extract_error_datetime(e: Exception) -> Optional[datetime]:
|
def extract_error_datetime(e: Exception) -> datetime | None:
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
|
||||||
for x in reversed(e.args):
|
for x in reversed(e.args):
|
||||||
if isinstance(x, datetime):
|
if isinstance(x, datetime):
|
||||||
return x
|
return x
|
||||||
|
@ -146,8 +193,6 @@ def extract_error_datetime(e: Exception) -> Optional[datetime]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
import traceback
|
|
||||||
from .common import Json
|
|
||||||
def error_to_json(e: Exception) -> Json:
|
def error_to_json(e: Exception) -> Json:
|
||||||
estr = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
estr = ''.join(traceback.format_exception(Exception, e, e.__traceback__))
|
||||||
return {'error': estr}
|
return {'error': estr}
|
||||||
|
@ -155,7 +200,13 @@ def error_to_json(e: Exception) -> Json:
|
||||||
|
|
||||||
MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
|
MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#private-configuration-myconfig'
|
||||||
|
|
||||||
def warn_my_config_import_error(err: Union[ImportError, AttributeError], help_url: Optional[str] = None) -> bool:
|
|
||||||
|
def warn_my_config_import_error(
|
||||||
|
err: ImportError | AttributeError,
|
||||||
|
*,
|
||||||
|
help_url: str | None = None,
|
||||||
|
module_name: str | None = None,
|
||||||
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
If the user tried to import something from my.config but it failed,
|
If the user tried to import something from my.config but it failed,
|
||||||
possibly due to missing the config block in my.config?
|
possibly due to missing the config block in my.config?
|
||||||
|
@ -163,10 +214,12 @@ def warn_my_config_import_error(err: Union[ImportError, AttributeError], help_ur
|
||||||
Returns True if it matched a possible config error
|
Returns True if it matched a possible config error
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
if help_url is None:
|
if help_url is None:
|
||||||
help_url = MODULE_SETUP_URL
|
help_url = MODULE_SETUP_URL
|
||||||
if type(err) == ImportError:
|
if type(err) is ImportError:
|
||||||
if err.name != 'my.config':
|
if err.name != 'my.config':
|
||||||
return False
|
return False
|
||||||
# parse name that user attempted to import
|
# parse name that user attempted to import
|
||||||
|
@ -178,17 +231,31 @@ You may be missing the '{section_name}' section from your config.
|
||||||
See {help_url}\
|
See {help_url}\
|
||||||
""", fg='yellow', err=True)
|
""", fg='yellow', err=True)
|
||||||
return True
|
return True
|
||||||
elif type(err) == AttributeError:
|
elif type(err) is AttributeError:
|
||||||
# test if user had a nested config block missing
|
# test if user had a nested config block missing
|
||||||
# https://github.com/karlicoss/HPI/issues/223
|
# https://github.com/karlicoss/HPI/issues/223
|
||||||
if hasattr(err, 'obj') and hasattr(err, "name"):
|
if hasattr(err, 'obj') and hasattr(err, "name"):
|
||||||
config_obj = cast(object, getattr(err, 'obj')) # the object that caused the attribute error
|
config_obj = cast(object, getattr(err, 'obj')) # the object that caused the attribute error
|
||||||
# e.g. active_browser for my.browser
|
# e.g. active_browser for my.browser
|
||||||
nested_block_name = err.name # type: ignore[attr-defined]
|
nested_block_name = err.name
|
||||||
if config_obj.__module__ == 'my.config':
|
errmsg = f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
|
||||||
click.secho(f"""You're likely missing the nested config block for '{getattr(config_obj, '__name__', str(config_obj))}.{nested_block_name}'.
|
|
||||||
See {help_url} or check the corresponding module.py file for an example\
|
See {help_url} or check the corresponding module.py file for an example\
|
||||||
""", fg='yellow', err=True)
|
"""
|
||||||
|
if config_obj.__module__ == 'my.config':
|
||||||
|
click.secho(errmsg, fg='yellow', err=True)
|
||||||
|
return True
|
||||||
|
if module_name is not None and nested_block_name == module_name.split('.')[-1]:
|
||||||
|
# this tries to cover cases like these
|
||||||
|
# user config:
|
||||||
|
# class location:
|
||||||
|
# class via_ip:
|
||||||
|
# accuracy = 10_000
|
||||||
|
# then when we import it, we do something like
|
||||||
|
# from my.config import location
|
||||||
|
# user_config = location.via_ip
|
||||||
|
# so if location is present, but via_ip is not, we get
|
||||||
|
# AttributeError: type object 'location' has no attribute 'via_ip'
|
||||||
|
click.secho(errmsg, fg='yellow', err=True)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
click.echo(f"Unexpected error... {err}", err=True)
|
click.echo(f"Unexpected error... {err}", err=True)
|
||||||
|
@ -196,7 +263,8 @@ See {help_url} or check the corresponding module.py file for an example\
|
||||||
|
|
||||||
|
|
||||||
def test_datetime_errors() -> None:
|
def test_datetime_errors() -> None:
|
||||||
import pytz
|
import pytz # noqa: I001
|
||||||
|
|
||||||
dt_notz = datetime.now()
|
dt_notz = datetime.now()
|
||||||
dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
|
dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam'))
|
||||||
for dt in [dt_tz, dt_notz]:
|
for dt in [dt_tz, dt_notz]:
|
||||||
|
|
66
my/core/experimental.py
Normal file
66
my/core/experimental.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
# The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages
|
||||||
|
# See usage examples here:
|
||||||
|
# - https://github.com/karlicoss/hpi-personal-overlay/blob/master/src/my/util/hpi_heartbeat.py
|
||||||
|
# - https://github.com/karlicoss/hpi-personal-overlay/blob/master/src/my/twitter/all.py
|
||||||
|
# Suppose you want to use my.twitter.talon, which isn't in the default all.py
|
||||||
|
# You could just copy all.py to your personal overlay, but that would mean duplicating
|
||||||
|
# all the code and possible upstream changes.
|
||||||
|
# Alternatively, you could import the "original" my.twitter.all module from "overlay" my.twitter.all
|
||||||
|
# _ORIG = import_original_module(__name__, __file__)
|
||||||
|
# this would magically take care of package import path etc,
|
||||||
|
# and should import the "original" my.twitter.all as _ORIG
|
||||||
|
# After that you can call its methods, extend etc.
|
||||||
|
def import_original_module(
|
||||||
|
module_name: str,
|
||||||
|
file: str,
|
||||||
|
*,
|
||||||
|
star: bool = False,
|
||||||
|
globals: dict[str, Any] | None = None,
|
||||||
|
) -> types.ModuleType:
|
||||||
|
module_to_restore = sys.modules[module_name]
|
||||||
|
|
||||||
|
# NOTE: we really wanna to hack the actual package of the module
|
||||||
|
# rather than just top level my.
|
||||||
|
# since that would be a bit less disruptive
|
||||||
|
module_pkg = module_to_restore.__package__
|
||||||
|
assert module_pkg is not None
|
||||||
|
parent = sys.modules[module_pkg]
|
||||||
|
|
||||||
|
my_path = parent.__path__._path # type: ignore[attr-defined]
|
||||||
|
my_path_orig = list(my_path)
|
||||||
|
|
||||||
|
def fixup_path() -> None:
|
||||||
|
for i, p in enumerate(my_path_orig):
|
||||||
|
starts = file.startswith(p)
|
||||||
|
if i == 0:
|
||||||
|
# not sure about this.. but I guess it'll always be 0th element?
|
||||||
|
assert starts, (my_path_orig, file)
|
||||||
|
if starts:
|
||||||
|
my_path.remove(p)
|
||||||
|
# should remove exactly one item
|
||||||
|
assert len(my_path) + 1 == len(my_path_orig), (my_path_orig, file)
|
||||||
|
|
||||||
|
try:
|
||||||
|
fixup_path()
|
||||||
|
try:
|
||||||
|
del sys.modules[module_name]
|
||||||
|
# NOTE: we're using __import__ instead of importlib.import_module
|
||||||
|
# since it's closer to the actual normal import (e.g. imports subpackages etc properly )
|
||||||
|
# fromlist=[None] forces it to return rightmost child
|
||||||
|
# (otherwise would just return 'my' package)
|
||||||
|
res = __import__(module_name, fromlist=[None]) # type: ignore[list-item]
|
||||||
|
if star:
|
||||||
|
assert globals is not None
|
||||||
|
globals.update({k: v for k, v in vars(res).items() if not k.startswith('_')})
|
||||||
|
return res
|
||||||
|
finally:
|
||||||
|
sys.modules[module_name] = module_to_restore
|
||||||
|
finally:
|
||||||
|
my_path[:] = my_path_orig
|
|
@ -1,27 +1,29 @@
|
||||||
from .common import assert_subpackage; assert_subpackage(__name__)
|
from __future__ import annotations
|
||||||
|
|
||||||
import dataclasses as dcl
|
from .internal import assert_subpackage
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
import inspect
|
import inspect
|
||||||
from typing import TypeVar, Type, Any
|
from typing import Any, Generic, TypeVar
|
||||||
|
|
||||||
D = TypeVar('D')
|
D = TypeVar('D')
|
||||||
|
|
||||||
|
|
||||||
def _freeze_dataclass(Orig: Type[D]):
|
def _freeze_dataclass(Orig: type[D]):
|
||||||
ofields = [(f.name, f.type, f) for f in dcl.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
ofields = [(f.name, f.type, f) for f in dataclasses.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115
|
||||||
|
|
||||||
# extract properties along with their types
|
# extract properties along with their types
|
||||||
props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
|
props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property)))
|
||||||
pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
|
pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props]
|
||||||
# FIXME not sure about name?
|
# FIXME not sure about name?
|
||||||
# NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
|
# NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields
|
||||||
RRR = dcl.make_dataclass('RRR', fields=[*ofields, *pfields])
|
RRR = dataclasses.make_dataclass('RRR', fields=[*ofields, *pfields])
|
||||||
# todo maybe even declare as slots?
|
# todo maybe even declare as slots?
|
||||||
return props, RRR
|
return props, RRR
|
||||||
|
|
||||||
|
|
||||||
# todo need some decorator thingie?
|
|
||||||
from typing import Generic
|
|
||||||
class Freezer(Generic[D]):
|
class Freezer(Generic[D]):
|
||||||
'''
|
'''
|
||||||
Some magic which converts dataclass properties into fields.
|
Some magic which converts dataclass properties into fields.
|
||||||
|
@ -29,13 +31,13 @@ class Freezer(Generic[D]):
|
||||||
For now only supports dataclasses.
|
For now only supports dataclasses.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, Orig: Type[D]) -> None:
|
def __init__(self, Orig: type[D]) -> None:
|
||||||
self.Orig = Orig
|
self.Orig = Orig
|
||||||
self.props, self.Frozen = _freeze_dataclass(Orig)
|
self.props, self.Frozen = _freeze_dataclass(Orig)
|
||||||
|
|
||||||
def freeze(self, value: D) -> D:
|
def freeze(self, value: D) -> D:
|
||||||
pvalues = {name: getattr(value, name) for name, _ in self.props}
|
pvalues = {name: getattr(value, name) for name, _ in self.props}
|
||||||
return self.Frozen(**dcl.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
|
return self.Frozen(**dataclasses.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115
|
||||||
|
|
||||||
|
|
||||||
### tests
|
### tests
|
||||||
|
@ -43,7 +45,7 @@ class Freezer(Generic[D]):
|
||||||
|
|
||||||
# this needs to be defined here to prevent a mypy bug
|
# this needs to be defined here to prevent a mypy bug
|
||||||
# see https://github.com/python/mypy/issues/7281
|
# see https://github.com/python/mypy/issues/7281
|
||||||
@dcl.dataclass
|
@dataclasses.dataclass
|
||||||
class _A:
|
class _A:
|
||||||
x: Any
|
x: Any
|
||||||
|
|
||||||
|
@ -58,8 +60,10 @@ class _A:
|
||||||
|
|
||||||
|
|
||||||
def test_freezer() -> None:
|
def test_freezer() -> None:
|
||||||
|
val = _A(x={
|
||||||
val = _A(x=dict(an_int=123, an_any=[1, 2, 3]))
|
'an_int': 123,
|
||||||
|
'an_any': [1, 2, 3],
|
||||||
|
})
|
||||||
af = Freezer(_A)
|
af = Freezer(_A)
|
||||||
fval = af.freeze(val)
|
fval = af.freeze(val)
|
||||||
|
|
||||||
|
@ -67,6 +71,7 @@ def test_freezer() -> None:
|
||||||
assert fd['typed'] == 123
|
assert fd['typed'] == 123
|
||||||
assert fd['untyped'] == [1, 2, 3]
|
assert fd['untyped'] == [1, 2, 3]
|
||||||
|
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
# TODO shit. what to do with exceptions?
|
# TODO shit. what to do with exceptions?
|
||||||
|
|
260
my/core/hpi_compat.py
Normal file
260
my/core/hpi_compat.py
Normal file
|
@ -0,0 +1,260 @@
|
||||||
|
"""
|
||||||
|
Contains various backwards compatibility/deprecation helpers relevant to HPI itself.
|
||||||
|
(as opposed to .compat module which implements compatibility between python versions)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections.abc import Iterator, Sequence
|
||||||
|
from types import ModuleType
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
|
from . import warnings
|
||||||
|
|
||||||
|
|
||||||
|
def handle_legacy_import(
|
||||||
|
parent_module_name: str,
|
||||||
|
legacy_submodule_name: str,
|
||||||
|
parent_module_path: list[str],
|
||||||
|
) -> bool:
|
||||||
|
###
|
||||||
|
# this is to trick mypy into treating this as a proper namespace package
|
||||||
|
# should only be used for backwards compatibility on packages that are convernted into namespace & all.py pattern
|
||||||
|
# - https://www.python.org/dev/peps/pep-0382/#namespace-packages-today
|
||||||
|
# - https://github.com/karlicoss/hpi_namespace_experiment
|
||||||
|
# - discussion here https://memex.zulipchat.com/#narrow/stream/279601-hpi/topic/extending.20HPI/near/269946944
|
||||||
|
from pkgutil import extend_path
|
||||||
|
|
||||||
|
parent_module_path[:] = extend_path(parent_module_path, parent_module_name)
|
||||||
|
# 'this' source tree ends up first in the pythonpath when we extend_path()
|
||||||
|
# so we need to move 'this' source tree towards the end to make sure we prioritize overlays
|
||||||
|
parent_module_path[:] = parent_module_path[1:] + parent_module_path[:1]
|
||||||
|
###
|
||||||
|
|
||||||
|
# allow stuff like 'import my.module.submodule' and such
|
||||||
|
imported_as_parent = False
|
||||||
|
|
||||||
|
# allow stuff like 'from my.module import submodule'
|
||||||
|
importing_submodule = False
|
||||||
|
|
||||||
|
# some hacky traceback to inspect the current stack
|
||||||
|
# to see if the user is using the old style of importing
|
||||||
|
for f in inspect.stack():
|
||||||
|
# seems that when a submodule is imported, at some point it'll call some internal import machinery
|
||||||
|
# with 'parent' set to the parent module
|
||||||
|
# if parent module is imported first (i.e. in case of deprecated usage), it won't be the case
|
||||||
|
args = inspect.getargvalues(f.frame)
|
||||||
|
if args.locals.get('parent') == parent_module_name:
|
||||||
|
imported_as_parent = True
|
||||||
|
|
||||||
|
# this we can only detect from the code I guess
|
||||||
|
line = '\n'.join(f.code_context or [])
|
||||||
|
if re.match(rf'from\s+{parent_module_name}\s+import\s+{legacy_submodule_name}', line):
|
||||||
|
importing_submodule = True
|
||||||
|
|
||||||
|
# click sets '_HPI_COMPLETE' env var when it's doing autocompletion
|
||||||
|
# otherwise, the warning will be printed every time you try to tab complete
|
||||||
|
autocompleting_module_cli = "_HPI_COMPLETE" in os.environ
|
||||||
|
|
||||||
|
is_legacy_import = not (imported_as_parent or importing_submodule)
|
||||||
|
if is_legacy_import and not autocompleting_module_cli:
|
||||||
|
warnings.high(
|
||||||
|
f'''\
|
||||||
|
importing {parent_module_name} is DEPRECATED! \
|
||||||
|
Instead, import from {parent_module_name}.{legacy_submodule_name} or {parent_module_name}.all \
|
||||||
|
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
return is_legacy_import
|
||||||
|
|
||||||
|
|
||||||
|
def pre_pip_dal_handler(
|
||||||
|
name: str,
|
||||||
|
e: ModuleNotFoundError,
|
||||||
|
cfg,
|
||||||
|
requires: Sequence[str] = (),
|
||||||
|
) -> ModuleType:
|
||||||
|
'''
|
||||||
|
https://github.com/karlicoss/HPI/issues/79
|
||||||
|
'''
|
||||||
|
if e.name != name:
|
||||||
|
# the module itself was imported, so the problem is with some dependencies
|
||||||
|
raise e
|
||||||
|
try:
|
||||||
|
dal = _get_dal(cfg, name)
|
||||||
|
warnings.high(
|
||||||
|
f'''
|
||||||
|
Specifying modules' dependencies in the config or in my/config/repos is deprecated!
|
||||||
|
Please install {' '.join(requires)} as PIP packages (see the corresponding README instructions).
|
||||||
|
'''.strip(),
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
dal = None
|
||||||
|
|
||||||
|
if dal is None:
|
||||||
|
# probably means there was nothing in the old config in the first place
|
||||||
|
# so we should raise the original exception
|
||||||
|
raise e
|
||||||
|
return dal
|
||||||
|
|
||||||
|
|
||||||
|
def _get_dal(cfg, module_name: str):
|
||||||
|
mpath = getattr(cfg, module_name, None)
|
||||||
|
if mpath is not None:
|
||||||
|
from .utils.imports import import_dir
|
||||||
|
|
||||||
|
return import_dir(mpath, '.dal')
|
||||||
|
else:
|
||||||
|
from importlib import import_module
|
||||||
|
|
||||||
|
return import_module(f'my.config.repos.{module_name}.dal')
|
||||||
|
|
||||||
|
|
||||||
|
V = TypeVar('V')
|
||||||
|
|
||||||
|
|
||||||
|
# named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
|
||||||
|
class always_supports_sequence(Iterator[V]):
|
||||||
|
"""
|
||||||
|
Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible in runtime
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, it: Iterator[V]) -> None:
|
||||||
|
self._it = it
|
||||||
|
self._list: list[V] | None = None
|
||||||
|
self._lit: Iterator[V] | None = None
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[V]: # noqa: PYI034
|
||||||
|
if self._list is not None:
|
||||||
|
self._lit = iter(self._list)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self) -> V:
|
||||||
|
if self._list is not None:
|
||||||
|
assert self._lit is not None
|
||||||
|
delegate = self._lit
|
||||||
|
else:
|
||||||
|
delegate = self._it
|
||||||
|
return next(delegate)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._it, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _aslist(self) -> list[V]:
|
||||||
|
if self._list is None:
|
||||||
|
qualname = getattr(self._it, '__qualname__', '<no qualname>') # defensive just in case
|
||||||
|
warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
|
||||||
|
self._list = list(self._it)
|
||||||
|
|
||||||
|
# this is necessary for list constructor to work correctly
|
||||||
|
# since it's __iter__ first, then tries to compute length and then starts iterating...
|
||||||
|
self._lit = iter(self._list)
|
||||||
|
return self._list
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self._aslist)
|
||||||
|
|
||||||
|
def __getitem__(self, i: int) -> V:
|
||||||
|
return self._aslist[i]
|
||||||
|
|
||||||
|
|
||||||
|
def test_always_supports_sequence_list_constructor() -> None:
|
||||||
|
exhausted = 0
|
||||||
|
|
||||||
|
def it() -> Iterator[str]:
|
||||||
|
nonlocal exhausted
|
||||||
|
yield from ['a', 'b', 'c']
|
||||||
|
exhausted += 1
|
||||||
|
|
||||||
|
sit = always_supports_sequence(it())
|
||||||
|
|
||||||
|
# list constructor is a bit special... it's trying to compute length if it's available to optimize memory allocation
|
||||||
|
# so, what's happening in this case is
|
||||||
|
# - sit.__iter__ is called
|
||||||
|
# - sit.__len__ is called
|
||||||
|
# - sit.__next__ is called
|
||||||
|
res = list(sit)
|
||||||
|
assert res == ['a', 'b', 'c']
|
||||||
|
assert exhausted == 1
|
||||||
|
|
||||||
|
res = list(sit)
|
||||||
|
assert res == ['a', 'b', 'c']
|
||||||
|
assert exhausted == 1 # this will iterate over 'cached' list now, so original generator is only exhausted once
|
||||||
|
|
||||||
|
|
||||||
|
def test_always_supports_sequence_indexing() -> None:
|
||||||
|
exhausted = 0
|
||||||
|
|
||||||
|
def it() -> Iterator[str]:
|
||||||
|
nonlocal exhausted
|
||||||
|
yield from ['a', 'b', 'c']
|
||||||
|
exhausted += 1
|
||||||
|
|
||||||
|
sit = always_supports_sequence(it())
|
||||||
|
|
||||||
|
assert len(sit) == 3
|
||||||
|
assert exhausted == 1
|
||||||
|
|
||||||
|
assert sit[2] == 'c'
|
||||||
|
assert sit[1] == 'b'
|
||||||
|
assert sit[0] == 'a'
|
||||||
|
assert exhausted == 1
|
||||||
|
|
||||||
|
# a few tests to make sure list-like operations are working..
|
||||||
|
assert list(sit) == ['a', 'b', 'c']
|
||||||
|
assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
|
||||||
|
assert list(sit) == ['a', 'b', 'c']
|
||||||
|
assert [x for x in sit] == ['a', 'b', 'c'] # noqa: C416
|
||||||
|
assert exhausted == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_always_supports_sequence_next() -> None:
|
||||||
|
exhausted = 0
|
||||||
|
|
||||||
|
def it() -> Iterator[str]:
|
||||||
|
nonlocal exhausted
|
||||||
|
yield from ['a', 'b', 'c']
|
||||||
|
exhausted += 1
|
||||||
|
|
||||||
|
sit = always_supports_sequence(it())
|
||||||
|
|
||||||
|
x = next(sit)
|
||||||
|
assert x == 'a'
|
||||||
|
assert exhausted == 0
|
||||||
|
|
||||||
|
x = next(sit)
|
||||||
|
assert x == 'b'
|
||||||
|
assert exhausted == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_always_supports_sequence_iter() -> None:
|
||||||
|
exhausted = 0
|
||||||
|
|
||||||
|
def it() -> Iterator[str]:
|
||||||
|
nonlocal exhausted
|
||||||
|
yield from ['a', 'b', 'c']
|
||||||
|
exhausted += 1
|
||||||
|
|
||||||
|
sit = always_supports_sequence(it())
|
||||||
|
|
||||||
|
for x in sit:
|
||||||
|
assert x == 'a'
|
||||||
|
break
|
||||||
|
|
||||||
|
x = next(sit)
|
||||||
|
assert x == 'b'
|
||||||
|
|
||||||
|
assert exhausted == 0
|
||||||
|
|
||||||
|
x = next(sit)
|
||||||
|
assert x == 'c'
|
||||||
|
assert exhausted == 0
|
||||||
|
|
||||||
|
for _ in sit:
|
||||||
|
raise RuntimeError # shouldn't trigger, just exhaust the iterator
|
||||||
|
assert exhausted == 1
|
|
@ -1,14 +1,22 @@
|
||||||
'''
|
'''
|
||||||
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
TODO doesn't really belong to 'core' morally, but can think of moving out later
|
||||||
'''
|
'''
|
||||||
from .common import assert_subpackage; assert_subpackage(__name__)
|
|
||||||
|
|
||||||
from typing import Iterable, Any, Optional, Dict
|
from __future__ import annotations
|
||||||
|
|
||||||
from .common import LazyLogger, asdict, Json
|
from .internal import assert_subpackage
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
from collections.abc import Iterable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
from .logging import make_logger
|
||||||
|
from .types import Json, asdict
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class config:
|
class config:
|
||||||
|
@ -27,6 +35,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
|
||||||
db = config.db
|
db = config.db
|
||||||
|
|
||||||
from influxdb import InfluxDBClient # type: ignore
|
from influxdb import InfluxDBClient # type: ignore
|
||||||
|
|
||||||
client = InfluxDBClient()
|
client = InfluxDBClient()
|
||||||
# todo maybe create if not exists?
|
# todo maybe create if not exists?
|
||||||
# client.create_database(db)
|
# client.create_database(db)
|
||||||
|
@ -37,7 +46,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
|
||||||
client.delete_series(database=db, measurement=measurement)
|
client.delete_series(database=db, measurement=measurement)
|
||||||
|
|
||||||
# TODO need to take schema here...
|
# TODO need to take schema here...
|
||||||
cache: Dict[str, bool] = {}
|
cache: dict[str, bool] = {}
|
||||||
|
|
||||||
def good(f, v) -> bool:
|
def good(f, v) -> bool:
|
||||||
c = cache.get(f)
|
c = cache.get(f)
|
||||||
|
@ -56,7 +65,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
|
||||||
def dit() -> Iterable[Json]:
|
def dit() -> Iterable[Json]:
|
||||||
for i in it:
|
for i in it:
|
||||||
d = asdict(i)
|
d = asdict(i)
|
||||||
tags: Optional[Json] = None
|
tags: Json | None = None
|
||||||
tags_ = d.get('tags') # meh... handle in a more robust manner
|
tags_ = d.get('tags') # meh... handle in a more robust manner
|
||||||
if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
|
if tags_ is not None and isinstance(tags_, dict): # FIXME meh.
|
||||||
del d['tags']
|
del d['tags']
|
||||||
|
@ -69,18 +78,19 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
|
||||||
|
|
||||||
fields = filter_dict(d)
|
fields = filter_dict(d)
|
||||||
|
|
||||||
yield dict(
|
yield {
|
||||||
measurement=measurement,
|
'measurement': measurement,
|
||||||
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
# TODO maybe good idea to tag with database file/name? to inspect inconsistencies etc..
|
||||||
# hmm, so tags are autoindexed and might be faster?
|
# hmm, so tags are autoindexed and might be faster?
|
||||||
# not sure what's the big difference though
|
# not sure what's the big difference though
|
||||||
# "fields are data and tags are metadata"
|
# "fields are data and tags are metadata"
|
||||||
tags=tags,
|
'tags': tags,
|
||||||
time=dt,
|
'time': dt,
|
||||||
fields=fields,
|
'fields': fields,
|
||||||
)
|
}
|
||||||
|
|
||||||
from more_itertools import chunked
|
from more_itertools import chunked
|
||||||
|
|
||||||
# "The optimal batch size is 5000 lines of line protocol."
|
# "The optimal batch size is 5000 lines of line protocol."
|
||||||
# some chunking is def necessary, otherwise it fails
|
# some chunking is def necessary, otherwise it fails
|
||||||
inserted = 0
|
inserted = 0
|
||||||
|
@ -94,7 +104,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
|
||||||
# todo "Specify timestamp precision when writing to InfluxDB."?
|
# todo "Specify timestamp precision when writing to InfluxDB."?
|
||||||
|
|
||||||
|
|
||||||
def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None:
|
def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> None:
|
||||||
if name is None:
|
if name is None:
|
||||||
assert callable(it) # generators have no name/module
|
assert callable(it) # generators have no name/module
|
||||||
name = f'{it.__module__}:{it.__name__}'
|
name = f'{it.__module__}:{it.__name__}'
|
||||||
|
@ -104,7 +114,9 @@ def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> No
|
||||||
it = it()
|
it = it()
|
||||||
|
|
||||||
from itertools import tee
|
from itertools import tee
|
||||||
|
|
||||||
from more_itertools import first, one
|
from more_itertools import first, one
|
||||||
|
|
||||||
it, x = tee(it)
|
it, x = tee(it)
|
||||||
f = first(x, default=None)
|
f = first(x, default=None)
|
||||||
if f is None:
|
if f is None:
|
||||||
|
@ -114,17 +126,17 @@ def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> No
|
||||||
# TODO can we reuse pandas code or something?
|
# TODO can we reuse pandas code or something?
|
||||||
#
|
#
|
||||||
from .pandas import _as_columns
|
from .pandas import _as_columns
|
||||||
|
|
||||||
schema = _as_columns(type(f))
|
schema = _as_columns(type(f))
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
dtex = RuntimeError(f'expected single datetime field. schema: {schema}')
|
||||||
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex)
|
||||||
|
|
||||||
fill(it, measurement=name, reset=reset, dt_col=dtf)
|
fill(it, measurement=name, reset=reset, dt_col=dtf)
|
||||||
|
|
||||||
|
|
||||||
import click
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
pass
|
pass
|
||||||
|
@ -133,8 +145,9 @@ def main() -> None:
|
||||||
@main.command(name='populate', short_help='populate influxdb')
|
@main.command(name='populate', short_help='populate influxdb')
|
||||||
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
|
@click.option('--reset', is_flag=True, help='Reset Influx measurements before inserting', show_default=True)
|
||||||
@click.argument('FUNCTION_NAME', type=str, required=True)
|
@click.argument('FUNCTION_NAME', type=str, required=True)
|
||||||
def populate(function_name: str, reset: bool) -> None:
|
def populate(*, function_name: str, reset: bool) -> None:
|
||||||
from .__main__ import _locate_functions_or_prompt
|
from .__main__ import _locate_functions_or_prompt
|
||||||
|
|
||||||
[provider] = list(_locate_functions_or_prompt([function_name]))
|
[provider] = list(_locate_functions_or_prompt([function_name]))
|
||||||
# todo could have a non-interactive version which populates from all data sources for the provider?
|
# todo could have a non-interactive version which populates from all data sources for the provider?
|
||||||
magic_fill(provider, reset=reset)
|
magic_fill(provider, reset=reset)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
'''
|
'''
|
||||||
A hook to insert user's config directory into Python's search path.
|
A hook to insert user's config directory into Python's search path.
|
||||||
|
Note that this file is imported only if we don't have custom user config (under my.config namespace) in PYTHONPATH
|
||||||
|
|
||||||
Ideally that would be in __init__.py (so it's executed without having to import explicitly)
|
Ideally that would be in __init__.py (so it's executed without having to import explicitly)
|
||||||
But, with namespace packages, we can't have __init__.py in the parent subpackage
|
But, with namespace packages, we can't have __init__.py in the parent subpackage
|
||||||
|
@ -15,15 +16,17 @@ Please let me know if you are aware of a better way of dealing with this!
|
||||||
def setup_config() -> None:
|
def setup_config() -> None:
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from .preinit import get_mycfg_dir
|
from .preinit import get_mycfg_dir
|
||||||
|
|
||||||
mycfg_dir = get_mycfg_dir()
|
mycfg_dir = get_mycfg_dir()
|
||||||
|
|
||||||
if not mycfg_dir.exists():
|
if not mycfg_dir.exists():
|
||||||
warnings.warn(f"""
|
warnings.warn(f"""
|
||||||
'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
|
'my.config' package isn't found! (expected at '{mycfg_dir}'). This is likely to result in issues.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
""".strip())
|
""".strip(), stacklevel=1)
|
||||||
return
|
return
|
||||||
|
|
||||||
mpath = str(mycfg_dir)
|
mpath = str(mycfg_dir)
|
||||||
|
@ -41,11 +44,29 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo
|
||||||
except ImportError as ex:
|
except ImportError as ex:
|
||||||
# just in case... who knows what crazy setup users have
|
# just in case... who knows what crazy setup users have
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.exception(ex)
|
logging.exception(ex)
|
||||||
warnings.warn(f"""
|
warnings.warn(f"""
|
||||||
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
|
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
""")
|
""", stacklevel=1)
|
||||||
|
else:
|
||||||
|
# defensive just in case -- __file__ may not be present if there is some dynamic magic involved
|
||||||
|
used_config_file = getattr(my.config, '__file__', None)
|
||||||
|
if used_config_file is not None:
|
||||||
|
used_config_path = Path(used_config_file)
|
||||||
|
try:
|
||||||
|
# will crash if it's imported from other dir?
|
||||||
|
used_config_path.relative_to(mycfg_dir)
|
||||||
|
except ValueError:
|
||||||
|
# TODO maybe implement a strict mode where these warnings will be errors?
|
||||||
|
warnings.warn(
|
||||||
|
f"""
|
||||||
|
Expected my.config to be located at {mycfg_dir}, but instead its path is {used_config_path}.
|
||||||
|
This will likely cause issues down the line -- double check {mycfg_dir} structure.
|
||||||
|
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
|
||||||
|
""", stacklevel=1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
setup_config()
|
setup_config()
|
||||||
|
|
9
my/core/internal.py
Normal file
9
my/core/internal.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
"""
|
||||||
|
Utils specific to hpi core, shouldn't really be used by HPI modules
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def assert_subpackage(name: str) -> None:
|
||||||
|
# can lead to some unexpected issues if you 'import cachew' which being in my/core directory.. so let's protect against it
|
||||||
|
# NOTE: if we use overlay, name can be smth like my.origg.my.core.cachew ...
|
||||||
|
assert name == '__main__' or 'my.core' in name, f'Expected module __name__ ({name}) to be __main__ or start with my.core'
|
|
@ -1,247 +1,17 @@
|
||||||
"""
|
from .internal import assert_subpackage
|
||||||
Various helpers for compression
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from datetime import datetime
|
assert_subpackage(__name__)
|
||||||
import pathlib
|
|
||||||
from pathlib import Path
|
|
||||||
import sys
|
|
||||||
from typing import Union, IO, Sequence, Any, Iterator
|
|
||||||
import io
|
|
||||||
|
|
||||||
PathIsh = Union[Path, str]
|
from . import warnings
|
||||||
|
|
||||||
|
# do this later -- for now need to transition modules to avoid using kompress directly (e.g. ZipPath)
|
||||||
|
# warnings.high('my.core.kompress is deprecated, please use "kompress" library directly. See https://github.com/karlicoss/kompress')
|
||||||
|
|
||||||
class Ext:
|
|
||||||
xz = '.xz'
|
|
||||||
zip = '.zip'
|
|
||||||
lz4 = '.lz4'
|
|
||||||
zstd = '.zstd'
|
|
||||||
zst = '.zst'
|
|
||||||
targz = '.tar.gz'
|
|
||||||
|
|
||||||
|
|
||||||
def is_compressed(p: Path) -> bool:
|
|
||||||
# todo kinda lame way for now.. use mime ideally?
|
|
||||||
# should cooperate with kompress.kopen?
|
|
||||||
return any(p.name.endswith(ext) for ext in {Ext.xz, Ext.zip, Ext.lz4, Ext.zstd, Ext.zst, Ext.targz})
|
|
||||||
|
|
||||||
|
|
||||||
def _zstd_open(path: Path, *args, **kwargs) -> IO:
|
|
||||||
import zstandard as zstd # type: ignore
|
|
||||||
fh = path.open('rb')
|
|
||||||
dctx = zstd.ZstdDecompressor()
|
|
||||||
reader = dctx.stream_reader(fh)
|
|
||||||
|
|
||||||
mode = kwargs.get('mode', 'rt')
|
|
||||||
if mode == 'rb':
|
|
||||||
return reader
|
|
||||||
else:
|
|
||||||
# must be text mode
|
|
||||||
kwargs.pop('mode') # TextIOWrapper doesn't like it
|
|
||||||
return io.TextIOWrapper(reader, **kwargs) # meh
|
|
||||||
|
|
||||||
|
|
||||||
# TODO use the 'dependent type' trick for return type?
|
|
||||||
def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO:
|
|
||||||
# just in case, but I think this shouldn't be necessary anymore
|
|
||||||
# since when we call .read_text, encoding is passed already
|
|
||||||
if mode in {'r', 'rt'}:
|
|
||||||
encoding = kwargs.get('encoding', 'utf8')
|
|
||||||
else:
|
|
||||||
encoding = None
|
|
||||||
kwargs['encoding'] = encoding
|
|
||||||
|
|
||||||
pp = Path(path)
|
|
||||||
name = pp.name
|
|
||||||
if name.endswith(Ext.xz):
|
|
||||||
import lzma
|
|
||||||
|
|
||||||
# ugh. for lzma, 'r' means 'rb'
|
|
||||||
# https://github.com/python/cpython/blob/d01cf5072be5511595b6d0c35ace6c1b07716f8d/Lib/lzma.py#L97
|
|
||||||
# whereas for regular open, 'r' means 'rt'
|
|
||||||
# https://docs.python.org/3/library/functions.html#open
|
|
||||||
if mode == 'r':
|
|
||||||
mode = 'rt'
|
|
||||||
kwargs['mode'] = mode
|
|
||||||
return lzma.open(pp, *args, **kwargs)
|
|
||||||
elif name.endswith(Ext.zip):
|
|
||||||
# eh. this behaviour is a bit dodgy...
|
|
||||||
from zipfile import ZipFile
|
|
||||||
zfile = ZipFile(pp)
|
|
||||||
|
|
||||||
[subpath] = args # meh?
|
|
||||||
|
|
||||||
## oh god... https://stackoverflow.com/a/5639960/706389
|
|
||||||
ifile = zfile.open(subpath, mode='r')
|
|
||||||
ifile.readable = lambda: True # type: ignore
|
|
||||||
ifile.writable = lambda: False # type: ignore
|
|
||||||
ifile.seekable = lambda: False # type: ignore
|
|
||||||
ifile.read1 = ifile.read # type: ignore
|
|
||||||
# TODO pass all kwargs here??
|
|
||||||
# todo 'expected "BinaryIO"'??
|
|
||||||
return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type]
|
|
||||||
elif name.endswith(Ext.lz4):
|
|
||||||
import lz4.frame # type: ignore
|
|
||||||
return lz4.frame.open(str(pp), mode, *args, **kwargs)
|
|
||||||
elif name.endswith(Ext.zstd) or name.endswith(Ext.zst):
|
|
||||||
kwargs['mode'] = mode
|
|
||||||
return _zstd_open(pp, *args, **kwargs)
|
|
||||||
elif name.endswith(Ext.targz):
|
|
||||||
import tarfile
|
|
||||||
# FIXME pass mode?
|
|
||||||
tf = tarfile.open(pp)
|
|
||||||
# TODO pass encoding?
|
|
||||||
x = tf.extractfile(*args); assert x is not None
|
|
||||||
return x # type: ignore[return-value]
|
|
||||||
else:
|
|
||||||
return pp.open(mode, *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import os
|
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
# otherwise mypy can't figure out that BasePath is a type alias..
|
|
||||||
BasePath = pathlib.Path
|
|
||||||
else:
|
|
||||||
BasePath = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath
|
|
||||||
|
|
||||||
|
|
||||||
class CPath(BasePath):
|
|
||||||
"""
|
|
||||||
Hacky way to support compressed files.
|
|
||||||
If you can think of a better way to do this, please let me know! https://github.com/karlicoss/HPI/issues/20
|
|
||||||
|
|
||||||
Ugh. So, can't override Path because of some _flavour thing.
|
|
||||||
Path only has _accessor and _closed slots, so can't directly set .open method
|
|
||||||
_accessor.open has to return file descriptor, doesn't work for compressed stuff.
|
|
||||||
"""
|
|
||||||
def open(self, *args, **kwargs):
|
|
||||||
kopen_kwargs = {}
|
|
||||||
mode = kwargs.get('mode')
|
|
||||||
if mode is not None:
|
|
||||||
kopen_kwargs['mode'] = mode
|
|
||||||
encoding = kwargs.get('encoding')
|
|
||||||
if encoding is not None:
|
|
||||||
kopen_kwargs['encoding'] = encoding
|
|
||||||
# TODO assert read only?
|
|
||||||
return kopen(str(self), **kopen_kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
open = kopen # TODO deprecate
|
|
||||||
|
|
||||||
|
|
||||||
# meh
|
|
||||||
# TODO ideally switch to ZipPath or smth similar?
|
|
||||||
# nothing else supports subpath properly anyway
|
|
||||||
def kexists(path: PathIsh, subpath: str) -> bool:
|
|
||||||
try:
|
try:
|
||||||
kopen(path, subpath)
|
from kompress import *
|
||||||
return True
|
except ModuleNotFoundError as e:
|
||||||
except Exception:
|
if e.name == 'kompress':
|
||||||
return False
|
warnings.high('Please install kompress (pip3 install kompress). Falling onto vendorized kompress for now.')
|
||||||
|
from ._deprecated.kompress import * # type: ignore[assignment]
|
||||||
|
|
||||||
import zipfile
|
|
||||||
if sys.version_info[:2] >= (3, 8):
|
|
||||||
# meh... zipfile.Path is not available on 3.7
|
|
||||||
zipfile_Path = zipfile.Path
|
|
||||||
else:
|
else:
|
||||||
if typing.TYPE_CHECKING:
|
raise e
|
||||||
zipfile_Path = Any
|
|
||||||
else:
|
|
||||||
zipfile_Path = object
|
|
||||||
|
|
||||||
|
|
||||||
class ZipPath(zipfile_Path):
|
|
||||||
# NOTE: is_dir/is_file might not behave as expected, the base class checks it only based on the slash in path
|
|
||||||
|
|
||||||
# seems that root/at are not exposed in the docs, so might be an implementation detail
|
|
||||||
root: zipfile.ZipFile
|
|
||||||
at: str
|
|
||||||
|
|
||||||
@property
|
|
||||||
def filepath(self) -> Path:
|
|
||||||
res = self.root.filename
|
|
||||||
assert res is not None # make mypy happy
|
|
||||||
return Path(res)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def subpath(self) -> Path:
|
|
||||||
return Path(self.at)
|
|
||||||
|
|
||||||
def absolute(self) -> ZipPath:
|
|
||||||
return ZipPath(self.filepath.absolute(), self.at)
|
|
||||||
|
|
||||||
def exists(self) -> bool:
|
|
||||||
if self.at == '':
|
|
||||||
# special case, the base class returns False in this case for some reason
|
|
||||||
return self.filepath.exists()
|
|
||||||
return super().exists() or self._as_dir().exists()
|
|
||||||
|
|
||||||
def _as_dir(self) -> zipfile_Path:
|
|
||||||
# note: seems that zip always uses forward slash, regardless OS?
|
|
||||||
return zipfile_Path(self.root, self.at + '/')
|
|
||||||
|
|
||||||
def rglob(self, glob: str) -> Sequence[ZipPath]:
|
|
||||||
# note: not 100% sure about the correctness, but seem fine?
|
|
||||||
# Path.match() matches from the right, so need to
|
|
||||||
rpaths = [p for p in self.root.namelist() if p.startswith(self.at)]
|
|
||||||
rpaths = [p for p in rpaths if Path(p).match(glob)]
|
|
||||||
return [ZipPath(self.root, p) for p in rpaths]
|
|
||||||
|
|
||||||
def relative_to(self, other: ZipPath) -> Path:
|
|
||||||
assert self.filepath == other.filepath, (self.filepath, other.filepath)
|
|
||||||
return self.subpath.relative_to(other.subpath)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def parts(self) -> Sequence[str]:
|
|
||||||
# messy, but might be ok..
|
|
||||||
return self.filepath.parts + self.subpath.parts
|
|
||||||
|
|
||||||
def __truediv__(self, key) -> ZipPath:
|
|
||||||
# need to implement it so the return type is not zipfile.Path
|
|
||||||
tmp = zipfile_Path(self.root) / self.at / key
|
|
||||||
return ZipPath(self.root, tmp.at) # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
def iterdir(self) -> Iterator[ZipPath]:
|
|
||||||
for s in self._as_dir().iterdir():
|
|
||||||
yield ZipPath(s.root, s.at) # type: ignore[attr-defined]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def stem(self) -> str:
|
|
||||||
return self.subpath.stem
|
|
||||||
|
|
||||||
@property # type: ignore[misc]
|
|
||||||
def __class__(self):
|
|
||||||
return Path
|
|
||||||
|
|
||||||
def __eq__(self, other) -> bool:
|
|
||||||
# hmm, super class doesn't seem to treat as equals unless they are the same object
|
|
||||||
if not isinstance(other, ZipPath):
|
|
||||||
return False
|
|
||||||
return (self.filepath, self.subpath) == (other.filepath, other.subpath)
|
|
||||||
|
|
||||||
def __hash__(self) -> int:
|
|
||||||
return hash((self.filepath, self.subpath))
|
|
||||||
|
|
||||||
def stat(self) -> os.stat_result:
|
|
||||||
# NOTE: zip datetimes have no notion of time zone, usually they just keep local time?
|
|
||||||
# see https://en.wikipedia.org/wiki/ZIP_(file_format)#Structure
|
|
||||||
dt = datetime(*self.root.getinfo(self.at).date_time)
|
|
||||||
ts = int(dt.timestamp())
|
|
||||||
params = dict(
|
|
||||||
st_mode=0,
|
|
||||||
st_ino=0,
|
|
||||||
st_dev=0,
|
|
||||||
st_nlink=1,
|
|
||||||
st_uid=1000,
|
|
||||||
st_gid=1000,
|
|
||||||
st_size=0, # todo compute it properly?
|
|
||||||
st_atime=ts,
|
|
||||||
st_mtime=ts,
|
|
||||||
st_ctime=ts,
|
|
||||||
)
|
|
||||||
return os.stat_result(tuple(params.values()))
|
|
||||||
|
|
|
@ -5,21 +5,25 @@ This can potentially allow both for safer defensive parsing, and let you know if
|
||||||
TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
|
TODO perhaps need to get some inspiration from linear logic to decide on a nice API...
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any, List
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
def ignore(w, *keys):
|
def ignore(w, *keys):
|
||||||
for k in keys:
|
for k in keys:
|
||||||
w[k].ignore()
|
w[k].ignore()
|
||||||
|
|
||||||
|
|
||||||
def zoom(w, *keys):
|
def zoom(w, *keys):
|
||||||
return [w[k].zoom() for k in keys]
|
return [w[k].zoom() for k in keys]
|
||||||
|
|
||||||
|
|
||||||
# TODO need to support lists
|
# TODO need to support lists
|
||||||
class Zoomable:
|
class Zoomable:
|
||||||
def __init__(self, parent, *args, **kwargs) -> None:
|
def __init__(self, parent, *args, **kwargs) -> None:
|
||||||
super().__init__(*args, **kwargs) # type: ignore
|
super().__init__(*args, **kwargs)
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
|
||||||
# TODO not sure, maybe do it via del??
|
# TODO not sure, maybe do it via del??
|
||||||
|
@ -40,7 +44,7 @@ class Zoomable:
|
||||||
assert self.parent is not None
|
assert self.parent is not None
|
||||||
self.parent._remove(self)
|
self.parent._remove(self)
|
||||||
|
|
||||||
def zoom(self) -> 'Zoomable':
|
def zoom(self) -> Zoomable:
|
||||||
self.consume()
|
self.consume()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -63,6 +67,7 @@ class Wdict(Zoomable, OrderedDict):
|
||||||
|
|
||||||
def this_consumed(self):
|
def this_consumed(self):
|
||||||
return len(self) == 0
|
return len(self) == 0
|
||||||
|
|
||||||
# TODO specify mypy type for the index special method?
|
# TODO specify mypy type for the index special method?
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,6 +82,7 @@ class Wlist(Zoomable, list):
|
||||||
def this_consumed(self):
|
def this_consumed(self):
|
||||||
return len(self) == 0
|
return len(self) == 0
|
||||||
|
|
||||||
|
|
||||||
class Wvalue(Zoomable):
|
class Wvalue(Zoomable):
|
||||||
def __init__(self, parent, value: Any) -> None:
|
def __init__(self, parent, value: Any) -> None:
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
|
@ -93,10 +99,9 @@ class Wvalue(Zoomable):
|
||||||
return 'WValue{' + repr(self.value) + '}'
|
return 'WValue{' + repr(self.value) + '}'
|
||||||
|
|
||||||
|
|
||||||
from typing import Tuple
|
def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]:
|
||||||
def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
|
|
||||||
res: Zoomable
|
res: Zoomable
|
||||||
cc: List[Zoomable]
|
cc: list[Zoomable]
|
||||||
if isinstance(j, dict):
|
if isinstance(j, dict):
|
||||||
res = Wdict(parent)
|
res = Wdict(parent)
|
||||||
cc = [res]
|
cc = [res]
|
||||||
|
@ -120,15 +125,17 @@ def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
|
||||||
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
|
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
|
||||||
|
|
||||||
|
|
||||||
|
from collections.abc import Iterator
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Iterator
|
|
||||||
|
|
||||||
class UnconsumedError(Exception):
|
class UnconsumedError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# TODO think about error policy later...
|
# TODO think about error policy later...
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def wrap(j, throw=True) -> Iterator[Zoomable]:
|
def wrap(j, *, throw=True) -> Iterator[Zoomable]:
|
||||||
w, children = _wrap(j)
|
w, children = _wrap(j)
|
||||||
|
|
||||||
yield w
|
yield w
|
||||||
|
@ -146,8 +153,11 @@ Expected {c} to be fully consumed by the parser.
|
||||||
|
|
||||||
|
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
|
|
||||||
def test_unconsumed() -> None:
|
def test_unconsumed() -> None:
|
||||||
import pytest # type: ignore
|
import pytest
|
||||||
|
|
||||||
with pytest.raises(UnconsumedError):
|
with pytest.raises(UnconsumedError):
|
||||||
with wrap({'a': 1234}) as w:
|
with wrap({'a': 1234}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -158,6 +168,7 @@ def test_unconsumed() -> None:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
d = w['c']['d'].zoom()
|
d = w['c']['d'].zoom()
|
||||||
|
|
||||||
|
|
||||||
def test_consumed() -> None:
|
def test_consumed() -> None:
|
||||||
with wrap({'a': 1234}) as w:
|
with wrap({'a': 1234}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -168,6 +179,7 @@ def test_consumed() -> None:
|
||||||
c = w['c'].zoom()
|
c = w['c'].zoom()
|
||||||
d = c['d'].zoom()
|
d = c['d'].zoom()
|
||||||
|
|
||||||
|
|
||||||
def test_types() -> None:
|
def test_types() -> None:
|
||||||
# (string, number, object, array, boolean or nul
|
# (string, number, object, array, boolean or nul
|
||||||
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
|
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
|
||||||
|
@ -179,6 +191,7 @@ def test_types() -> None:
|
||||||
for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
|
for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
|
||||||
x.consume()
|
x.consume()
|
||||||
|
|
||||||
|
|
||||||
def test_consume_all() -> None:
|
def test_consume_all() -> None:
|
||||||
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
|
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
|
@ -188,11 +201,9 @@ def test_consume_all() -> None:
|
||||||
|
|
||||||
def test_consume_few() -> None:
|
def test_consume_few() -> None:
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
pytest.skip('Will think about it later..')
|
pytest.skip('Will think about it later..')
|
||||||
with wrap({
|
with wrap({'important': 123, 'unimportant': 'whatever'}) as w:
|
||||||
'important': 123,
|
|
||||||
'unimportant': 'whatever'
|
|
||||||
}) as w:
|
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
w['important'].zoom()
|
w['important'].zoom()
|
||||||
w.consume_all()
|
w.consume_all()
|
||||||
|
@ -200,7 +211,8 @@ def test_consume_few() -> None:
|
||||||
|
|
||||||
|
|
||||||
def test_zoom() -> None:
|
def test_zoom() -> None:
|
||||||
import pytest # type: ignore
|
import pytest
|
||||||
|
|
||||||
with wrap({'aaa': 'whatever'}) as w:
|
with wrap({'aaa': 'whatever'}) as w:
|
||||||
w = cast(Wdict, w)
|
w = cast(Wdict, w)
|
||||||
with pytest.raises(KeyError):
|
with pytest.raises(KeyError):
|
||||||
|
@ -209,3 +221,34 @@ def test_zoom() -> None:
|
||||||
|
|
||||||
|
|
||||||
# TODO type check this...
|
# TODO type check this...
|
||||||
|
|
||||||
|
# TODO feels like the whole thing kind of unnecessarily complex
|
||||||
|
# - cons:
|
||||||
|
# - in most cases this is not even needed? who cares if we miss a few attributes?
|
||||||
|
# - pro: on the other hand it could be interesting to know about new attributes in data,
|
||||||
|
# and without this kind of processing we wouldn't even know
|
||||||
|
# alternatives
|
||||||
|
# - manually process data
|
||||||
|
# e.g. use asserts, dict.pop and dict.values() methods to unpack things
|
||||||
|
# - pros:
|
||||||
|
# - very simple, since uses built in syntax
|
||||||
|
# - very performant, as fast as it gets
|
||||||
|
# - very flexible, easy to adjust behaviour
|
||||||
|
# - cons:
|
||||||
|
# - can forget to assert about extra entities etc, so error prone
|
||||||
|
# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder
|
||||||
|
# - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though)
|
||||||
|
# - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements
|
||||||
|
# - TODO perhaps combine warnings somehow or at least only emit once per module?
|
||||||
|
# - hmm actually tbh if we carefully go through everything and don't make copies, then only requires one assert at the very end?
|
||||||
|
# - TODO this is kinda useful? https://discuss.python.org/t/syntax-for-dictionnary-unpacking-to-variables/18718
|
||||||
|
# operator.itemgetter?
|
||||||
|
# - TODO can use match operator in python for this? quite nice actually! and allows for dynamic behaviour
|
||||||
|
# only from 3.10 tho, and gonna be tricky to do dynamic defensive behaviour with this
|
||||||
|
# - TODO in a sense, blenser already would hint if some meaningful fields aren't being processed? only if they are changing though
|
||||||
|
# - define a "schema" for data, then just recursively match data against the schema?
|
||||||
|
# possibly pydantic already does something like that? not sure about performance though
|
||||||
|
# pros:
|
||||||
|
# - much simpler to extend and understand what's going on
|
||||||
|
# cons:
|
||||||
|
# - more rigid, so it becomes tricky to do dynamic stuff (e.g. if schema actually changes)
|
||||||
|
|
|
@ -1,60 +0,0 @@
|
||||||
# I think 'compat' should be for python-specific compat stuff, whereas this for HPI specific backwards compatibility
|
|
||||||
import os
|
|
||||||
import inspect
|
|
||||||
import re
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from my.core import warnings as W
|
|
||||||
|
|
||||||
|
|
||||||
def handle_legacy_import(
|
|
||||||
parent_module_name: str,
|
|
||||||
legacy_submodule_name: str,
|
|
||||||
parent_module_path: List[str],
|
|
||||||
) -> bool:
|
|
||||||
###
|
|
||||||
# this is to trick mypy into treating this as a proper namespace package
|
|
||||||
# should only be used for backwards compatibility on packages that are convernted into namespace & all.py pattern
|
|
||||||
# - https://www.python.org/dev/peps/pep-0382/#namespace-packages-today
|
|
||||||
# - https://github.com/karlicoss/hpi_namespace_experiment
|
|
||||||
# - discussion here https://memex.zulipchat.com/#narrow/stream/279601-hpi/topic/extending.20HPI/near/269946944
|
|
||||||
from pkgutil import extend_path
|
|
||||||
parent_module_path[:] = extend_path(parent_module_path, parent_module_name)
|
|
||||||
# 'this' source tree ends up first in the pythonpath when we extend_path()
|
|
||||||
# so we need to move 'this' source tree towards the end to make sure we prioritize overlays
|
|
||||||
parent_module_path[:] = parent_module_path[1:] + parent_module_path[:1]
|
|
||||||
###
|
|
||||||
|
|
||||||
# allow stuff like 'import my.module.submodule' and such
|
|
||||||
imported_as_parent = False
|
|
||||||
|
|
||||||
# allow stuff like 'from my.module import submodule'
|
|
||||||
importing_submodule = False
|
|
||||||
|
|
||||||
# some hacky traceback to inspect the current stack
|
|
||||||
# to see if the user is using the old style of importing
|
|
||||||
for f in inspect.stack():
|
|
||||||
# seems that when a submodule is imported, at some point it'll call some internal import machinery
|
|
||||||
# with 'parent' set to the parent module
|
|
||||||
# if parent module is imported first (i.e. in case of deprecated usage), it won't be the case
|
|
||||||
args = inspect.getargvalues(f.frame)
|
|
||||||
if args.locals.get('parent') == parent_module_name:
|
|
||||||
imported_as_parent = True
|
|
||||||
|
|
||||||
# this we can only detect from the code I guess
|
|
||||||
line = '\n'.join(f.code_context or [])
|
|
||||||
if re.match(rf'from\s+{parent_module_name}\s+import\s+{legacy_submodule_name}', line):
|
|
||||||
importing_submodule = True
|
|
||||||
|
|
||||||
# click sets '_HPI_COMPLETE' env var when it's doing autocompletion
|
|
||||||
# otherwise, the warning will be printed every time you try to tab complete
|
|
||||||
autocompleting_module_cli = "_HPI_COMPLETE" in os.environ
|
|
||||||
|
|
||||||
is_legacy_import = not (imported_as_parent or importing_submodule)
|
|
||||||
if is_legacy_import and not autocompleting_module_cli:
|
|
||||||
W.high(f'''\
|
|
||||||
importing {parent_module_name} is DEPRECATED! \
|
|
||||||
Instead, import from {parent_module_name}.{legacy_submodule_name} or {parent_module_name}.all \
|
|
||||||
See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info.
|
|
||||||
''')
|
|
||||||
return is_legacy_import
|
|
|
@ -1,47 +1,61 @@
|
||||||
#!/usr/bin/env python3
|
from __future__ import annotations
|
||||||
'''
|
|
||||||
Default logger is a bit meh, see 'test'/run this file for a demo
|
import logging
|
||||||
'''
|
import os
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import TYPE_CHECKING, Union
|
||||||
|
|
||||||
|
|
||||||
def test() -> None:
|
def test() -> None:
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)
|
M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)
|
||||||
|
|
||||||
M(" Logging module's defaults are not great...'")
|
## prepare exception for later
|
||||||
l = logging.getLogger('test_logger')
|
try:
|
||||||
|
None.whatever # type: ignore[attr-defined] # noqa: B018
|
||||||
|
except Exception as e:
|
||||||
|
ex = e
|
||||||
|
##
|
||||||
|
|
||||||
|
M(" Logging module's defaults are not great:")
|
||||||
|
l = logging.getLogger('default_logger')
|
||||||
l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")
|
l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")
|
||||||
|
|
||||||
M(" The reason is that you need to remember to call basicConfig() first")
|
M("\n The reason is that you need to remember to call basicConfig() first. Let's do it now:")
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")
|
l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")
|
||||||
|
|
||||||
M("")
|
M("\n Also exception logging is kinda lame, doesn't print traceback by default unless you remember to pass exc_info:")
|
||||||
M(" With LazyLogger you get a reasonable logging format, colours and other neat things")
|
l.exception(ex) # type: ignore[possibly-undefined]
|
||||||
|
|
||||||
ll = LazyLogger('test') # No need for basicConfig!
|
M("\n\n With make_logger you get a reasonable logging format, colours (via colorlog library) and other neat things:")
|
||||||
|
|
||||||
|
ll = make_logger('test') # No need for basicConfig!
|
||||||
ll.info("default level is INFO")
|
ll.info("default level is INFO")
|
||||||
ll.debug(".. so this shouldn't be displayed")
|
ll.debug("... so this shouldn't be displayed")
|
||||||
ll.warning("warnings are easy to spot!")
|
ll.warning("warnings are easy to spot!")
|
||||||
ll.exception(RuntimeError("exceptions as well"))
|
|
||||||
|
M("\n Exceptions print traceback by default now:")
|
||||||
|
ll.exception(ex)
|
||||||
|
|
||||||
|
M("\n You can (and should) use it via regular logging.getLogger after that, e.g. let's set logging level to DEBUG now")
|
||||||
|
logging.getLogger('test').setLevel(logging.DEBUG)
|
||||||
|
ll.debug("... now debug messages are also displayed")
|
||||||
|
|
||||||
|
|
||||||
import logging
|
DEFAULT_LEVEL = 'INFO'
|
||||||
from typing import Union, Optional, cast
|
FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)-4d]{end} %(message)s'
|
||||||
import os
|
FORMAT_NOCOLOR = FORMAT.format(start='', end='')
|
||||||
import warnings
|
|
||||||
|
|
||||||
Level = int
|
Level = int
|
||||||
LevelIsh = Optional[Union[Level, str]]
|
LevelIsh = Union[Level, str, None]
|
||||||
|
|
||||||
|
|
||||||
def mklevel(level: LevelIsh) -> Level:
|
def mklevel(level: LevelIsh) -> Level:
|
||||||
# todo put in some global file, like envvars.py
|
|
||||||
glevel = os.environ.get('HPI_LOGS', None)
|
|
||||||
if glevel is not None:
|
|
||||||
level = glevel
|
|
||||||
if level is None:
|
if level is None:
|
||||||
return logging.NOTSET
|
return logging.NOTSET
|
||||||
if isinstance(level, int):
|
if isinstance(level, int):
|
||||||
|
@ -49,100 +63,145 @@ def mklevel(level: LevelIsh) -> Level:
|
||||||
return getattr(logging, level.upper())
|
return getattr(logging, level.upper())
|
||||||
|
|
||||||
|
|
||||||
FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)d]{end} %(message)s'
|
def get_collapse_level() -> Level | None:
|
||||||
FORMAT_COLOR = FORMAT.format(start='%(color)s', end='%(end_color)s')
|
# TODO not sure if should be specific to logger name?
|
||||||
FORMAT_NOCOLOR = FORMAT.format(start='', end='')
|
cl = os.environ.get('LOGGING_COLLAPSE', None)
|
||||||
DATEFMT = '%Y-%m-%d %H:%M:%S'
|
if cl is not None:
|
||||||
|
return mklevel(cl)
|
||||||
|
# legacy name, maybe deprecate?
|
||||||
|
cl = os.environ.get('COLLAPSE_DEBUG_LOGS', None)
|
||||||
|
if cl is not None:
|
||||||
|
return logging.DEBUG
|
||||||
|
return None
|
||||||
|
|
||||||
COLLAPSE_DEBUG_LOGS = os.environ.get('COLLAPSE_DEBUG_LOGS', False)
|
|
||||||
|
|
||||||
_init_done = 'lazylogger_init_done'
|
def get_env_level(name: str) -> Level | None:
|
||||||
|
PREFIX = 'LOGGING_LEVEL_' # e.g. LOGGING_LEVEL_my_hypothesis=debug
|
||||||
|
# shell doesn't allow using dots in var names without escaping, so also support underscore syntax
|
||||||
|
lvl = os.environ.get(PREFIX + name, None) or os.environ.get(PREFIX + name.replace('.', '_'), None)
|
||||||
|
if lvl is not None:
|
||||||
|
return mklevel(lvl)
|
||||||
|
# if LOGGING_LEVEL_HPI is set, use that. This should override anything the module may set as its default
|
||||||
|
# this is also set when the user passes the --debug flag in the CLI
|
||||||
|
#
|
||||||
|
# check after LOGGING_LEVEL_ prefix since that is more specific
|
||||||
|
if 'LOGGING_LEVEL_HPI' in os.environ:
|
||||||
|
return mklevel(os.environ['LOGGING_LEVEL_HPI'])
|
||||||
|
# legacy name, for backwards compatibility
|
||||||
|
if 'HPI_LOGS' in os.environ:
|
||||||
|
from my.core.warnings import medium
|
||||||
|
|
||||||
def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
|
medium('The HPI_LOGS environment variable is deprecated, use LOGGING_LEVEL_HPI instead')
|
||||||
|
|
||||||
|
return mklevel(os.environ['HPI_LOGS'])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logger(logger: str | logging.Logger, *, level: LevelIsh = None) -> None:
|
||||||
|
"""
|
||||||
|
Wrapper to simplify logging setup.
|
||||||
|
"""
|
||||||
|
if isinstance(logger, str):
|
||||||
|
logger = logging.getLogger(logger)
|
||||||
|
|
||||||
|
if level is None:
|
||||||
|
level = DEFAULT_LEVEL
|
||||||
|
|
||||||
|
# env level always takes precedence
|
||||||
|
env_level = get_env_level(logger.name)
|
||||||
|
if env_level is not None:
|
||||||
|
lvl = env_level
|
||||||
|
else:
|
||||||
lvl = mklevel(level)
|
lvl = mklevel(level)
|
||||||
try:
|
|
||||||
import logzero # type: ignore[import]
|
|
||||||
formatter = logzero.LogFormatter(
|
|
||||||
fmt=FORMAT_COLOR,
|
|
||||||
datefmt=DATEFMT,
|
|
||||||
)
|
|
||||||
use_logzero = True
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
warnings.warn("You might want to install 'logzero' for nice colored logs!")
|
|
||||||
formatter = logging.Formatter(fmt=FORMAT_NOCOLOR, datefmt=DATEFMT)
|
|
||||||
use_logzero = False
|
|
||||||
|
|
||||||
logger.addFilter(AddExceptionTraceback())
|
if logger.level == logging.NOTSET:
|
||||||
if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do
|
# if it's already set, the user requested a different logging level, let's respect that
|
||||||
# 'simple' setup
|
|
||||||
logzero.setup_logger(logger.name, level=lvl, formatter=formatter)
|
|
||||||
return
|
|
||||||
|
|
||||||
h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler()
|
|
||||||
logger.setLevel(lvl)
|
logger.setLevel(lvl)
|
||||||
h.setLevel(lvl)
|
|
||||||
h.setFormatter(formatter)
|
_setup_handlers_and_formatters(name=logger.name)
|
||||||
logger.addHandler(h)
|
|
||||||
logger.propagate = False # ugh. otherwise it duplicates log messages? not sure about it..
|
|
||||||
|
|
||||||
|
|
||||||
class LazyLogger(logging.Logger):
|
# cached since this should only be done once per logger instance
|
||||||
def __new__(cls, name: str, level: LevelIsh = 'INFO') -> 'LazyLogger':
|
@lru_cache(None)
|
||||||
|
def _setup_handlers_and_formatters(name: str) -> None:
|
||||||
logger = logging.getLogger(name)
|
logger = logging.getLogger(name)
|
||||||
|
|
||||||
# this is called prior to all _log calls so makes sense to do it here?
|
logger.addFilter(AddExceptionTraceback())
|
||||||
def isEnabledFor_lazyinit(*args, logger=logger, orig=logger.isEnabledFor, **kwargs) -> bool:
|
|
||||||
if not getattr(logger, _init_done, False): # init once, if necessary
|
|
||||||
setup_logger(logger, level=level)
|
|
||||||
setattr(logger, _init_done, True)
|
|
||||||
logger.isEnabledFor = orig # restore the callback
|
|
||||||
return orig(*args, **kwargs)
|
|
||||||
|
|
||||||
# oh god.. otherwise might go into an inf loop
|
collapse_level = get_collapse_level()
|
||||||
if not hasattr(logger, _init_done):
|
if collapse_level is None or not sys.stderr.isatty():
|
||||||
setattr(logger, _init_done, False) # will setup on the first call
|
handler = logging.StreamHandler()
|
||||||
logger.isEnabledFor = isEnabledFor_lazyinit # type: ignore[assignment]
|
else:
|
||||||
return cast(LazyLogger, logger)
|
handler = CollapseLogsHandler(maxlevel=collapse_level)
|
||||||
|
|
||||||
|
# default level for handler is NOTSET, which will make it process all messages
|
||||||
|
# we rely on the logger to actually accept/reject log msgs
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
# this attribute is set to True by default, which causes log entries to be passed to root logger (e.g. if you call basicConfig beforehand)
|
||||||
|
# even if log entry is handled by this logger ... not sure what's the point of this behaviour??
|
||||||
|
logger.propagate = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# try colorlog first, so user gets nice colored logs
|
||||||
|
import colorlog
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
warnings.warn("You might want to 'pip install colorlog' for nice colored logs", stacklevel=1)
|
||||||
|
formatter = logging.Formatter(FORMAT_NOCOLOR)
|
||||||
|
else:
|
||||||
|
# log_color/reset are specific to colorlog
|
||||||
|
FORMAT_COLOR = FORMAT.format(start='%(log_color)s', end='%(reset)s')
|
||||||
|
# colorlog should detect tty in principle, but doesn't handle everything for some reason
|
||||||
|
# see https://github.com/borntyping/python-colorlog/issues/71
|
||||||
|
if handler.stream.isatty():
|
||||||
|
formatter = colorlog.ColoredFormatter(FORMAT_COLOR)
|
||||||
|
else:
|
||||||
|
formatter = logging.Formatter(FORMAT_NOCOLOR)
|
||||||
|
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
|
||||||
# by default, logging.exception isn't logging traceback
|
# by default, logging.exception isn't logging traceback unless called inside of the exception handler
|
||||||
# which is a bit annoying since we have to
|
# which is a bit annoying since we have to pass exc_info explicitly
|
||||||
# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
|
# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
|
||||||
# tod also amend by post about defensive error handling?
|
# todo also amend by post about defensive error handling?
|
||||||
class AddExceptionTraceback(logging.Filter):
|
class AddExceptionTraceback(logging.Filter):
|
||||||
def filter(self, record):
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
s = super().filter(record)
|
|
||||||
if s is False:
|
|
||||||
return False
|
|
||||||
if record.levelname == 'ERROR':
|
if record.levelname == 'ERROR':
|
||||||
exc = record.msg
|
exc = record.msg
|
||||||
if isinstance(exc, BaseException):
|
if isinstance(exc, BaseException):
|
||||||
if record.exc_info is None or record.exc_info == (None, None, None):
|
if record.exc_info is None or record.exc_info == (None, None, None):
|
||||||
exc_info = (type(exc), exc, exc.__traceback__)
|
exc_info = (type(exc), exc, exc.__traceback__)
|
||||||
record.exc_info = exc_info
|
record.exc_info = exc_info
|
||||||
return s
|
return True
|
||||||
|
|
||||||
|
|
||||||
# todo also save full log in a file?
|
# todo also save full log in a file?
|
||||||
class CollapseDebugHandler(logging.StreamHandler):
|
class CollapseLogsHandler(logging.StreamHandler):
|
||||||
'''
|
'''
|
||||||
Collapses subsequent debug log lines and redraws on the same line.
|
Collapses subsequent debug log lines and redraws on the same line.
|
||||||
Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
|
Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
|
||||||
'''
|
'''
|
||||||
last = False
|
|
||||||
|
last: bool = False
|
||||||
|
|
||||||
|
maxlevel: Level = logging.DEBUG # everything with less or equal level will be collapsed
|
||||||
|
|
||||||
|
def __init__(self, *args, maxlevel: Level, **kwargs) -> None:
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.maxlevel = maxlevel
|
||||||
|
|
||||||
def emit(self, record: logging.LogRecord) -> None:
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
try:
|
try:
|
||||||
msg = self.format(record)
|
msg = self.format(record)
|
||||||
cur = record.levelno == logging.DEBUG and '\n' not in msg
|
cur = record.levelno <= self.maxlevel and '\n' not in msg
|
||||||
if cur:
|
if cur:
|
||||||
if self.last:
|
if self.last:
|
||||||
self.stream.write('\033[K' + '\r') # clear line + return carriage
|
self.stream.write('\033[K' + '\r') # clear line + return carriage
|
||||||
else:
|
else:
|
||||||
if self.last:
|
if self.last:
|
||||||
self.stream.write('\n') # clean up after the last debug line
|
self.stream.write('\n') # clean up after the last line
|
||||||
self.last = cur
|
self.last = cur
|
||||||
import os
|
|
||||||
columns, _ = os.get_terminal_size(0)
|
columns, _ = os.get_terminal_size(0)
|
||||||
# ugh. the columns thing is meh. dunno I guess ultimately need curses for that
|
# ugh. the columns thing is meh. dunno I guess ultimately need curses for that
|
||||||
# TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc)
|
# TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc)
|
||||||
|
@ -152,5 +211,56 @@ class CollapseDebugHandler(logging.StreamHandler):
|
||||||
self.handleError(record)
|
self.handleError(record)
|
||||||
|
|
||||||
|
|
||||||
|
def make_logger(name: str, *, level: LevelIsh = None) -> logging.Logger:
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
setup_logger(logger, level=level)
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
# ughh. hacky way to have a single enlighten instance per interpreter, so it can be shared between modules
|
||||||
|
# not sure about this. I guess this should definitely be behind some flag
|
||||||
|
# OK, when stdout is not a tty, enlighten doesn't log anything, good
|
||||||
|
def get_enlighten():
|
||||||
|
# TODO could add env variable to disable enlighten for a module?
|
||||||
|
from unittest.mock import (
|
||||||
|
Mock, # Mock to return stub so cients don't have to think about it
|
||||||
|
)
|
||||||
|
|
||||||
|
# for now hidden behind the flag since it's a little experimental
|
||||||
|
if os.environ.get('ENLIGHTEN_ENABLE', None) is None:
|
||||||
|
return Mock()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import enlighten # type: ignore[import-untyped]
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
warnings.warn("You might want to 'pip install enlighten' for a nice progress bar", stacklevel=1)
|
||||||
|
|
||||||
|
return Mock()
|
||||||
|
|
||||||
|
# dirty, but otherwise a bit unclear how to share enlighten manager between packages that call each other
|
||||||
|
instance = getattr(enlighten, 'INSTANCE', None)
|
||||||
|
if instance is not None:
|
||||||
|
return instance
|
||||||
|
instance = enlighten.get_manager()
|
||||||
|
setattr(enlighten, 'INSTANCE', instance)
|
||||||
|
return instance
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test()
|
test()
|
||||||
|
|
||||||
|
|
||||||
|
## legacy/deprecated methods for backwards compatibility
|
||||||
|
if not TYPE_CHECKING:
|
||||||
|
from .compat import deprecated
|
||||||
|
|
||||||
|
@deprecated('use make_logger instead')
|
||||||
|
def LazyLogger(*args, **kwargs):
|
||||||
|
return make_logger(*args, **kwargs)
|
||||||
|
|
||||||
|
@deprecated('use make_logger instead')
|
||||||
|
def logger(*args, **kwargs):
|
||||||
|
return make_logger(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
|
37
my/core/mime.py
Normal file
37
my/core/mime.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
"""
|
||||||
|
Utils for mime/filetype handling
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .internal import assert_subpackage
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
|
import functools
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
@functools.lru_cache(1)
|
||||||
|
def _magic():
|
||||||
|
import magic # type: ignore
|
||||||
|
|
||||||
|
# TODO also has uncompess=True? could be useful
|
||||||
|
return magic.Magic(mime=True)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO could reuse in pdf module?
|
||||||
|
import mimetypes # todo do I need init()?
|
||||||
|
|
||||||
|
|
||||||
|
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
|
||||||
|
# whereas magic detects correctly: application/x-zstd and application/x-xz
|
||||||
|
def fastermime(path: Path | str) -> str:
|
||||||
|
paths = str(path)
|
||||||
|
# mimetypes is faster, so try it first
|
||||||
|
(mime, _) = mimetypes.guess_type(paths)
|
||||||
|
if mime is not None:
|
||||||
|
return mime
|
||||||
|
# magic is slower but handles more types
|
||||||
|
# TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
|
||||||
|
return _magic().from_file(paths)
|
|
@ -1,10 +1,13 @@
|
||||||
"""
|
"""
|
||||||
Various helpers for reading org-mode data
|
Various helpers for reading org-mode data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def parse_org_datetime(s: str) -> datetime:
|
def parse_org_datetime(s: str) -> datetime:
|
||||||
s = s.strip('[]')
|
s = s.strip('[]')
|
||||||
for fmt, cl in [
|
for fmt, _cls in [
|
||||||
("%Y-%m-%d %a %H:%M", datetime),
|
("%Y-%m-%d %a %H:%M", datetime),
|
||||||
("%Y-%m-%d %H:%M" , datetime),
|
("%Y-%m-%d %H:%M" , datetime),
|
||||||
# todo not sure about these... fallback on 00:00?
|
# todo not sure about these... fallback on 00:00?
|
||||||
|
@ -15,23 +18,29 @@ def parse_org_datetime(s: str) -> datetime:
|
||||||
return datetime.strptime(s, fmt)
|
return datetime.strptime(s, fmt)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Bad datetime string {s}")
|
raise RuntimeError(f"Bad datetime string {s}")
|
||||||
|
|
||||||
|
|
||||||
# TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
|
# TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from typing import Callable, TypeVar
|
||||||
|
|
||||||
from orgparse import OrgNode
|
from orgparse import OrgNode
|
||||||
from typing import Iterable, TypeVar, Callable
|
|
||||||
V = TypeVar('V')
|
V = TypeVar('V')
|
||||||
|
|
||||||
|
|
||||||
def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
|
def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
|
||||||
yield from cfun(n)
|
yield from cfun(n)
|
||||||
for c in n.children:
|
for c in n.children:
|
||||||
yield from collect(c, cfun)
|
yield from collect(c, cfun)
|
||||||
|
|
||||||
|
|
||||||
from more_itertools import one
|
from more_itertools import one
|
||||||
from orgparse.extra import Table
|
from orgparse.extra import Table
|
||||||
|
|
||||||
|
|
||||||
def one_table(o: OrgNode) -> Table:
|
def one_table(o: OrgNode) -> Table:
|
||||||
return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table))))
|
return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table))))
|
||||||
|
|
||||||
|
|
|
@ -1,32 +1,54 @@
|
||||||
'''
|
'''
|
||||||
Various pandas helpers and convenience functions
|
Various pandas helpers and convenience functions
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
# todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential
|
||||||
# NOTE: this file is meant to be importable without Pandas installed
|
# NOTE: this file is meant to be importable without Pandas installed
|
||||||
from datetime import datetime
|
import dataclasses
|
||||||
|
from collections.abc import Iterable, Iterator
|
||||||
|
from datetime import datetime, timezone
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from typing import Optional, TYPE_CHECKING, Any, Iterable, Type, Dict
|
from typing import (
|
||||||
from . import warnings, Res
|
TYPE_CHECKING,
|
||||||
from .common import LazyLogger, Json, asdict
|
Any,
|
||||||
|
Callable,
|
||||||
|
Literal,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
from decorator import decorator
|
||||||
|
|
||||||
|
from . import warnings
|
||||||
|
from .error import Res, error_to_json, extract_error_datetime
|
||||||
|
from .logging import make_logger
|
||||||
|
from .types import Json, asdict
|
||||||
|
|
||||||
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
# this is kinda pointless at the moment, but handy to annotate DF returning methods now
|
import pandas as pd
|
||||||
# later will be unignored when they implement type annotations
|
|
||||||
import pandas as pd # type: ignore
|
DataFrameT = pd.DataFrame
|
||||||
# DataFrameT = pd.DataFrame
|
SeriesT = pd.Series
|
||||||
# TODO ugh. pretty annoying, having any is not very useful since it would allow arbitrary coercions..
|
from pandas._typing import S1 # meh
|
||||||
# ideally want to use a type that's like Any but doesn't allow arbitrary coercions??
|
|
||||||
DataFrameT = Any
|
FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
|
||||||
|
# huh interesting -- with from __future__ import annotations don't even need else clause here?
|
||||||
|
# but still if other modules import these we do need some fake runtime types here..
|
||||||
else:
|
else:
|
||||||
# in runtime, make it defensive so it works without pandas
|
from typing import Optional
|
||||||
|
|
||||||
DataFrameT = Any
|
DataFrameT = Any
|
||||||
|
SeriesT = Optional # just some type with one argument
|
||||||
|
S1 = Any
|
||||||
|
|
||||||
|
|
||||||
def check_dateish(s) -> Iterable[str]:
|
def _check_dateish(s: SeriesT[S1]) -> Iterable[str]:
|
||||||
import pandas as pd # type: ignore # noqa: F811 not actually a redefinition
|
import pandas as pd # noqa: F811 not actually a redefinition
|
||||||
|
|
||||||
ctype = s.dtype
|
ctype = s.dtype
|
||||||
if str(ctype).startswith('datetime64'):
|
if str(ctype).startswith('datetime64'):
|
||||||
return
|
return
|
||||||
|
@ -36,7 +58,7 @@ def check_dateish(s) -> Iterable[str]:
|
||||||
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
|
all_timestamps = s.apply(lambda x: isinstance(x, (pd.Timestamp, datetime))).all()
|
||||||
if not all_timestamps:
|
if not all_timestamps:
|
||||||
return # not sure why it would happen, but ok
|
return # not sure why it would happen, but ok
|
||||||
tzs = s.map(lambda x: x.tzinfo).drop_duplicates()
|
tzs = s.map(lambda x: x.tzinfo).drop_duplicates() # type: ignore[union-attr, var-annotated, arg-type, return-value, unused-ignore]
|
||||||
examples = s[tzs.index]
|
examples = s[tzs.index]
|
||||||
# todo not so sure this warning is that useful... except for stuff without tz
|
# todo not so sure this warning is that useful... except for stuff without tz
|
||||||
yield f'''
|
yield f'''
|
||||||
|
@ -45,13 +67,50 @@ def check_dateish(s) -> Iterable[str]:
|
||||||
'''.strip()
|
'''.strip()
|
||||||
|
|
||||||
|
|
||||||
from .compat import Literal
|
def test_check_dateish() -> None:
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from .compat import fromisoformat
|
||||||
|
|
||||||
|
# empty series shouldn't warn
|
||||||
|
assert list(_check_dateish(pd.Series([]))) == []
|
||||||
|
|
||||||
|
# if no dateimes, shouldn't return any warnings
|
||||||
|
assert list(_check_dateish(pd.Series([1, 2, 3]))) == []
|
||||||
|
|
||||||
|
# all values are datetimes, shouldn't warn
|
||||||
|
# fmt: off
|
||||||
|
assert list(_check_dateish(pd.Series([
|
||||||
|
fromisoformat('2024-08-19T01:02:03'),
|
||||||
|
fromisoformat('2024-08-19T03:04:05'),
|
||||||
|
]))) == []
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# mixture of timezones -- should warn
|
||||||
|
# fmt: off
|
||||||
|
assert len(list(_check_dateish(pd.Series([
|
||||||
|
fromisoformat('2024-08-19T01:02:03'),
|
||||||
|
fromisoformat('2024-08-19T03:04:05Z'),
|
||||||
|
])))) == 1
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# TODO hmm. maybe this should actually warn?
|
||||||
|
# fmt: off
|
||||||
|
assert len(list(_check_dateish(pd.Series([
|
||||||
|
'whatever',
|
||||||
|
fromisoformat('2024-08-19T01:02:03'),
|
||||||
|
])))) == 0
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
ErrorColPolicy = Literal[
|
ErrorColPolicy = Literal[
|
||||||
'add_if_missing', # add error column if it's missing
|
'add_if_missing', # add error column if it's missing
|
||||||
'warn' , # warn, but do not modify
|
'warn' , # warn, but do not modify
|
||||||
'ignore' , # no warnings
|
'ignore' , # no warnings
|
||||||
]
|
]
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
def check_error_column(df: DataFrameT, *, policy: ErrorColPolicy) -> Iterable[str]:
|
def check_error_column(df: DataFrameT, *, policy: ErrorColPolicy) -> Iterable[str]:
|
||||||
if 'error' in df:
|
if 'error' in df:
|
||||||
|
@ -71,19 +130,15 @@ No 'error' column detected. You probably forgot to handle errors defensively, wh
|
||||||
yield wmsg
|
yield wmsg
|
||||||
|
|
||||||
|
|
||||||
from typing import Any, Callable, TypeVar
|
# TODO ugh. typing this is a mess... perhaps should use .compat.ParamSpec?
|
||||||
FuncT = TypeVar('FuncT', bound=Callable[..., DataFrameT])
|
|
||||||
|
|
||||||
# TODO ugh. typing this is a mess... should I use mypy_extensions.VarArg/KwArgs?? or what??
|
|
||||||
from decorator import decorator
|
|
||||||
@decorator
|
@decorator
|
||||||
def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy = 'add_if_missing', *args, **kwargs) -> DataFrameT:
|
def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy = 'add_if_missing', *args, **kwargs) -> DataFrameT:
|
||||||
df = f(*args, **kwargs)
|
df: DataFrameT = f(*args, **kwargs)
|
||||||
tag = '{f.__module__}:{f.__name__}'
|
tag = '{f.__module__}:{f.__name__}'
|
||||||
# makes sense to keep super defensive
|
# makes sense to keep super defensive
|
||||||
try:
|
try:
|
||||||
for col, data in df.reset_index().iteritems():
|
for col, data in df.reset_index().items():
|
||||||
for w in check_dateish(data):
|
for w in _check_dateish(data):
|
||||||
warnings.low(f"{tag}, column '{col}': {w}")
|
warnings.low(f"{tag}, column '{col}': {w}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(e)
|
logger.exception(e)
|
||||||
|
@ -94,11 +149,11 @@ def check_dataframe(f: FuncT, error_col_policy: ErrorColPolicy='add_if_missing',
|
||||||
logger.exception(e)
|
logger.exception(e)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
# todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?
|
# todo doctor: could have a suggesion to wrap dataframes with it?? discover by return type?
|
||||||
|
|
||||||
|
|
||||||
def error_to_row(e: Exception, *, dt_col: str='dt', tz=None) -> Json:
|
def error_to_row(e: Exception, *, dt_col: str = 'dt', tz: timezone | None = None) -> Json:
|
||||||
from .error import error_to_json, extract_error_datetime
|
|
||||||
edt = extract_error_datetime(e)
|
edt = extract_error_datetime(e)
|
||||||
if edt is not None and edt.tzinfo is None and tz is not None:
|
if edt is not None and edt.tzinfo is None and tz is not None:
|
||||||
edt = edt.replace(tzinfo=tz)
|
edt = edt.replace(tzinfo=tz)
|
||||||
|
@ -107,8 +162,7 @@ def error_to_row(e: Exception, *, dt_col: str='dt', tz=None) -> Json:
|
||||||
return err_dict
|
return err_dict
|
||||||
|
|
||||||
|
|
||||||
# todo not sure about naming
|
def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
|
||||||
def to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
|
|
||||||
for r in it:
|
for r in it:
|
||||||
if isinstance(r, Exception):
|
if isinstance(r, Exception):
|
||||||
yield error_to_row(r)
|
yield error_to_row(r)
|
||||||
|
@ -120,11 +174,11 @@ def to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]:
|
||||||
# no type for dataclass?
|
# no type for dataclass?
|
||||||
Schema = Any
|
Schema = Any
|
||||||
|
|
||||||
def _as_columns(s: Schema) -> Dict[str, Type]:
|
|
||||||
|
def _as_columns(s: Schema) -> dict[str, type]:
|
||||||
# todo would be nice to extract properties; add tests for this as well
|
# todo would be nice to extract properties; add tests for this as well
|
||||||
import dataclasses as D
|
if dataclasses.is_dataclass(s):
|
||||||
if D.is_dataclass(s):
|
return {f.name: f.type for f in dataclasses.fields(s)} # type: ignore[misc] # ugh, why mypy thinks f.type can return str??
|
||||||
return {f.name: f.type for f in D.fields(s)}
|
|
||||||
# else must be NamedTuple??
|
# else must be NamedTuple??
|
||||||
# todo assert my.core.common.is_namedtuple?
|
# todo assert my.core.common.is_namedtuple?
|
||||||
return getattr(s, '_field_types')
|
return getattr(s, '_field_types')
|
||||||
|
@ -132,7 +186,7 @@ def _as_columns(s: Schema) -> Dict[str, Type]:
|
||||||
|
|
||||||
# todo add proper types
|
# todo add proper types
|
||||||
@check_dataframe
|
@check_dataframe
|
||||||
def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataFrameT:
|
def as_dataframe(it: Iterable[Res[Any]], schema: Schema | None = None) -> DataFrameT:
|
||||||
# todo warn if schema isn't specified?
|
# todo warn if schema isn't specified?
|
||||||
# ok nice supports dataframe/NT natively
|
# ok nice supports dataframe/NT natively
|
||||||
# https://github.com/pandas-dev/pandas/pull/27999
|
# https://github.com/pandas-dev/pandas/pull/27999
|
||||||
|
@ -141,26 +195,88 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
|
||||||
# same for NamedTuple -- seems that it takes whatever schema the first NT has
|
# same for NamedTuple -- seems that it takes whatever schema the first NT has
|
||||||
# so we need to convert each individually... sigh
|
# so we need to convert each individually... sigh
|
||||||
import pandas as pd # noqa: F811 not actually a redefinition
|
import pandas as pd # noqa: F811 not actually a redefinition
|
||||||
|
|
||||||
columns = None if schema is None else list(_as_columns(schema).keys())
|
columns = None if schema is None else list(_as_columns(schema).keys())
|
||||||
return pd.DataFrame(to_jsons(it), columns=columns)
|
return pd.DataFrame(_to_jsons(it), columns=columns)
|
||||||
|
|
||||||
|
|
||||||
|
# ugh. in principle this could be inside the test
|
||||||
|
# might be due to use of from __future__ import annotations
|
||||||
|
# can quickly reproduce by running pytest tests/tz.py tests/core/test_pandas.py
|
||||||
|
# possibly will be resolved after fix in pytest?
|
||||||
|
# see https://github.com/pytest-dev/pytest/issues/7856
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class _X:
|
||||||
|
# FIXME try moving inside?
|
||||||
|
x: int
|
||||||
|
|
||||||
|
|
||||||
def test_as_dataframe() -> None:
|
def test_as_dataframe() -> None:
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
it = (dict(i=i, s=f'str{i}') for i in range(10))
|
from pandas.testing import assert_frame_equal
|
||||||
|
|
||||||
|
from .compat import fromisoformat
|
||||||
|
|
||||||
|
it = ({'i': i, 's': f'str{i}'} for i in range(5))
|
||||||
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
|
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
|
||||||
df = as_dataframe(it)
|
df: DataFrameT = as_dataframe(it)
|
||||||
# todo test other error col policies
|
# todo test other error col policies
|
||||||
assert list(df.columns) == ['i', 's', 'error']
|
|
||||||
|
|
||||||
assert len(as_dataframe([])) == 0
|
# fmt: off
|
||||||
|
assert_frame_equal(
|
||||||
|
df,
|
||||||
|
pd.DataFrame({
|
||||||
|
'i' : [0 , 1 , 2 , 3 , 4 ],
|
||||||
|
's' : ['str0', 'str1', 'str2', 'str3', 'str4'],
|
||||||
|
# NOTE: error column is always added
|
||||||
|
'error': [None , None , None , None , None ],
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
assert_frame_equal(as_dataframe([]), pd.DataFrame(columns=['error']))
|
||||||
|
|
||||||
from dataclasses import dataclass
|
df2: DataFrameT = as_dataframe([], schema=_X)
|
||||||
|
assert_frame_equal(
|
||||||
|
df2,
|
||||||
|
# FIXME hmm. x column type should be an int?? and error should be string (or object??)
|
||||||
|
pd.DataFrame(columns=['x', 'error']),
|
||||||
|
)
|
||||||
|
|
||||||
@dataclass
|
@dataclasses.dataclass
|
||||||
class X:
|
class S:
|
||||||
x: int
|
value: str
|
||||||
|
|
||||||
# makes sense to specify the schema so the downstream program doesn't fail in case of empty iterable
|
def it2() -> Iterator[Res[S]]:
|
||||||
df = as_dataframe([], schema=X)
|
yield S(value='test')
|
||||||
assert list(df.columns) == ['x', 'error']
|
yield RuntimeError('i failed')
|
||||||
|
|
||||||
|
df = as_dataframe(it2())
|
||||||
|
# fmt: off
|
||||||
|
assert_frame_equal(
|
||||||
|
df,
|
||||||
|
pd.DataFrame(data={
|
||||||
|
'value': ['test', np.nan ],
|
||||||
|
'error': [np.nan, 'RuntimeError: i failed\n'],
|
||||||
|
'dt' : [np.nan, np.nan ],
|
||||||
|
}).astype(dtype={'dt': 'float'}), # FIXME should be datetime64 as below
|
||||||
|
)
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
def it3() -> Iterator[Res[S]]:
|
||||||
|
yield S(value='aba')
|
||||||
|
yield RuntimeError('whoops')
|
||||||
|
yield S(value='cde')
|
||||||
|
yield RuntimeError('exception with datetime', fromisoformat('2024-08-19T22:47:01Z'))
|
||||||
|
|
||||||
|
df = as_dataframe(it3())
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
assert_frame_equal(df, pd.DataFrame(data={
|
||||||
|
'value': ['aba' , np.nan , 'cde' , np.nan ],
|
||||||
|
'error': [np.nan, 'RuntimeError: whoops\n', np.nan, "RuntimeError: ('exception with datetime', datetime.datetime(2024, 8, 19, 22, 47, 1, tzinfo=datetime.timezone.utc))\n"],
|
||||||
|
# note: dt column is added even if errors don't have an associated datetime
|
||||||
|
'dt' : [np.nan, np.nan , np.nan, '2024-08-19 22:47:01+00:00'],
|
||||||
|
}).astype(dtype={'dt': 'datetime64[ns, UTC]'}))
|
||||||
|
# fmt: on
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# todo preinit isn't really a good name? it's only in a separate file because
|
||||||
|
# - it's imported from my.core.init (so we wan't to keep this file as small/reliable as possible, hence not common or something)
|
||||||
|
# - we still need this function in __main__, so has to be separate from my/core/init.py
|
||||||
def get_mycfg_dir() -> Path:
|
def get_mycfg_dir() -> Path:
|
||||||
import appdirs # type: ignore[import]
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import appdirs # type: ignore[import-untyped]
|
||||||
|
|
||||||
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
|
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
|
||||||
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
|
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?
|
||||||
mvar = os.environ.get('MY_CONFIG')
|
mvar = os.environ.get('MY_CONFIG')
|
||||||
|
|
24
my/core/pytest.py
Normal file
24
my/core/pytest.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
"""
|
||||||
|
Helpers to prevent depending on pytest in runtime
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .internal import assert_subpackage
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import typing
|
||||||
|
|
||||||
|
under_pytest = 'pytest' in sys.modules
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING or under_pytest:
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
parametrize = pytest.mark.parametrize
|
||||||
|
else:
|
||||||
|
|
||||||
|
def parametrize(*_args, **_kwargs):
|
||||||
|
def wrapper(f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
return wrapper
|
145
my/core/query.py
145
my/core/query.py
|
@ -5,20 +5,29 @@ The main entrypoint to this library is the 'select' function below; try:
|
||||||
python3 -c "from my.core.query import select; help(select)"
|
python3 -c "from my.core.query import select; help(select)"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
|
from collections.abc import Iterable, Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TypeVar, Tuple, Optional, Union, Callable, Iterable, Iterator, Dict, Any, NamedTuple, List
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
NamedTuple,
|
||||||
|
Optional,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
import more_itertools
|
import more_itertools
|
||||||
|
|
||||||
from .common import is_namedtuple
|
from . import error as err
|
||||||
from .error import Res, unwrap
|
from .error import Res, unwrap
|
||||||
|
from .types import is_namedtuple
|
||||||
from .warnings import low
|
from .warnings import low
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
ET = Res[T]
|
ET = Res[T]
|
||||||
|
|
||||||
|
@ -39,6 +48,7 @@ class Unsortable(NamedTuple):
|
||||||
|
|
||||||
class QueryException(ValueError):
|
class QueryException(ValueError):
|
||||||
"""Used to differentiate query-related errors, so the CLI interface is more expressive"""
|
"""Used to differentiate query-related errors, so the CLI interface is more expressive"""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,16 +61,16 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
mod = importlib.import_module(module_name)
|
mod = importlib.import_module(module_name)
|
||||||
for (fname, func) in inspect.getmembers(mod, inspect.isfunction):
|
for fname, f in inspect.getmembers(mod, inspect.isfunction):
|
||||||
if fname == function_name:
|
if fname == function_name:
|
||||||
return func
|
return f
|
||||||
# in case the function is defined dynamically,
|
# in case the function is defined dynamically,
|
||||||
# like with a globals().setdefault(...) or a module-level __getattr__ function
|
# like with a globals().setdefault(...) or a module-level __getattr__ function
|
||||||
func = getattr(mod, function_name, None)
|
func = getattr(mod, function_name, None)
|
||||||
if func is not None and callable(func):
|
if func is not None and callable(func):
|
||||||
return func
|
return func
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise QueryException(str(e))
|
raise QueryException(str(e)) # noqa: B904
|
||||||
raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
|
raise QueryException(f"Could not find function '{function_name}' in '{module_name}'")
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +84,7 @@ def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]]
|
||||||
return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :])
|
return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :])
|
||||||
|
|
||||||
|
|
||||||
def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]:
|
def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc | None:
|
||||||
"""
|
"""
|
||||||
Attempts to find an attribute which matches the 'where_function' on the object,
|
Attempts to find an attribute which matches the 'where_function' on the object,
|
||||||
using some getattr/dict checks. Returns a function which when called with
|
using some getattr/dict checks. Returns a function which when called with
|
||||||
|
@ -102,7 +112,7 @@ def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optiona
|
||||||
if where(v):
|
if where(v):
|
||||||
return lambda o: o.get(k, default) # type: ignore[union-attr]
|
return lambda o: o.get(k, default) # type: ignore[union-attr]
|
||||||
elif dataclasses.is_dataclass(obj):
|
elif dataclasses.is_dataclass(obj):
|
||||||
for (field_name, _annotation) in obj.__annotations__.items():
|
for field_name in obj.__annotations__.keys():
|
||||||
if where(getattr(obj, field_name)):
|
if where(getattr(obj, field_name)):
|
||||||
return lambda o: getattr(o, field_name, default)
|
return lambda o: getattr(o, field_name, default)
|
||||||
elif is_namedtuple(obj):
|
elif is_namedtuple(obj):
|
||||||
|
@ -120,11 +130,12 @@ def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optiona
|
||||||
|
|
||||||
def _generate_order_by_func(
|
def _generate_order_by_func(
|
||||||
obj_res: Res[T],
|
obj_res: Res[T],
|
||||||
key: Optional[str] = None,
|
*,
|
||||||
where_function: Optional[Where] = None,
|
key: str | None = None,
|
||||||
default: Optional[U] = None,
|
where_function: Where | None = None,
|
||||||
|
default: U | None = None,
|
||||||
force_unsortable: bool = False,
|
force_unsortable: bool = False,
|
||||||
) -> Optional[OrderFunc]:
|
) -> OrderFunc | None:
|
||||||
"""
|
"""
|
||||||
Accepts an object Res[T] (Instance of some class or Exception)
|
Accepts an object Res[T] (Instance of some class or Exception)
|
||||||
|
|
||||||
|
@ -177,7 +188,7 @@ pass 'drop_exceptions' to ignore exceptions""")
|
||||||
return lambda o: o.get(key, default) # type: ignore[union-attr]
|
return lambda o: o.get(key, default) # type: ignore[union-attr]
|
||||||
else:
|
else:
|
||||||
if hasattr(obj, key):
|
if hasattr(obj, key):
|
||||||
return lambda o: getattr(o, key, default) # type: ignore[arg-type]
|
return lambda o: getattr(o, key, default)
|
||||||
|
|
||||||
# Note: if the attribute you're ordering by is an Optional type,
|
# Note: if the attribute you're ordering by is an Optional type,
|
||||||
# and on some objects it'll return None, the getattr(o, field_name, default) won't
|
# and on some objects it'll return None, the getattr(o, field_name, default) won't
|
||||||
|
@ -189,7 +200,7 @@ pass 'drop_exceptions' to ignore exceptions""")
|
||||||
|
|
||||||
# user must provide either a key or a where predicate
|
# user must provide either a key or a where predicate
|
||||||
if where_function is not None:
|
if where_function is not None:
|
||||||
func: Optional[OrderFunc] = attribute_func(obj, where_function, default)
|
func: OrderFunc | None = attribute_func(obj, where_function, default)
|
||||||
if func is not None:
|
if func is not None:
|
||||||
return func
|
return func
|
||||||
|
|
||||||
|
@ -205,29 +216,13 @@ pass 'drop_exceptions' to ignore exceptions""")
|
||||||
return None # couldn't compute a OrderFunc for this class/instance
|
return None # couldn't compute a OrderFunc for this class/instance
|
||||||
|
|
||||||
|
|
||||||
def _drop_exceptions(itr: Iterator[ET]) -> Iterator[T]:
|
|
||||||
"""Return non-errors from the iterable"""
|
|
||||||
for o in itr:
|
|
||||||
if isinstance(o, Exception):
|
|
||||||
continue
|
|
||||||
yield o
|
|
||||||
|
|
||||||
|
|
||||||
def _raise_exceptions(itr: Iterable[ET]) -> Iterator[T]:
|
|
||||||
"""Raise errors from the iterable, stops the select function"""
|
|
||||||
for o in itr:
|
|
||||||
if isinstance(o, Exception):
|
|
||||||
raise o
|
|
||||||
yield o
|
|
||||||
|
|
||||||
|
|
||||||
# currently using the 'key set' as a proxy for 'this is the same type of thing'
|
# currently using the 'key set' as a proxy for 'this is the same type of thing'
|
||||||
def _determine_order_by_value_key(obj_res: ET) -> Any:
|
def _determine_order_by_value_key(obj_res: ET) -> Any:
|
||||||
"""
|
"""
|
||||||
Returns either the class, or a tuple of the dictionary keys
|
Returns either the class, or a tuple of the dictionary keys
|
||||||
"""
|
"""
|
||||||
key = obj_res.__class__
|
key = obj_res.__class__
|
||||||
if key == dict:
|
if key is dict:
|
||||||
# assuming same keys signify same way to determine ordering
|
# assuming same keys signify same way to determine ordering
|
||||||
return tuple(obj_res.keys()) # type: ignore[union-attr]
|
return tuple(obj_res.keys()) # type: ignore[union-attr]
|
||||||
return key
|
return key
|
||||||
|
@ -245,7 +240,7 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]:
|
||||||
|
|
||||||
# try getting the first value from the iterator
|
# try getting the first value from the iterator
|
||||||
# similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
|
# similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though
|
||||||
def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
|
def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]:
|
||||||
itr = more_itertools.peekable(itr)
|
itr = more_itertools.peekable(itr)
|
||||||
try:
|
try:
|
||||||
first_item = itr.peek()
|
first_item = itr.peek()
|
||||||
|
@ -256,9 +251,9 @@ def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
|
||||||
|
|
||||||
|
|
||||||
# similar to 'my.core.error.sort_res_by'?
|
# similar to 'my.core.error.sort_res_by'?
|
||||||
def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
|
def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Unsortable], Iterator[ET]]:
|
||||||
unsortable: List[Unsortable] = []
|
unsortable: list[Unsortable] = []
|
||||||
sortable: List[ET] = []
|
sortable: list[ET] = []
|
||||||
for o in itr:
|
for o in itr:
|
||||||
# if input to select was another select
|
# if input to select was another select
|
||||||
if isinstance(o, Unsortable):
|
if isinstance(o, Unsortable):
|
||||||
|
@ -276,10 +271,11 @@ def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Un
|
||||||
# the second being items for which orderfunc returned a non-none value
|
# the second being items for which orderfunc returned a non-none value
|
||||||
def _handle_unsorted(
|
def _handle_unsorted(
|
||||||
itr: Iterator[ET],
|
itr: Iterator[ET],
|
||||||
|
*,
|
||||||
orderfunc: OrderFunc,
|
orderfunc: OrderFunc,
|
||||||
drop_unsorted: bool,
|
drop_unsorted: bool,
|
||||||
wrap_unsorted: bool
|
wrap_unsorted: bool
|
||||||
) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
|
) -> tuple[Iterator[Unsortable], Iterator[ET]]:
|
||||||
# prefer drop_unsorted to wrap_unsorted, if both were present
|
# prefer drop_unsorted to wrap_unsorted, if both were present
|
||||||
if drop_unsorted:
|
if drop_unsorted:
|
||||||
return iter([]), _drop_unsorted(itr, orderfunc)
|
return iter([]), _drop_unsorted(itr, orderfunc)
|
||||||
|
@ -294,16 +290,16 @@ def _handle_unsorted(
|
||||||
# different types. ***This consumes the iterator***, so
|
# different types. ***This consumes the iterator***, so
|
||||||
# you should definitely itertoolts.tee it beforehand
|
# you should definitely itertoolts.tee it beforehand
|
||||||
# as to not exhaust the values
|
# as to not exhaust the values
|
||||||
def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc:
|
def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U | None = None) -> OrderFunc:
|
||||||
# TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
|
# TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
|
||||||
order_by_lookup: Dict[Any, OrderFunc] = {}
|
order_by_lookup: dict[Any, OrderFunc] = {}
|
||||||
|
|
||||||
# need to go through a copy of the whole iterator here to
|
# need to go through a copy of the whole iterator here to
|
||||||
# pre-generate functions to support sorting mixed types
|
# pre-generate functions to support sorting mixed types
|
||||||
for obj_res in itr:
|
for obj_res in itr:
|
||||||
key: Any = _determine_order_by_value_key(obj_res)
|
key: Any = _determine_order_by_value_key(obj_res)
|
||||||
if key not in order_by_lookup:
|
if key not in order_by_lookup:
|
||||||
keyfunc: Optional[OrderFunc] = _generate_order_by_func(
|
keyfunc: OrderFunc | None = _generate_order_by_func(
|
||||||
obj_res,
|
obj_res,
|
||||||
where_function=order_value,
|
where_function=order_value,
|
||||||
default=default,
|
default=default,
|
||||||
|
@ -324,12 +320,12 @@ def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: O
|
||||||
def _handle_generate_order_by(
|
def _handle_generate_order_by(
|
||||||
itr,
|
itr,
|
||||||
*,
|
*,
|
||||||
order_by: Optional[OrderFunc] = None,
|
order_by: OrderFunc | None = None,
|
||||||
order_key: Optional[str] = None,
|
order_key: str | None = None,
|
||||||
order_value: Optional[Where] = None,
|
order_value: Where | None = None,
|
||||||
default: Optional[U] = None,
|
default: U | None = None,
|
||||||
) -> Tuple[Optional[OrderFunc], Iterator[ET]]:
|
) -> tuple[OrderFunc | None, Iterator[ET]]:
|
||||||
order_by_chosen: Optional[OrderFunc] = order_by # if the user just supplied a function themselves
|
order_by_chosen: OrderFunc | None = order_by # if the user just supplied a function themselves
|
||||||
if order_by is not None:
|
if order_by is not None:
|
||||||
return order_by, itr
|
return order_by, itr
|
||||||
if order_key is not None:
|
if order_key is not None:
|
||||||
|
@ -354,17 +350,19 @@ def _handle_generate_order_by(
|
||||||
|
|
||||||
|
|
||||||
def select(
|
def select(
|
||||||
src: Union[Iterable[ET], Callable[[], Iterable[ET]]],
|
src: Iterable[ET] | Callable[[], Iterable[ET]],
|
||||||
*,
|
*,
|
||||||
where: Optional[Where] = None,
|
where: Where | None = None,
|
||||||
order_by: Optional[OrderFunc] = None,
|
order_by: OrderFunc | None = None,
|
||||||
order_key: Optional[str] = None,
|
order_key: str | None = None,
|
||||||
order_value: Optional[Where] = None,
|
order_value: Where | None = None,
|
||||||
default: Optional[U] = None,
|
default: U | None = None,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: Optional[int] = None,
|
limit: int | None = None,
|
||||||
drop_unsorted: bool = False,
|
drop_unsorted: bool = False,
|
||||||
wrap_unsorted: bool = True,
|
wrap_unsorted: bool = True,
|
||||||
|
warn_exceptions: bool = False,
|
||||||
|
warn_func: Callable[[Exception], None] | None = None,
|
||||||
drop_exceptions: bool = False,
|
drop_exceptions: bool = False,
|
||||||
raise_exceptions: bool = False,
|
raise_exceptions: bool = False,
|
||||||
) -> Iterator[ET]:
|
) -> Iterator[ET]:
|
||||||
|
@ -408,7 +406,9 @@ def select(
|
||||||
to copy the iterator in memory (using itertools.tee) to determine how to order it
|
to copy the iterator in memory (using itertools.tee) to determine how to order it
|
||||||
in memory
|
in memory
|
||||||
|
|
||||||
The 'drop_exceptions' and 'raise_exceptions' let you ignore or raise when the src contains exceptions
|
The 'drop_exceptions', 'raise_exceptions', 'warn_exceptions' let you ignore or raise
|
||||||
|
when the src contains exceptions. The 'warn_func' lets you provide a custom function
|
||||||
|
to call when an exception is encountered instead of using the 'warnings' module
|
||||||
|
|
||||||
src: an iterable of mixed types, or a function to be called,
|
src: an iterable of mixed types, or a function to be called,
|
||||||
as the input to this function
|
as the input to this function
|
||||||
|
@ -464,15 +464,18 @@ Will attempt to call iter() on the value""")
|
||||||
try:
|
try:
|
||||||
itr: Iterator[ET] = iter(it)
|
itr: Iterator[ET] = iter(it)
|
||||||
except TypeError as t:
|
except TypeError as t:
|
||||||
raise QueryException("Could not convert input src to an Iterator: " + str(t))
|
raise QueryException("Could not convert input src to an Iterator: " + str(t)) # noqa: B904
|
||||||
|
|
||||||
# if both drop_exceptions and drop_exceptions are provided for some reason,
|
# if both drop_exceptions and drop_exceptions are provided for some reason,
|
||||||
# should raise exceptions before dropping them
|
# should raise exceptions before dropping them
|
||||||
if raise_exceptions:
|
if raise_exceptions:
|
||||||
itr = _raise_exceptions(itr)
|
itr = err.raise_exceptions(itr)
|
||||||
|
|
||||||
if drop_exceptions:
|
if drop_exceptions:
|
||||||
itr = _drop_exceptions(itr)
|
itr = err.drop_exceptions(itr)
|
||||||
|
|
||||||
|
if warn_exceptions:
|
||||||
|
itr = err.warn_exceptions(itr, warn_func=warn_func)
|
||||||
|
|
||||||
if where is not None:
|
if where is not None:
|
||||||
itr = filter(where, itr)
|
itr = filter(where, itr)
|
||||||
|
@ -498,10 +501,15 @@ Will attempt to call iter() on the value""")
|
||||||
# note: can't just attach sort unsortable values in the same iterable as the
|
# note: can't just attach sort unsortable values in the same iterable as the
|
||||||
# other items because they don't have any lookups for order_key or functions
|
# other items because they don't have any lookups for order_key or functions
|
||||||
# to handle items in the order_by_lookup dictionary
|
# to handle items in the order_by_lookup dictionary
|
||||||
unsortable, itr = _handle_unsorted(itr, order_by_chosen, drop_unsorted, wrap_unsorted)
|
unsortable, itr = _handle_unsorted(
|
||||||
|
itr,
|
||||||
|
orderfunc=order_by_chosen,
|
||||||
|
drop_unsorted=drop_unsorted,
|
||||||
|
wrap_unsorted=wrap_unsorted,
|
||||||
|
)
|
||||||
|
|
||||||
# run the sort, with the computed order by function
|
# run the sort, with the computed order by function
|
||||||
itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse)) # type: ignore[arg-type, type-var]
|
itr = iter(sorted(itr, key=order_by_chosen, reverse=reverse)) # type: ignore[arg-type]
|
||||||
|
|
||||||
# re-attach unsortable values to the front/back of the list
|
# re-attach unsortable values to the front/back of the list
|
||||||
if reverse:
|
if reverse:
|
||||||
|
@ -589,7 +597,7 @@ def test_couldnt_determine_order() -> None:
|
||||||
res = list(select(iter([object()]), order_value=lambda o: isinstance(o, datetime)))
|
res = list(select(iter([object()]), order_value=lambda o: isinstance(o, datetime)))
|
||||||
assert len(res) == 1
|
assert len(res) == 1
|
||||||
assert isinstance(res[0], Unsortable)
|
assert isinstance(res[0], Unsortable)
|
||||||
assert type(res[0].obj) == object
|
assert type(res[0].obj) is object
|
||||||
|
|
||||||
|
|
||||||
# same value type, different keys, with clashing keys
|
# same value type, different keys, with clashing keys
|
||||||
|
@ -605,7 +613,7 @@ class _B(NamedTuple):
|
||||||
|
|
||||||
# move these to tests/? They are re-used so much in the tests below,
|
# move these to tests/? They are re-used so much in the tests below,
|
||||||
# not sure where the best place for these is
|
# not sure where the best place for these is
|
||||||
def _mixed_iter() -> Iterator[Union[_A, _B]]:
|
def _mixed_iter() -> Iterator[_A | _B]:
|
||||||
yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
|
yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10)
|
||||||
yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
|
yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))
|
||||||
yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
|
yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2)
|
||||||
|
@ -614,7 +622,7 @@ def _mixed_iter() -> Iterator[Union[_A, _B]]:
|
||||||
yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
|
yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5)
|
||||||
|
|
||||||
|
|
||||||
def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]:
|
def _mixed_iter_errors() -> Iterator[Res[_A | _B]]:
|
||||||
m = _mixed_iter()
|
m = _mixed_iter()
|
||||||
yield from itertools.islice(m, 0, 3)
|
yield from itertools.islice(m, 0, 3)
|
||||||
yield RuntimeError("Unhandled error!")
|
yield RuntimeError("Unhandled error!")
|
||||||
|
@ -650,7 +658,7 @@ def test_wrap_unsortable() -> None:
|
||||||
|
|
||||||
# by default, wrap unsortable
|
# by default, wrap unsortable
|
||||||
res = list(select(_mixed_iter(), order_key="z"))
|
res = list(select(_mixed_iter(), order_key="z"))
|
||||||
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 2})
|
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 2})
|
||||||
|
|
||||||
|
|
||||||
def test_disabled_wrap_unsorted() -> None:
|
def test_disabled_wrap_unsorted() -> None:
|
||||||
|
@ -669,7 +677,7 @@ def test_drop_unsorted() -> None:
|
||||||
# test drop unsortable, should remove them before the 'sorted' call
|
# test drop unsortable, should remove them before the 'sorted' call
|
||||||
res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
|
res = list(select(_mixed_iter(), order_key="z", wrap_unsorted=False, drop_unsorted=True))
|
||||||
assert len(res) == 4
|
assert len(res) == 4
|
||||||
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4})
|
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4})
|
||||||
|
|
||||||
|
|
||||||
def test_drop_exceptions() -> None:
|
def test_drop_exceptions() -> None:
|
||||||
|
@ -693,15 +701,16 @@ def test_raise_exceptions() -> None:
|
||||||
|
|
||||||
def test_wrap_unsortable_with_error_and_warning() -> None:
|
def test_wrap_unsortable_with_error_and_warning() -> None:
|
||||||
|
|
||||||
import pytest
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
# by default should wrap unsortable (error)
|
# by default should wrap unsortable (error)
|
||||||
with pytest.warns(UserWarning, match=r"encountered exception"):
|
with pytest.warns(UserWarning, match=r"encountered exception"):
|
||||||
res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
|
res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
|
||||||
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
|
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
|
||||||
# compare the returned error wrapped in the Unsortable
|
# compare the returned error wrapped in the Unsortable
|
||||||
returned_error = next((o for o in res if isinstance(o, Unsortable))).obj
|
returned_error = next(o for o in res if isinstance(o, Unsortable)).obj
|
||||||
assert "Unhandled error!" == str(returned_error)
|
assert "Unhandled error!" == str(returned_error)
|
||||||
|
|
||||||
|
|
||||||
|
@ -711,7 +720,7 @@ def test_order_key_unsortable() -> None:
|
||||||
|
|
||||||
# both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
|
# both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
|
||||||
res = list(select(_mixed_iter_errors(), order_key="z"))
|
res = list(select(_mixed_iter_errors(), order_key="z"))
|
||||||
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
|
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
|
||||||
|
|
||||||
|
|
||||||
def test_order_default_param() -> None:
|
def test_order_default_param() -> None:
|
||||||
|
@ -731,7 +740,7 @@ def test_no_recursive_unsortables() -> None:
|
||||||
# select to select as input, wrapping unsortables the first time, second should drop them
|
# select to select as input, wrapping unsortables the first time, second should drop them
|
||||||
# reverse=True to send errors to the end, so the below order_key works
|
# reverse=True to send errors to the end, so the below order_key works
|
||||||
res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
|
res = list(select(_mixed_iter_errors(), order_key="z", reverse=True))
|
||||||
assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "Unsortable": 3})
|
assert Counter(type(t).__name__ for t in res) == Counter({"_A": 4, "Unsortable": 3})
|
||||||
|
|
||||||
# drop_unsorted
|
# drop_unsorted
|
||||||
dropped = list(select(res, order_key="z", drop_unsorted=True))
|
dropped = list(select(res, order_key="z", drop_unsorted=True))
|
||||||
|
|
|
@ -7,27 +7,30 @@ filtered iterator
|
||||||
See the select_range function below
|
See the select_range function below
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from functools import lru_cache
|
from collections.abc import Iterator
|
||||||
from datetime import datetime, timedelta, date
|
from datetime import date, datetime, timedelta
|
||||||
from typing import Callable, Iterator, NamedTuple, Optional, Any, Type
|
from functools import cache
|
||||||
|
from typing import Any, Callable, NamedTuple
|
||||||
|
|
||||||
import more_itertools
|
import more_itertools
|
||||||
|
|
||||||
|
from .compat import fromisoformat
|
||||||
from .query import (
|
from .query import (
|
||||||
QueryException,
|
ET,
|
||||||
select,
|
|
||||||
OrderFunc,
|
OrderFunc,
|
||||||
|
QueryException,
|
||||||
Where,
|
Where,
|
||||||
_handle_generate_order_by,
|
_handle_generate_order_by,
|
||||||
ET,
|
select,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .common import isoparse
|
timedelta_regex = re.compile(
|
||||||
|
r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$"
|
||||||
|
)
|
||||||
timedelta_regex = re.compile(r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$")
|
|
||||||
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/51916936
|
# https://stackoverflow.com/a/51916936
|
||||||
|
@ -40,7 +43,7 @@ def parse_timedelta_string(timedelta_str: str) -> timedelta:
|
||||||
if parts is None:
|
if parts is None:
|
||||||
raise ValueError(f"Could not parse time duration from {timedelta_str}.\nValid examples: '8h', '1w2d8h5m20s', '2m4s'")
|
raise ValueError(f"Could not parse time duration from {timedelta_str}.\nValid examples: '8h', '1w2d8h5m20s', '2m4s'")
|
||||||
time_params = {name: float(param) for name, param in parts.groupdict().items() if param}
|
time_params = {name: float(param) for name, param in parts.groupdict().items() if param}
|
||||||
return timedelta(**time_params) # type: ignore[arg-type]
|
return timedelta(**time_params)
|
||||||
|
|
||||||
|
|
||||||
def parse_timedelta_float(timedelta_str: str) -> float:
|
def parse_timedelta_float(timedelta_str: str) -> float:
|
||||||
|
@ -73,19 +76,34 @@ def parse_datetime_float(date_str: str) -> float:
|
||||||
return ds_float
|
return ds_float
|
||||||
try:
|
try:
|
||||||
# isoformat - default format when you call str() on datetime
|
# isoformat - default format when you call str() on datetime
|
||||||
|
# this also parses dates like '2020-01-01'
|
||||||
return datetime.fromisoformat(ds).timestamp()
|
return datetime.fromisoformat(ds).timestamp()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
return isoparse(ds).timestamp()
|
return fromisoformat(ds).timestamp()
|
||||||
except (AssertionError, ValueError):
|
except (AssertionError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
import dateparser
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# dateparser is a bit more lenient than the above, lets you type
|
||||||
|
# all sorts of dates as inputs
|
||||||
|
# https://github.com/scrapinghub/dateparser#how-to-use
|
||||||
|
res: datetime | None = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"})
|
||||||
|
if res is not None:
|
||||||
|
return res.timestamp()
|
||||||
|
|
||||||
raise QueryException(f"Was not able to parse {ds} into a datetime")
|
raise QueryException(f"Was not able to parse {ds} into a datetime")
|
||||||
|
|
||||||
|
|
||||||
# probably DateLike input? but a user could specify an order_key
|
# probably DateLike input? but a user could specify an order_key
|
||||||
# which is an epoch timestamp or a float value which they
|
# which is an epoch timestamp or a float value which they
|
||||||
# expect to be converted to a datetime to compare
|
# expect to be converted to a datetime to compare
|
||||||
@lru_cache(maxsize=None)
|
@cache
|
||||||
def _datelike_to_float(dl: Any) -> float:
|
def _datelike_to_float(dl: Any) -> float:
|
||||||
if isinstance(dl, datetime):
|
if isinstance(dl, datetime):
|
||||||
return dl.timestamp()
|
return dl.timestamp()
|
||||||
|
@ -96,7 +114,7 @@ def _datelike_to_float(dl: Any) -> float:
|
||||||
try:
|
try:
|
||||||
return parse_datetime_float(dl)
|
return parse_datetime_float(dl)
|
||||||
except QueryException as q:
|
except QueryException as q:
|
||||||
raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))
|
raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q)) # noqa: B904
|
||||||
|
|
||||||
|
|
||||||
class RangeTuple(NamedTuple):
|
class RangeTuple(NamedTuple):
|
||||||
|
@ -117,11 +135,12 @@ class RangeTuple(NamedTuple):
|
||||||
of the timeframe -- 'before'
|
of the timeframe -- 'before'
|
||||||
- before and after - anything after 'after' and before 'before', acts as a time range
|
- before and after - anything after 'after' and before 'before', acts as a time range
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# technically doesn't need to be Optional[Any],
|
# technically doesn't need to be Optional[Any],
|
||||||
# just to make it more clear these can be None
|
# just to make it more clear these can be None
|
||||||
after: Optional[Any]
|
after: Any | None
|
||||||
before: Optional[Any]
|
before: Any | None
|
||||||
within: Optional[Any]
|
within: Any | None
|
||||||
|
|
||||||
|
|
||||||
Converter = Callable[[Any], Any]
|
Converter = Callable[[Any], Any]
|
||||||
|
@ -132,14 +151,15 @@ def _parse_range(
|
||||||
unparsed_range: RangeTuple,
|
unparsed_range: RangeTuple,
|
||||||
end_parser: Converter,
|
end_parser: Converter,
|
||||||
within_parser: Converter,
|
within_parser: Converter,
|
||||||
parsed_range: Optional[RangeTuple] = None,
|
parsed_range: RangeTuple | None = None,
|
||||||
error_message: Optional[str] = None
|
error_message: str | None = None,
|
||||||
) -> Optional[RangeTuple]:
|
) -> RangeTuple | None:
|
||||||
|
|
||||||
if parsed_range is not None:
|
if parsed_range is not None:
|
||||||
return parsed_range
|
return parsed_range
|
||||||
|
|
||||||
err_msg = error_message or RangeTuple.__doc__
|
err_msg = error_message or RangeTuple.__doc__
|
||||||
|
assert err_msg is not None # make mypy happy
|
||||||
after, before, within = None, None, None
|
after, before, within = None, None, None
|
||||||
|
|
||||||
none_count = more_itertools.ilen(filter(lambda o: o is None, list(unparsed_range)))
|
none_count = more_itertools.ilen(filter(lambda o: o is None, list(unparsed_range)))
|
||||||
|
@ -162,11 +182,11 @@ def _create_range_filter(
|
||||||
end_parser: Converter,
|
end_parser: Converter,
|
||||||
within_parser: Converter,
|
within_parser: Converter,
|
||||||
attr_func: Where,
|
attr_func: Where,
|
||||||
parsed_range: Optional[RangeTuple] = None,
|
parsed_range: RangeTuple | None = None,
|
||||||
default_before: Optional[Any] = None,
|
default_before: Any | None = None,
|
||||||
value_coercion_func: Optional[Converter] = None,
|
value_coercion_func: Converter | None = None,
|
||||||
error_message: Optional[str] = None,
|
error_message: str | None = None,
|
||||||
) -> Optional[Where]:
|
) -> Where | None:
|
||||||
"""
|
"""
|
||||||
Handles:
|
Handles:
|
||||||
- parsing the user input into values that are comparable to items the iterable returns
|
- parsing the user input into values that are comparable to items the iterable returns
|
||||||
|
@ -258,15 +278,17 @@ def _create_range_filter(
|
||||||
def select_range(
|
def select_range(
|
||||||
itr: Iterator[ET],
|
itr: Iterator[ET],
|
||||||
*,
|
*,
|
||||||
where: Optional[Where] = None,
|
where: Where | None = None,
|
||||||
order_key: Optional[str] = None,
|
order_key: str | None = None,
|
||||||
order_value: Optional[Where] = None,
|
order_value: Where | None = None,
|
||||||
order_by_value_type: Optional[Type] = None,
|
order_by_value_type: type | None = None,
|
||||||
unparsed_range: Optional[RangeTuple] = None,
|
unparsed_range: RangeTuple | None = None,
|
||||||
reverse: bool = False,
|
reverse: bool = False,
|
||||||
limit: Optional[int] = None,
|
limit: int | None = None,
|
||||||
drop_unsorted: bool = False,
|
drop_unsorted: bool = False,
|
||||||
wrap_unsorted: bool = False,
|
wrap_unsorted: bool = False,
|
||||||
|
warn_exceptions: bool = False,
|
||||||
|
warn_func: Callable[[Exception], None] | None = None,
|
||||||
drop_exceptions: bool = False,
|
drop_exceptions: bool = False,
|
||||||
raise_exceptions: bool = False,
|
raise_exceptions: bool = False,
|
||||||
) -> Iterator[ET]:
|
) -> Iterator[ET]:
|
||||||
|
@ -293,21 +315,30 @@ def select_range(
|
||||||
unparsed_range = None
|
unparsed_range = None
|
||||||
|
|
||||||
# some operations to do before ordering/filtering
|
# some operations to do before ordering/filtering
|
||||||
if drop_exceptions or raise_exceptions or where is not None:
|
if drop_exceptions or raise_exceptions or where is not None or warn_exceptions:
|
||||||
# doesn't wrap unsortable items, because we pass no order related kwargs
|
# doesn't wrap unsortable items, because we pass no order related kwargs
|
||||||
itr = select(itr, where=where, drop_exceptions=drop_exceptions, raise_exceptions=raise_exceptions)
|
itr = select(
|
||||||
|
itr,
|
||||||
|
where=where,
|
||||||
|
drop_exceptions=drop_exceptions,
|
||||||
|
raise_exceptions=raise_exceptions,
|
||||||
|
warn_exceptions=warn_exceptions,
|
||||||
|
warn_func=warn_func,
|
||||||
|
)
|
||||||
|
|
||||||
order_by_chosen: Optional[OrderFunc] = None
|
order_by_chosen: OrderFunc | None = None
|
||||||
|
|
||||||
# if the user didn't specify an attribute to order value, but specified a type
|
# if the user didn't specify an attribute to order value, but specified a type
|
||||||
# we should search for on each value in the iterator
|
# we should search for on each value in the iterator
|
||||||
if order_value is None and order_by_value_type is not None:
|
if order_value is None and order_by_value_type is not None:
|
||||||
# search for that type on the iterator object
|
# search for that type on the iterator object
|
||||||
order_value = lambda o: isinstance(o, order_by_value_type) # type: ignore
|
order_value = lambda o: isinstance(o, order_by_value_type)
|
||||||
|
|
||||||
# if the user supplied a order_key, and/or we've generated an order_value, create
|
# if the user supplied a order_key, and/or we've generated an order_value, create
|
||||||
# the function that accesses that type on each value in the iterator
|
# the function that accesses that type on each value in the iterator
|
||||||
if order_key is not None or order_value is not None:
|
if order_key is not None or order_value is not None:
|
||||||
|
# _generate_order_value_func internally here creates a copy of the iterator, which has to
|
||||||
|
# be consumed in-case we're sorting by mixed types
|
||||||
order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
|
order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
|
||||||
# signifies that itr is empty -- can early return here
|
# signifies that itr is empty -- can early return here
|
||||||
if order_by_chosen is None:
|
if order_by_chosen is None:
|
||||||
|
@ -319,11 +350,11 @@ def select_range(
|
||||||
if order_by_chosen is None:
|
if order_by_chosen is None:
|
||||||
raise QueryException("""Can't order by range if we have no way to order_by!
|
raise QueryException("""Can't order by range if we have no way to order_by!
|
||||||
Specify a type or a key to order the value by""")
|
Specify a type or a key to order the value by""")
|
||||||
else:
|
|
||||||
# force drop_unsorted=True so we can use _create_range_filter
|
# force drop_unsorted=True so we can use _create_range_filter
|
||||||
# sort the iterable by the generated order_by_chosen function
|
# sort the iterable by the generated order_by_chosen function
|
||||||
itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
|
itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
|
||||||
filter_func: Optional[Where]
|
filter_func: Where | None
|
||||||
if order_by_value_type in [datetime, date]:
|
if order_by_value_type in [datetime, date]:
|
||||||
filter_func = _create_range_filter(
|
filter_func = _create_range_filter(
|
||||||
unparsed_range=unparsed_range,
|
unparsed_range=unparsed_range,
|
||||||
|
@ -331,7 +362,8 @@ Specify a type or a key to order the value by""")
|
||||||
within_parser=parse_timedelta_float,
|
within_parser=parse_timedelta_float,
|
||||||
attr_func=order_by_chosen, # type: ignore[arg-type]
|
attr_func=order_by_chosen, # type: ignore[arg-type]
|
||||||
default_before=time.time(),
|
default_before=time.time(),
|
||||||
value_coercion_func=_datelike_to_float)
|
value_coercion_func=_datelike_to_float,
|
||||||
|
)
|
||||||
elif order_by_value_type in [int, float]:
|
elif order_by_value_type in [int, float]:
|
||||||
# allow primitives to be converted using the default int(), float() callables
|
# allow primitives to be converted using the default int(), float() callables
|
||||||
filter_func = _create_range_filter(
|
filter_func = _create_range_filter(
|
||||||
|
@ -340,7 +372,8 @@ Specify a type or a key to order the value by""")
|
||||||
within_parser=order_by_value_type,
|
within_parser=order_by_value_type,
|
||||||
attr_func=order_by_chosen, # type: ignore[arg-type]
|
attr_func=order_by_chosen, # type: ignore[arg-type]
|
||||||
default_before=None,
|
default_before=None,
|
||||||
value_coercion_func=order_by_value_type)
|
value_coercion_func=order_by_value_type,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
|
# TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
|
||||||
# would need to allow passing the end_parser, within parser, default before and value_coercion_func...
|
# would need to allow passing the end_parser, within parser, default before and value_coercion_func...
|
||||||
|
@ -367,7 +400,7 @@ Specify a type or a key to order the value by""")
|
||||||
return itr
|
return itr
|
||||||
|
|
||||||
|
|
||||||
# re-use items from query for testing
|
# reuse items from query for testing
|
||||||
from .query import _A, _B, _Float, _mixed_iter_errors
|
from .query import _A, _B, _Float, _mixed_iter_errors
|
||||||
|
|
||||||
|
|
||||||
|
@ -447,8 +480,8 @@ def test_range_predicate() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
# filter from 0 to 5
|
# filter from 0 to 5
|
||||||
rn: Optional[RangeTuple] = RangeTuple("0", "5", None)
|
rn: RangeTuple = RangeTuple("0", "5", None)
|
||||||
zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn)
|
zero_to_five_filter: Where | None = int_filter_func(unparsed_range=rn)
|
||||||
assert zero_to_five_filter is not None
|
assert zero_to_five_filter is not None
|
||||||
# this is just a Where function, given some input it return True/False if the value is allowed
|
# this is just a Where function, given some input it return True/False if the value is allowed
|
||||||
assert zero_to_five_filter(3) is True
|
assert zero_to_five_filter(3) is True
|
||||||
|
@ -461,6 +494,7 @@ def test_range_predicate() -> None:
|
||||||
rn = RangeTuple(None, 3, "3.5")
|
rn = RangeTuple(None, 3, "3.5")
|
||||||
assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
|
assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
|
||||||
|
|
||||||
|
|
||||||
def test_parse_range() -> None:
|
def test_parse_range() -> None:
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
@ -504,9 +538,8 @@ def test_parse_timedelta_string() -> None:
|
||||||
|
|
||||||
|
|
||||||
def test_parse_datetime_float() -> None:
|
def test_parse_datetime_float() -> None:
|
||||||
|
|
||||||
pnow = parse_datetime_float("now")
|
pnow = parse_datetime_float("now")
|
||||||
sec_diff = abs((pnow - datetime.now().timestamp()))
|
sec_diff = abs(pnow - datetime.now().timestamp())
|
||||||
# should probably never fail? could mock time.time
|
# should probably never fail? could mock time.time
|
||||||
# but there seems to be issues with doing that use C-libraries (as time.time) does
|
# but there seems to be issues with doing that use C-libraries (as time.time) does
|
||||||
# https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
|
# https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
import datetime
|
from __future__ import annotations
|
||||||
import dataclasses
|
|
||||||
from pathlib import Path
|
import datetime
|
||||||
from decimal import Decimal
|
from dataclasses import asdict, is_dataclass
|
||||||
from typing import Any, Optional, Callable, NamedTuple
|
from decimal import Decimal
|
||||||
from functools import lru_cache
|
from functools import cache
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, NamedTuple
|
||||||
|
|
||||||
from .common import is_namedtuple
|
|
||||||
from .error import error_to_json
|
from .error import error_to_json
|
||||||
|
from .pytest import parametrize
|
||||||
|
from .types import is_namedtuple
|
||||||
|
|
||||||
# note: it would be nice to combine the 'asdict' and _default_encode to some function
|
# note: it would be nice to combine the 'asdict' and _default_encode to some function
|
||||||
# that takes a complex python object and returns JSON-compatible fields, while still
|
# that takes a complex python object and returns JSON-compatible fields, while still
|
||||||
|
@ -16,6 +19,8 @@ from .error import error_to_json
|
||||||
|
|
||||||
DefaultEncoder = Callable[[Any], Any]
|
DefaultEncoder = Callable[[Any], Any]
|
||||||
|
|
||||||
|
Dumps = Callable[[Any], str]
|
||||||
|
|
||||||
|
|
||||||
def _default_encode(obj: Any) -> Any:
|
def _default_encode(obj: Any) -> Any:
|
||||||
"""
|
"""
|
||||||
|
@ -33,8 +38,9 @@ def _default_encode(obj: Any) -> Any:
|
||||||
# convert paths to their string representation
|
# convert paths to their string representation
|
||||||
if isinstance(obj, Path):
|
if isinstance(obj, Path):
|
||||||
return str(obj)
|
return str(obj)
|
||||||
if dataclasses.is_dataclass(obj):
|
if is_dataclass(obj):
|
||||||
return dataclasses.asdict(obj)
|
assert not isinstance(obj, type) # to help mypy
|
||||||
|
return asdict(obj)
|
||||||
if isinstance(obj, Exception):
|
if isinstance(obj, Exception):
|
||||||
return error_to_json(obj)
|
return error_to_json(obj)
|
||||||
# if something was stored as 'decimal', you likely
|
# if something was stored as 'decimal', you likely
|
||||||
|
@ -53,19 +59,18 @@ def _default_encode(obj: Any) -> Any:
|
||||||
# could possibly run multiple times/raise warning if you provide different 'default'
|
# could possibly run multiple times/raise warning if you provide different 'default'
|
||||||
# functions or change the kwargs? The alternative is to maintain all of this at the module
|
# functions or change the kwargs? The alternative is to maintain all of this at the module
|
||||||
# level, which is just as annoying
|
# level, which is just as annoying
|
||||||
@lru_cache(maxsize=None)
|
@cache
|
||||||
def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
use_default: DefaultEncoder = _default_encode
|
use_default: DefaultEncoder = _default_encode
|
||||||
# if the user passed an additional 'default' parameter,
|
# if the user passed an additional 'default' parameter,
|
||||||
# try using that to serialize before before _default_encode
|
# try using that to serialize before before _default_encode
|
||||||
_additional_default: Optional[DefaultEncoder] = kwargs.get("default")
|
_additional_default: DefaultEncoder | None = kwargs.get("default")
|
||||||
if _additional_default is not None and callable(_additional_default):
|
if _additional_default is not None and callable(_additional_default):
|
||||||
|
|
||||||
def wrapped_default(obj: Any) -> Any:
|
def wrapped_default(obj: Any) -> Any:
|
||||||
|
assert _additional_default is not None
|
||||||
try:
|
try:
|
||||||
# hmm... shouldn't mypy know that _additional_default is not None here?
|
return _additional_default(obj)
|
||||||
# assert _additional_default is not None
|
|
||||||
return _additional_default(obj) # type: ignore[misc]
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# expected TypeError, signifies couldn't be encoded by custom
|
# expected TypeError, signifies couldn't be encoded by custom
|
||||||
# serializer function. Try _default_encode from here
|
# serializer function. Try _default_encode from here
|
||||||
|
@ -75,22 +80,29 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
|
|
||||||
kwargs["default"] = use_default
|
kwargs["default"] = use_default
|
||||||
|
|
||||||
|
prefer_factory: str | None = kwargs.pop('_prefer_factory', None)
|
||||||
|
|
||||||
|
def orjson_factory() -> Dumps | None:
|
||||||
try:
|
try:
|
||||||
import orjson
|
import orjson
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
return None
|
||||||
|
|
||||||
# todo: add orjson.OPT_NON_STR_KEYS? would require some bitwise ops
|
# todo: add orjson.OPT_NON_STR_KEYS? would require some bitwise ops
|
||||||
# most keys are typically attributes from a NT/Dataclass,
|
# most keys are typically attributes from a NT/Dataclass,
|
||||||
# so most seem to work: https://github.com/ijl/orjson#opt_non_str_keys
|
# so most seem to work: https://github.com/ijl/orjson#opt_non_str_keys
|
||||||
def _orjson_dumps(obj: Any) -> str:
|
def _orjson_dumps(obj: Any) -> str: # TODO rename?
|
||||||
# orjson returns json as bytes, encode to string
|
# orjson returns json as bytes, encode to string
|
||||||
return orjson.dumps(obj, **kwargs).decode('utf-8')
|
return orjson.dumps(obj, **kwargs).decode('utf-8')
|
||||||
|
|
||||||
return _orjson_dumps
|
return _orjson_dumps
|
||||||
except ModuleNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
def simplejson_factory() -> Dumps | None:
|
||||||
try:
|
try:
|
||||||
from simplejson import dumps as simplejson_dumps
|
from simplejson import dumps as simplejson_dumps
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
return None
|
||||||
|
|
||||||
# if orjson couldn't be imported, try simplejson
|
# if orjson couldn't be imported, try simplejson
|
||||||
# This is included for compatibility reasons because orjson
|
# This is included for compatibility reasons because orjson
|
||||||
# is rust-based and compiling on rarer architectures may not work
|
# is rust-based and compiling on rarer architectures may not work
|
||||||
|
@ -105,23 +117,42 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]:
|
||||||
|
|
||||||
return _simplejson_dumps
|
return _simplejson_dumps
|
||||||
|
|
||||||
except ModuleNotFoundError:
|
def stdlib_factory() -> Dumps | None:
|
||||||
pass
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .warnings import high
|
from .warnings import high
|
||||||
|
|
||||||
high("You might want to install 'orjson' to support serialization for lots more types! If that does not work for you, you can install 'simplejson' instead")
|
high(
|
||||||
|
"You might want to install 'orjson' to support serialization for lots more types! If that does not work for you, you can install 'simplejson' instead"
|
||||||
|
)
|
||||||
|
|
||||||
def _stdlib_dumps(obj: Any) -> str:
|
def _stdlib_dumps(obj: Any) -> str:
|
||||||
return json.dumps(obj, **kwargs)
|
return json.dumps(obj, **kwargs)
|
||||||
|
|
||||||
return _stdlib_dumps
|
return _stdlib_dumps
|
||||||
|
|
||||||
|
factories = {
|
||||||
|
'orjson': orjson_factory,
|
||||||
|
'simplejson': simplejson_factory,
|
||||||
|
'stdlib': stdlib_factory,
|
||||||
|
}
|
||||||
|
|
||||||
|
if prefer_factory is not None:
|
||||||
|
factory = factories[prefer_factory]
|
||||||
|
res = factory()
|
||||||
|
assert res is not None, prefer_factory
|
||||||
|
return res
|
||||||
|
|
||||||
|
for factory in factories.values():
|
||||||
|
res = factory()
|
||||||
|
if res is not None:
|
||||||
|
return res
|
||||||
|
raise RuntimeError("Should not happen!")
|
||||||
|
|
||||||
|
|
||||||
def dumps(
|
def dumps(
|
||||||
obj: Any,
|
obj: Any,
|
||||||
default: Optional[DefaultEncoder] = None,
|
default: DefaultEncoder | None = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
|
@ -154,8 +185,17 @@ def dumps(
|
||||||
return _dumps_factory(default=default, **kwargs)(obj)
|
return _dumps_factory(default=default, **kwargs)(obj)
|
||||||
|
|
||||||
|
|
||||||
def test_serialize_fallback() -> None:
|
@parametrize('factory', ['orjson', 'simplejson', 'stdlib'])
|
||||||
import json as jsn # dont cause possible conflicts with module code
|
def test_dumps(factory: str) -> None:
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
orig_dumps = globals()['dumps'] # hack to prevent error from using local variable before declaring
|
||||||
|
|
||||||
|
def dumps(*args, **kwargs) -> str:
|
||||||
|
kwargs['_prefer_factory'] = factory
|
||||||
|
return orig_dumps(*args, **kwargs)
|
||||||
|
|
||||||
|
import json as json_builtin # dont cause possible conflicts with module code
|
||||||
|
|
||||||
# can't use a namedtuple here, since the default json.dump serializer
|
# can't use a namedtuple here, since the default json.dump serializer
|
||||||
# serializes namedtuples as tuples, which become arrays
|
# serializes namedtuples as tuples, which become arrays
|
||||||
|
@ -166,36 +206,12 @@ def test_serialize_fallback() -> None:
|
||||||
# the lru_cache'd warning may have already been sent,
|
# the lru_cache'd warning may have already been sent,
|
||||||
# so checking may be nondeterministic?
|
# so checking may be nondeterministic?
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("ignore")
|
warnings.simplefilter("ignore")
|
||||||
res = jsn.loads(dumps(X))
|
res = json_builtin.loads(dumps(X))
|
||||||
assert res == [5, 5.0]
|
assert res == [5, 5.0]
|
||||||
|
|
||||||
|
|
||||||
# this needs to be defined here to prevent a mypy bug
|
|
||||||
# see https://github.com/python/mypy/issues/7281
|
|
||||||
class _A(NamedTuple):
|
|
||||||
x: int
|
|
||||||
y: float
|
|
||||||
|
|
||||||
|
|
||||||
def test_nt_serialize() -> None:
|
|
||||||
import json as jsn # dont cause possible conflicts with module code
|
|
||||||
import orjson # import to make sure this is installed
|
|
||||||
|
|
||||||
res: str = dumps(_A(x=1, y=2.0))
|
|
||||||
assert res == '{"x":1,"y":2.0}'
|
|
||||||
|
|
||||||
# test orjson option kwarg
|
|
||||||
data = {datetime.date(year=1970, month=1, day=1): 5}
|
|
||||||
res = jsn.loads(dumps(data, option=orjson.OPT_NON_STR_KEYS))
|
|
||||||
assert res == {'1970-01-01': 5}
|
|
||||||
|
|
||||||
|
|
||||||
def test_default_serializer() -> None:
|
|
||||||
import pytest
|
|
||||||
import json as jsn # dont cause possible conflicts with module code
|
|
||||||
|
|
||||||
class Unserializable:
|
class Unserializable:
|
||||||
def __init__(self, x: int):
|
def __init__(self, x: int):
|
||||||
self.x = x
|
self.x = x
|
||||||
|
@ -209,7 +225,7 @@ def test_default_serializer() -> None:
|
||||||
def _serialize(self) -> Any:
|
def _serialize(self) -> Any:
|
||||||
return {"x": self.x, "y": self.y}
|
return {"x": self.x, "y": self.y}
|
||||||
|
|
||||||
res = jsn.loads(dumps(WithUnderscoreSerialize(6)))
|
res = json_builtin.loads(dumps(WithUnderscoreSerialize(6)))
|
||||||
assert res == {"x": 6, "y": 6.0}
|
assert res == {"x": 6, "y": 6.0}
|
||||||
|
|
||||||
# test passing additional 'default' func
|
# test passing additional 'default' func
|
||||||
|
@ -221,5 +237,25 @@ def test_default_serializer() -> None:
|
||||||
# this serializes both Unserializable, which is a custom type otherwise
|
# this serializes both Unserializable, which is a custom type otherwise
|
||||||
# not handled, and timedelta, which is handled by the '_default_encode'
|
# not handled, and timedelta, which is handled by the '_default_encode'
|
||||||
# in the 'wrapped_default' function
|
# in the 'wrapped_default' function
|
||||||
res2 = jsn.loads(dumps(Unserializable(10), default=_serialize_with_default))
|
res2 = json_builtin.loads(dumps(Unserializable(10), default=_serialize_with_default))
|
||||||
assert res2 == {"x": 10, "y": 10.0}
|
assert res2 == {"x": 10, "y": 10.0}
|
||||||
|
|
||||||
|
if factory == 'orjson':
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
# test orjson option kwarg
|
||||||
|
data = {datetime.date(year=1970, month=1, day=1): 5}
|
||||||
|
res2 = json_builtin.loads(dumps(data, option=orjson.OPT_NON_STR_KEYS))
|
||||||
|
assert res2 == {'1970-01-01': 5}
|
||||||
|
|
||||||
|
|
||||||
|
@parametrize('factory', ['orjson', 'simplejson'])
|
||||||
|
def test_dumps_namedtuple(factory: str) -> None:
|
||||||
|
import json as json_builtin # dont cause possible conflicts with module code
|
||||||
|
|
||||||
|
class _A(NamedTuple):
|
||||||
|
x: int
|
||||||
|
y: float
|
||||||
|
|
||||||
|
res: str = dumps(_A(x=1, y=2.0), _prefer_factory=factory)
|
||||||
|
assert json_builtin.loads(res) == {'x': 1, 'y': 2.0}
|
||||||
|
|
|
@ -3,9 +3,12 @@ Decorator to gracefully handle importing a data source, or warning
|
||||||
and yielding nothing (or a default) when its not available
|
and yielding nothing (or a default) when its not available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from functools import wraps
|
from __future__ import annotations
|
||||||
from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable
|
|
||||||
import warnings
|
import warnings
|
||||||
|
from collections.abc import Iterable, Iterator
|
||||||
|
from functools import wraps
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
from .warnings import medium
|
from .warnings import medium
|
||||||
|
|
||||||
|
@ -26,8 +29,8 @@ _DEFAULT_ITR = ()
|
||||||
def import_source(
|
def import_source(
|
||||||
*,
|
*,
|
||||||
default: Iterable[T] = _DEFAULT_ITR,
|
default: Iterable[T] = _DEFAULT_ITR,
|
||||||
module_name: Optional[str] = None,
|
module_name: str | None = None,
|
||||||
help_url: Optional[str] = None,
|
help_url: str | None = None,
|
||||||
) -> Callable[..., Callable[..., Iterator[T]]]:
|
) -> Callable[..., Callable[..., Iterator[T]]]:
|
||||||
"""
|
"""
|
||||||
doesn't really play well with types, but is used to catch
|
doesn't really play well with types, but is used to catch
|
||||||
|
@ -50,6 +53,7 @@ def import_source(
|
||||||
except (ImportError, AttributeError) as err:
|
except (ImportError, AttributeError) as err:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
from .error import warn_my_config_import_error
|
from .error import warn_my_config_import_error
|
||||||
|
|
||||||
suppressed_in_conf = False
|
suppressed_in_conf = False
|
||||||
if module_name is not None and CC.config._is_module_active(module_name) is False:
|
if module_name is not None and CC.config._is_module_active(module_name) is False:
|
||||||
suppressed_in_conf = True
|
suppressed_in_conf = True
|
||||||
|
@ -61,16 +65,18 @@ def import_source(
|
||||||
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
|
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
|
||||||
|
|
||||||
class core:
|
class core:
|
||||||
disabled_modules = [{repr(module_name)}]
|
disabled_modules = [{module_name!r}]
|
||||||
""")
|
""", stacklevel=1)
|
||||||
# try to check if this is a config error or based on dependencies not being installed
|
# try to check if this is a config error or based on dependencies not being installed
|
||||||
if isinstance(err, (ImportError, AttributeError)):
|
if isinstance(err, (ImportError, AttributeError)):
|
||||||
matched_config_err = warn_my_config_import_error(err, help_url=help_url)
|
matched_config_err = warn_my_config_import_error(err, module_name=module_name, help_url=help_url)
|
||||||
# if we determined this wasn't a config error, and it was an attribute error
|
# if we determined this wasn't a config error, and it was an attribute error
|
||||||
# it could be *any* attribute error -- we should raise this since its otherwise a fatal error
|
# it could be *any* attribute error -- we should raise this since its otherwise a fatal error
|
||||||
# from some code in the module failing
|
# from some code in the module failing
|
||||||
if not matched_config_err and isinstance(err, AttributeError):
|
if not matched_config_err and isinstance(err, AttributeError):
|
||||||
raise err
|
raise err
|
||||||
yield from default
|
yield from default
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|
|
@ -1,16 +1,19 @@
|
||||||
from .common import assert_subpackage; assert_subpackage(__name__)
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .internal import assert_subpackage # noqa: I001
|
||||||
|
|
||||||
|
assert_subpackage(__name__)
|
||||||
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
from typing import Tuple, Any, Iterator, Callable, Optional, Union
|
from typing import Any, Callable, Literal, Union, overload
|
||||||
|
|
||||||
|
from .common import PathIsh
|
||||||
from .common import PathIsh, assert_never
|
from .compat import assert_never
|
||||||
from .compat import Literal
|
|
||||||
|
|
||||||
|
|
||||||
def sqlite_connect_immutable(db: PathIsh) -> sqlite3.Connection:
|
def sqlite_connect_immutable(db: PathIsh) -> sqlite3.Connection:
|
||||||
|
@ -22,7 +25,8 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
|
||||||
with sqlite3.connect(db) as conn:
|
with sqlite3.connect(db) as conn:
|
||||||
conn.execute('CREATE TABLE testtable (col)')
|
conn.execute('CREATE TABLE testtable (col)')
|
||||||
|
|
||||||
import pytest # type: ignore
|
import pytest
|
||||||
|
|
||||||
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
|
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
|
||||||
with sqlite_connect_immutable(db) as conn:
|
with sqlite_connect_immutable(db) as conn:
|
||||||
conn.execute('DROP TABLE testtable')
|
conn.execute('DROP TABLE testtable')
|
||||||
|
@ -34,15 +38,17 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None:
|
||||||
|
|
||||||
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
||||||
|
|
||||||
|
|
||||||
def dict_factory(cursor, row):
|
def dict_factory(cursor, row):
|
||||||
fields = [column[0] for column in cursor.description]
|
fields = [column[0] for column in cursor.description]
|
||||||
return {key: value for key, value in zip(fields, row)}
|
return dict(zip(fields, row))
|
||||||
|
|
||||||
|
|
||||||
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]:
|
def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]:
|
||||||
dbp = f'file:{db}'
|
dbp = f'file:{db}'
|
||||||
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
||||||
if immutable:
|
if immutable:
|
||||||
|
@ -86,8 +92,7 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
|
||||||
for p in tocopy:
|
for p in tocopy:
|
||||||
shutil.copy(p, tdir / p.name)
|
shutil.copy(p, tdir / p.name)
|
||||||
with sqlite3.connect(str(tdir / dp.name)) as conn:
|
with sqlite3.connect(str(tdir / dp.name)) as conn:
|
||||||
from .compat import sqlite_backup
|
conn.backup(target=dest)
|
||||||
sqlite_backup(source=conn, dest=dest)
|
|
||||||
conn.close()
|
conn.close()
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
@ -99,32 +104,76 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
|
||||||
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
|
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
|
||||||
# a bit annoying to have this copy-pasting, but hopefully not a big issue
|
# a bit annoying to have this copy-pasting, but hopefully not a big issue
|
||||||
|
|
||||||
from typing import overload
|
# fmt: off
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any ]]: ...
|
Iterator[tuple[Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any ]]: ...
|
Iterator[tuple[Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any ]]: ...
|
Iterator[tuple[Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any, Any ]]: ...
|
Iterator[tuple[Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any, Any, Any ]]: ...
|
Iterator[tuple[Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any, Any, Any, Any ]]: ...
|
Iterator[tuple[Any, Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
|
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
|
||||||
@overload
|
@overload
|
||||||
def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
|
def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
|
||||||
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
|
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
def select(cols, rest, *, db):
|
def select(cols, rest, *, db):
|
||||||
# db arg is last cause that results in nicer code formatting..
|
# db arg is last cause that results in nicer code formatting..
|
||||||
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
|
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteTool:
|
||||||
|
def __init__(self, connection: sqlite3.Connection) -> None:
|
||||||
|
self.connection = connection
|
||||||
|
|
||||||
|
def _get_sqlite_master(self) -> dict[str, str]:
|
||||||
|
res = {}
|
||||||
|
for c in self.connection.execute('SELECT name, type FROM sqlite_master'):
|
||||||
|
[name, type_] = c
|
||||||
|
assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_) # just in case
|
||||||
|
res[name] = type_
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_table_names(self) -> list[str]:
|
||||||
|
master = self._get_sqlite_master()
|
||||||
|
res = []
|
||||||
|
for name, type_ in master.items():
|
||||||
|
if type_ != 'table':
|
||||||
|
continue
|
||||||
|
res.append(name)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_table_schema(self, name: str) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Returns map from column name to column type
|
||||||
|
|
||||||
|
NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps)
|
||||||
|
In this case you might still be able to use get_table_names
|
||||||
|
"""
|
||||||
|
schema: dict[str, str] = {}
|
||||||
|
for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'):
|
||||||
|
col = row[1]
|
||||||
|
type_ = row[2]
|
||||||
|
# hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase
|
||||||
|
# let's do this just in case since python < 3.10 are using the old version
|
||||||
|
# e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs)
|
||||||
|
type_ = type_.upper()
|
||||||
|
schema[col] = type_
|
||||||
|
return schema
|
||||||
|
|
||||||
|
def get_table_schemas(self) -> dict[str, dict[str, str]]:
|
||||||
|
return {name: self.get_table_schema(name) for name in self.get_table_names()}
|
||||||
|
|
385
my/core/stats.py
385
my/core/stats.py
|
@ -1,41 +1,219 @@
|
||||||
'''
|
'''
|
||||||
Helpers for hpi doctor/stats functionality.
|
Helpers for hpi doctor/stats functionality.
|
||||||
'''
|
'''
|
||||||
import collections
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import collections.abc
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
|
||||||
import typing
|
import typing
|
||||||
from typing import Optional, Callable, Any, Iterator, Sequence, Dict, List
|
from collections.abc import Iterable, Iterator, Sequence
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from types import ModuleType
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
Protocol,
|
||||||
|
cast,
|
||||||
|
)
|
||||||
|
|
||||||
from .common import StatsFun, Stats, stat
|
from .types import asdict
|
||||||
|
|
||||||
|
Stats = dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class StatsFun(Protocol):
|
||||||
|
def __call__(self, *, quick: bool = False) -> Stats: ...
|
||||||
|
|
||||||
|
|
||||||
|
# global state that turns on/off quick stats
|
||||||
|
# can use the 'quick_stats' contextmanager
|
||||||
|
# to enable/disable this in cli so that module 'stats'
|
||||||
|
# functions don't have to implement custom 'quick' logic
|
||||||
|
QUICK_STATS = False
|
||||||
|
|
||||||
|
|
||||||
|
# in case user wants to use the stats functions/quick option
|
||||||
|
# elsewhere -- can use this decorator instead of editing
|
||||||
|
# the global state directly
|
||||||
|
@contextmanager
|
||||||
|
def quick_stats():
|
||||||
|
global QUICK_STATS
|
||||||
|
prev = QUICK_STATS
|
||||||
|
try:
|
||||||
|
QUICK_STATS = True
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
QUICK_STATS = prev
|
||||||
|
|
||||||
|
|
||||||
|
def stat(
|
||||||
|
func: Callable[[], Iterable[Any]] | Iterable[Any],
|
||||||
|
*,
|
||||||
|
quick: bool = False,
|
||||||
|
name: str | None = None,
|
||||||
|
) -> Stats:
|
||||||
|
"""
|
||||||
|
Extracts various statistics from a passed iterable/callable, e.g.:
|
||||||
|
- number of items
|
||||||
|
- first/last item
|
||||||
|
- timestamps associated with first/last item
|
||||||
|
|
||||||
|
If quick is set, then only first 100 items of the iterable will be processed
|
||||||
|
"""
|
||||||
|
if callable(func):
|
||||||
|
fr = func()
|
||||||
|
if hasattr(fr, '__enter__') and hasattr(fr, '__exit__'):
|
||||||
|
# context managers has Iterable type, but they aren't data providers
|
||||||
|
# sadly doesn't look like there is a way to tell from typing annotations
|
||||||
|
# Ideally we'd detect this in is_data_provider...
|
||||||
|
# but there is no way of knowing without actually calling it first :(
|
||||||
|
return {}
|
||||||
|
fname = func.__name__
|
||||||
|
else:
|
||||||
|
# meh. means it's just a list.. not sure how to generate a name then
|
||||||
|
fr = func
|
||||||
|
fname = f'unnamed_{id(fr)}'
|
||||||
|
type_name = type(fr).__name__
|
||||||
|
extras = {}
|
||||||
|
if type_name == 'DataFrame':
|
||||||
|
# dynamic, because pandas is an optional dependency..
|
||||||
|
df = cast(Any, fr) # todo ugh, not sure how to annotate properly
|
||||||
|
df = df.reset_index()
|
||||||
|
|
||||||
|
fr = df.to_dict(orient='records')
|
||||||
|
|
||||||
|
dtypes = df.dtypes.to_dict()
|
||||||
|
extras['dtypes'] = dtypes
|
||||||
|
|
||||||
|
res = _stat_iterable(fr, quick=quick)
|
||||||
|
res.update(extras)
|
||||||
|
|
||||||
|
stat_name = name if name is not None else fname
|
||||||
|
return {
|
||||||
|
stat_name: res,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_stat() -> None:
|
||||||
|
# the bulk of testing is in test_stat_iterable
|
||||||
|
|
||||||
|
# works with 'anonymous' lists
|
||||||
|
res = stat([1, 2, 3])
|
||||||
|
[(name, v)] = res.items()
|
||||||
|
# note: name will be a little funny since anonymous list doesn't have one
|
||||||
|
assert v == {'count': 3}
|
||||||
|
#
|
||||||
|
|
||||||
|
# works with functions:
|
||||||
|
def fun():
|
||||||
|
return [4, 5, 6]
|
||||||
|
|
||||||
|
assert stat(fun) == {'fun': {'count': 3}}
|
||||||
|
#
|
||||||
|
|
||||||
|
# context managers are technically iterable
|
||||||
|
# , but usually we wouldn't want to compute stats for them
|
||||||
|
# this is mainly intended for guess_stats,
|
||||||
|
# since it can't tell whether the function is a ctx manager without calling it
|
||||||
|
@contextmanager
|
||||||
|
def cm():
|
||||||
|
yield 1
|
||||||
|
yield 3
|
||||||
|
|
||||||
|
assert stat(cm) == {} # type: ignore[arg-type]
|
||||||
|
#
|
||||||
|
|
||||||
|
# works with pandas dataframes
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def df() -> pd.DataFrame:
|
||||||
|
dates = pd.date_range(start='2024-02-10 08:00', end='2024-02-11 16:00', freq='5h')
|
||||||
|
return pd.DataFrame([f'value{i}' for i, _ in enumerate(dates)], index=dates, columns=['value'])
|
||||||
|
|
||||||
|
assert stat(df) == {
|
||||||
|
'df': {
|
||||||
|
'count': 7,
|
||||||
|
'dtypes': {
|
||||||
|
'index': np.dtype('<M8[ns]'),
|
||||||
|
'value': np.dtype('O'),
|
||||||
|
},
|
||||||
|
'first': pd.Timestamp('2024-02-10 08:00'),
|
||||||
|
'last': pd.Timestamp('2024-02-11 14:00'),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None:
|
||||||
|
stats: StatsFun | None = None
|
||||||
|
try:
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
stats = getattr(module, 'stats', None)
|
||||||
|
if stats is None:
|
||||||
|
stats = guess_stats(module)
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
||||||
# then stats could just use them as hints?
|
# then stats could just use them as hints?
|
||||||
def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
|
def guess_stats(module: ModuleType) -> StatsFun | None:
|
||||||
providers = guess_data_providers(module_name)
|
"""
|
||||||
|
If the module doesn't have explicitly defined 'stat' function,
|
||||||
|
this is used to try to guess what could be included in stats automatically
|
||||||
|
"""
|
||||||
|
providers = _guess_data_providers(module)
|
||||||
if len(providers) == 0:
|
if len(providers) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def auto_stats() -> Stats:
|
def auto_stats(*, quick: bool = False) -> Stats:
|
||||||
return {k: stat(v, quick=quick) for k, v in providers.items()}
|
res = {}
|
||||||
|
for k, v in providers.items():
|
||||||
|
res.update(stat(v, quick=quick, name=k))
|
||||||
|
return res
|
||||||
|
|
||||||
return auto_stats
|
return auto_stats
|
||||||
|
|
||||||
|
|
||||||
def guess_data_providers(module_name: str) -> Dict[str, Callable]:
|
def test_guess_stats() -> None:
|
||||||
module = importlib.import_module(module_name)
|
import my.core.tests.auto_stats as M
|
||||||
|
|
||||||
|
auto_stats = guess_stats(M)
|
||||||
|
assert auto_stats is not None
|
||||||
|
res = auto_stats(quick=False)
|
||||||
|
|
||||||
|
assert res == {
|
||||||
|
'inputs': {
|
||||||
|
'count': 3,
|
||||||
|
'first': 'file1.json',
|
||||||
|
'last': 'file3.json',
|
||||||
|
},
|
||||||
|
'iter_data': {
|
||||||
|
'count': 9,
|
||||||
|
'first': datetime(2020, 1, 1, 1, 1, 1),
|
||||||
|
'last': datetime(2020, 1, 3, 1, 1, 1),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_data_providers(module: ModuleType) -> dict[str, Callable]:
|
||||||
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
mfunctions = inspect.getmembers(module, inspect.isfunction)
|
||||||
return {k: v for k, v in mfunctions if is_data_provider(v)}
|
return {k: v for k, v in mfunctions if is_data_provider(v)}
|
||||||
|
|
||||||
|
|
||||||
# todo how to exclude deprecated stuff?
|
# todo how to exclude deprecated data providers?
|
||||||
def is_data_provider(fun: Any) -> bool:
|
def is_data_provider(fun: Any) -> bool:
|
||||||
"""
|
"""
|
||||||
|
Criteria for being a "data provider":
|
||||||
1. returns iterable or something like that
|
1. returns iterable or something like that
|
||||||
2. takes no arguments? (otherwise not callable by stats anyway?)
|
2. takes no arguments? (otherwise not callable by stats anyway?)
|
||||||
3. doesn't start with an underscore (those are probably helper functions?)
|
3. doesn't start with an underscore (those are probably helper functions?)
|
||||||
4. functions isn't the 'inputs' function (or ends with '_inputs')
|
|
||||||
"""
|
"""
|
||||||
# todo maybe for 2 allow default arguments? not sure
|
# todo maybe for 2 allow default arguments? not sure
|
||||||
# one example which could benefit is my.pdfs
|
# one example which could benefit is my.pdfs
|
||||||
|
@ -48,19 +226,23 @@ def is_data_provider(fun: Any) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# has at least one argument without default values
|
# has at least one argument without default values
|
||||||
if len(list(sig_required_params(sig))) > 0:
|
if len(list(_sig_required_params(sig))) > 0:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if hasattr(fun, '__name__'):
|
if hasattr(fun, '__name__'):
|
||||||
# probably a helper function?
|
# probably a helper function?
|
||||||
if fun.__name__.startswith('_'):
|
if fun.__name__.startswith('_'):
|
||||||
return False
|
return False
|
||||||
# ignore def inputs; something like comment_inputs or backup_inputs should also be ignored
|
|
||||||
if fun.__name__ == 'inputs' or fun.__name__.endswith('_inputs'):
|
# inspect.signature might return str instead of a proper type object
|
||||||
|
# if from __future__ import annotations is used
|
||||||
|
# so best to rely on get_type_hints (which evals the annotations)
|
||||||
|
type_hints = typing.get_type_hints(fun)
|
||||||
|
return_type = type_hints.get('return')
|
||||||
|
if return_type is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return_type = sig.return_annotation
|
return _type_is_iterable(return_type)
|
||||||
return type_is_iterable(return_type)
|
|
||||||
|
|
||||||
|
|
||||||
def test_is_data_provider() -> None:
|
def test_is_data_provider() -> None:
|
||||||
|
@ -71,34 +253,42 @@ def test_is_data_provider() -> None:
|
||||||
|
|
||||||
def no_return_type():
|
def no_return_type():
|
||||||
return [1, 2, 3]
|
return [1, 2, 3]
|
||||||
|
|
||||||
assert not idp(no_return_type)
|
assert not idp(no_return_type)
|
||||||
|
|
||||||
lam = lambda: [1, 2]
|
lam = lambda: [1, 2]
|
||||||
assert not idp(lam)
|
assert not idp(lam)
|
||||||
|
|
||||||
def has_extra_args(count) -> List[int]:
|
def has_extra_args(count) -> list[int]:
|
||||||
return list(range(count))
|
return list(range(count))
|
||||||
|
|
||||||
assert not idp(has_extra_args)
|
assert not idp(has_extra_args)
|
||||||
|
|
||||||
def has_return_type() -> Sequence[str]:
|
def has_return_type() -> Sequence[str]:
|
||||||
return ['a', 'b', 'c']
|
return ['a', 'b', 'c']
|
||||||
|
|
||||||
assert idp(has_return_type)
|
assert idp(has_return_type)
|
||||||
|
|
||||||
def _helper_func() -> Iterator[Any]:
|
def _helper_func() -> Iterator[Any]:
|
||||||
yield 1
|
yield 1
|
||||||
|
|
||||||
assert not idp(_helper_func)
|
assert not idp(_helper_func)
|
||||||
|
|
||||||
def inputs() -> Iterator[Any]:
|
def inputs() -> Iterator[Any]:
|
||||||
yield 1
|
yield 1
|
||||||
assert not idp(inputs)
|
|
||||||
|
assert idp(inputs)
|
||||||
|
|
||||||
def producer_inputs() -> Iterator[Any]:
|
def producer_inputs() -> Iterator[Any]:
|
||||||
yield 1
|
yield 1
|
||||||
assert not idp(producer_inputs)
|
|
||||||
|
assert idp(producer_inputs)
|
||||||
|
|
||||||
|
|
||||||
# return any parameters the user is required to provide - those which don't have default values
|
def _sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
|
||||||
def sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
|
"""
|
||||||
|
Returns parameters the user is required to provide - e.g. ones that don't have default values
|
||||||
|
"""
|
||||||
for param in sig.parameters.values():
|
for param in sig.parameters.values():
|
||||||
if param.default == inspect.Parameter.empty:
|
if param.default == inspect.Parameter.empty:
|
||||||
yield param
|
yield param
|
||||||
|
@ -108,24 +298,24 @@ def test_sig_required_params() -> None:
|
||||||
|
|
||||||
def x() -> int:
|
def x() -> int:
|
||||||
return 5
|
return 5
|
||||||
assert len(list(sig_required_params(inspect.signature(x)))) == 0
|
|
||||||
|
assert len(list(_sig_required_params(inspect.signature(x)))) == 0
|
||||||
|
|
||||||
def y(arg: int) -> int:
|
def y(arg: int) -> int:
|
||||||
return arg
|
return arg
|
||||||
assert len(list(sig_required_params(inspect.signature(y)))) == 1
|
|
||||||
|
assert len(list(_sig_required_params(inspect.signature(y)))) == 1
|
||||||
|
|
||||||
# from stats perspective, this should be treated as a data provider as well
|
# from stats perspective, this should be treated as a data provider as well
|
||||||
# could be that the default value to the data provider is the 'default'
|
# could be that the default value to the data provider is the 'default'
|
||||||
# path to use for inputs/a function to provide input data
|
# path to use for inputs/a function to provide input data
|
||||||
def z(arg: int = 5) -> int:
|
def z(arg: int = 5) -> int:
|
||||||
return arg
|
return arg
|
||||||
assert len(list(sig_required_params(inspect.signature(z)))) == 0
|
|
||||||
|
assert len(list(_sig_required_params(inspect.signature(z)))) == 0
|
||||||
|
|
||||||
|
|
||||||
def type_is_iterable(type_spec) -> bool:
|
def _type_is_iterable(type_spec) -> bool:
|
||||||
if sys.version_info[1] < 8:
|
|
||||||
# there is no get_origin before 3.8, and retrofitting gonna be a lot of pain
|
|
||||||
return any(x in str(type_spec) for x in ['List', 'Sequence', 'Iterable', 'Iterator'])
|
|
||||||
origin = typing.get_origin(type_spec)
|
origin = typing.get_origin(type_spec)
|
||||||
if origin is None:
|
if origin is None:
|
||||||
return False
|
return False
|
||||||
|
@ -142,14 +332,139 @@ def type_is_iterable(type_spec) -> bool:
|
||||||
|
|
||||||
# todo docstring test?
|
# todo docstring test?
|
||||||
def test_type_is_iterable() -> None:
|
def test_type_is_iterable() -> None:
|
||||||
from typing import List, Sequence, Iterable, Dict, Any
|
fun = _type_is_iterable
|
||||||
|
|
||||||
fun = type_is_iterable
|
|
||||||
assert not fun(None)
|
assert not fun(None)
|
||||||
assert not fun(int)
|
assert not fun(int)
|
||||||
assert not fun(Any)
|
assert not fun(Any)
|
||||||
assert not fun(Dict[int, int])
|
assert not fun(dict[int, int])
|
||||||
|
|
||||||
assert fun(List[int])
|
assert fun(list[int])
|
||||||
assert fun(Sequence[Dict[str, str]])
|
assert fun(Sequence[dict[str, str]])
|
||||||
assert fun(Iterable[Any])
|
assert fun(Iterable[Any])
|
||||||
|
|
||||||
|
|
||||||
|
def _stat_item(item):
|
||||||
|
if item is None:
|
||||||
|
return None
|
||||||
|
if isinstance(item, Path):
|
||||||
|
return str(item)
|
||||||
|
return _guess_datetime(item)
|
||||||
|
|
||||||
|
|
||||||
|
def _stat_iterable(it: Iterable[Any], *, quick: bool = False) -> Stats:
|
||||||
|
from more_itertools import first, ilen, take
|
||||||
|
|
||||||
|
# todo not sure if there is something in more_itertools to compute this?
|
||||||
|
total = 0
|
||||||
|
errors = 0
|
||||||
|
first_item = None
|
||||||
|
last_item = None
|
||||||
|
|
||||||
|
def funcit():
|
||||||
|
nonlocal errors, first_item, last_item, total
|
||||||
|
for x in it:
|
||||||
|
total += 1
|
||||||
|
if isinstance(x, Exception):
|
||||||
|
errors += 1
|
||||||
|
else:
|
||||||
|
last_item = x
|
||||||
|
if first_item is None:
|
||||||
|
first_item = x
|
||||||
|
yield x
|
||||||
|
|
||||||
|
eit = funcit()
|
||||||
|
count: Any
|
||||||
|
if quick or QUICK_STATS:
|
||||||
|
initial = take(100, eit)
|
||||||
|
count = len(initial)
|
||||||
|
if first(eit, None) is not None: # todo can actually be none...
|
||||||
|
# haven't exhausted
|
||||||
|
count = f'{count}+'
|
||||||
|
else:
|
||||||
|
count = ilen(eit)
|
||||||
|
|
||||||
|
res = {
|
||||||
|
'count': count,
|
||||||
|
}
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
# not sure but I guess a good balance? wouldn't want to throw early here?
|
||||||
|
res['warning'] = 'THE ITERABLE RETURNED NO DATA'
|
||||||
|
|
||||||
|
if errors > 0:
|
||||||
|
res['errors'] = errors
|
||||||
|
|
||||||
|
if (stat_first := _stat_item(first_item)) is not None:
|
||||||
|
res['first'] = stat_first
|
||||||
|
|
||||||
|
if (stat_last := _stat_item(last_item)) is not None:
|
||||||
|
res['last'] = stat_last
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def test_stat_iterable() -> None:
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
dd = datetime.fromtimestamp(123, tz=timezone.utc)
|
||||||
|
day = timedelta(days=3)
|
||||||
|
|
||||||
|
class X(NamedTuple):
|
||||||
|
x: int
|
||||||
|
d: datetime
|
||||||
|
|
||||||
|
def it():
|
||||||
|
yield RuntimeError('oops!')
|
||||||
|
for i in range(2):
|
||||||
|
yield X(x=i, d=dd + day * i)
|
||||||
|
yield RuntimeError('bad!')
|
||||||
|
for i in range(3):
|
||||||
|
yield X(x=i * 10, d=dd + day * (i * 10))
|
||||||
|
yield X(x=123, d=dd + day * 50)
|
||||||
|
|
||||||
|
res = _stat_iterable(it())
|
||||||
|
assert res['count'] == 1 + 2 + 1 + 3 + 1
|
||||||
|
assert res['errors'] == 1 + 1
|
||||||
|
assert res['last'] == dd + day * 50
|
||||||
|
|
||||||
|
|
||||||
|
# experimental, not sure about it..
|
||||||
|
def _guess_datetime(x: Any) -> datetime | None:
|
||||||
|
# todo hmm implement without exception..
|
||||||
|
try:
|
||||||
|
d = asdict(x)
|
||||||
|
except: # noqa: E722 bare except
|
||||||
|
return None
|
||||||
|
for v in d.values():
|
||||||
|
if isinstance(v, datetime):
|
||||||
|
return v
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def test_guess_datetime() -> None:
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
from .compat import fromisoformat
|
||||||
|
|
||||||
|
dd = fromisoformat('2021-02-01T12:34:56Z')
|
||||||
|
|
||||||
|
class A(NamedTuple):
|
||||||
|
x: int
|
||||||
|
|
||||||
|
class B(NamedTuple):
|
||||||
|
x: int
|
||||||
|
created: datetime
|
||||||
|
|
||||||
|
assert _guess_datetime(A(x=4)) is None
|
||||||
|
assert _guess_datetime(B(x=4, created=dd)) == dd
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class C:
|
||||||
|
a: datetime
|
||||||
|
x: int
|
||||||
|
|
||||||
|
assert _guess_datetime(C(a=dd, x=435)) == dd
|
||||||
|
# TODO not sure what to return when multiple datetime fields?
|
||||||
|
# TODO test @property?
|
||||||
|
|
|
@ -1,20 +1,22 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import atexit
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
import atexit
|
from collections.abc import Generator, Sequence
|
||||||
|
|
||||||
from typing import Sequence, Generator, List, Union, Tuple
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .common import LazyLogger
|
from .logging import make_logger
|
||||||
|
|
||||||
|
logger = make_logger(__name__, level="info")
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__, level="info")
|
def _structure_exists(base_dir: Path, paths: Sequence[str], *, partial: bool = False) -> bool:
|
||||||
|
|
||||||
|
|
||||||
def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = False) -> bool:
|
|
||||||
"""
|
"""
|
||||||
Helper function for match_structure to check if
|
Helper function for match_structure to check if
|
||||||
all subpaths exist at some base directory
|
all subpaths exist at some base directory
|
||||||
|
@ -36,17 +38,18 @@ def _structure_exists(base_dir: Path, paths: Sequence[str], partial: bool = Fals
|
||||||
|
|
||||||
|
|
||||||
ZIP_EXT = {".zip"}
|
ZIP_EXT = {".zip"}
|
||||||
|
TARGZ_EXT = {".tar.gz"}
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def match_structure(
|
def match_structure(
|
||||||
base: Path,
|
base: Path,
|
||||||
expected: Union[str, Sequence[str]],
|
expected: str | Sequence[str],
|
||||||
*,
|
*,
|
||||||
partial: bool = False,
|
partial: bool = False,
|
||||||
) -> Generator[Tuple[Path, ...], None, None]:
|
) -> Generator[tuple[Path, ...], None, None]:
|
||||||
"""
|
"""
|
||||||
Given a 'base' directory or zipfile, recursively search for one or more paths that match the
|
Given a 'base' directory or archive (zip/tar.gz), recursively search for one or more paths that match the
|
||||||
pattern described in 'expected'. That can be a single string, or a list
|
pattern described in 'expected'. That can be a single string, or a list
|
||||||
of relative paths (as strings) you expect at the same directory.
|
of relative paths (as strings) you expect at the same directory.
|
||||||
|
|
||||||
|
@ -54,12 +57,12 @@ def match_structure(
|
||||||
expected be present, not all of them.
|
expected be present, not all of them.
|
||||||
|
|
||||||
This reduces the chances of the user misconfiguring gdpr exports, e.g.
|
This reduces the chances of the user misconfiguring gdpr exports, e.g.
|
||||||
if they zipped the folders instead of the parent directory or vice-versa
|
if they archived the folders instead of the parent directory or vice-versa
|
||||||
|
|
||||||
When this finds a matching directory structure, it stops searching in that subdirectory
|
When this finds a matching directory structure, it stops searching in that subdirectory
|
||||||
and continues onto other possible subdirectories which could match
|
and continues onto other possible subdirectories which could match
|
||||||
|
|
||||||
If base is a zipfile, this extracts the zipfile into a temporary directory
|
If base is an archive, this extracts it into a temporary directory
|
||||||
(configured by core_config.config.get_tmp_dir), and then searches the extracted
|
(configured by core_config.config.get_tmp_dir), and then searches the extracted
|
||||||
folder for matching structures
|
folder for matching structures
|
||||||
|
|
||||||
|
@ -69,21 +72,21 @@ def match_structure(
|
||||||
|
|
||||||
export_dir
|
export_dir
|
||||||
├── exp_2020
|
├── exp_2020
|
||||||
│ ├── channel_data
|
│ ├── channel_data
|
||||||
│ │ ├── data1
|
│ │ ├── data1
|
||||||
│ │ └── data2
|
│ │ └── data2
|
||||||
│ ├── index.json
|
│ ├── index.json
|
||||||
│ ├── messages
|
│ ├── messages
|
||||||
│ │ └── messages.csv
|
│ │ └── messages.csv
|
||||||
│ └── profile
|
│ └── profile
|
||||||
│ └── settings.json
|
│ └── settings.json
|
||||||
└── exp_2021
|
└── exp_2021
|
||||||
├── channel_data
|
├── channel_data
|
||||||
│ ├── data1
|
│ ├── data1
|
||||||
│ └── data2
|
│ └── data2
|
||||||
├── index.json
|
├── index.json
|
||||||
├── messages
|
├── messages
|
||||||
│ └── messages.csv
|
│ └── messages.csv
|
||||||
└── profile
|
└── profile
|
||||||
└── settings.json
|
└── settings.json
|
||||||
|
|
||||||
|
@ -95,12 +98,12 @@ def match_structure(
|
||||||
This doesn't require an exhaustive list of expected values, but its a good idea to supply
|
This doesn't require an exhaustive list of expected values, but its a good idea to supply
|
||||||
a complete picture of the expected structure to avoid false-positives
|
a complete picture of the expected structure to avoid false-positives
|
||||||
|
|
||||||
This does not recursively unzip zipfiles in the subdirectories,
|
This does not recursively decompress archives in the subdirectories,
|
||||||
it only unzips into a temporary directory if 'base' is a zipfile
|
it only unpacks into a temporary directory if 'base' is an archive
|
||||||
|
|
||||||
A common pattern for using this might be to use get_files to get a list
|
A common pattern for using this might be to use get_files to get a list
|
||||||
of zipfiles or top-level gdpr export directories, and use match_structure
|
of archives or top-level gdpr export directories, and use match_structure
|
||||||
to search the resulting paths for a export structure you're expecting
|
to search the resulting paths for an export structure you're expecting
|
||||||
"""
|
"""
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
|
@ -110,28 +113,37 @@ def match_structure(
|
||||||
expected = (expected,)
|
expected = (expected,)
|
||||||
|
|
||||||
is_zip: bool = base.suffix in ZIP_EXT
|
is_zip: bool = base.suffix in ZIP_EXT
|
||||||
|
is_targz: bool = any(base.name.endswith(suffix) for suffix in TARGZ_EXT)
|
||||||
|
|
||||||
searchdir: Path = base.absolute()
|
searchdir: Path = base.absolute()
|
||||||
try:
|
try:
|
||||||
# if the file given by the user is a zipfile, create a temporary
|
# if the file given by the user is an archive, create a temporary
|
||||||
# directory and extract the zipfile to that temporary directory
|
# directory and extract it to that temporary directory
|
||||||
#
|
#
|
||||||
# this temporary directory is removed in the finally block
|
# this temporary directory is removed in the finally block
|
||||||
if is_zip:
|
if is_zip or is_targz:
|
||||||
# sanity check before we start creating directories/rm-tree'ing things
|
# sanity check before we start creating directories/rm-tree'ing things
|
||||||
assert base.exists(), f"zipfile at {base} doesn't exist"
|
assert base.exists(), f"archive at {base} doesn't exist"
|
||||||
|
|
||||||
searchdir = Path(tempfile.mkdtemp(dir=tdir))
|
searchdir = Path(tempfile.mkdtemp(dir=tdir))
|
||||||
|
|
||||||
zf = zipfile.ZipFile(base)
|
if is_zip:
|
||||||
|
# base might already be a ZipPath, and str(base) would end with /
|
||||||
|
zf = zipfile.ZipFile(str(base).rstrip('/'))
|
||||||
zf.extractall(path=str(searchdir))
|
zf.extractall(path=str(searchdir))
|
||||||
|
elif is_targz:
|
||||||
|
with tarfile.open(str(base)) as tar:
|
||||||
|
# filter is a security feature, will be required param in later python version
|
||||||
|
mfilter = {'filter': 'data'} if sys.version_info[:2] >= (3, 12) else {}
|
||||||
|
tar.extractall(path=str(searchdir), **mfilter) # type: ignore[arg-type]
|
||||||
|
else:
|
||||||
|
raise RuntimeError("can't happen")
|
||||||
else:
|
else:
|
||||||
if not searchdir.is_dir():
|
if not searchdir.is_dir():
|
||||||
raise NotADirectoryError(f"Expected either a zipfile or a directory, received {searchdir}")
|
raise NotADirectoryError(f"Expected either a zip/tar.gz archive or a directory, received {searchdir}")
|
||||||
|
|
||||||
matches: List[Path] = []
|
matches: list[Path] = []
|
||||||
possible_targets: List[Path] = [searchdir]
|
possible_targets: list[Path] = [searchdir]
|
||||||
|
|
||||||
while len(possible_targets) > 0:
|
while len(possible_targets) > 0:
|
||||||
p = possible_targets.pop(0)
|
p = possible_targets.pop(0)
|
||||||
|
@ -151,9 +163,9 @@ def match_structure(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|
||||||
if is_zip:
|
if is_zip or is_targz:
|
||||||
# make sure we're not mistakenly deleting data
|
# make sure we're not mistakenly deleting data
|
||||||
assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting zip to start with the temporary directory prefix ({tdir}), found {searchdir}"
|
assert str(searchdir).startswith(str(tdir)), f"Expected the temporary directory for extracting archive to start with the temporary directory prefix ({tdir}), found {searchdir}"
|
||||||
|
|
||||||
shutil.rmtree(str(searchdir))
|
shutil.rmtree(str(searchdir))
|
||||||
|
|
||||||
|
@ -162,7 +174,7 @@ def warn_leftover_files() -> None:
|
||||||
from . import core_config as CC
|
from . import core_config as CC
|
||||||
|
|
||||||
base_tmp: Path = CC.config.get_tmp_dir()
|
base_tmp: Path = CC.config.get_tmp_dir()
|
||||||
leftover: List[Path] = list(base_tmp.iterdir())
|
leftover: list[Path] = list(base_tmp.iterdir())
|
||||||
if leftover:
|
if leftover:
|
||||||
logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
|
logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored")
|
||||||
|
|
||||||
|
|
3
my/core/tests/__init__.py
Normal file
3
my/core/tests/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# hmm, sadly pytest --import-mode importlib --pyargs my.core.tests doesn't work properly without __init__.py
|
||||||
|
# although it works if you run either my.core or my.core.tests.sqlite (for example) directly
|
||||||
|
# so if it gets in the way could get rid of this later?
|
37
my/core/tests/auto_stats.py
Normal file
37
my/core/tests/auto_stats.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
"""
|
||||||
|
Helper 'module' for test_guess_stats
|
||||||
|
"""
|
||||||
|
|
||||||
|
from collections.abc import Iterable, Iterator, Sequence
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Item:
|
||||||
|
id: str
|
||||||
|
dt: datetime
|
||||||
|
source: Path
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return [
|
||||||
|
Path('file1.json'),
|
||||||
|
Path('file2.json'),
|
||||||
|
Path('file3.json'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def iter_data() -> Iterable[Item]:
|
||||||
|
dt = datetime.fromisoformat('2020-01-01 01:01:01')
|
||||||
|
for path in inputs():
|
||||||
|
for i in range(3):
|
||||||
|
yield Item(id=str(i), dt=dt + timedelta(days=i), source=path)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def some_contextmanager() -> Iterator[str]:
|
||||||
|
# this shouldn't end up in guess_stats because context manager is not a data provider
|
||||||
|
yield 'hello'
|
32
my/core/tests/common.py
Normal file
32
my/core/tests/common.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
V = 'HPI_TESTS_USES_OPTIONAL_DEPS'
|
||||||
|
|
||||||
|
# TODO use it for serialize tests that are using simplejson/orjson?
|
||||||
|
skip_if_uses_optional_deps = pytest.mark.skipif(
|
||||||
|
V not in os.environ,
|
||||||
|
reason=f'test only works when optional dependencies are installed. Set env variable {V}=true to override.',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO maybe move to hpi core?
|
||||||
|
@contextmanager
|
||||||
|
def tmp_environ_set(key: str, value: str | None) -> Iterator[None]:
|
||||||
|
prev_value = os.environ.get(key)
|
||||||
|
if value is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = value
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
if prev_value is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = prev_value
|
|
@ -1,11 +1,11 @@
|
||||||
import warnings
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
import warnings
|
||||||
|
from collections.abc import Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import NamedTuple, Iterator
|
from pathlib import Path
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
from my.core.denylist import DenyList
|
from ..denylist import DenyList
|
||||||
|
|
||||||
|
|
||||||
class IP(NamedTuple):
|
class IP(NamedTuple):
|
||||||
|
@ -30,7 +30,6 @@ def data() -> Iterator[IP]:
|
||||||
def test_denylist(tmp_path: Path) -> None:
|
def test_denylist(tmp_path: Path) -> None:
|
||||||
tf = (tmp_path / "denylist.json").absolute()
|
tf = (tmp_path / "denylist.json").absolute()
|
||||||
with warnings.catch_warnings(record=True):
|
with warnings.catch_warnings(record=True):
|
||||||
|
|
||||||
# create empty denylist (though file does not have to exist for denylist to work)
|
# create empty denylist (though file does not have to exist for denylist to work)
|
||||||
tf.write_text("[]")
|
tf.write_text("[]")
|
||||||
|
|
||||||
|
@ -93,8 +92,7 @@ def test_denylist(tmp_path: Path) -> None:
|
||||||
|
|
||||||
assert "59.40.113.87" not in [i.addr for i in filtered]
|
assert "59.40.113.87" not in [i.addr for i in filtered]
|
||||||
|
|
||||||
with open(tf, "r") as f:
|
data_json = json.loads(tf.read_text())
|
||||||
data_json = json.loads(f.read())
|
|
||||||
|
|
||||||
assert data_json == [
|
assert data_json == [
|
||||||
{
|
{
|
|
@ -1,10 +1,10 @@
|
||||||
from pathlib import Path
|
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from pathlib import Path
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from ..sqlite import sqlite_connect_immutable, sqlite_copy_and_open
|
||||||
from my.core.sqlite import sqlite_connect_immutable, sqlite_copy_and_open
|
|
||||||
|
|
||||||
|
|
||||||
def test_sqlite_read_with_wal(tmp_path: Path) -> None:
|
def test_sqlite_read_with_wal(tmp_path: Path) -> None:
|
||||||
|
@ -27,13 +27,14 @@ def test_sqlite_read_with_wal(tmp_path: Path) -> None:
|
||||||
assert len(wals) == 1
|
assert len(wals) == 1
|
||||||
|
|
||||||
## now run the tests in separate process to ensure there is no potential for reusing sqlite connections or something
|
## now run the tests in separate process to ensure there is no potential for reusing sqlite connections or something
|
||||||
from concurrent.futures import ProcessPoolExecutor as Pool
|
with ProcessPoolExecutor(1) as pool:
|
||||||
with Pool(1) as pool:
|
|
||||||
# merely using it for ctx manager..
|
# merely using it for ctx manager..
|
||||||
|
# fmt: off
|
||||||
pool.submit(_test_do_copy , db).result()
|
pool.submit(_test_do_copy , db).result()
|
||||||
pool.submit(_test_do_immutable , db).result()
|
pool.submit(_test_do_immutable , db).result()
|
||||||
pool.submit(_test_do_copy_and_open, db).result()
|
pool.submit(_test_do_copy_and_open, db).result()
|
||||||
pool.submit(_test_open_asis , db).result()
|
pool.submit(_test_open_asis , db).result()
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
def _test_do_copy(db: Path) -> None:
|
def _test_do_copy(db: Path) -> None:
|
|
@ -1,9 +1,8 @@
|
||||||
import pytest
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from my.core.structure import match_structure
|
import pytest
|
||||||
|
|
||||||
|
from ..structure import match_structure
|
||||||
|
|
||||||
structure_data: Path = Path(__file__).parent / "structure_data"
|
structure_data: Path = Path(__file__).parent / "structure_data"
|
||||||
|
|
||||||
|
@ -15,11 +14,9 @@ def test_gdpr_structure_exists() -> None:
|
||||||
assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
|
assert results == (structure_data / "gdpr_subdirs" / "gdpr_export",)
|
||||||
|
|
||||||
|
|
||||||
def test_gdpr_unzip() -> None:
|
@pytest.mark.parametrize("archive", ["gdpr_export.zip", "gdpr_export.tar.gz"])
|
||||||
|
def test_gdpr_unpack(archive: str) -> None:
|
||||||
with match_structure(
|
with match_structure(structure_data / archive, expected=gdpr_expected) as results:
|
||||||
structure_data / "gdpr_export.zip", expected=gdpr_expected
|
|
||||||
) as results:
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
extracted = results[0]
|
extracted = results[0]
|
||||||
index_file = extracted / "messages" / "index.csv"
|
index_file = extracted / "messages" / "index.csv"
|
||||||
|
@ -31,15 +28,11 @@ def test_gdpr_unzip() -> None:
|
||||||
|
|
||||||
def test_match_partial() -> None:
|
def test_match_partial() -> None:
|
||||||
# a partial match should match both the 'broken' and 'gdpr_export' directories
|
# a partial match should match both the 'broken' and 'gdpr_export' directories
|
||||||
with match_structure(
|
with match_structure(structure_data / "gdpr_subdirs", expected=gdpr_expected, partial=True) as results:
|
||||||
structure_data / "gdpr_subdirs", expected=gdpr_expected, partial=True
|
|
||||||
) as results:
|
|
||||||
assert len(results) == 2
|
assert len(results) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_not_directory() -> None:
|
def test_not_directory() -> None:
|
||||||
with pytest.raises(NotADirectoryError, match=r"Expected either a zipfile or a directory"):
|
with pytest.raises(NotADirectoryError, match=r"Expected either a zip/tar.gz archive or a directory"):
|
||||||
with match_structure(
|
with match_structure(structure_data / "messages/index.csv", expected=gdpr_expected):
|
||||||
structure_data / "messages/index.csv", expected=gdpr_expected
|
|
||||||
):
|
|
||||||
pass
|
pass
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue