my.location: let takeout provider be in a separate my.location.google; add CI test & enable mypy
This commit is contained in:
parent
90ada92110
commit
ba9acc3445
7 changed files with 82 additions and 35 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -1,3 +1,6 @@
|
||||||
[submodule "testdata/hpi-testdata"]
|
[submodule "testdata/hpi-testdata"]
|
||||||
path = testdata/hpi-testdata
|
path = testdata/hpi-testdata
|
||||||
url = https://github.com/karlicoss/hpi-testdata
|
url = https://github.com/karlicoss/hpi-testdata
|
||||||
|
[submodule "testdata/track"]
|
||||||
|
path = testdata/track
|
||||||
|
url = https://github.com/tajtiattila/track
|
||||||
|
|
|
@ -29,3 +29,6 @@ class exercise:
|
||||||
|
|
||||||
class bluemaestro:
|
class bluemaestro:
|
||||||
export_path: Paths = ''
|
export_path: Paths = ''
|
||||||
|
|
||||||
|
class google:
|
||||||
|
takeout_path: Paths = ''
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
takeout.py
|
|
|
@ -4,14 +4,13 @@ Location data from Google Takeout
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple
|
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import pytz
|
|
||||||
|
|
||||||
# pip3 install geopy
|
# pip3 install geopy
|
||||||
import geopy # type: ignore
|
import geopy # type: ignore
|
||||||
import geopy.distance # type: ignore
|
import geopy.distance # type: ignore
|
||||||
|
@ -22,6 +21,11 @@ from ..google.takeout.paths import get_last_takeout
|
||||||
from ..kython import kompress
|
from ..kython import kompress
|
||||||
|
|
||||||
|
|
||||||
|
# otherwise uses ijson
|
||||||
|
# todo move to config??
|
||||||
|
USE_GREP = False
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,18 +61,20 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# todo ugh. fragile, not sure, maybe should do some assert in advance?
|
||||||
def _iter_via_grep(fo) -> Iterator[TsLatLon]:
|
def _iter_via_grep(fo) -> Iterator[TsLatLon]:
|
||||||
# grep version takes 5 seconds for 1M items (without processing)
|
# grep version takes 5 seconds for 1M items (without processing)
|
||||||
x = [None, None, None]
|
x = [-1, -1, -1]
|
||||||
for i, line in enumerate(fo):
|
for i, line in enumerate(fo):
|
||||||
if i > 0 and i % 3 == 0:
|
if i > 0 and i % 3 == 0:
|
||||||
yield tuple(x)
|
yield tuple(x) # type: ignore[misc]
|
||||||
n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile...
|
n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile...
|
||||||
|
assert n is not None
|
||||||
j = i % 3
|
j = i % 3
|
||||||
x[j] = int(n.group(1).decode('ascii'))
|
x[j] = int(n.group(1).decode('ascii'))
|
||||||
# make sure it's read what we expected
|
# make sure it's read what we expected
|
||||||
assert (i + 1) % 3 == 0
|
assert (i + 1) % 3 == 0
|
||||||
yield tuple(x)
|
yield tuple(x) # type: ignore[misc]
|
||||||
|
|
||||||
|
|
||||||
# todo could also use pool? not sure if that would really be faster...
|
# todo could also use pool? not sure if that would really be faster...
|
||||||
|
@ -102,7 +108,7 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
for tsMs, latE7, lonE7 in fit:
|
for tsMs, latE7, lonE7 in fit:
|
||||||
dt = datetime.fromtimestamp(tsMs / 1000, tz=pytz.utc)
|
dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc)
|
||||||
total += 1
|
total += 1
|
||||||
if total % 10000 == 0:
|
if total % 10000 == 0:
|
||||||
logger.info('processing item %d %s', total, dt)
|
logger.info('processing item %d %s', total, dt)
|
||||||
|
@ -150,31 +156,33 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
|
||||||
# todo CPath? although not sure if it can be iterative?
|
# todo CPath? although not sure if it can be iterative?
|
||||||
ctx = kompress.open(path, _LOCATION_JSON)
|
ctx = kompress.open(path, _LOCATION_JSON)
|
||||||
|
|
||||||
# with ctx as fo:
|
if USE_GREP:
|
||||||
# fit = _iter_via_ijson(fo)
|
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
|
||||||
# fit = islice(fit, start, stop)
|
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
|
||||||
# yield from _iter_locations_fo(fit)
|
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
|
||||||
|
out = p.stdout; assert out is not None
|
||||||
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
|
fit = _iter_via_grep(out)
|
||||||
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
|
fit = islice(fit, start, stop)
|
||||||
from subprocess import Popen, PIPE
|
yield from _iter_locations_fo(fit)
|
||||||
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
|
else:
|
||||||
out = p.stdout; assert out is not None
|
with ctx as fo:
|
||||||
fit = _iter_via_grep(out)
|
# todo need to open as bytes
|
||||||
fit = islice(fit, start, stop)
|
fit = _iter_via_ijson(fo)
|
||||||
yield from _iter_locations_fo(fit)
|
fit = islice(fit, start, stop)
|
||||||
|
yield from _iter_locations_fo(fit)
|
||||||
# todo wonder if old takeouts could contribute as well??
|
# todo wonder if old takeouts could contribute as well??
|
||||||
|
|
||||||
|
|
||||||
def iter_locations(**kwargs) -> Iterator[Location]:
|
def locations(**kwargs) -> Iterator[Location]:
|
||||||
# TODO need to include older data
|
# TODO need to include older data
|
||||||
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
||||||
|
|
||||||
return _iter_locations(path=last_takeout, **kwargs)
|
return _iter_locations(path=last_takeout, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# todo deprecate?
|
||||||
def get_locations(*args, **kwargs) -> Sequence[Location]:
|
def get_locations(*args, **kwargs) -> Sequence[Location]:
|
||||||
return list(iter_locations(*args, **kwargs))
|
return list(locations(*args, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
class LocInterval(NamedTuple):
|
class LocInterval(NamedTuple):
|
||||||
|
@ -222,7 +230,7 @@ class Window:
|
||||||
# todo cachew as well?
|
# todo cachew as well?
|
||||||
# TODO maybe if tag is none, we just don't care?
|
# TODO maybe if tag is none, we just don't care?
|
||||||
def get_groups(*args, **kwargs) -> List[LocInterval]:
|
def get_groups(*args, **kwargs) -> List[LocInterval]:
|
||||||
all_locations = iter(iter_locations(*args, **kwargs))
|
all_locations = iter(locations(*args, **kwargs))
|
||||||
locsi = Window(all_locations)
|
locsi = Window(all_locations)
|
||||||
i = 0
|
i = 0
|
||||||
groups: List[LocInterval] = []
|
groups: List[LocInterval] = []
|
||||||
|
@ -269,13 +277,3 @@ def get_groups(*args, **kwargs) -> List[LocInterval]:
|
||||||
pass
|
pass
|
||||||
dump_group()
|
dump_group()
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
|
|
||||||
# TODO not sure if necessary anymore...
|
|
||||||
def update_cache():
|
|
||||||
# TODO perhaps set hash to null instead, that's a bit less intrusive
|
|
||||||
cp = cache_path()
|
|
||||||
if cp.exists():
|
|
||||||
cp.unlink()
|
|
||||||
for _ in iter_locations():
|
|
||||||
pass
|
|
1
testdata/track
vendored
Submodule
1
testdata/track
vendored
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 2e8a334ca9cb4b04265b060025fb35bcfc53d6c1
|
37
tests/location.py
Normal file
37
tests/location.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from more_itertools import one
|
||||||
|
|
||||||
|
import pytest # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def test() -> None:
|
||||||
|
from my.location.google import locations
|
||||||
|
locs = list(locations())
|
||||||
|
assert len(locs) == 3810
|
||||||
|
|
||||||
|
last = locs[-1]
|
||||||
|
assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
|
||||||
|
# todo approx
|
||||||
|
assert last.lat == 46.5515350
|
||||||
|
assert last.lon == 16.4742742
|
||||||
|
# todo check altitude
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def prepare(tmp_path: Path):
|
||||||
|
testdata = Path(__file__).absolute().parent.parent / 'testdata'
|
||||||
|
assert testdata.exists(), testdata
|
||||||
|
|
||||||
|
track = one(testdata.rglob('italy-slovenia-2017-07-29.json'))
|
||||||
|
|
||||||
|
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
|
||||||
|
import zipfile
|
||||||
|
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
|
||||||
|
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
|
||||||
|
|
||||||
|
from my.cfg import config
|
||||||
|
class user_config:
|
||||||
|
takeout_path = tmp_path
|
||||||
|
config.google = user_config # type: ignore
|
||||||
|
yield
|
8
tox.ini
8
tox.ini
|
@ -13,6 +13,10 @@ commands =
|
||||||
# todo these are probably not necessary anymore?
|
# todo these are probably not necessary anymore?
|
||||||
python3 -c 'from my.config import stub as config; print(config.key)'
|
python3 -c 'from my.config import stub as config; print(config.key)'
|
||||||
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
|
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
|
||||||
|
|
||||||
|
# my.location.google deps
|
||||||
|
pip install geopy ijson
|
||||||
|
|
||||||
python3 -m pytest \
|
python3 -m pytest \
|
||||||
tests/core.py \
|
tests/core.py \
|
||||||
tests/misc.py \
|
tests/misc.py \
|
||||||
|
@ -20,7 +24,8 @@ commands =
|
||||||
tests/config.py::test_set_repo \
|
tests/config.py::test_set_repo \
|
||||||
tests/config.py::test_environment_variable \
|
tests/config.py::test_environment_variable \
|
||||||
tests/demo.py \
|
tests/demo.py \
|
||||||
tests/bluemaestro.py
|
tests/bluemaestro.py \
|
||||||
|
tests/location.py
|
||||||
# TODO add; once I figure out porg depdencency?? tests/config.py
|
# TODO add; once I figure out porg depdencency?? tests/config.py
|
||||||
# TODO run demo.py? just make sure with_my is a bit cleverer?
|
# TODO run demo.py? just make sure with_my is a bit cleverer?
|
||||||
# TODO e.g. under CI, rely on installing
|
# TODO e.g. under CI, rely on installing
|
||||||
|
@ -57,6 +62,7 @@ commands =
|
||||||
-p my.body.exercise.cardio \
|
-p my.body.exercise.cardio \
|
||||||
-p my.body.exercise.cross_trainer \
|
-p my.body.exercise.cross_trainer \
|
||||||
-p my.bluemaestro \
|
-p my.bluemaestro \
|
||||||
|
-p my.location.google \
|
||||||
--txt-report .mypy-coverage \
|
--txt-report .mypy-coverage \
|
||||||
--html-report .mypy-coverage \
|
--html-report .mypy-coverage \
|
||||||
{posargs}
|
{posargs}
|
||||||
|
|
Loading…
Add table
Reference in a new issue