my.location: let takeout provider be in a separate my.location.google; add CI test & enable mypy

This commit is contained in:
Dima Gerasimov 2020-10-05 21:14:14 +01:00 committed by karlicoss
parent 90ada92110
commit ba9acc3445
7 changed files with 82 additions and 35 deletions

3
.gitmodules vendored
View file

@ -1,3 +1,6 @@
[submodule "testdata/hpi-testdata"] [submodule "testdata/hpi-testdata"]
path = testdata/hpi-testdata path = testdata/hpi-testdata
url = https://github.com/karlicoss/hpi-testdata url = https://github.com/karlicoss/hpi-testdata
[submodule "testdata/track"]
path = testdata/track
url = https://github.com/tajtiattila/track

View file

@ -29,3 +29,6 @@ class exercise:
class bluemaestro: class bluemaestro:
export_path: Paths = '' export_path: Paths = ''
class google:
takeout_path: Paths = ''

View file

@ -1 +0,0 @@
takeout.py

View file

@ -4,14 +4,13 @@ Location data from Google Takeout
import json import json
from collections import deque from collections import deque
from datetime import datetime from datetime import datetime, timezone
from itertools import islice from itertools import islice
from pathlib import Path from pathlib import Path
from subprocess import Popen, PIPE
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple
import re import re
import pytz
# pip3 install geopy # pip3 install geopy
import geopy # type: ignore import geopy # type: ignore
import geopy.distance # type: ignore import geopy.distance # type: ignore
@ -22,6 +21,11 @@ from ..google.takeout.paths import get_last_takeout
from ..kython import kompress from ..kython import kompress
# otherwise uses ijson
# todo move to config??
USE_GREP = False
logger = LazyLogger(__name__) logger = LazyLogger(__name__)
@ -57,18 +61,20 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
) )
# todo ugh. fragile, not sure, maybe should do some assert in advance?
def _iter_via_grep(fo) -> Iterator[TsLatLon]: def _iter_via_grep(fo) -> Iterator[TsLatLon]:
# grep version takes 5 seconds for 1M items (without processing) # grep version takes 5 seconds for 1M items (without processing)
x = [None, None, None] x = [-1, -1, -1]
for i, line in enumerate(fo): for i, line in enumerate(fo):
if i > 0 and i % 3 == 0: if i > 0 and i % 3 == 0:
yield tuple(x) yield tuple(x) # type: ignore[misc]
n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile... n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile...
assert n is not None
j = i % 3 j = i % 3
x[j] = int(n.group(1).decode('ascii')) x[j] = int(n.group(1).decode('ascii'))
# make sure it's read what we expected # make sure it's read what we expected
assert (i + 1) % 3 == 0 assert (i + 1) % 3 == 0
yield tuple(x) yield tuple(x) # type: ignore[misc]
# todo could also use pool? not sure if that would really be faster... # todo could also use pool? not sure if that would really be faster...
@ -102,7 +108,7 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
return None return None
for tsMs, latE7, lonE7 in fit: for tsMs, latE7, lonE7 in fit:
dt = datetime.fromtimestamp(tsMs / 1000, tz=pytz.utc) dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc)
total += 1 total += 1
if total % 10000 == 0: if total % 10000 == 0:
logger.info('processing item %d %s', total, dt) logger.info('processing item %d %s', total, dt)
@ -150,31 +156,33 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
# todo CPath? although not sure if it can be iterative? # todo CPath? although not sure if it can be iterative?
ctx = kompress.open(path, _LOCATION_JSON) ctx = kompress.open(path, _LOCATION_JSON)
# with ctx as fo: if USE_GREP:
# fit = _iter_via_ijson(fo)
# fit = islice(fit, start, stop)
# yield from _iter_locations_fo(fit)
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"' unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'" extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
from subprocess import Popen, PIPE
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p: with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
out = p.stdout; assert out is not None out = p.stdout; assert out is not None
fit = _iter_via_grep(out) fit = _iter_via_grep(out)
fit = islice(fit, start, stop) fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit) yield from _iter_locations_fo(fit)
else:
with ctx as fo:
# todo need to open as bytes
fit = _iter_via_ijson(fo)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
# todo wonder if old takeouts could contribute as well?? # todo wonder if old takeouts could contribute as well??
def iter_locations(**kwargs) -> Iterator[Location]: def locations(**kwargs) -> Iterator[Location]:
# TODO need to include older data # TODO need to include older data
last_takeout = get_last_takeout(path=_LOCATION_JSON) last_takeout = get_last_takeout(path=_LOCATION_JSON)
return _iter_locations(path=last_takeout, **kwargs) return _iter_locations(path=last_takeout, **kwargs)
# todo deprecate?
def get_locations(*args, **kwargs) -> Sequence[Location]: def get_locations(*args, **kwargs) -> Sequence[Location]:
return list(iter_locations(*args, **kwargs)) return list(locations(*args, **kwargs))
class LocInterval(NamedTuple): class LocInterval(NamedTuple):
@ -222,7 +230,7 @@ class Window:
# todo cachew as well? # todo cachew as well?
# TODO maybe if tag is none, we just don't care? # TODO maybe if tag is none, we just don't care?
def get_groups(*args, **kwargs) -> List[LocInterval]: def get_groups(*args, **kwargs) -> List[LocInterval]:
all_locations = iter(iter_locations(*args, **kwargs)) all_locations = iter(locations(*args, **kwargs))
locsi = Window(all_locations) locsi = Window(all_locations)
i = 0 i = 0
groups: List[LocInterval] = [] groups: List[LocInterval] = []
@ -269,13 +277,3 @@ def get_groups(*args, **kwargs) -> List[LocInterval]:
pass pass
dump_group() dump_group()
return groups return groups
# TODO not sure if necessary anymore...
def update_cache():
# TODO perhaps set hash to null instead, that's a bit less intrusive
cp = cache_path()
if cp.exists():
cp.unlink()
for _ in iter_locations():
pass

1
testdata/track vendored Submodule

@ -0,0 +1 @@
Subproject commit 2e8a334ca9cb4b04265b060025fb35bcfc53d6c1

37
tests/location.py Normal file
View file

@ -0,0 +1,37 @@
from pathlib import Path
from more_itertools import one
import pytest # type: ignore
def test() -> None:
from my.location.google import locations
locs = list(locations())
assert len(locs) == 3810
last = locs[-1]
assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
# todo approx
assert last.lat == 46.5515350
assert last.lon == 16.4742742
# todo check altitude
@pytest.fixture(autouse=True)
def prepare(tmp_path: Path):
testdata = Path(__file__).absolute().parent.parent / 'testdata'
assert testdata.exists(), testdata
track = one(testdata.rglob('italy-slovenia-2017-07-29.json'))
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
import zipfile
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
from my.cfg import config
class user_config:
takeout_path = tmp_path
config.google = user_config # type: ignore
yield

View file

@ -13,6 +13,10 @@ commands =
# todo these are probably not necessary anymore? # todo these are probably not necessary anymore?
python3 -c 'from my.config import stub as config; print(config.key)' python3 -c 'from my.config import stub as config; print(config.key)'
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
# my.location.google deps
pip install geopy ijson
python3 -m pytest \ python3 -m pytest \
tests/core.py \ tests/core.py \
tests/misc.py \ tests/misc.py \
@ -20,7 +24,8 @@ commands =
tests/config.py::test_set_repo \ tests/config.py::test_set_repo \
tests/config.py::test_environment_variable \ tests/config.py::test_environment_variable \
tests/demo.py \ tests/demo.py \
tests/bluemaestro.py tests/bluemaestro.py \
tests/location.py
# TODO add; once I figure out porg depdencency?? tests/config.py # TODO add; once I figure out porg depdencency?? tests/config.py
# TODO run demo.py? just make sure with_my is a bit cleverer? # TODO run demo.py? just make sure with_my is a bit cleverer?
# TODO e.g. under CI, rely on installing # TODO e.g. under CI, rely on installing
@ -57,6 +62,7 @@ commands =
-p my.body.exercise.cardio \ -p my.body.exercise.cardio \
-p my.body.exercise.cross_trainer \ -p my.body.exercise.cross_trainer \
-p my.bluemaestro \ -p my.bluemaestro \
-p my.location.google \
--txt-report .mypy-coverage \ --txt-report .mypy-coverage \
--html-report .mypy-coverage \ --html-report .mypy-coverage \
{posargs} {posargs}