my.location: let takeout provider be in a separate my.location.google; add CI test & enable mypy

This commit is contained in:
Dima Gerasimov 2020-10-05 21:14:14 +01:00 committed by karlicoss
parent 90ada92110
commit ba9acc3445
7 changed files with 82 additions and 35 deletions

3
.gitmodules vendored
View file

@ -1,3 +1,6 @@
[submodule "testdata/hpi-testdata"]
path = testdata/hpi-testdata
url = https://github.com/karlicoss/hpi-testdata
[submodule "testdata/track"]
path = testdata/track
url = https://github.com/tajtiattila/track

View file

@ -29,3 +29,6 @@ class exercise:
class bluemaestro:
export_path: Paths = ''
class google:
takeout_path: Paths = ''

View file

@ -1 +0,0 @@
takeout.py

View file

@ -4,14 +4,13 @@ Location data from Google Takeout
import json
from collections import deque
from datetime import datetime
from datetime import datetime, timezone
from itertools import islice
from pathlib import Path
from subprocess import Popen, PIPE
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple
import re
import pytz
# pip3 install geopy
import geopy # type: ignore
import geopy.distance # type: ignore
@ -22,6 +21,11 @@ from ..google.takeout.paths import get_last_takeout
from ..kython import kompress
# otherwise uses ijson
# todo move to config??
USE_GREP = False
logger = LazyLogger(__name__)
@ -57,18 +61,20 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
)
# todo ugh. fragile, not sure, maybe should do some assert in advance?
def _iter_via_grep(fo) -> Iterator[TsLatLon]:
# grep version takes 5 seconds for 1M items (without processing)
x = [None, None, None]
x = [-1, -1, -1]
for i, line in enumerate(fo):
if i > 0 and i % 3 == 0:
yield tuple(x)
yield tuple(x) # type: ignore[misc]
n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile...
assert n is not None
j = i % 3
x[j] = int(n.group(1).decode('ascii'))
# make sure it's read what we expected
assert (i + 1) % 3 == 0
yield tuple(x)
yield tuple(x) # type: ignore[misc]
# todo could also use pool? not sure if that would really be faster...
@ -102,7 +108,7 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
return None
for tsMs, latE7, lonE7 in fit:
dt = datetime.fromtimestamp(tsMs / 1000, tz=pytz.utc)
dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc)
total += 1
if total % 10000 == 0:
logger.info('processing item %d %s', total, dt)
@ -150,31 +156,33 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
# todo CPath? although not sure if it can be iterative?
ctx = kompress.open(path, _LOCATION_JSON)
# with ctx as fo:
# fit = _iter_via_ijson(fo)
# fit = islice(fit, start, stop)
# yield from _iter_locations_fo(fit)
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
from subprocess import Popen, PIPE
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
out = p.stdout; assert out is not None
fit = _iter_via_grep(out)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
if USE_GREP:
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
out = p.stdout; assert out is not None
fit = _iter_via_grep(out)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
else:
with ctx as fo:
# todo need to open as bytes
fit = _iter_via_ijson(fo)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
# todo wonder if old takeouts could contribute as well??
def iter_locations(**kwargs) -> Iterator[Location]:
def locations(**kwargs) -> Iterator[Location]:
# TODO need to include older data
last_takeout = get_last_takeout(path=_LOCATION_JSON)
return _iter_locations(path=last_takeout, **kwargs)
# todo deprecate?
def get_locations(*args, **kwargs) -> Sequence[Location]:
return list(iter_locations(*args, **kwargs))
return list(locations(*args, **kwargs))
class LocInterval(NamedTuple):
@ -222,7 +230,7 @@ class Window:
# todo cachew as well?
# TODO maybe if tag is none, we just don't care?
def get_groups(*args, **kwargs) -> List[LocInterval]:
all_locations = iter(iter_locations(*args, **kwargs))
all_locations = iter(locations(*args, **kwargs))
locsi = Window(all_locations)
i = 0
groups: List[LocInterval] = []
@ -269,13 +277,3 @@ def get_groups(*args, **kwargs) -> List[LocInterval]:
pass
dump_group()
return groups
# TODO not sure if necessary anymore...
def update_cache():
# TODO perhaps set hash to null instead, that's a bit less intrusive
cp = cache_path()
if cp.exists():
cp.unlink()
for _ in iter_locations():
pass

1
testdata/track vendored Submodule

@ -0,0 +1 @@
Subproject commit 2e8a334ca9cb4b04265b060025fb35bcfc53d6c1

37
tests/location.py Normal file
View file

@ -0,0 +1,37 @@
from pathlib import Path
from more_itertools import one
import pytest # type: ignore
def test() -> None:
from my.location.google import locations
locs = list(locations())
assert len(locs) == 3810
last = locs[-1]
assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
# todo approx
assert last.lat == 46.5515350
assert last.lon == 16.4742742
# todo check altitude
@pytest.fixture(autouse=True)
def prepare(tmp_path: Path):
testdata = Path(__file__).absolute().parent.parent / 'testdata'
assert testdata.exists(), testdata
track = one(testdata.rglob('italy-slovenia-2017-07-29.json'))
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
import zipfile
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
from my.cfg import config
class user_config:
takeout_path = tmp_path
config.google = user_config # type: ignore
yield

View file

@ -13,6 +13,10 @@ commands =
# todo these are probably not necessary anymore?
python3 -c 'from my.config import stub as config; print(config.key)'
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
# my.location.google deps
pip install geopy ijson
python3 -m pytest \
tests/core.py \
tests/misc.py \
@ -20,7 +24,8 @@ commands =
tests/config.py::test_set_repo \
tests/config.py::test_environment_variable \
tests/demo.py \
tests/bluemaestro.py
tests/bluemaestro.py \
tests/location.py
# TODO add; once I figure out porg depdencency?? tests/config.py
# TODO run demo.py? just make sure with_my is a bit cleverer?
# TODO e.g. under CI, rely on installing
@ -57,6 +62,7 @@ commands =
-p my.body.exercise.cardio \
-p my.body.exercise.cross_trainer \
-p my.bluemaestro \
-p my.location.google \
--txt-report .mypy-coverage \
--html-report .mypy-coverage \
{posargs}