diff --git a/.gitmodules b/.gitmodules index 67d3592..5eb73b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "testdata/hpi-testdata"] path = testdata/hpi-testdata url = https://github.com/karlicoss/hpi-testdata +[submodule "testdata/track"] + path = testdata/track + url = https://github.com/tajtiattila/track diff --git a/doc/example_config/my/config/__init__.py b/doc/example_config/my/config/__init__.py index 8728016..4f7393a 100644 --- a/doc/example_config/my/config/__init__.py +++ b/doc/example_config/my/config/__init__.py @@ -29,3 +29,6 @@ class exercise: class bluemaestro: export_path: Paths = '' + +class google: + takeout_path: Paths = '' diff --git a/my/location/__init__.py b/my/location/__init__.py deleted file mode 120000 index bfea2b5..0000000 --- a/my/location/__init__.py +++ /dev/null @@ -1 +0,0 @@ -takeout.py \ No newline at end of file diff --git a/my/location/takeout.py b/my/location/google.py similarity index 85% rename from my/location/takeout.py rename to my/location/google.py index a32242a..cc8d3f3 100644 --- a/my/location/takeout.py +++ b/my/location/google.py @@ -4,14 +4,13 @@ Location data from Google Takeout import json from collections import deque -from datetime import datetime +from datetime import datetime, timezone from itertools import islice from pathlib import Path +from subprocess import Popen, PIPE from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple import re -import pytz - # pip3 install geopy import geopy # type: ignore import geopy.distance # type: ignore @@ -22,6 +21,11 @@ from ..google.takeout.paths import get_last_takeout from ..kython import kompress + # otherwise uses ijson + # todo move to config?? +USE_GREP = False + + logger = LazyLogger(__name__) @@ -57,18 +61,20 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]: ) +# todo ugh. fragile, not sure, maybe should do some assert in advance? def _iter_via_grep(fo) -> Iterator[TsLatLon]: # grep version takes 5 seconds for 1M items (without processing) - x = [None, None, None] + x = [-1, -1, -1] for i, line in enumerate(fo): if i > 0 and i % 3 == 0: - yield tuple(x) + yield tuple(x) # type: ignore[misc] n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile... + assert n is not None j = i % 3 x[j] = int(n.group(1).decode('ascii')) # make sure it's read what we expected assert (i + 1) % 3 == 0 - yield tuple(x) + yield tuple(x) # type: ignore[misc] # todo could also use pool? not sure if that would really be faster... @@ -102,7 +108,7 @@ def _iter_locations_fo(fit) -> Iterator[Location]: return None for tsMs, latE7, lonE7 in fit: - dt = datetime.fromtimestamp(tsMs / 1000, tz=pytz.utc) + dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc) total += 1 if total % 10000 == 0: logger.info('processing item %d %s', total, dt) @@ -150,31 +156,33 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: # todo CPath? although not sure if it can be iterative? ctx = kompress.open(path, _LOCATION_JSON) - # with ctx as fo: - # fit = _iter_via_ijson(fo) - # fit = islice(fit, start, stop) - # yield from _iter_locations_fo(fit) - - unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"' - extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'" - from subprocess import Popen, PIPE - with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p: - out = p.stdout; assert out is not None - fit = _iter_via_grep(out) - fit = islice(fit, start, stop) - yield from _iter_locations_fo(fit) + if USE_GREP: + unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"' + extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'" + with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p: + out = p.stdout; assert out is not None + fit = _iter_via_grep(out) + fit = islice(fit, start, stop) + yield from _iter_locations_fo(fit) + else: + with ctx as fo: + # todo need to open as bytes + fit = _iter_via_ijson(fo) + fit = islice(fit, start, stop) + yield from _iter_locations_fo(fit) # todo wonder if old takeouts could contribute as well?? -def iter_locations(**kwargs) -> Iterator[Location]: +def locations(**kwargs) -> Iterator[Location]: # TODO need to include older data last_takeout = get_last_takeout(path=_LOCATION_JSON) return _iter_locations(path=last_takeout, **kwargs) +# todo deprecate? def get_locations(*args, **kwargs) -> Sequence[Location]: - return list(iter_locations(*args, **kwargs)) + return list(locations(*args, **kwargs)) class LocInterval(NamedTuple): @@ -222,7 +230,7 @@ class Window: # todo cachew as well? # TODO maybe if tag is none, we just don't care? def get_groups(*args, **kwargs) -> List[LocInterval]: - all_locations = iter(iter_locations(*args, **kwargs)) + all_locations = iter(locations(*args, **kwargs)) locsi = Window(all_locations) i = 0 groups: List[LocInterval] = [] @@ -269,13 +277,3 @@ def get_groups(*args, **kwargs) -> List[LocInterval]: pass dump_group() return groups - - -# TODO not sure if necessary anymore... -def update_cache(): - # TODO perhaps set hash to null instead, that's a bit less intrusive - cp = cache_path() - if cp.exists(): - cp.unlink() - for _ in iter_locations(): - pass diff --git a/testdata/track b/testdata/track new file mode 160000 index 0000000..2e8a334 --- /dev/null +++ b/testdata/track @@ -0,0 +1 @@ +Subproject commit 2e8a334ca9cb4b04265b060025fb35bcfc53d6c1 diff --git a/tests/location.py b/tests/location.py new file mode 100644 index 0000000..7671df8 --- /dev/null +++ b/tests/location.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from more_itertools import one + +import pytest # type: ignore + + +def test() -> None: + from my.location.google import locations + locs = list(locations()) + assert len(locs) == 3810 + + last = locs[-1] + assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc + # todo approx + assert last.lat == 46.5515350 + assert last.lon == 16.4742742 + # todo check altitude + + +@pytest.fixture(autouse=True) +def prepare(tmp_path: Path): + testdata = Path(__file__).absolute().parent.parent / 'testdata' + assert testdata.exists(), testdata + + track = one(testdata.rglob('italy-slovenia-2017-07-29.json')) + + # todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs + import zipfile + with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf: + zf.writestr('Takeout/Location History/Location History.json', track.read_bytes()) + + from my.cfg import config + class user_config: + takeout_path = tmp_path + config.google = user_config # type: ignore + yield diff --git a/tox.ini b/tox.ini index 7a3dddb..3f26068 100644 --- a/tox.ini +++ b/tox.ini @@ -13,6 +13,10 @@ commands = # todo these are probably not necessary anymore? python3 -c 'from my.config import stub as config; print(config.key)' python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least + + # my.location.google deps + pip install geopy ijson + python3 -m pytest \ tests/core.py \ tests/misc.py \ @@ -20,7 +24,8 @@ commands = tests/config.py::test_set_repo \ tests/config.py::test_environment_variable \ tests/demo.py \ - tests/bluemaestro.py + tests/bluemaestro.py \ + tests/location.py # TODO add; once I figure out porg depdencency?? tests/config.py # TODO run demo.py? just make sure with_my is a bit cleverer? # TODO e.g. under CI, rely on installing @@ -57,6 +62,7 @@ commands = -p my.body.exercise.cardio \ -p my.body.exercise.cross_trainer \ -p my.bluemaestro \ + -p my.location.google \ --txt-report .mypy-coverage \ --html-report .mypy-coverage \ {posargs}