my.location.google: cleanup old stuff related to tagging, definitely doesn't belong to this module

This commit is contained in:
Dima Gerasimov 2020-10-05 21:34:27 +01:00 committed by karlicoss
parent ba9acc3445
commit dc2518b348

View file

@ -1,21 +1,22 @@
""" """
Location data from Google Takeout Location data from Google Takeout
""" """
REQUIRES = [
'geopy', # checking that coordinates are valid
]
import json import json
from collections import deque
from datetime import datetime, timezone from datetime import datetime, timezone
from itertools import islice from itertools import islice
from pathlib import Path from pathlib import Path
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple from typing import Any, Collection, Iterator, NamedTuple, Optional, Sequence, IO, Tuple
import re import re
# pip3 install geopy # pip3 install geopy
import geopy # type: ignore import geopy # type: ignore
import geopy.distance # type: ignore
from ..core.common import get_files, LazyLogger, mcachew from ..core.common import LazyLogger, mcachew
from ..core.cachew import cache_dir from ..core.cachew import cache_dir
from ..google.takeout.paths import get_last_takeout from ..google.takeout.paths import get_last_takeout
from ..kython import kompress from ..kython import kompress
@ -29,15 +30,11 @@ USE_GREP = False
logger = LazyLogger(__name__) logger = LazyLogger(__name__)
Tag = Optional[str]
# todo maybe don't tag by default?
class Location(NamedTuple): class Location(NamedTuple):
dt: datetime dt: datetime
lat: float lat: float
lon: float lon: float
alt: Optional[float] alt: Optional[float]
tag: Tag
TsLatLon = Tuple[int, int, int] TsLatLon = Tuple[int, int, int]
@ -86,27 +83,6 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
total = 0 total = 0
errors = 0 errors = 0
try:
from my.config.locations import LOCATIONS as known_locations
except ModuleNotFoundError as e:
name = 'my.config.locations'
if e.name != name:
raise e
logger.warning("'%s' isn't found. setting known_locations to empty list", name)
known_locations = []
# TODO tagging should be takeout-agnostic
def tagger(dt: datetime, point: geopy.Point) -> Tag:
'''
Tag points with known locations (e.g. work/home/etc)
'''
for lat, lon, dist, tag in known_locations:
# TODO use something more efficient?
if geopy.distance.distance((lat, lon), point).m < dist:
return tag
else:
return None
for tsMs, latE7, lonE7 in fit: for tsMs, latE7, lonE7 in fit:
dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc) dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc)
total += 1 total += 1
@ -122,6 +98,7 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
logger.exception(e) logger.exception(e)
errors += 1 errors += 1
if float(errors) / total > 0.01: if float(errors) / total > 0.01:
# todo make defensive?
raise RuntimeError('too many errors! aborting') raise RuntimeError('too many errors! aborting')
else: else:
continue continue
@ -129,15 +106,11 @@ def _iter_locations_fo(fit) -> Iterator[Location]:
# todo support later # todo support later
# alt = j.get("altitude", None) # alt = j.get("altitude", None)
alt = None alt = None
# todo enable tags later
# tag = tagger(dt, point) # TODO take accuracy into account??
tag = None
yield Location( yield Location(
dt=dt, dt=dt,
lat=lat, lat=lat,
lon=lon, lon=lon,
alt=alt, alt=alt,
tag=tag
) )
@ -145,7 +118,8 @@ _LOCATION_JSON = 'Takeout/Location History/Location History.json'
# todo if start != 0, disable cache? again this is where nicer caching would come handy # todo if start != 0, disable cache? again this is where nicer caching would come handy
# TODO hope they are sorted... (could assert for it) # TODO hope they are sorted... (could assert for it)
@mcachew(cache_dir() / 'google_location.cache', logger=logger) # todo configure cache automatically?
@mcachew(cache_dir(), logger=logger)
def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
ctx: IO[str] ctx: IO[str]
if path.suffix == '.json': if path.suffix == '.json':
@ -180,100 +154,13 @@ def locations(**kwargs) -> Iterator[Location]:
return _iter_locations(path=last_takeout, **kwargs) return _iter_locations(path=last_takeout, **kwargs)
from ..core.common import stat, Stats
def stats() -> Stats:
return stat(locations)
# todo add dataframe
# todo deprecate? # todo deprecate?
def get_locations(*args, **kwargs) -> Sequence[Location]: def get_locations(*args, **kwargs) -> Sequence[Location]:
return list(locations(*args, **kwargs)) return list(locations(*args, **kwargs))
class LocInterval(NamedTuple):
from_: Location
to: Location
# TODO use more_itertools
# TODO kython? nicer interface?
class Window:
def __init__(self, it):
self.it = it
self.storage: Deque[Any] = deque()
self.start = 0
self.end = 0
# TODO need check for existence?
def load_to(self, to):
while to >= self.end:
try:
ii = next(self.it)
self.storage.append(ii)
self.end += 1
except StopIteration:
break
def exists(self, i):
self.load_to(i)
return i < self.end
def consume_to(self, i):
self.load_to(i)
consumed = i - self.start
self.start = i
for _ in range(consumed):
self.storage.popleft()
def __getitem__(self, i):
self.load_to(i)
ii = i - self.start
assert ii >= 0
return self.storage[ii]
# todo cachew as well?
# TODO maybe if tag is none, we just don't care?
def get_groups(*args, **kwargs) -> List[LocInterval]:
all_locations = iter(locations(*args, **kwargs))
locsi = Window(all_locations)
i = 0
groups: List[LocInterval] = []
curg: List[Location] = []
def add_to_group(x):
nonlocal curg
if len(curg) < 2:
curg.append(x)
else:
curg[-1] = x
def dump_group():
nonlocal curg
if len(curg) > 0:
# print("new group")
groups.append(LocInterval(from_=curg[0], to=curg[-1]))
curg = []
while locsi.exists(i):
if i % 10000 == 0:
logger.debug('grouping item %d', i)
locsi.consume_to(i)
last = None if len(curg) == 0 else curg[-1]
cur = locsi[i]
j = i
match = False
while not match and locsi.exists(j) and j < i + 10: # TODO FIXME time distance here... e.g. half an hour?
cur = locsi[j]
if last is None or cur.tag == last.tag:
# ok
add_to_group(cur)
i = j + 1
match = True
else:
j += 1
# if we made here without advancing
if not match:
dump_group()
i += 1
else:
pass
dump_group()
return groups