location: add all.py, using takeout/gpslogger/ip

This commit is contained in:
Sean Breckenridge 2022-04-25 18:21:52 -07:00
parent 66a00c6ada
commit ca10d524a4
12 changed files with 357 additions and 27 deletions

View file

@ -7,27 +7,34 @@ REQUIRES = [
]
from my.config import time
from my.core import dataclass
@dataclass
class config(time.tz.via_location):
# less precise, but faster
fast: bool = True
# if the accuracy for the location is more than 5km, don't use
require_accuracy: float = 5_000
from collections import Counter
from datetime import date, datetime
from functools import lru_cache
from itertools import groupby
from typing import Iterator, NamedTuple, Optional
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
from more_itertools import seekable
import pytz
from ...core.common import LazyLogger, mcachew, tzdatetime
from ...core.cachew import cache_dir
from ...location.google import locations
from my.core.common import LazyLogger, mcachew, tzdatetime
logger = LazyLogger(__name__, level='warning')
logger = LazyLogger(__name__, level='debug')
# todo should move to config? not sure
_FASTER: bool = True
@lru_cache(2)
def _timezone_finder(fast: bool):
def _timezone_finder(fast: bool) -> Any:
if fast:
# less precise, but faster
from timezonefinder import TimezoneFinderL as Finder # type: ignore
@ -46,20 +53,40 @@ class DayWithZone(NamedTuple):
zone: Zone
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=_FASTER) # rely on the default
from my.location.common import LatLon
# for backwards compatibility
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
try:
import my.location.all
for loc in my.location.all.locations():
yield ((loc.lat, loc.lon), loc.dt)
except Exception as e:
from my.core.warnings import high
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
import my.location.google
for loc in my.location.google.locations():
yield ((loc.lat, loc.lon), loc.dt)
def _iter_local_dates() -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=config.fast) # rely on the default
pdt = None
warnings = []
# todo allow to skip if not noo many errors in row?
for l in locations(start=start, stop=stop):
for (lat, lon), dt in _locations():
# TODO right. its _very_ slow...
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
zone = finder.timezone_at(lat=lat, lng=lon)
if zone is None:
warnings.append(f"Couldn't figure out tz for {l}")
warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
continue
tz = pytz.timezone(zone)
# TODO this is probably a bit expensive... test & benchmark
ldt = l.dt.astimezone(tz)
ldt = dt.astimezone(tz)
ndate = ldt.date()
if pdt is not None and ndate < pdt.date():
# TODO for now just drop and collect the stats
@ -71,12 +98,13 @@ def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
yield DayWithZone(day=ndate, zone=z)
def most_common(l):
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
def most_common(lst: List[DayWithZone]) -> DayWithZone:
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
return res
@mcachew(cache_path=cache_dir())
# refresh _iter_tzs once per day -- don't think a better depends_on is possible dynamically
@mcachew(logger=logger, depends_on=lambda: str(date.today()))
def _iter_tzs() -> Iterator[DayWithZone]:
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
logger.info('processed %s', d)
@ -106,6 +134,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
break
return None if zone is None else pytz.timezone(zone)
# ok to cache, there are only a few home locations?
@lru_cache(maxsize=None)
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
@ -119,8 +148,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
return pytz.timezone(zone)
# TODO expose? to main as well?
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
'''
Given a datetime, returns the timezone for that date.
'''
res = _get_day_tz(d=dt.date())
if res is not None:
return res
@ -129,6 +160,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
loc = home.get_location(dt)
return _get_home_tz(loc=loc)
# expose as 'public' function
get_tz = _get_tz
def localize(dt: datetime) -> tzdatetime:
tz = _get_tz(dt)