location fallback (#263)
see https://github.com/karlicoss/HPI/issues/262 * move home to fallback/via_home.py * move via_ip to fallback * add fallback model * add stub via_ip file * add fallback_locations for via_ip * use protocol for locations * estimate_from helper, via_home estimator, all.py * via_home: add accuracy, cache history * add datasources to gpslogger/google_takeout * tz/via_location.py: update import to fallback * denylist docs/installation instructions * tz.via_location: let user customize cachew refresh time * add via_ip.estimate_location using binary search * use estimate_location in via_home.get_location * tests: add gpslogger to location config stub * tests: install tz related libs in test env * tz: add regression test for broken windows dates * vendorize bisect_left from python src doesnt have a 'key' parameter till python3.10
This commit is contained in:
parent
6dc5e7575f
commit
98b086f746
25 changed files with 1166 additions and 190 deletions
53
my/location/fallback/all.py
Normal file
53
my/location/fallback/all.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# TODO: add config here which passes kwargs to estimate_from (under_accuracy)
|
||||
# overwritable by passing the kwarg name here to the top-level estimate_location
|
||||
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from my.core.source import import_source
|
||||
from my.location.fallback.common import (
|
||||
estimate_from,
|
||||
FallbackLocation,
|
||||
DateExact,
|
||||
LocationEstimator,
|
||||
)
|
||||
|
||||
|
||||
def fallback_locations() -> Iterator[FallbackLocation]:
|
||||
# can comment/uncomment sources here to enable/disable them
|
||||
yield from _ip_fallback_locations()
|
||||
|
||||
|
||||
def fallback_estimators() -> Iterator[LocationEstimator]:
|
||||
# can comment/uncomment estimators here to enable/disable them
|
||||
# the order of the estimators determines priority if location accuries are equal/unavailable
|
||||
yield _ip_estimate
|
||||
yield _home_estimate
|
||||
|
||||
|
||||
def estimate_location(dt: DateExact, first_match: bool=False, under_accuracy: Optional[int] = None) -> FallbackLocation:
|
||||
loc = estimate_from(dt, estimators=list(fallback_estimators()), first_match=first_match, under_accuracy=under_accuracy)
|
||||
# should never happen if the user has home configured
|
||||
if loc is None:
|
||||
raise ValueError("Could not estimate location")
|
||||
return loc
|
||||
|
||||
|
||||
@import_source(module_name="my.location.fallback.via_home")
|
||||
def _home_estimate(dt: DateExact) -> Iterator[FallbackLocation]:
|
||||
from my.location.fallback.via_home import estimate_location as via_home_estimate
|
||||
|
||||
yield from via_home_estimate(dt)
|
||||
|
||||
|
||||
@import_source(module_name="my.location.fallback.via_ip")
|
||||
def _ip_estimate(dt: DateExact) -> Iterator[FallbackLocation]:
|
||||
from my.location.fallback.via_ip import estimate_location as via_ip_estimate
|
||||
|
||||
yield from via_ip_estimate(dt)
|
||||
|
||||
|
||||
@import_source(module_name="my.location.fallback.via_ip")
|
||||
def _ip_fallback_locations() -> Iterator[FallbackLocation]:
|
||||
from my.location.fallback.via_ip import fallback_locations as via_ip_fallback
|
||||
|
||||
yield from via_ip_fallback()
|
120
my/location/fallback/common.py
Normal file
120
my/location/fallback/common.py
Normal file
|
@ -0,0 +1,120 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Callable, Sequence, Iterator, List, Union
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from ..common import LocationProtocol, Location
|
||||
DateExact = Union[datetime, float, int] # float/int as epoch timestamps
|
||||
|
||||
Second = float
|
||||
|
||||
@dataclass
|
||||
class FallbackLocation(LocationProtocol):
|
||||
lat: float
|
||||
lon: float
|
||||
dt: datetime
|
||||
duration: Optional[Second] = None
|
||||
accuracy: Optional[float] = None
|
||||
elevation: Optional[float] = None
|
||||
datasource: Optional[str] = None # which module provided this, useful for debugging
|
||||
|
||||
def to_location(self, end: bool = False) -> Location:
|
||||
'''
|
||||
by default the start date is used for the location
|
||||
If end is True, the start date + duration is used
|
||||
'''
|
||||
dt: datetime = self.dt
|
||||
if end and self.duration is not None:
|
||||
dt += timedelta(self.duration)
|
||||
return Location(
|
||||
lat=self.lat,
|
||||
lon=self.lon,
|
||||
dt=dt,
|
||||
accuracy=self.accuracy,
|
||||
elevation=self.elevation,
|
||||
datasource=self.datasource,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_end_date(
|
||||
cls,
|
||||
*,
|
||||
lat: float,
|
||||
lon: float,
|
||||
dt: datetime,
|
||||
end_dt: datetime,
|
||||
accuracy: Optional[float] = None,
|
||||
elevation: Optional[float] = None,
|
||||
datasource: Optional[str] = None,
|
||||
) -> FallbackLocation:
|
||||
'''
|
||||
Create FallbackLocation from a start date and an end date
|
||||
'''
|
||||
if end_dt < dt:
|
||||
raise ValueError("end_date must be after dt")
|
||||
duration = (end_dt - dt).total_seconds()
|
||||
return cls(
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
dt=dt,
|
||||
duration=duration,
|
||||
accuracy=accuracy,
|
||||
elevation=elevation,
|
||||
datasource=datasource,
|
||||
)
|
||||
|
||||
|
||||
# a location estimator can return multiple fallbacks, incase there are
|
||||
# differing accuracies/to allow for possible matches to be computed
|
||||
# iteratively
|
||||
LocationEstimator = Callable[[DateExact], Iterator[FallbackLocation]]
|
||||
LocationEstimators = Sequence[LocationEstimator]
|
||||
|
||||
# helper function, instead of dealing with datetimes while comparing, just use epoch timestamps
|
||||
def _datetime_timestamp(dt: DateExact) -> float:
|
||||
if isinstance(dt, datetime):
|
||||
try:
|
||||
return dt.timestamp()
|
||||
except ValueError:
|
||||
# https://github.com/python/cpython/issues/75395
|
||||
return dt.replace(tzinfo=timezone.utc).timestamp()
|
||||
return float(dt)
|
||||
|
||||
def _iter_estimate_from(
|
||||
dt: DateExact,
|
||||
estimators: LocationEstimators,
|
||||
) -> Iterator[FallbackLocation]:
|
||||
for est in estimators:
|
||||
yield from est(dt)
|
||||
|
||||
|
||||
def estimate_from(
|
||||
dt: DateExact,
|
||||
estimators: LocationEstimators,
|
||||
*,
|
||||
first_match: bool = False,
|
||||
under_accuracy: Optional[int] = None,
|
||||
) -> Optional[FallbackLocation]:
|
||||
'''
|
||||
first_match: if True, return the first location found
|
||||
under_accuracy: if set, only return locations with accuracy under this value
|
||||
'''
|
||||
found: List[FallbackLocation] = []
|
||||
for loc in _iter_estimate_from(dt, estimators):
|
||||
if under_accuracy is not None and loc.accuracy is not None and loc.accuracy > under_accuracy:
|
||||
continue
|
||||
if first_match:
|
||||
return loc
|
||||
found.append(loc)
|
||||
|
||||
if not found:
|
||||
return None
|
||||
|
||||
# if all items have accuracy, return the one with the lowest accuracy
|
||||
# otherwise, we should prefer the order that the estimators are passed in as
|
||||
if all(loc.accuracy is not None for loc in found):
|
||||
# return the location with the lowest accuracy
|
||||
return min(found, key=lambda loc: loc.accuracy) # type: ignore[return-value, arg-type]
|
||||
else:
|
||||
# return the first location
|
||||
return found[0]
|
104
my/location/fallback/via_home.py
Normal file
104
my/location/fallback/via_home.py
Normal file
|
@ -0,0 +1,104 @@
|
|||
'''
|
||||
Simple location provider, serving as a fallback when more detailed data isn't available
|
||||
'''
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, time, timezone
|
||||
from functools import lru_cache
|
||||
from typing import Sequence, Tuple, Union, cast, List, Iterator
|
||||
|
||||
from my.config import location as user_config
|
||||
|
||||
from my.location.common import LatLon, DateIsh
|
||||
from my.location.fallback.common import FallbackLocation, DateExact
|
||||
|
||||
@dataclass
|
||||
class Config(user_config):
|
||||
home: Union[
|
||||
LatLon, # either single, 'current' location
|
||||
Sequence[Tuple[ # or, a sequence of location history
|
||||
DateIsh, # date when you moved to
|
||||
LatLon, # the location
|
||||
]]
|
||||
]
|
||||
|
||||
# default ~30km accuracy
|
||||
# this is called 'home_accuracy' since it lives on the base location.config object,
|
||||
# to differentiate it from accuracy for other providers
|
||||
home_accuracy: float = 30_000
|
||||
|
||||
# TODO could make current Optional and somehow determine from system settings?
|
||||
@property
|
||||
def _history(self) -> Sequence[Tuple[datetime, LatLon]]:
|
||||
home1 = self.home
|
||||
# todo ugh, can't test for isnstance LatLon, it's a tuple itself
|
||||
home2: Sequence[Tuple[DateIsh, LatLon]]
|
||||
if isinstance(home1[0], tuple):
|
||||
# already a sequence
|
||||
home2 = cast(Sequence[Tuple[DateIsh, LatLon]], home1)
|
||||
else:
|
||||
# must be a pair of coordinates. also doesn't really matter which date to pick?
|
||||
loc = cast(LatLon, home1)
|
||||
home2 = [(datetime.min, loc)]
|
||||
|
||||
# todo cache?
|
||||
res = []
|
||||
for x, loc in home2:
|
||||
dt: datetime
|
||||
if isinstance(x, str):
|
||||
dt = datetime.fromisoformat(x)
|
||||
elif isinstance(x, datetime):
|
||||
dt = x
|
||||
else:
|
||||
dt = datetime.combine(x, time.min)
|
||||
# todo not sure about doing it here, but makes it easier to compare..
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
res.append((dt, loc))
|
||||
res = list(sorted(res, key=lambda p: p[0]))
|
||||
return res
|
||||
|
||||
|
||||
from ...core.cfg import make_config
|
||||
config = make_config(Config)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_location(dt: datetime) -> LatLon:
|
||||
'''
|
||||
Interpolates the location at dt
|
||||
'''
|
||||
loc = list(estimate_location(dt))
|
||||
assert len(loc) == 1
|
||||
return loc[0].lat, loc[0].lon
|
||||
|
||||
|
||||
# TODO: in python3.9, use functools.cached_property instead?
|
||||
@lru_cache(maxsize=None)
|
||||
def homes_cached() -> List[Tuple[datetime, LatLon]]:
|
||||
return list(config._history)
|
||||
|
||||
|
||||
def estimate_location(dt: DateExact) -> Iterator[FallbackLocation]:
|
||||
from my.location.fallback.common import _datetime_timestamp
|
||||
d: float = _datetime_timestamp(dt)
|
||||
hist = list(reversed(homes_cached()))
|
||||
for pdt, (lat, lon) in hist:
|
||||
if d >= pdt.timestamp():
|
||||
yield FallbackLocation(
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
accuracy=config.home_accuracy,
|
||||
dt=datetime.fromtimestamp(d, timezone.utc),
|
||||
datasource='via_home')
|
||||
return
|
||||
else:
|
||||
# I guess the most reasonable is to fallback on the first location
|
||||
lat, lon = hist[-1][1]
|
||||
yield FallbackLocation(
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
accuracy=config.home_accuracy,
|
||||
dt=datetime.fromtimestamp(d, timezone.utc),
|
||||
datasource='via_home')
|
||||
return
|
99
my/location/fallback/via_ip.py
Normal file
99
my/location/fallback/via_ip.py
Normal file
|
@ -0,0 +1,99 @@
|
|||
"""
|
||||
Converts IP addresses provided by my.location.ip to estimated locations
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from my.core import dataclass, Stats, make_config
|
||||
from my.config import location
|
||||
from my.core.warnings import medium
|
||||
|
||||
|
||||
@dataclass
|
||||
class ip_config(location.via_ip):
|
||||
# no real science to this, just a guess of ~15km accuracy for IP addresses
|
||||
accuracy: float = 15_000.0
|
||||
# default to being accurate for a day
|
||||
for_duration: timedelta = timedelta(hours=24)
|
||||
|
||||
|
||||
# TODO: move config to location.fallback.via_location instead and add migration
|
||||
config = make_config(ip_config)
|
||||
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Iterator, List
|
||||
|
||||
from my.core.common import LazyLogger
|
||||
from my.core.compat import bisect_left
|
||||
from my.ip.all import ips
|
||||
from my.location.common import Location
|
||||
from my.location.fallback.common import FallbackLocation, DateExact, _datetime_timestamp
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
|
||||
def fallback_locations() -> Iterator[FallbackLocation]:
|
||||
dur = config.for_duration.total_seconds()
|
||||
for ip in ips():
|
||||
lat, lon = ip.latlon
|
||||
yield FallbackLocation(
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
dt=ip.dt,
|
||||
accuracy=config.accuracy,
|
||||
duration=dur,
|
||||
elevation=None,
|
||||
datasource="via_ip",
|
||||
)
|
||||
|
||||
|
||||
# for compatibility with my.location.via_ip, this shouldnt be used by other modules
|
||||
def locations() -> Iterator[Location]:
|
||||
medium("locations is deprecated, should use fallback_locations or estimate_location")
|
||||
yield from map(FallbackLocation.to_location, fallback_locations())
|
||||
|
||||
|
||||
@lru_cache(1)
|
||||
def _sorted_fallback_locations() -> List[FallbackLocation]:
|
||||
fl = list(filter(lambda l: l.duration is not None, fallback_locations()))
|
||||
logger.debug(f"Fallback locations: {len(fl)}, sorting...:")
|
||||
fl.sort(key=lambda l: l.dt.timestamp())
|
||||
return fl
|
||||
|
||||
|
||||
def estimate_location(dt: DateExact) -> Iterator[FallbackLocation]:
|
||||
# logger.debug(f"Estimating location for: {dt}")
|
||||
fl = _sorted_fallback_locations()
|
||||
dt_ts = _datetime_timestamp(dt)
|
||||
|
||||
# search to find the first possible location which contains dt (something that started up to
|
||||
# config.for_duration ago, and ends after dt)
|
||||
idx = bisect_left(fl, dt_ts - config.for_duration.total_seconds(), key=lambda l: l.dt.timestamp()) # type: ignore[operator,call-arg,type-var]
|
||||
|
||||
# all items are before the given dt
|
||||
if idx == len(fl):
|
||||
return
|
||||
|
||||
# iterate through in sorted order, until we find a location that is after the given dt
|
||||
while idx < len(fl):
|
||||
loc = fl[idx]
|
||||
start_time = loc.dt.timestamp()
|
||||
# loc.duration is filtered for in _sorted_fallback_locations
|
||||
end_time = start_time + loc.duration # type: ignore[operator]
|
||||
if start_time <= dt_ts <= end_time:
|
||||
# logger.debug(f"Found location for {dt}: {loc}")
|
||||
yield loc
|
||||
# no more locations could possibly contain dt
|
||||
if start_time > dt_ts:
|
||||
# logger.debug(f"Passed start time: {end_time} > {dt_ts} ({datetime.fromtimestamp(end_time)} > {datetime.fromtimestamp(dt_ts)})")
|
||||
break
|
||||
idx += 1
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
Loading…
Add table
Add a link
Reference in a new issue