time.tz.via_location: more consistent behaviour wrt caching
previously it was possible to cachew never properly initialize the cache because if you only queried some dates in the past because we never made it to the end of _iter_tzs also some minor cleanup
This commit is contained in:
parent
70bb9ed0c5
commit
996169aa29
1 changed files with 79 additions and 81 deletions
|
@ -6,6 +6,24 @@ REQUIRES = [
|
||||||
'timezonefinder',
|
'timezonefinder',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import date, datetime
|
||||||
|
from functools import lru_cache
|
||||||
|
import heapq
|
||||||
|
from itertools import groupby
|
||||||
|
import os
|
||||||
|
from typing import Iterator, Optional, Tuple, Any, List, Iterable, Set, Dict
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from my.core import make_logger, stat, Stats, datetime_aware
|
||||||
|
from my.core.common import mcachew
|
||||||
|
from my.core.source import import_source
|
||||||
|
from my.core.warnings import high
|
||||||
|
|
||||||
|
from my.location.common import LatLon
|
||||||
|
|
||||||
|
|
||||||
## user might not have tz config section, so makes sense to be more defensive about it
|
## user might not have tz config section, so makes sense to be more defensive about it
|
||||||
# todo might be useful to extract a helper for this
|
# todo might be useful to extract a helper for this
|
||||||
|
@ -27,8 +45,6 @@ if 'user_config' not in globals():
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|
||||||
from my.core import dataclass
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config):
|
class config(user_config):
|
||||||
# less precise, but faster
|
# less precise, but faster
|
||||||
|
@ -46,55 +62,33 @@ class config(user_config):
|
||||||
_iter_tz_refresh_time: int = 6
|
_iter_tz_refresh_time: int = 6
|
||||||
|
|
||||||
|
|
||||||
from collections import Counter
|
logger = make_logger(__name__)
|
||||||
from datetime import date, datetime
|
|
||||||
from functools import lru_cache
|
|
||||||
from itertools import groupby
|
|
||||||
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable, Set
|
|
||||||
|
|
||||||
import heapq
|
|
||||||
import pytz
|
|
||||||
from more_itertools import seekable
|
|
||||||
|
|
||||||
from my.core.common import LazyLogger, mcachew, tzdatetime
|
@lru_cache(None)
|
||||||
from my.core.source import import_source
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__, level='warning')
|
|
||||||
|
|
||||||
@lru_cache(2)
|
|
||||||
def _timezone_finder(fast: bool) -> Any:
|
def _timezone_finder(fast: bool) -> Any:
|
||||||
if fast:
|
if fast:
|
||||||
# less precise, but faster
|
# less precise, but faster
|
||||||
from timezonefinder import TimezoneFinderL as Finder
|
from timezonefinder import TimezoneFinderL as Finder
|
||||||
else:
|
else:
|
||||||
from timezonefinder import TimezoneFinder as Finder # type: ignore
|
from timezonefinder import TimezoneFinder as Finder # type: ignore
|
||||||
return Finder(in_memory=True)
|
return Finder(in_memory=True)
|
||||||
|
|
||||||
|
|
||||||
# todo move to common?
|
|
||||||
Zone = str
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE: for now only daily resolution is supported... later will implement something more efficient
|
|
||||||
class DayWithZone(NamedTuple):
|
|
||||||
day: date
|
|
||||||
zone: Zone
|
|
||||||
|
|
||||||
|
|
||||||
from my.location.common import LatLon
|
|
||||||
|
|
||||||
# for backwards compatibility
|
# for backwards compatibility
|
||||||
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
|
def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]:
|
||||||
try:
|
try:
|
||||||
import my.location.all
|
import my.location.all
|
||||||
|
|
||||||
for loc in my.location.all.locations():
|
for loc in my.location.all.locations():
|
||||||
if loc.accuracy is not None and loc.accuracy > config.require_accuracy:
|
if loc.accuracy is not None and loc.accuracy > config.require_accuracy:
|
||||||
continue
|
continue
|
||||||
yield ((loc.lat, loc.lon), loc.dt)
|
yield ((loc.lat, loc.lon), loc.dt)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
from my.core.warnings import high
|
logger.exception(
|
||||||
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implementation", exc_info=e)
|
"Could not setup via_location using my.location.all provider, falling back to legacy google implementation", exc_info=e
|
||||||
|
)
|
||||||
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
|
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
|
||||||
|
|
||||||
import my.location.google
|
import my.location.google
|
||||||
|
@ -102,10 +96,22 @@ def _locations() -> Iterator[Tuple[LatLon, datetime]]:
|
||||||
for gloc in my.location.google.locations():
|
for gloc in my.location.google.locations():
|
||||||
yield ((gloc.lat, gloc.lon), gloc.dt)
|
yield ((gloc.lat, gloc.lon), gloc.dt)
|
||||||
|
|
||||||
|
|
||||||
# TODO: could use heapmerge or sort the underlying iterators somehow?
|
# TODO: could use heapmerge or sort the underlying iterators somehow?
|
||||||
# see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934
|
# see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934
|
||||||
def _sorted_locations() -> List[Tuple[LatLon, datetime]]:
|
def _sorted_locations() -> List[Tuple[LatLon, datetime_aware]]:
|
||||||
return list(sorted(_locations(), key=lambda x: x[1]))
|
return sorted(_locations(), key=lambda x: x[1])
|
||||||
|
|
||||||
|
|
||||||
|
# todo move to common?
|
||||||
|
Zone = str
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE: for now only daily resolution is supported... later will implement something more efficient
|
||||||
|
@dataclass(unsafe_hash=True)
|
||||||
|
class DayWithZone:
|
||||||
|
day: date
|
||||||
|
zone: Zone
|
||||||
|
|
||||||
|
|
||||||
def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]:
|
def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]:
|
||||||
|
@ -120,20 +126,22 @@ def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> I
|
||||||
# TODO this is probably a bit expensive... test & benchmark
|
# TODO this is probably a bit expensive... test & benchmark
|
||||||
ldt = dt.astimezone(tz)
|
ldt = dt.astimezone(tz)
|
||||||
ndate = ldt.date()
|
ndate = ldt.date()
|
||||||
#if pdt is not None and ndate < pdt.date():
|
# if pdt is not None and ndate < pdt.date():
|
||||||
# # TODO for now just drop and collect the stats
|
# # TODO for now just drop and collect the stats
|
||||||
# # I guess we'd have minor drops while air travel...
|
# # I guess we'd have minor drops while air travel...
|
||||||
# warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
# warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
||||||
# continue
|
# continue
|
||||||
#pdt = ldt
|
# pdt = ldt
|
||||||
z = tz.zone; assert z is not None
|
z = tz.zone
|
||||||
|
assert z is not None
|
||||||
yield DayWithZone(day=ndate, zone=z)
|
yield DayWithZone(day=ndate, zone=z)
|
||||||
|
|
||||||
|
|
||||||
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
|
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
|
||||||
# has to do an iterative sort of the entire my.locations.all list
|
# has to do an iterative sort of the entire my.locations.all list
|
||||||
def _iter_local_dates() -> Iterator[DayWithZone]:
|
def _iter_local_dates() -> Iterator[DayWithZone]:
|
||||||
finder = _timezone_finder(fast=config.fast) # rely on the default
|
finder = _timezone_finder(fast=config.fast) # rely on the default
|
||||||
#pdt = None
|
# pdt = None
|
||||||
# TODO: warnings doesn't actually warn?
|
# TODO: warnings doesn't actually warn?
|
||||||
# warnings = []
|
# warnings = []
|
||||||
|
|
||||||
|
@ -157,7 +165,7 @@ def _iter_local_dates_fallback() -> Iterator[DayWithZone]:
|
||||||
yield from _find_tz_for_locs(_timezone_finder(fast=config.fast), _fallback_locations())
|
yield from _find_tz_for_locs(_timezone_finder(fast=config.fast), _fallback_locations())
|
||||||
|
|
||||||
|
|
||||||
def most_common(lst: List[DayWithZone]) -> DayWithZone:
|
def most_common(lst: Iterator[DayWithZone]) -> DayWithZone:
|
||||||
res, _ = Counter(lst).most_common(1)[0]
|
res, _ = Counter(lst).most_common(1)[0]
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -181,59 +189,49 @@ def _iter_tz_depends_on() -> str:
|
||||||
|
|
||||||
|
|
||||||
# refresh _iter_tzs every few hours -- don't think a better depends_on is possible dynamically
|
# refresh _iter_tzs every few hours -- don't think a better depends_on is possible dynamically
|
||||||
@mcachew(logger=logger, depends_on=_iter_tz_depends_on)
|
@mcachew(depends_on=_iter_tz_depends_on)
|
||||||
def _iter_tzs() -> Iterator[DayWithZone]:
|
def _iter_tzs() -> Iterator[DayWithZone]:
|
||||||
# since we have no control over what order the locations are returned,
|
# since we have no control over what order the locations are returned,
|
||||||
# we need to sort them first before we can do a groupby
|
# we need to sort them first before we can do a groupby
|
||||||
local_dates: List[DayWithZone] = list(_iter_local_dates())
|
by_day = lambda p: p.day
|
||||||
local_dates.sort(key=lambda p: p.day)
|
|
||||||
|
local_dates: List[DayWithZone] = sorted(_iter_local_dates(), key=by_day)
|
||||||
logger.debug(f"no. of items using exact locations: {len(local_dates)}")
|
logger.debug(f"no. of items using exact locations: {len(local_dates)}")
|
||||||
|
|
||||||
local_dates_fallback: List[DayWithZone] = list(_iter_local_dates_fallback())
|
local_dates_fallback: List[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day)
|
||||||
local_dates_fallback.sort(key=lambda p: p.day)
|
|
||||||
|
|
||||||
# find days that are in fallback but not in local_dates (i.e., missing days)
|
# find days that are in fallback but not in local_dates (i.e., missing days)
|
||||||
local_dates_set: Set[date] = set(d.day for d in local_dates)
|
local_dates_set: Set[date] = {d.day for d in local_dates}
|
||||||
use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set]
|
use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set]
|
||||||
logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}")
|
logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}")
|
||||||
|
|
||||||
# combine local_dates and missing days from fallback into a sorted list
|
# combine local_dates and missing days from fallback into a sorted list
|
||||||
all_dates = heapq.merge(local_dates, use_fallback_days, key=lambda p: p.day)
|
all_dates = heapq.merge(local_dates, use_fallback_days, key=by_day)
|
||||||
|
# todo could probably use heapify here instead of heapq.merge?
|
||||||
|
|
||||||
for d, gr in groupby(all_dates, key=lambda p: p.day):
|
for d, gr in groupby(all_dates, key=by_day):
|
||||||
logger.info(f"processed {d}{', using fallback' if d in local_dates_set else ''}")
|
logger.debug(f"processed {d}{', using fallback' if d in local_dates_set else ''}")
|
||||||
zone = most_common(list(gr)).zone
|
zone = most_common(gr).zone
|
||||||
yield DayWithZone(day=d, zone=zone)
|
yield DayWithZone(day=d, zone=zone)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(1)
|
@lru_cache(1)
|
||||||
def loc_tz_getter() -> Iterator[DayWithZone]:
|
def _day2zone() -> Dict[date, pytz.BaseTzInfo]:
|
||||||
# seekable makes it cache the emitted values
|
# NOTE: kinda unfortunate that this will have to process all days before returning result for just one
|
||||||
return seekable(_iter_tzs())
|
# however otherwise cachew cache might never be initialized properly
|
||||||
|
# so we'll always end up recomputing everyting during subsequent runs
|
||||||
|
return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()}
|
||||||
|
|
||||||
|
|
||||||
# todo expose zone names too?
|
|
||||||
@lru_cache(maxsize=None)
|
|
||||||
def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
||||||
sit = loc_tz_getter()
|
return _day2zone().get(d)
|
||||||
# todo hmm. seeking is not super efficient... might need to use some smarter dict-based cache
|
|
||||||
# hopefully, this method itself caches stuff forthe users, so won't be too bad
|
|
||||||
sit.seek(0) # type: ignore
|
|
||||||
|
|
||||||
zone: Optional[str] = None
|
|
||||||
for x, tz in sit:
|
|
||||||
if x == d:
|
|
||||||
zone = tz
|
|
||||||
if x >= d:
|
|
||||||
break
|
|
||||||
return None if zone is None else pytz.timezone(zone)
|
|
||||||
|
|
||||||
|
|
||||||
# ok to cache, there are only a few home locations?
|
# ok to cache, there are only a few home locations?
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(None)
|
||||||
def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]:
|
def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]:
|
||||||
(lat, lng) = loc
|
(lat, lng) = loc
|
||||||
finder = _timezone_finder(fast=False) # ok to use slow here for better precision
|
finder = _timezone_finder(fast=False) # ok to use slow here for better precision
|
||||||
zone = finder.timezone_at(lat=lat, lng=lng)
|
zone = finder.timezone_at(lat=lat, lng=lng)
|
||||||
if zone is None:
|
if zone is None:
|
||||||
# TODO shouldn't really happen, warn?
|
# TODO shouldn't really happen, warn?
|
||||||
|
@ -242,7 +240,7 @@ def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]:
|
||||||
return pytz.timezone(zone)
|
return pytz.timezone(zone)
|
||||||
|
|
||||||
|
|
||||||
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
def get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||||
'''
|
'''
|
||||||
Given a datetime, returns the timezone for that date.
|
Given a datetime, returns the timezone for that date.
|
||||||
'''
|
'''
|
||||||
|
@ -258,16 +256,14 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||||
# that datetime is between, else fallback on your first home location, so it acts
|
# that datetime is between, else fallback on your first home location, so it acts
|
||||||
# as a last resort
|
# as a last resort
|
||||||
from my.location.fallback import via_home as home
|
from my.location.fallback import via_home as home
|
||||||
|
|
||||||
loc = list(home.estimate_location(dt))
|
loc = list(home.estimate_location(dt))
|
||||||
assert len(loc) == 1, f"should only have one home location, received {loc}"
|
assert len(loc) == 1, f"should only have one home location, received {loc}"
|
||||||
return _get_home_tz(loc=(loc[0].lat, loc[0].lon))
|
return _get_home_tz(loc=(loc[0].lat, loc[0].lon))
|
||||||
|
|
||||||
# expose as 'public' function
|
|
||||||
get_tz = _get_tz
|
|
||||||
|
|
||||||
|
def localize(dt: datetime) -> datetime_aware:
|
||||||
def localize(dt: datetime) -> tzdatetime:
|
tz = get_tz(dt)
|
||||||
tz = _get_tz(dt)
|
|
||||||
if tz is None:
|
if tz is None:
|
||||||
# TODO -- this shouldn't really happen.. think about it carefully later
|
# TODO -- this shouldn't really happen.. think about it carefully later
|
||||||
return dt
|
return dt
|
||||||
|
@ -275,20 +271,17 @@ def localize(dt: datetime) -> tzdatetime:
|
||||||
return tz.localize(dt)
|
return tz.localize(dt)
|
||||||
|
|
||||||
|
|
||||||
from ...core import stat, Stats
|
def stats(quick: bool = False) -> Stats:
|
||||||
def stats(quick: bool=False) -> Stats:
|
|
||||||
if quick:
|
if quick:
|
||||||
prev, config.sort_locations = config.sort_locations, False
|
prev, config.sort_locations = config.sort_locations, False
|
||||||
res = {
|
res = {'first': next(_iter_local_dates())}
|
||||||
'first': next(_iter_local_dates())
|
|
||||||
}
|
|
||||||
config.sort_locations = prev
|
config.sort_locations = prev
|
||||||
return res
|
return res
|
||||||
# TODO not sure what would be a good stat() for this module...
|
# TODO not sure what would be a good stat() for this module...
|
||||||
# might be nice to print some actual timezones?
|
# might be nice to print some actual timezones?
|
||||||
# there aren't really any great iterables to expose
|
# there aren't really any great iterables to expose
|
||||||
import os
|
|
||||||
VIA_LOCATION_START_YEAR = int(os.environ.get("VIA_LOCATION_START_YEAR", 1990))
|
VIA_LOCATION_START_YEAR = int(os.environ.get("VIA_LOCATION_START_YEAR", 1990))
|
||||||
|
|
||||||
def localized_years():
|
def localized_years():
|
||||||
last = datetime.now().year + 2
|
last = datetime.now().year + 2
|
||||||
# note: deliberately take + 2 years, so the iterator exhausts. otherwise stuff might never get cached
|
# note: deliberately take + 2 years, so the iterator exhausts. otherwise stuff might never get cached
|
||||||
|
@ -296,4 +289,9 @@ def stats(quick: bool=False) -> Stats:
|
||||||
for Y in range(VIA_LOCATION_START_YEAR, last):
|
for Y in range(VIA_LOCATION_START_YEAR, last):
|
||||||
dt = datetime.fromisoformat(f'{Y}-01-01 01:01:01')
|
dt = datetime.fromisoformat(f'{Y}-01-01 01:01:01')
|
||||||
yield localize(dt)
|
yield localize(dt)
|
||||||
|
|
||||||
return stat(localized_years)
|
return stat(localized_years)
|
||||||
|
|
||||||
|
|
||||||
|
# deprecated -- still used in some other modules so need to keep
|
||||||
|
_get_tz = get_tz
|
||||||
|
|
Loading…
Add table
Reference in a new issue