tz.via_location: add fallback to _iter_tzs

update home to use estimate_location
This commit is contained in:
Sean Breckenridge 2023-02-22 04:52:37 -08:00
parent 0a48393589
commit c9c2415771

View file

@ -50,12 +50,14 @@ from collections import Counter
from datetime import date, datetime from datetime import date, datetime
from functools import lru_cache from functools import lru_cache
from itertools import groupby from itertools import groupby
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable, Set
from more_itertools import seekable import heapq
import pytz import pytz
from more_itertools import seekable
from my.core.common import LazyLogger, mcachew, tzdatetime from my.core.common import LazyLogger, mcachew, tzdatetime
from my.core.source import import_source
logger = LazyLogger(__name__, level='warning') logger = LazyLogger(__name__, level='warning')
@ -106,23 +108,13 @@ def _sorted_locations() -> List[Tuple[LatLon, datetime]]:
return list(sorted(_locations(), key=lambda x: x[1])) return list(sorted(_locations(), key=lambda x: x[1]))
# Note: this takes a while, as the upstream since _locations isn't sorted, so this def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]:
# has to do an iterative sort of the entire my.locations.all list
def _iter_local_dates() -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=config.fast) # rely on the default
#pdt = None
# TODO: warnings doesnt actually warn?
warnings = []
locs: Iterable[Tuple[LatLon, datetime]]
locs = _sorted_locations() if config.sort_locations else _locations()
# todo allow to skip if not noo many errors in row?
for (lat, lon), dt in locs: for (lat, lon), dt in locs:
# TODO right. its _very_ slow... # TODO right. its _very_ slow...
zone = finder.timezone_at(lat=lat, lng=lon) zone = finder.timezone_at(lat=lat, lng=lon)
# todo allow to skip if not noo many errors in row?
if zone is None: if zone is None:
warnings.append(f"Couldn't figure out tz for {lat}, {lon}") # warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
continue continue
tz = pytz.timezone(zone) tz = pytz.timezone(zone)
# TODO this is probably a bit expensive... test & benchmark # TODO this is probably a bit expensive... test & benchmark
@ -137,6 +129,33 @@ def _iter_local_dates() -> Iterator[DayWithZone]:
z = tz.zone; assert z is not None z = tz.zone; assert z is not None
yield DayWithZone(day=ndate, zone=z) yield DayWithZone(day=ndate, zone=z)
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
# has to do an iterative sort of the entire my.locations.all list
def _iter_local_dates() -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=config.fast) # rely on the default
#pdt = None
# TODO: warnings doesnt actually warn?
# warnings = []
locs: Iterable[Tuple[LatLon, datetime]]
locs = _sorted_locations() if config.sort_locations else _locations()
yield from _find_tz_for_locs(finder, locs)
# my.location.fallback.estimate_location could be used here
# but iterating through all the locations is faster since this
# is saved behind cachew
@import_source(module_name="my.location.fallback.all")
def _iter_local_dates_fallback() -> Iterator[DayWithZone]:
from my.location.fallback.all import fallback_locations as flocs
def _fallback_locations() -> Iterator[Tuple[LatLon, datetime]]:
for loc in sorted(flocs(), key=lambda x: x.dt):
yield ((loc.lat, loc.lon), loc.dt)
yield from _find_tz_for_locs(_timezone_finder(fast=config.fast), _fallback_locations())
def most_common(lst: List[DayWithZone]) -> DayWithZone: def most_common(lst: List[DayWithZone]) -> DayWithZone:
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated] res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
@ -161,15 +180,28 @@ def _iter_tz_depends_on() -> str:
return "{}_{}".format(day, hr_truncated) return "{}_{}".format(day, hr_truncated)
# refresh _iter_tzs every 6 hours -- don't think a better depends_on is possible dynamically # refresh _iter_tzs every few hours -- don't think a better depends_on is possible dynamically
@mcachew(logger=logger, depends_on=_iter_tz_depends_on) @mcachew(logger=logger, depends_on=_iter_tz_depends_on)
def _iter_tzs() -> Iterator[DayWithZone]: def _iter_tzs() -> Iterator[DayWithZone]:
# since we have no control over what order the locations are returned, # since we have no control over what order the locations are returned,
# we need to sort them first before we can do a groupby # we need to sort them first before we can do a groupby
local_dates: List[DayWithZone] = list(_iter_local_dates()) local_dates: List[DayWithZone] = list(_iter_local_dates())
local_dates.sort(key=lambda p: p.day) local_dates.sort(key=lambda p: p.day)
for d, gr in groupby(local_dates, key=lambda p: p.day): logger.debug(f"no. of items using exact locations: {len(local_dates)}")
logger.info('processed %s', d)
local_dates_fallback: List[DayWithZone] = list(_iter_local_dates_fallback())
local_dates_fallback.sort(key=lambda p: p.day)
# find days that are in fallback but not in local_dates (i.e., missing days)
local_dates_set: Set[date] = set(d.day for d in local_dates)
use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set]
logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}")
# combine local_dates and missing days from fallback into a sorted list
all_dates = heapq.merge(local_dates, use_fallback_days, key=lambda p: p.day)
for d, gr in groupby(all_dates, key=lambda p: p.day):
logger.info(f"processed {d}{', using fallback' if d in local_dates_set else ''}")
zone = most_common(list(gr)).zone zone = most_common(list(gr)).zone
yield DayWithZone(day=d, zone=zone) yield DayWithZone(day=d, zone=zone)
@ -219,8 +251,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
return res return res
# fallback to home tz # fallback to home tz
from my.location.fallback import via_home as home from my.location.fallback import via_home as home
loc = home.get_location(dt) loc = list(home.estimate_location(dt))
return _get_home_tz(loc=loc) assert len(loc) == 1, f"should only have one home location, received {loc}"
return _get_home_tz(loc=(loc[0].lat, loc[0].lon))
# expose as 'public' function # expose as 'public' function
get_tz = _get_tz get_tz = _get_tz