diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py index 89b2551..343d30a 100644 --- a/my/time/tz/via_location.py +++ b/my/time/tz/via_location.py @@ -50,12 +50,14 @@ from collections import Counter from datetime import date, datetime from functools import lru_cache from itertools import groupby -from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable +from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable, Set -from more_itertools import seekable +import heapq import pytz +from more_itertools import seekable from my.core.common import LazyLogger, mcachew, tzdatetime +from my.core.source import import_source logger = LazyLogger(__name__, level='warning') @@ -106,23 +108,13 @@ def _sorted_locations() -> List[Tuple[LatLon, datetime]]: return list(sorted(_locations(), key=lambda x: x[1])) -# Note: this takes a while, as the upstream since _locations isn't sorted, so this -# has to do an iterative sort of the entire my.locations.all list -def _iter_local_dates() -> Iterator[DayWithZone]: - finder = _timezone_finder(fast=config.fast) # rely on the default - #pdt = None - # TODO: warnings doesnt actually warn? - warnings = [] - - locs: Iterable[Tuple[LatLon, datetime]] - locs = _sorted_locations() if config.sort_locations else _locations() - - # todo allow to skip if not noo many errors in row? +def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]: for (lat, lon), dt in locs: # TODO right. its _very_ slow... zone = finder.timezone_at(lat=lat, lng=lon) + # todo allow to skip if not noo many errors in row? if zone is None: - warnings.append(f"Couldn't figure out tz for {lat}, {lon}") + # warnings.append(f"Couldn't figure out tz for {lat}, {lon}") continue tz = pytz.timezone(zone) # TODO this is probably a bit expensive... test & benchmark @@ -137,6 +129,33 @@ def _iter_local_dates() -> Iterator[DayWithZone]: z = tz.zone; assert z is not None yield DayWithZone(day=ndate, zone=z) +# Note: this takes a while, as the upstream since _locations isn't sorted, so this +# has to do an iterative sort of the entire my.locations.all list +def _iter_local_dates() -> Iterator[DayWithZone]: + finder = _timezone_finder(fast=config.fast) # rely on the default + #pdt = None + # TODO: warnings doesnt actually warn? + # warnings = [] + + locs: Iterable[Tuple[LatLon, datetime]] + locs = _sorted_locations() if config.sort_locations else _locations() + + yield from _find_tz_for_locs(finder, locs) + + +# my.location.fallback.estimate_location could be used here +# but iterating through all the locations is faster since this +# is saved behind cachew +@import_source(module_name="my.location.fallback.all") +def _iter_local_dates_fallback() -> Iterator[DayWithZone]: + from my.location.fallback.all import fallback_locations as flocs + + def _fallback_locations() -> Iterator[Tuple[LatLon, datetime]]: + for loc in sorted(flocs(), key=lambda x: x.dt): + yield ((loc.lat, loc.lon), loc.dt) + + yield from _find_tz_for_locs(_timezone_finder(fast=config.fast), _fallback_locations()) + def most_common(lst: List[DayWithZone]) -> DayWithZone: res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated] @@ -161,15 +180,28 @@ def _iter_tz_depends_on() -> str: return "{}_{}".format(day, hr_truncated) -# refresh _iter_tzs every 6 hours -- don't think a better depends_on is possible dynamically +# refresh _iter_tzs every few hours -- don't think a better depends_on is possible dynamically @mcachew(logger=logger, depends_on=_iter_tz_depends_on) def _iter_tzs() -> Iterator[DayWithZone]: # since we have no control over what order the locations are returned, # we need to sort them first before we can do a groupby local_dates: List[DayWithZone] = list(_iter_local_dates()) local_dates.sort(key=lambda p: p.day) - for d, gr in groupby(local_dates, key=lambda p: p.day): - logger.info('processed %s', d) + logger.debug(f"no. of items using exact locations: {len(local_dates)}") + + local_dates_fallback: List[DayWithZone] = list(_iter_local_dates_fallback()) + local_dates_fallback.sort(key=lambda p: p.day) + + # find days that are in fallback but not in local_dates (i.e., missing days) + local_dates_set: Set[date] = set(d.day for d in local_dates) + use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set] + logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}") + + # combine local_dates and missing days from fallback into a sorted list + all_dates = heapq.merge(local_dates, use_fallback_days, key=lambda p: p.day) + + for d, gr in groupby(all_dates, key=lambda p: p.day): + logger.info(f"processed {d}{', using fallback' if d in local_dates_set else ''}") zone = most_common(list(gr)).zone yield DayWithZone(day=d, zone=zone) @@ -219,8 +251,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]: return res # fallback to home tz from my.location.fallback import via_home as home - loc = home.get_location(dt) - return _get_home_tz(loc=loc) + loc = list(home.estimate_location(dt)) + assert len(loc) == 1, f"should only have one home location, received {loc}" + return _get_home_tz(loc=(loc[0].lat, loc[0].lon)) # expose as 'public' function get_tz = _get_tz