diff --git a/my/core/time.py b/my/core/time.py index 7f856b0..fec90ba 100644 --- a/my/core/time.py +++ b/my/core/time.py @@ -1,20 +1,50 @@ from functools import lru_cache from datetime import datetime, tzinfo +from typing import Sequence import pytz # type: ignore -# https://gist.github.com/edwardabraham/8680198 -tz_lookup = { - pytz.timezone(x).localize(datetime.now()).tzname(): pytz.timezone(x) - for x in pytz.all_timezones -} -tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu... + +def user_forced() -> Sequence[str]: + # conversion from abbreviations is always ambiguous + # https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve + try: + from my.config import time as user_config + return user_config.tz.force_abbreviations # type: ignore[attr-defined] + except: + # todo log/apply policy + return [] -# TODO dammit, lru_cache interferes with mypy? +@lru_cache(1) +def _abbr_to_timezone_map(): + # also force UTC to always correspond to utc + # this makes more sense than Zulu it ends up by default + timezones = pytz.all_timezones + ['UTC'] + list(user_forced()) + + res = {} + for tzname in timezones: + tz = pytz.timezone(tzname) + infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present? + for info in infos: + abbr = info[-1] + # todo could support this with a better error handling strategy? + # otz = res.get(abbr, tz) + # if otz != tz: + # raise RuntimeError(abbr, tz, otz) + res[abbr] = tz + # ugh. also necessary, e.g. for Zulu?? why is it not in _tzinfos? + # note: somehow this is not the same as the tzname! + tzn = getattr(tz, '_tzname', None) + if tzn is not None: + res[tzn] = tz + return res + + +# todo dammit, lru_cache interferes with mypy? @lru_cache(None) def abbr_to_timezone(abbr: str) -> tzinfo: - return tz_lookup[abbr] + return _abbr_to_timezone_map()[abbr] def zone_to_countrycode(zone: str) -> str: @@ -30,3 +60,6 @@ def _zones_to_countrycode(): for timezone in timezones: res[timezone] = countrycode return res + + +# todo stuff here could be a bit more defensive? e.g. dependent on policy diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index a50fb3f..fd702e8 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -29,22 +29,29 @@ def parse_dt(s: str) -> datetime: if end == ' PM' or end == ' AM': # old takeouts didn't have timezone # hopefully it was utc? Legacy, so no that much of an issue anymore.. + # todo although maybe worth adding timezone from location provider? tz = pytz.utc else: s, tzabbr = s.rsplit(maxsplit=1) tz = abbr_to_timezone(tzabbr) dt = datetime.strptime(s, fmt) - dt = tz.localize(dt) - return dt + return tz.localize(dt) -def test_parse_dt(): +def test_parse_dt() -> None: parse_dt('Jun 23, 2015, 2:43:45 PM') parse_dt('Jan 25, 2019, 8:23:48 AM GMT') parse_dt('Jan 22, 2020, 8:34:00 PM UTC') parse_dt('Sep 10, 2019, 8:51:45 PM MSK') + # this testcases are interesting: in pytz, abbr resolution might depend on the _current_ date! + # so these used to fail during winter + # you can see all the different values used in in _tzinfos field + parse_dt('Jun 01, 2018, 11:00:00 PM BST') + parse_dt('Jun 01, 2018, 11:00:00 PM PDT') + parse_dt('Feb 01, 2018, 11:00:00 PM PST') + class State(Enum): OUTSIDE = 0 diff --git a/tests/takeout.py b/tests/takeout.py index bc746c2..f45a51d 100644 --- a/tests/takeout.py +++ b/tests/takeout.py @@ -3,7 +3,7 @@ from datetime import datetime from itertools import islice import pytz -import my.location.takeout as LT +import my.location.google as LT from my.google.takeout.html import read_html from my.google.takeout.paths import get_last_takeout @@ -69,3 +69,5 @@ def parse_takeout_xmllint(data: str): out = res.stdout.decode('utf8') # out = data return out.split('