core/time: more flexible support for resolving TZ abbreviation -> TZ ambiguities

addresses https://github.com/karlicoss/HPI/issues/103

for now via experimental time.tz.force_abbreviations config variable
not sure if this whole things is doomed to be resolved properly
This commit is contained in:
Dima Gerasimov 2021-03-07 21:50:39 +00:00 committed by karlicoss
parent 5ef638694e
commit 1fd2a9f643
3 changed files with 54 additions and 12 deletions

View file

@ -1,20 +1,50 @@
from functools import lru_cache from functools import lru_cache
from datetime import datetime, tzinfo from datetime import datetime, tzinfo
from typing import Sequence
import pytz # type: ignore import pytz # type: ignore
# https://gist.github.com/edwardabraham/8680198
tz_lookup = { def user_forced() -> Sequence[str]:
pytz.timezone(x).localize(datetime.now()).tzname(): pytz.timezone(x) # conversion from abbreviations is always ambiguous
for x in pytz.all_timezones # https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
} try:
tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu... from my.config import time as user_config
return user_config.tz.force_abbreviations # type: ignore[attr-defined]
except:
# todo log/apply policy
return []
# TODO dammit, lru_cache interferes with mypy? @lru_cache(1)
def _abbr_to_timezone_map():
# also force UTC to always correspond to utc
# this makes more sense than Zulu it ends up by default
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
res = {}
for tzname in timezones:
tz = pytz.timezone(tzname)
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
for info in infos:
abbr = info[-1]
# todo could support this with a better error handling strategy?
# otz = res.get(abbr, tz)
# if otz != tz:
# raise RuntimeError(abbr, tz, otz)
res[abbr] = tz
# ugh. also necessary, e.g. for Zulu?? why is it not in _tzinfos?
# note: somehow this is not the same as the tzname!
tzn = getattr(tz, '_tzname', None)
if tzn is not None:
res[tzn] = tz
return res
# todo dammit, lru_cache interferes with mypy?
@lru_cache(None) @lru_cache(None)
def abbr_to_timezone(abbr: str) -> tzinfo: def abbr_to_timezone(abbr: str) -> tzinfo:
return tz_lookup[abbr] return _abbr_to_timezone_map()[abbr]
def zone_to_countrycode(zone: str) -> str: def zone_to_countrycode(zone: str) -> str:
@ -30,3 +60,6 @@ def _zones_to_countrycode():
for timezone in timezones: for timezone in timezones:
res[timezone] = countrycode res[timezone] = countrycode
return res return res
# todo stuff here could be a bit more defensive? e.g. dependent on policy

View file

@ -29,22 +29,29 @@ def parse_dt(s: str) -> datetime:
if end == ' PM' or end == ' AM': if end == ' PM' or end == ' AM':
# old takeouts didn't have timezone # old takeouts didn't have timezone
# hopefully it was utc? Legacy, so no that much of an issue anymore.. # hopefully it was utc? Legacy, so no that much of an issue anymore..
# todo although maybe worth adding timezone from location provider?
tz = pytz.utc tz = pytz.utc
else: else:
s, tzabbr = s.rsplit(maxsplit=1) s, tzabbr = s.rsplit(maxsplit=1)
tz = abbr_to_timezone(tzabbr) tz = abbr_to_timezone(tzabbr)
dt = datetime.strptime(s, fmt) dt = datetime.strptime(s, fmt)
dt = tz.localize(dt) return tz.localize(dt)
return dt
def test_parse_dt(): def test_parse_dt() -> None:
parse_dt('Jun 23, 2015, 2:43:45 PM') parse_dt('Jun 23, 2015, 2:43:45 PM')
parse_dt('Jan 25, 2019, 8:23:48 AM GMT') parse_dt('Jan 25, 2019, 8:23:48 AM GMT')
parse_dt('Jan 22, 2020, 8:34:00 PM UTC') parse_dt('Jan 22, 2020, 8:34:00 PM UTC')
parse_dt('Sep 10, 2019, 8:51:45 PM MSK') parse_dt('Sep 10, 2019, 8:51:45 PM MSK')
# this testcases are interesting: in pytz, abbr resolution might depend on the _current_ date!
# so these used to fail during winter
# you can see all the different values used in in _tzinfos field
parse_dt('Jun 01, 2018, 11:00:00 PM BST')
parse_dt('Jun 01, 2018, 11:00:00 PM PDT')
parse_dt('Feb 01, 2018, 11:00:00 PM PST')
class State(Enum): class State(Enum):
OUTSIDE = 0 OUTSIDE = 0

View file

@ -3,7 +3,7 @@ from datetime import datetime
from itertools import islice from itertools import islice
import pytz import pytz
import my.location.takeout as LT import my.location.google as LT
from my.google.takeout.html import read_html from my.google.takeout.html import read_html
from my.google.takeout.paths import get_last_takeout from my.google.takeout.paths import get_last_takeout
@ -69,3 +69,5 @@ def parse_takeout_xmllint(data: str):
out = res.stdout.decode('utf8') out = res.stdout.decode('utf8')
# out = data # out = data
return out.split('<div class="content-cell') return out.split('<div class="content-cell')
from my.google.takeout.html import test_parse_dt