core/time: more flexible support for resolving TZ abbreviation -> TZ ambiguities
addresses https://github.com/karlicoss/HPI/issues/103 for now via experimental time.tz.force_abbreviations config variable not sure if this whole things is doomed to be resolved properly
This commit is contained in:
parent
5ef638694e
commit
1fd2a9f643
3 changed files with 54 additions and 12 deletions
|
@ -1,20 +1,50 @@
|
|||
from functools import lru_cache
|
||||
from datetime import datetime, tzinfo
|
||||
from typing import Sequence
|
||||
|
||||
import pytz # type: ignore
|
||||
|
||||
# https://gist.github.com/edwardabraham/8680198
|
||||
tz_lookup = {
|
||||
pytz.timezone(x).localize(datetime.now()).tzname(): pytz.timezone(x)
|
||||
for x in pytz.all_timezones
|
||||
}
|
||||
tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu...
|
||||
|
||||
def user_forced() -> Sequence[str]:
|
||||
# conversion from abbreviations is always ambiguous
|
||||
# https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
|
||||
try:
|
||||
from my.config import time as user_config
|
||||
return user_config.tz.force_abbreviations # type: ignore[attr-defined]
|
||||
except:
|
||||
# todo log/apply policy
|
||||
return []
|
||||
|
||||
|
||||
# TODO dammit, lru_cache interferes with mypy?
|
||||
@lru_cache(1)
|
||||
def _abbr_to_timezone_map():
|
||||
# also force UTC to always correspond to utc
|
||||
# this makes more sense than Zulu it ends up by default
|
||||
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
||||
|
||||
res = {}
|
||||
for tzname in timezones:
|
||||
tz = pytz.timezone(tzname)
|
||||
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
||||
for info in infos:
|
||||
abbr = info[-1]
|
||||
# todo could support this with a better error handling strategy?
|
||||
# otz = res.get(abbr, tz)
|
||||
# if otz != tz:
|
||||
# raise RuntimeError(abbr, tz, otz)
|
||||
res[abbr] = tz
|
||||
# ugh. also necessary, e.g. for Zulu?? why is it not in _tzinfos?
|
||||
# note: somehow this is not the same as the tzname!
|
||||
tzn = getattr(tz, '_tzname', None)
|
||||
if tzn is not None:
|
||||
res[tzn] = tz
|
||||
return res
|
||||
|
||||
|
||||
# todo dammit, lru_cache interferes with mypy?
|
||||
@lru_cache(None)
|
||||
def abbr_to_timezone(abbr: str) -> tzinfo:
|
||||
return tz_lookup[abbr]
|
||||
return _abbr_to_timezone_map()[abbr]
|
||||
|
||||
|
||||
def zone_to_countrycode(zone: str) -> str:
|
||||
|
@ -30,3 +60,6 @@ def _zones_to_countrycode():
|
|||
for timezone in timezones:
|
||||
res[timezone] = countrycode
|
||||
return res
|
||||
|
||||
|
||||
# todo stuff here could be a bit more defensive? e.g. dependent on policy
|
||||
|
|
|
@ -29,22 +29,29 @@ def parse_dt(s: str) -> datetime:
|
|||
if end == ' PM' or end == ' AM':
|
||||
# old takeouts didn't have timezone
|
||||
# hopefully it was utc? Legacy, so no that much of an issue anymore..
|
||||
# todo although maybe worth adding timezone from location provider?
|
||||
tz = pytz.utc
|
||||
else:
|
||||
s, tzabbr = s.rsplit(maxsplit=1)
|
||||
tz = abbr_to_timezone(tzabbr)
|
||||
|
||||
dt = datetime.strptime(s, fmt)
|
||||
dt = tz.localize(dt)
|
||||
return dt
|
||||
return tz.localize(dt)
|
||||
|
||||
|
||||
def test_parse_dt():
|
||||
def test_parse_dt() -> None:
|
||||
parse_dt('Jun 23, 2015, 2:43:45 PM')
|
||||
parse_dt('Jan 25, 2019, 8:23:48 AM GMT')
|
||||
parse_dt('Jan 22, 2020, 8:34:00 PM UTC')
|
||||
parse_dt('Sep 10, 2019, 8:51:45 PM MSK')
|
||||
|
||||
# this testcases are interesting: in pytz, abbr resolution might depend on the _current_ date!
|
||||
# so these used to fail during winter
|
||||
# you can see all the different values used in in _tzinfos field
|
||||
parse_dt('Jun 01, 2018, 11:00:00 PM BST')
|
||||
parse_dt('Jun 01, 2018, 11:00:00 PM PDT')
|
||||
parse_dt('Feb 01, 2018, 11:00:00 PM PST')
|
||||
|
||||
|
||||
class State(Enum):
|
||||
OUTSIDE = 0
|
||||
|
|
|
@ -3,7 +3,7 @@ from datetime import datetime
|
|||
from itertools import islice
|
||||
import pytz
|
||||
|
||||
import my.location.takeout as LT
|
||||
import my.location.google as LT
|
||||
from my.google.takeout.html import read_html
|
||||
from my.google.takeout.paths import get_last_takeout
|
||||
|
||||
|
@ -69,3 +69,5 @@ def parse_takeout_xmllint(data: str):
|
|||
out = res.stdout.decode('utf8')
|
||||
# out = data
|
||||
return out.split('<div class="content-cell')
|
||||
|
||||
from my.google.takeout.html import test_parse_dt
|
||||
|
|
Loading…
Add table
Reference in a new issue