core/time: more flexible support for resolving TZ abbreviation -> TZ ambiguities
addresses https://github.com/karlicoss/HPI/issues/103 for now via experimental time.tz.force_abbreviations config variable not sure if this whole things is doomed to be resolved properly
This commit is contained in:
parent
5ef638694e
commit
1fd2a9f643
3 changed files with 54 additions and 12 deletions
|
@ -1,20 +1,50 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from datetime import datetime, tzinfo
|
from datetime import datetime, tzinfo
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
import pytz # type: ignore
|
import pytz # type: ignore
|
||||||
|
|
||||||
# https://gist.github.com/edwardabraham/8680198
|
|
||||||
tz_lookup = {
|
def user_forced() -> Sequence[str]:
|
||||||
pytz.timezone(x).localize(datetime.now()).tzname(): pytz.timezone(x)
|
# conversion from abbreviations is always ambiguous
|
||||||
for x in pytz.all_timezones
|
# https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve
|
||||||
}
|
try:
|
||||||
tz_lookup['UTC'] = pytz.utc # ugh. otherwise it'z Zulu...
|
from my.config import time as user_config
|
||||||
|
return user_config.tz.force_abbreviations # type: ignore[attr-defined]
|
||||||
|
except:
|
||||||
|
# todo log/apply policy
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
# TODO dammit, lru_cache interferes with mypy?
|
@lru_cache(1)
|
||||||
|
def _abbr_to_timezone_map():
|
||||||
|
# also force UTC to always correspond to utc
|
||||||
|
# this makes more sense than Zulu it ends up by default
|
||||||
|
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
||||||
|
|
||||||
|
res = {}
|
||||||
|
for tzname in timezones:
|
||||||
|
tz = pytz.timezone(tzname)
|
||||||
|
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
||||||
|
for info in infos:
|
||||||
|
abbr = info[-1]
|
||||||
|
# todo could support this with a better error handling strategy?
|
||||||
|
# otz = res.get(abbr, tz)
|
||||||
|
# if otz != tz:
|
||||||
|
# raise RuntimeError(abbr, tz, otz)
|
||||||
|
res[abbr] = tz
|
||||||
|
# ugh. also necessary, e.g. for Zulu?? why is it not in _tzinfos?
|
||||||
|
# note: somehow this is not the same as the tzname!
|
||||||
|
tzn = getattr(tz, '_tzname', None)
|
||||||
|
if tzn is not None:
|
||||||
|
res[tzn] = tz
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# todo dammit, lru_cache interferes with mypy?
|
||||||
@lru_cache(None)
|
@lru_cache(None)
|
||||||
def abbr_to_timezone(abbr: str) -> tzinfo:
|
def abbr_to_timezone(abbr: str) -> tzinfo:
|
||||||
return tz_lookup[abbr]
|
return _abbr_to_timezone_map()[abbr]
|
||||||
|
|
||||||
|
|
||||||
def zone_to_countrycode(zone: str) -> str:
|
def zone_to_countrycode(zone: str) -> str:
|
||||||
|
@ -30,3 +60,6 @@ def _zones_to_countrycode():
|
||||||
for timezone in timezones:
|
for timezone in timezones:
|
||||||
res[timezone] = countrycode
|
res[timezone] = countrycode
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# todo stuff here could be a bit more defensive? e.g. dependent on policy
|
||||||
|
|
|
@ -29,22 +29,29 @@ def parse_dt(s: str) -> datetime:
|
||||||
if end == ' PM' or end == ' AM':
|
if end == ' PM' or end == ' AM':
|
||||||
# old takeouts didn't have timezone
|
# old takeouts didn't have timezone
|
||||||
# hopefully it was utc? Legacy, so no that much of an issue anymore..
|
# hopefully it was utc? Legacy, so no that much of an issue anymore..
|
||||||
|
# todo although maybe worth adding timezone from location provider?
|
||||||
tz = pytz.utc
|
tz = pytz.utc
|
||||||
else:
|
else:
|
||||||
s, tzabbr = s.rsplit(maxsplit=1)
|
s, tzabbr = s.rsplit(maxsplit=1)
|
||||||
tz = abbr_to_timezone(tzabbr)
|
tz = abbr_to_timezone(tzabbr)
|
||||||
|
|
||||||
dt = datetime.strptime(s, fmt)
|
dt = datetime.strptime(s, fmt)
|
||||||
dt = tz.localize(dt)
|
return tz.localize(dt)
|
||||||
return dt
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_dt():
|
def test_parse_dt() -> None:
|
||||||
parse_dt('Jun 23, 2015, 2:43:45 PM')
|
parse_dt('Jun 23, 2015, 2:43:45 PM')
|
||||||
parse_dt('Jan 25, 2019, 8:23:48 AM GMT')
|
parse_dt('Jan 25, 2019, 8:23:48 AM GMT')
|
||||||
parse_dt('Jan 22, 2020, 8:34:00 PM UTC')
|
parse_dt('Jan 22, 2020, 8:34:00 PM UTC')
|
||||||
parse_dt('Sep 10, 2019, 8:51:45 PM MSK')
|
parse_dt('Sep 10, 2019, 8:51:45 PM MSK')
|
||||||
|
|
||||||
|
# this testcases are interesting: in pytz, abbr resolution might depend on the _current_ date!
|
||||||
|
# so these used to fail during winter
|
||||||
|
# you can see all the different values used in in _tzinfos field
|
||||||
|
parse_dt('Jun 01, 2018, 11:00:00 PM BST')
|
||||||
|
parse_dt('Jun 01, 2018, 11:00:00 PM PDT')
|
||||||
|
parse_dt('Feb 01, 2018, 11:00:00 PM PST')
|
||||||
|
|
||||||
|
|
||||||
class State(Enum):
|
class State(Enum):
|
||||||
OUTSIDE = 0
|
OUTSIDE = 0
|
||||||
|
|
|
@ -3,7 +3,7 @@ from datetime import datetime
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
import my.location.takeout as LT
|
import my.location.google as LT
|
||||||
from my.google.takeout.html import read_html
|
from my.google.takeout.html import read_html
|
||||||
from my.google.takeout.paths import get_last_takeout
|
from my.google.takeout.paths import get_last_takeout
|
||||||
|
|
||||||
|
@ -69,3 +69,5 @@ def parse_takeout_xmllint(data: str):
|
||||||
out = res.stdout.decode('utf8')
|
out = res.stdout.decode('utf8')
|
||||||
# out = data
|
# out = data
|
||||||
return out.split('<div class="content-cell')
|
return out.split('<div class="content-cell')
|
||||||
|
|
||||||
|
from my.google.takeout.html import test_parse_dt
|
||||||
|
|
Loading…
Add table
Reference in a new issue