diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index fd702e8..9aa741c 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -14,16 +14,16 @@ import pytz from ...core.time import abbr_to_timezone -# Mar 8, 2018, 5:14:40 PM -_TIME_FORMAT = "%b %d, %Y, %I:%M:%S %p" + +# NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly +_TIME_FORMATS = [ + "%b %d, %Y, %I:%M:%S %p", # Mar 8, 2018, 5:14:40 PM + "%d %b %Y, %H:%M:%S", +] # ugh. something is seriously wrong with datetime, it wouldn't parse timezone aware UTC timestamp :( def parse_dt(s: str) -> datetime: - fmt = _TIME_FORMAT - # - # ugh. https://bugs.python.org/issue22377 %Z doesn't work properly - end = s[-3:] tz: Any # meh if end == ' PM' or end == ' AM': @@ -35,7 +35,15 @@ def parse_dt(s: str) -> datetime: s, tzabbr = s.rsplit(maxsplit=1) tz = abbr_to_timezone(tzabbr) - dt = datetime.strptime(s, fmt) + dt: Optional[datetime] = None + for fmt in _TIME_FORMATS: + try: + dt = datetime.strptime(s, fmt) + break + except ValueError: + continue + if dt is None: + raise RuntimeError("None of formats {} matched {}", _TIME_FORMATS, dt) return tz.localize(dt) @@ -52,6 +60,8 @@ def test_parse_dt() -> None: parse_dt('Jun 01, 2018, 11:00:00 PM PDT') parse_dt('Feb 01, 2018, 11:00:00 PM PST') + parse_dt('6 Oct 2020, 14:32:28 PDT') + class State(Enum): OUTSIDE = 0