twitter.twint: localize timestamps correctly

same issue as discussed here https://memex.zulipchat.com/#narrow/stream/279610-data/topic/google.20takeout.20timestamps

also see corresponding changes for google_takeout_parser

- https://github.com/seanbreckenridge/google_takeout_parser/pull/28/files
- https://github.com/seanbreckenridge/google_takeout_parser/pull/30/files
This commit is contained in:
Dima Gerasimov 2022-05-30 17:24:09 +01:00 committed by karlicoss
parent de7972be05
commit d65e1b5245
2 changed files with 23 additions and 12 deletions

View file

@ -1,8 +1,9 @@
from functools import lru_cache
from datetime import tzinfo
from typing import Sequence
from typing import Sequence, Dict
import pytz # type: ignore
import pytz
from .common import datetime_aware, datetime_naive
def user_forced() -> Sequence[str]:
@ -17,12 +18,12 @@ def user_forced() -> Sequence[str]:
@lru_cache(1)
def _abbr_to_timezone_map():
def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
# also force UTC to always correspond to utc
# this makes more sense than Zulu it ends up by default
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
res = {}
res: Dict[str, pytz.BaseTzInfo] = {}
for tzname in timezones:
tz = pytz.timezone(tzname)
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
@ -41,12 +42,23 @@ def _abbr_to_timezone_map():
return res
# todo dammit, lru_cache interferes with mypy?
@lru_cache(None)
def abbr_to_timezone(abbr: str) -> tzinfo:
@lru_cache(maxsize=None)
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
return _abbr_to_timezone_map()[abbr]
def localize_with_abbr(dt: datetime_naive, *, abbr: str) -> datetime_aware:
if abbr.lower() == 'utc':
# best to shortcut here to avoid complications
return pytz.utc.localize(dt)
tz = abbr_to_timezone(abbr)
# this will compute the correct UTC offset
tzinfo = tz.localize(dt).tzinfo
assert tzinfo is not None # make mypy happy
return tz.normalize(dt.replace(tzinfo=tzinfo))
def zone_to_countrycode(zone: str) -> str:
# todo make optional?
return _zones_to_countrycode()[zone]