twitter.twint: localize timestamps correctly
same issue as discussed here https://memex.zulipchat.com/#narrow/stream/279610-data/topic/google.20takeout.20timestamps also see corresponding changes for google_takeout_parser - https://github.com/seanbreckenridge/google_takeout_parser/pull/28/files - https://github.com/seanbreckenridge/google_takeout_parser/pull/30/files
This commit is contained in:
parent
e3f376d64f
commit
3c6f45ce83
2 changed files with 23 additions and 12 deletions
|
@ -1,8 +1,9 @@
|
|||
from functools import lru_cache
|
||||
from datetime import tzinfo
|
||||
from typing import Sequence
|
||||
from typing import Sequence, Dict
|
||||
|
||||
import pytz # type: ignore
|
||||
import pytz
|
||||
|
||||
from .common import datetime_aware, datetime_naive
|
||||
|
||||
|
||||
def user_forced() -> Sequence[str]:
|
||||
|
@ -17,12 +18,12 @@ def user_forced() -> Sequence[str]:
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def _abbr_to_timezone_map():
|
||||
def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
|
||||
# also force UTC to always correspond to utc
|
||||
# this makes more sense than Zulu it ends up by default
|
||||
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
||||
|
||||
res = {}
|
||||
res: Dict[str, pytz.BaseTzInfo] = {}
|
||||
for tzname in timezones:
|
||||
tz = pytz.timezone(tzname)
|
||||
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
||||
|
@ -41,12 +42,23 @@ def _abbr_to_timezone_map():
|
|||
return res
|
||||
|
||||
|
||||
# todo dammit, lru_cache interferes with mypy?
|
||||
@lru_cache(None)
|
||||
def abbr_to_timezone(abbr: str) -> tzinfo:
|
||||
@lru_cache(maxsize=None)
|
||||
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
|
||||
return _abbr_to_timezone_map()[abbr]
|
||||
|
||||
|
||||
def localize_with_abbr(dt: datetime_naive, *, abbr: str) -> datetime_aware:
|
||||
if abbr.lower() == 'utc':
|
||||
# best to shortcut here to avoid complications
|
||||
return pytz.utc.localize(dt)
|
||||
|
||||
tz = abbr_to_timezone(abbr)
|
||||
# this will compute the correct UTC offset
|
||||
tzinfo = tz.localize(dt).tzinfo
|
||||
assert tzinfo is not None # make mypy happy
|
||||
return tz.normalize(dt.replace(tzinfo=tzinfo))
|
||||
|
||||
|
||||
def zone_to_countrycode(zone: str) -> str:
|
||||
# todo make optional?
|
||||
return _zones_to_countrycode()[zone]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue