twitter.twint: localize timestamps correctly
same issue as discussed here https://memex.zulipchat.com/#narrow/stream/279610-data/topic/google.20takeout.20timestamps also see corresponding changes for google_takeout_parser - https://github.com/seanbreckenridge/google_takeout_parser/pull/28/files - https://github.com/seanbreckenridge/google_takeout_parser/pull/30/files
This commit is contained in:
parent
de7972be05
commit
d65e1b5245
2 changed files with 23 additions and 12 deletions
|
@ -1,8 +1,9 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from datetime import tzinfo
|
from typing import Sequence, Dict
|
||||||
from typing import Sequence
|
|
||||||
|
|
||||||
import pytz # type: ignore
|
import pytz
|
||||||
|
|
||||||
|
from .common import datetime_aware, datetime_naive
|
||||||
|
|
||||||
|
|
||||||
def user_forced() -> Sequence[str]:
|
def user_forced() -> Sequence[str]:
|
||||||
|
@ -17,12 +18,12 @@ def user_forced() -> Sequence[str]:
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(1)
|
@lru_cache(1)
|
||||||
def _abbr_to_timezone_map():
|
def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]:
|
||||||
# also force UTC to always correspond to utc
|
# also force UTC to always correspond to utc
|
||||||
# this makes more sense than Zulu it ends up by default
|
# this makes more sense than Zulu it ends up by default
|
||||||
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
timezones = pytz.all_timezones + ['UTC'] + list(user_forced())
|
||||||
|
|
||||||
res = {}
|
res: Dict[str, pytz.BaseTzInfo] = {}
|
||||||
for tzname in timezones:
|
for tzname in timezones:
|
||||||
tz = pytz.timezone(tzname)
|
tz = pytz.timezone(tzname)
|
||||||
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present?
|
||||||
|
@ -41,12 +42,23 @@ def _abbr_to_timezone_map():
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
# todo dammit, lru_cache interferes with mypy?
|
@lru_cache(maxsize=None)
|
||||||
@lru_cache(None)
|
def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo:
|
||||||
def abbr_to_timezone(abbr: str) -> tzinfo:
|
|
||||||
return _abbr_to_timezone_map()[abbr]
|
return _abbr_to_timezone_map()[abbr]
|
||||||
|
|
||||||
|
|
||||||
|
def localize_with_abbr(dt: datetime_naive, *, abbr: str) -> datetime_aware:
|
||||||
|
if abbr.lower() == 'utc':
|
||||||
|
# best to shortcut here to avoid complications
|
||||||
|
return pytz.utc.localize(dt)
|
||||||
|
|
||||||
|
tz = abbr_to_timezone(abbr)
|
||||||
|
# this will compute the correct UTC offset
|
||||||
|
tzinfo = tz.localize(dt).tzinfo
|
||||||
|
assert tzinfo is not None # make mypy happy
|
||||||
|
return tz.normalize(dt.replace(tzinfo=tzinfo))
|
||||||
|
|
||||||
|
|
||||||
def zone_to_countrycode(zone: str) -> str:
|
def zone_to_countrycode(zone: str) -> str:
|
||||||
# todo make optional?
|
# todo make optional?
|
||||||
return _zones_to_countrycode()[zone]
|
return _zones_to_countrycode()[zone]
|
||||||
|
|
|
@ -26,7 +26,7 @@ from typing import NamedTuple, Iterator, List
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ..core.common import get_files, LazyLogger, Json, datetime_aware
|
from ..core.common import get_files, LazyLogger, Json, datetime_aware
|
||||||
from ..core.time import abbr_to_timezone
|
from ..core.time import localize_with_abbr
|
||||||
|
|
||||||
log = LazyLogger(__name__)
|
log = LazyLogger(__name__)
|
||||||
|
|
||||||
|
@ -49,9 +49,8 @@ class Tweet(NamedTuple):
|
||||||
def created_at(self) -> datetime_aware:
|
def created_at(self) -> datetime_aware:
|
||||||
seconds = self.row['created_at'] / 1000
|
seconds = self.row['created_at'] / 1000
|
||||||
tz_abbr = self.row['timezone']
|
tz_abbr = self.row['timezone']
|
||||||
tz = abbr_to_timezone(tz_abbr)
|
naive = datetime.fromtimestamp(seconds)
|
||||||
dt = datetime.fromtimestamp(seconds, tz=tz)
|
return localize_with_abbr(naive, abbr=tz_abbr)
|
||||||
return dt
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def screen_name(self) -> str:
|
def screen_name(self) -> str:
|
||||||
|
|
Loading…
Add table
Reference in a new issue