Initial my.time.tz provider, infer from location with daily resolution
This commit is contained in:
parent
dc2518b348
commit
1f2e595be9
6 changed files with 202 additions and 6 deletions
|
@ -7,6 +7,9 @@ from ...core.common import Paths, get_files
|
||||||
from ...core.util import __NOT_HPI_MODULE__
|
from ...core.util import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
from my.config import google as user_config
|
from my.config import google as user_config
|
||||||
|
|
||||||
|
from more_itertools import last
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class google(user_config):
|
class google(user_config):
|
||||||
takeout_path: Paths # path/paths/glob for the takeout zips
|
takeout_path: Paths # path/paths/glob for the takeout zips
|
||||||
|
@ -35,9 +38,7 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
|
||||||
|
|
||||||
|
|
||||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||||
# TODO more_itertools?
|
return last(get_takeouts(path=path))
|
||||||
matching = list(get_takeouts(path=path))
|
|
||||||
return matching[-1]
|
|
||||||
|
|
||||||
|
|
||||||
# TODO might be a good idea to merge across multiple takeouts...
|
# TODO might be a good idea to merge across multiple takeouts...
|
||||||
|
|
|
@ -18,7 +18,6 @@ import geopy # type: ignore
|
||||||
|
|
||||||
from ..core.common import LazyLogger, mcachew
|
from ..core.common import LazyLogger, mcachew
|
||||||
from ..core.cachew import cache_dir
|
from ..core.cachew import cache_dir
|
||||||
from ..google.takeout.paths import get_last_takeout
|
|
||||||
from ..kython import kompress
|
from ..kython import kompress
|
||||||
|
|
||||||
|
|
||||||
|
@ -148,7 +147,9 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
|
||||||
|
|
||||||
|
|
||||||
def locations(**kwargs) -> Iterator[Location]:
|
def locations(**kwargs) -> Iterator[Location]:
|
||||||
# TODO need to include older data
|
# NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config
|
||||||
|
# very weird, as if this function captures the values of globals somehow?? investigate later.
|
||||||
|
from ..google.takeout.paths import get_last_takeout
|
||||||
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
||||||
|
|
||||||
return _iter_locations(path=last_takeout, **kwargs)
|
return _iter_locations(path=last_takeout, **kwargs)
|
||||||
|
|
9
my/time/tz/main.py
Normal file
9
my/time/tz/main.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
'''
|
||||||
|
Timezone data provider
|
||||||
|
'''
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
def localize(dt: datetime) -> datetime:
|
||||||
|
# For now, it's user's reponsibility to check that it actually managed to localize
|
||||||
|
from . import via_location as L
|
||||||
|
return L.localize(dt)
|
118
my/time/tz/via_location.py
Normal file
118
my/time/tz/via_location.py
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
'''
|
||||||
|
Timezone data provider, useful for localizing UTC-only/timezone unaware dates.
|
||||||
|
'''
|
||||||
|
REQUIRES = [
|
||||||
|
# for determining timezone by coordinate
|
||||||
|
'timezonefinder',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
from datetime import date, datetime
|
||||||
|
from functools import lru_cache
|
||||||
|
from itertools import groupby, islice
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterator, List, NamedTuple, Optional, Tuple
|
||||||
|
|
||||||
|
from more_itertools import seekable
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from ...core.common import LazyLogger
|
||||||
|
from ...location.google import locations
|
||||||
|
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__, level='debug')
|
||||||
|
|
||||||
|
|
||||||
|
# todo should move to config? not sure
|
||||||
|
_FASTER: bool = False
|
||||||
|
@lru_cache(1)
|
||||||
|
def _timezone_finder():
|
||||||
|
from timezonefinder import TimezoneFinder as Finder # type: ignore
|
||||||
|
if _FASTER:
|
||||||
|
from timezonefinder import TimezoneFinderL as Finder # type: ignore
|
||||||
|
return Finder(in_memory=True)
|
||||||
|
|
||||||
|
|
||||||
|
Zone = str
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE: for now only daily resolution is supported... later will implement something more efficient
|
||||||
|
class DayWithZone(NamedTuple):
|
||||||
|
day: date
|
||||||
|
zone: Zone
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
|
||||||
|
finder = _timezone_finder(fast=_FASTER) # rely on the default
|
||||||
|
pdt = None
|
||||||
|
warnings = []
|
||||||
|
# todo allow to skip if not noo many errors in row?
|
||||||
|
for l in locations(start=start, stop=stop):
|
||||||
|
# TODO right. its _very_ slow...
|
||||||
|
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
|
||||||
|
if zone is None:
|
||||||
|
warnings.append(f"Couldn't figure out tz for {l}")
|
||||||
|
continue
|
||||||
|
tz = pytz.timezone(zone)
|
||||||
|
ldt = l.dt.astimezone(tz)
|
||||||
|
ndate = ldt.date()
|
||||||
|
if pdt is not None and ndate < pdt.date():
|
||||||
|
# TODO for now just drop and collect the stats
|
||||||
|
# I guess we'd have minor drops while air travel...
|
||||||
|
warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
||||||
|
continue
|
||||||
|
pdt = ldt
|
||||||
|
yield DayWithZone(day=ndate, zone=tz.zone)
|
||||||
|
|
||||||
|
|
||||||
|
def most_common(l):
|
||||||
|
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_tzs() -> Iterator[DayWithZone]:
|
||||||
|
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
|
||||||
|
logger.info('processed %s', d)
|
||||||
|
zone = most_common(list(gr)).zone
|
||||||
|
yield DayWithZone(day=d, zone=zone)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(1)
|
||||||
|
def loc_tz_getter() -> Iterator[DayWithZone]:
|
||||||
|
# seekable makes it cache the emitted values
|
||||||
|
return seekable(_iter_tzs())
|
||||||
|
|
||||||
|
|
||||||
|
# todo expose zone names too?
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
||||||
|
sit = loc_tz_getter()
|
||||||
|
# todo hmm. seeking is not super efficient... might need to use some smarter dict-based cache
|
||||||
|
# hopefully, this method itself caches stuff forthe users, so won't be too bad
|
||||||
|
sit.seek(0) # type: ignore
|
||||||
|
|
||||||
|
zone: Optional[str] = None
|
||||||
|
for x, tz in sit:
|
||||||
|
if x == d:
|
||||||
|
zone = tz
|
||||||
|
if x >= d:
|
||||||
|
break
|
||||||
|
return None if zone is None else pytz.timezone(zone)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||||
|
return _get_day_tz(d=dt.date())
|
||||||
|
|
||||||
|
|
||||||
|
def localize(dt: datetime) -> datetime:
|
||||||
|
# todo not sure. warn instead?
|
||||||
|
assert dt.tzinfo is None, dt
|
||||||
|
tz = _get_tz(dt)
|
||||||
|
if tz is None:
|
||||||
|
return dt
|
||||||
|
else:
|
||||||
|
return tz.localize(dt)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: cache stuff
|
62
tests/tz.py
Normal file
62
tests/tz.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest # type: ignore
|
||||||
|
|
||||||
|
import my.time.tz.main as TZ
|
||||||
|
import my.time.tz.via_location as LTZ
|
||||||
|
|
||||||
|
|
||||||
|
def test_iter_tzs() -> None:
|
||||||
|
ll = list(LTZ._iter_tzs())
|
||||||
|
assert len(ll) > 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_future() -> None:
|
||||||
|
fut = datetime.now() + timedelta(days=100)
|
||||||
|
# shouldn't crash at least
|
||||||
|
assert TZ.localize(fut) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_tz() -> None:
|
||||||
|
# not present in the test data
|
||||||
|
tz = LTZ._get_tz(D('20200101 10:00:00'))
|
||||||
|
assert tz is None
|
||||||
|
|
||||||
|
tz = LTZ._get_tz(D('20170801 11:00:00'))
|
||||||
|
assert tz is not None
|
||||||
|
assert tz.zone == 'Europe/Vienna'
|
||||||
|
|
||||||
|
tz = LTZ._get_tz(D('20170730 10:00:00'))
|
||||||
|
assert tz is not None
|
||||||
|
assert tz.zone == 'Europe/Rome'
|
||||||
|
|
||||||
|
|
||||||
|
def D(dstr: str) -> datetime:
|
||||||
|
return datetime.strptime(dstr, '%Y%m%d %H:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
|
# TODO copy pasted from location.py, need to extract some common provider
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def prepare(tmp_path: Path):
|
||||||
|
LTZ._FASTER = True
|
||||||
|
|
||||||
|
from more_itertools import one
|
||||||
|
testdata = Path(__file__).absolute().parent.parent / 'testdata'
|
||||||
|
assert testdata.exists(), testdata
|
||||||
|
|
||||||
|
track = one(testdata.rglob('italy-slovenia-2017-07-29.json'))
|
||||||
|
|
||||||
|
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
|
||||||
|
import zipfile
|
||||||
|
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
|
||||||
|
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
|
||||||
|
|
||||||
|
# FIXME ugh. early import/inheritance of user_confg in my.google.takeout.paths messes things up..
|
||||||
|
from my.cfg import config
|
||||||
|
class user_config:
|
||||||
|
takeout_path = tmp_path
|
||||||
|
config.google = user_config # type: ignore
|
||||||
|
|
||||||
|
yield
|
7
tox.ini
7
tox.ini
|
@ -17,6 +17,9 @@ commands =
|
||||||
# my.location.google deps
|
# my.location.google deps
|
||||||
pip install geopy ijson
|
pip install geopy ijson
|
||||||
|
|
||||||
|
# my.time.tz.via_location dep
|
||||||
|
pip install timezonefinder
|
||||||
|
|
||||||
python3 -m pytest \
|
python3 -m pytest \
|
||||||
tests/core.py \
|
tests/core.py \
|
||||||
tests/misc.py \
|
tests/misc.py \
|
||||||
|
@ -25,7 +28,8 @@ commands =
|
||||||
tests/config.py::test_environment_variable \
|
tests/config.py::test_environment_variable \
|
||||||
tests/demo.py \
|
tests/demo.py \
|
||||||
tests/bluemaestro.py \
|
tests/bluemaestro.py \
|
||||||
tests/location.py
|
tests/location.py \
|
||||||
|
tests/tz.py
|
||||||
# TODO add; once I figure out porg depdencency?? tests/config.py
|
# TODO add; once I figure out porg depdencency?? tests/config.py
|
||||||
# TODO run demo.py? just make sure with_my is a bit cleverer?
|
# TODO run demo.py? just make sure with_my is a bit cleverer?
|
||||||
# TODO e.g. under CI, rely on installing
|
# TODO e.g. under CI, rely on installing
|
||||||
|
@ -63,6 +67,7 @@ commands =
|
||||||
-p my.body.exercise.cross_trainer \
|
-p my.body.exercise.cross_trainer \
|
||||||
-p my.bluemaestro \
|
-p my.bluemaestro \
|
||||||
-p my.location.google \
|
-p my.location.google \
|
||||||
|
-p my.time.tz.via_location \
|
||||||
--txt-report .mypy-coverage \
|
--txt-report .mypy-coverage \
|
||||||
--html-report .mypy-coverage \
|
--html-report .mypy-coverage \
|
||||||
{posargs}
|
{posargs}
|
||||||
|
|
Loading…
Add table
Reference in a new issue