location: add all.py, using takeout/gpslogger/ip (#237)
* location: add all.py, using takeout/gpslogger/ip, update docs
This commit is contained in:
parent
66a00c6ada
commit
2cb836181b
15 changed files with 488 additions and 46 deletions
|
@ -16,9 +16,12 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
|
|||
- [[#toc][TOC]]
|
||||
- [[#intro][Intro]]
|
||||
- [[#configs][Configs]]
|
||||
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
|
||||
- [[#mygoogletakeoutparser][my.google.takeout.parser]]
|
||||
- [[#myhypothesis][my.hypothesis]]
|
||||
- [[#myreddit][my.reddit]]
|
||||
- [[#mybrowser][my.browser]]
|
||||
- [[#mylocation][my.location]]
|
||||
- [[#mytimetzvia_location][my.time.tz.via_location]]
|
||||
- [[#mypocket][my.pocket]]
|
||||
- [[#mytwittertwint][my.twitter.twint]]
|
||||
- [[#mytwitterarchive][my.twitter.archive]]
|
||||
|
@ -90,12 +93,12 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
|||
export_path: Paths
|
||||
|
||||
#+end_src
|
||||
|
||||
** [[file:../my/browser/][my.browser]]
|
||||
|
||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||
|
||||
#+begin_src python
|
||||
@dataclass
|
||||
class browser:
|
||||
class export:
|
||||
# path[s]/glob to your backed up browser history sqlite files
|
||||
|
@ -108,6 +111,80 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
|||
# active_databases = Firefox.locate_database()
|
||||
export_path: Paths
|
||||
#+end_src
|
||||
** [[file:../my/location][my.location]]
|
||||
|
||||
Merged location history from lots of sources.
|
||||
|
||||
The main sources here are
|
||||
[[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (XML) files, and
|
||||
google takeout (using =my.google.takeout.parser=), with a fallback on
|
||||
manually defined home locations.
|
||||
|
||||
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
||||
provide geolocation data for an IPs (though no IPs are provided from any
|
||||
of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
|
||||
|
||||
#+begin_src python
|
||||
class location:
|
||||
home = (
|
||||
# supports ISO strings
|
||||
('2005-12-04' , (42.697842, 23.325973)), # Bulgaria, Sofia
|
||||
# supports date/datetime objects
|
||||
(date(year=1980, month=2, day=15) , (40.7128 , -74.0060 )), # NY
|
||||
(datetime.fromtimestamp(1600000000, tz=timezone.utc), (55.7558 , 37.6173 )), # Moscow, Russia
|
||||
)
|
||||
# note: order doesn't matter, will be sorted in the data provider
|
||||
|
||||
class gpslogger:
|
||||
# path[s]/glob to the exported gpx files
|
||||
export_path: Paths
|
||||
|
||||
# default accuracy for gpslogger
|
||||
accuracy: float = 50.0
|
||||
|
||||
class via_ip:
|
||||
# guess ~15km accuracy for IP addresses
|
||||
accuracy: float = 15_000
|
||||
#+end_src
|
||||
** [[file:../my/time/tz/via_location.py][my.time.tz.via_location]]
|
||||
|
||||
Uses the =my.location= module to determine the timezone for a location.
|
||||
|
||||
This can be used to 'localize' timezones. Most modules here return
|
||||
datetimes in UTC, to prevent confusion whether or not its a local
|
||||
timezone, one from UTC, or one in your timezone.
|
||||
|
||||
Depending on the specific data provider and your level of paranoia you might expect different behaviour.. E.g.:
|
||||
- if your objects already have tz info, you might not need to call localize() at all
|
||||
- it's safer when either all of your objects are tz aware or all are tz unware, not a mixture
|
||||
- you might trust your original timezone, or it might just be UTC, and you want to use something more reasonable
|
||||
|
||||
#+begin_src python
|
||||
TzPolicy = Literal[
|
||||
'keep' , # if datetime is tz aware, just preserve it
|
||||
'convert', # if datetime is tz aware, convert to provider's tz
|
||||
'throw' , # if datetime is tz aware, throw exception
|
||||
]
|
||||
#+end_src
|
||||
|
||||
This is still a work in progress, plan is to integrate it with =hpi query=
|
||||
so that you can easily convert/localize timezones for some module/data
|
||||
|
||||
#+begin_src python
|
||||
class time:
|
||||
class tz:
|
||||
policy = 'keep'
|
||||
|
||||
class via_location:
|
||||
# less precise, but faster
|
||||
fast: bool = True
|
||||
|
||||
# if the accuracy for the location is more than 5km (this
|
||||
# isn't an accurate location, so shouldn't use it to determine
|
||||
# timezone), don't use
|
||||
require_accuracy: float = 5_000
|
||||
#+end_src
|
||||
|
||||
|
||||
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
||||
|
||||
|
@ -163,7 +240,6 @@ for cls, p in modules:
|
|||
|
||||
#+RESULTS:
|
||||
|
||||
|
||||
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
||||
|
||||
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
||||
|
|
11
my/config.py
11
my/config.py
|
@ -72,10 +72,19 @@ class location:
|
|||
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
||||
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
||||
|
||||
class via_ip:
|
||||
accuracy: float
|
||||
|
||||
class gpslogger:
|
||||
export_path: Paths = ''
|
||||
accuracy: float
|
||||
|
||||
|
||||
class time:
|
||||
class tz:
|
||||
pass
|
||||
class via_location:
|
||||
fast: bool
|
||||
require_accuracy: float
|
||||
|
||||
|
||||
class orgmode:
|
||||
|
|
29
my/ip/all.py
Normal file
29
my/ip/all.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
An example all.py stub module that provides ip data
|
||||
|
||||
To use this, you'd add IP providers that yield IPs to the 'ips' function
|
||||
|
||||
For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.core.common import Stats, warn_if_empty
|
||||
|
||||
from .common import IP
|
||||
|
||||
|
||||
@warn_if_empty
|
||||
def ips() -> Iterator[IP]:
|
||||
yield from ()
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {
|
||||
**stat(ips),
|
||||
}
|
39
my/ip/common.py
Normal file
39
my/ip/common.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
from my.core import __NOT_HPI_MODULE__
|
||||
|
||||
import ipaddress
|
||||
from typing import NamedTuple, Iterator
|
||||
from datetime import datetime
|
||||
|
||||
import ipgeocache
|
||||
|
||||
from my.core import Json
|
||||
|
||||
|
||||
class IP(NamedTuple):
|
||||
dt: datetime
|
||||
addr: str # an IP address
|
||||
|
||||
# TODO: could cache? not sure if it's worth it
|
||||
def ipgeocache(self) -> Json:
|
||||
return ipgeocache.get(self.addr)
|
||||
|
||||
@property
|
||||
def tzname(self) -> str:
|
||||
tz: str = self.ipgeocache()["timezone"]
|
||||
return tz
|
||||
|
||||
|
||||
def drop_private(ips: Iterator[IP]) -> Iterator[IP]:
|
||||
"""
|
||||
Helper function that can be used to filter out private IPs
|
||||
"""
|
||||
for ip in ips:
|
||||
if ipaddress.ip_address(ip.addr).is_private:
|
||||
continue
|
||||
yield ip
|
48
my/location/all.py
Normal file
48
my/location/all.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
"""
|
||||
Merges location data from multiple sources
|
||||
"""
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.core import Stats, LazyLogger
|
||||
from my.core.source import import_source
|
||||
|
||||
from my.location.via_ip import locations
|
||||
|
||||
from .common import Location
|
||||
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
|
||||
def locations() -> Iterator[Location]:
|
||||
# can add/comment out sources here to disable them, or use core.disabled_modules
|
||||
yield from _takeout_locations()
|
||||
yield from _gpslogger_locations()
|
||||
yield from _ip_locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.google_takeout")
|
||||
def _takeout_locations() -> Iterator[Location]:
|
||||
from . import google_takeout
|
||||
yield from google_takeout.locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.gpslogger")
|
||||
def _gpslogger_locations() -> Iterator[Location]:
|
||||
from . import gpslogger
|
||||
yield from gpslogger.locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.via_ip")
|
||||
def _ip_locations() -> Iterator[Location]:
|
||||
from . import via_ip
|
||||
yield from via_ip.locations()
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {
|
||||
**stat(locations),
|
||||
}
|
17
my/location/common.py
Normal file
17
my/location/common.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from datetime import date, datetime
|
||||
from typing import Union, Tuple, NamedTuple, Optional
|
||||
|
||||
from my.core import __NOT_HPI_MODULE__
|
||||
|
||||
DateIsh = Union[datetime, date, str]
|
||||
|
||||
LatLon = Tuple[float, float]
|
||||
|
||||
|
||||
# TODO: add timezone to this? can use timezonefinder in tz provider instead though
|
||||
class Location(NamedTuple):
|
||||
lat: float
|
||||
lon: float
|
||||
dt: datetime
|
||||
accuracy: Optional[float]
|
||||
elevation: Optional[float]
|
|
@ -1,6 +1,9 @@
|
|||
"""
|
||||
Location data from Google Takeout
|
||||
|
||||
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
|
||||
"""
|
||||
|
||||
REQUIRES = [
|
||||
'geopy', # checking that coordinates are valid
|
||||
'ijson',
|
||||
|
@ -20,6 +23,10 @@ from ..core.common import LazyLogger, mcachew
|
|||
from ..core.cachew import cache_dir
|
||||
from ..core import kompress
|
||||
|
||||
from my.core.warnings import high
|
||||
|
||||
high("Please set up my.google.takeout.parser module for better takeout support")
|
||||
|
||||
|
||||
# otherwise uses ijson
|
||||
# todo move to config??
|
||||
|
|
33
my/location/google_takeout.py
Normal file
33
my/location/google_takeout.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
"""
|
||||
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.google.takeout.parser import events, _cachew_depends_on
|
||||
from google_takeout_parser.models import Location as GoogleLocation
|
||||
|
||||
from my.core.common import mcachew, LazyLogger, Stats
|
||||
from .common import Location
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
@mcachew(
|
||||
depends_on=_cachew_depends_on,
|
||||
logger=logger,
|
||||
)
|
||||
def locations() -> Iterator[Location]:
|
||||
for g in events():
|
||||
if isinstance(g, GoogleLocation):
|
||||
yield Location(
|
||||
lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
74
my/location/gpslogger.py
Normal file
74
my/location/gpslogger.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
"""
|
||||
Parse [[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (xml) files
|
||||
"""
|
||||
|
||||
REQUIRES = ["gpxpy"]
|
||||
|
||||
from my.config import location
|
||||
from my.core import Paths, dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(location.gpslogger):
|
||||
# path[s]/glob to the synced gpx (XML) files
|
||||
export_path: Paths
|
||||
|
||||
# default accuracy for gpslogger
|
||||
accuracy: float = 50.0
|
||||
|
||||
|
||||
from itertools import chain
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence, List
|
||||
|
||||
import gpxpy # type: ignore[import]
|
||||
from more_itertools import unique_everseen
|
||||
|
||||
from my.core import Stats, LazyLogger
|
||||
from my.core.common import get_files, mcachew
|
||||
from .common import Location
|
||||
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path, glob="*.gpx")
|
||||
|
||||
|
||||
def _cachew_depends_on() -> List[float]:
|
||||
return [p.stat().st_mtime for p in inputs()]
|
||||
|
||||
|
||||
# TODO: could use a better cachew key/this has to recompute every file whenever the newest one changes
|
||||
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
||||
def locations() -> Iterator[Location]:
|
||||
yield from unique_everseen(
|
||||
chain(*map(_extract_locations, inputs())), key=lambda loc: loc.dt
|
||||
)
|
||||
|
||||
|
||||
def _extract_locations(path: Path) -> Iterator[Location]:
|
||||
with path.open("r") as gf:
|
||||
gpx_obj = gpxpy.parse(gf)
|
||||
for track in gpx_obj.tracks:
|
||||
for segment in track.segments:
|
||||
for point in segment.points:
|
||||
if point.time is None:
|
||||
continue
|
||||
# hmm - for gpslogger, seems that timezone is always SimpleTZ('Z'), which
|
||||
# specifies UTC -- see https://github.com/tkrajina/gpxpy/blob/cb243b22841bd2ce9e603fe3a96672fc75edecf2/gpxpy/gpxfield.py#L38
|
||||
yield Location(
|
||||
lat=point.latitude,
|
||||
lon=point.longitude,
|
||||
accuracy=config.accuracy,
|
||||
elevation=point.elevation,
|
||||
dt=datetime.replace(point.time, tzinfo=timezone.utc),
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
|
@ -2,17 +2,13 @@
|
|||
Simple location provider, serving as a fallback when more detailed data isn't available
|
||||
'''
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, date, time, timezone
|
||||
from datetime import datetime, time, timezone
|
||||
from functools import lru_cache
|
||||
from typing import Sequence, Tuple, Union, cast
|
||||
|
||||
from my.config import location as user_config
|
||||
|
||||
|
||||
DateIsh = Union[datetime, date, str]
|
||||
|
||||
# todo hopefully reasonable? might be nice to add name or something too
|
||||
LatLon = Tuple[float, float]
|
||||
from my.location.common import LatLon, DateIsh
|
||||
|
||||
@dataclass
|
||||
class Config(user_config):
|
||||
|
|
39
my/location/via_ip.py
Normal file
39
my/location/via_ip.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
Converts IP addresses provided by my.location.ip to estimated locations
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
from my.core import dataclass, Stats
|
||||
from my.config import location
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(location.via_ip):
|
||||
# no real science to this, just a guess of ~15km accuracy for IP addresses
|
||||
accuracy: float = 15_000.0
|
||||
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from .common import Location
|
||||
from my.ip.all import ips
|
||||
|
||||
|
||||
def locations() -> Iterator[Location]:
|
||||
for ip in ips():
|
||||
loc: str = ip.ipgeocache()["loc"]
|
||||
lat, _, lon = loc.partition(",")
|
||||
yield Location(
|
||||
lat=float(lat),
|
||||
lon=float(lon),
|
||||
dt=ip.dt,
|
||||
accuracy=config.accuracy,
|
||||
elevation=None,
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
|
@ -10,24 +10,27 @@ Depending on the specific data provider and your level of paranoia you might exp
|
|||
- it's safer when either all of your objects are tz aware or all are tz unware, not a mixture
|
||||
- you might trust your original timezone, or it might just be UTC, and you want to use something more reasonable
|
||||
'''
|
||||
Policy = Literal[
|
||||
TzPolicy = Literal[
|
||||
'keep' , # if datetime is tz aware, just preserve it
|
||||
'convert', # if datetime is tz aware, convert to provider's tz
|
||||
'throw' , # if datetime is tz aware, throw exception
|
||||
# todo 'warn'? not sure if very useful
|
||||
]
|
||||
|
||||
def default_policy() -> Policy:
|
||||
# backwards compatibility
|
||||
Policy = TzPolicy
|
||||
|
||||
def default_policy() -> TzPolicy:
|
||||
try:
|
||||
from my.config import time as user_config
|
||||
return cast(Policy, user_config.tz.policy)
|
||||
return cast(TzPolicy, user_config.tz.policy)
|
||||
except Exception as e:
|
||||
# todo meh.. need to think how to do this more carefully
|
||||
# rationale: do not mess with user's data unless they want
|
||||
return 'keep'
|
||||
|
||||
|
||||
def localize_with_policy(lfun: Callable[[datetime], tzdatetime], dt: datetime, policy: Policy=default_policy()) -> tzdatetime:
|
||||
def localize_with_policy(lfun: Callable[[datetime], tzdatetime], dt: datetime, policy: TzPolicy=default_policy()) -> tzdatetime:
|
||||
tz = dt.tzinfo
|
||||
if tz is None:
|
||||
return lfun(dt)
|
||||
|
|
|
@ -7,27 +7,34 @@ REQUIRES = [
|
|||
]
|
||||
|
||||
|
||||
from my.config import time
|
||||
from my.core import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(time.tz.via_location):
|
||||
# less precise, but faster
|
||||
fast: bool = True
|
||||
|
||||
# if the accuracy for the location is more than 5km, don't use
|
||||
require_accuracy: float = 5_000
|
||||
|
||||
|
||||
from collections import Counter
|
||||
from datetime import date, datetime
|
||||
from functools import lru_cache
|
||||
from itertools import groupby
|
||||
from typing import Iterator, NamedTuple, Optional
|
||||
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
|
||||
|
||||
from more_itertools import seekable
|
||||
import pytz
|
||||
|
||||
from ...core.common import LazyLogger, mcachew, tzdatetime
|
||||
from ...core.cachew import cache_dir
|
||||
from ...location.google import locations
|
||||
from my.core.common import LazyLogger, mcachew, tzdatetime
|
||||
|
||||
logger = LazyLogger(__name__, level='warning')
|
||||
|
||||
logger = LazyLogger(__name__, level='debug')
|
||||
|
||||
|
||||
# todo should move to config? not sure
|
||||
_FASTER: bool = True
|
||||
@lru_cache(2)
|
||||
def _timezone_finder(fast: bool):
|
||||
def _timezone_finder(fast: bool) -> Any:
|
||||
if fast:
|
||||
# less precise, but faster
|
||||
from timezonefinder import TimezoneFinderL as Finder # type: ignore
|
||||
|
@ -46,39 +53,89 @@ class DayWithZone(NamedTuple):
|
|||
zone: Zone
|
||||
|
||||
|
||||
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
|
||||
finder = _timezone_finder(fast=_FASTER) # rely on the default
|
||||
pdt = None
|
||||
from my.location.common import LatLon
|
||||
|
||||
# for backwards compatibility
|
||||
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
|
||||
try:
|
||||
import my.location.all
|
||||
for loc in my.location.all.locations():
|
||||
if loc.accuracy is not None and loc.accuracy > config.require_accuracy:
|
||||
continue
|
||||
yield ((loc.lat, loc.lon), loc.dt)
|
||||
|
||||
except Exception as e:
|
||||
from my.core.warnings import high
|
||||
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
|
||||
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
|
||||
|
||||
import my.location.google
|
||||
|
||||
for gloc in my.location.google.locations():
|
||||
yield ((gloc.lat, gloc.lon), gloc.dt)
|
||||
|
||||
# TODO: could use heapmerge or sort the underlying iterators somehow?
|
||||
# see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934
|
||||
def _sorted_locations() -> List[Tuple[LatLon, datetime]]:
|
||||
return list(sorted(_locations(), key=lambda x: x[1]))
|
||||
|
||||
|
||||
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
|
||||
# has to do an iterative sort of the entire my.locations.all list
|
||||
def _iter_local_dates() -> Iterator[DayWithZone]:
|
||||
finder = _timezone_finder(fast=config.fast) # rely on the default
|
||||
#pdt = None
|
||||
# TODO: warnings doesnt actually warn?
|
||||
warnings = []
|
||||
# todo allow to skip if not noo many errors in row?
|
||||
for l in locations(start=start, stop=stop):
|
||||
for (lat, lon), dt in _sorted_locations():
|
||||
# TODO right. its _very_ slow...
|
||||
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
|
||||
zone = finder.timezone_at(lat=lat, lng=lon)
|
||||
if zone is None:
|
||||
warnings.append(f"Couldn't figure out tz for {l}")
|
||||
warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
|
||||
continue
|
||||
tz = pytz.timezone(zone)
|
||||
# TODO this is probably a bit expensive... test & benchmark
|
||||
ldt = l.dt.astimezone(tz)
|
||||
ldt = dt.astimezone(tz)
|
||||
ndate = ldt.date()
|
||||
if pdt is not None and ndate < pdt.date():
|
||||
# TODO for now just drop and collect the stats
|
||||
# I guess we'd have minor drops while air travel...
|
||||
warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
||||
continue
|
||||
pdt = ldt
|
||||
#if pdt is not None and ndate < pdt.date():
|
||||
# # TODO for now just drop and collect the stats
|
||||
# # I guess we'd have minor drops while air travel...
|
||||
# warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
||||
# continue
|
||||
#pdt = ldt
|
||||
z = tz.zone; assert z is not None
|
||||
yield DayWithZone(day=ndate, zone=z)
|
||||
|
||||
|
||||
def most_common(l):
|
||||
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
|
||||
def most_common(lst: List[DayWithZone]) -> DayWithZone:
|
||||
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
|
||||
return res
|
||||
|
||||
|
||||
@mcachew(cache_path=cache_dir())
|
||||
def _iter_tz_depends_on() -> str:
|
||||
"""
|
||||
Since you might get new data which specifies a new timezone sometime
|
||||
in the day, this causes _iter_tzs to refresh every 6 hours, like:
|
||||
2022-04-26_00
|
||||
2022-04-26_06
|
||||
2022-04-26_12
|
||||
2022-04-26_18
|
||||
"""
|
||||
day = str(date.today())
|
||||
hr = datetime.now().hour
|
||||
hr_truncated = hr // 6 * 6
|
||||
return "{}_{}".format(day, hr_truncated)
|
||||
|
||||
|
||||
# refresh _iter_tzs every 6 hours -- don't think a better depends_on is possible dynamically
|
||||
@mcachew(logger=logger, depends_on=_iter_tz_depends_on)
|
||||
def _iter_tzs() -> Iterator[DayWithZone]:
|
||||
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
|
||||
# since we have no control over what order the locations are returned,
|
||||
# we need to sort them first before we can do a groupby
|
||||
local_dates: List[DayWithZone] = list(_iter_local_dates())
|
||||
local_dates.sort(key=lambda p: p.day)
|
||||
for d, gr in groupby(local_dates, key=lambda p: p.day):
|
||||
logger.info('processed %s', d)
|
||||
zone = most_common(list(gr)).zone
|
||||
yield DayWithZone(day=d, zone=zone)
|
||||
|
@ -106,6 +163,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
|||
break
|
||||
return None if zone is None else pytz.timezone(zone)
|
||||
|
||||
|
||||
# ok to cache, there are only a few home locations?
|
||||
@lru_cache(maxsize=None)
|
||||
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
||||
|
@ -119,8 +177,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
|||
return pytz.timezone(zone)
|
||||
|
||||
|
||||
# TODO expose? to main as well?
|
||||
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||
'''
|
||||
Given a datetime, returns the timezone for that date.
|
||||
'''
|
||||
res = _get_day_tz(d=dt.date())
|
||||
if res is not None:
|
||||
return res
|
||||
|
@ -129,6 +189,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
|||
loc = home.get_location(dt)
|
||||
return _get_home_tz(loc=loc)
|
||||
|
||||
# expose as 'public' function
|
||||
get_tz = _get_tz
|
||||
|
||||
|
||||
def localize(dt: datetime) -> tzdatetime:
|
||||
tz = _get_tz(dt)
|
||||
|
@ -144,11 +207,13 @@ def stats() -> Stats:
|
|||
# TODO not sure what would be a good stat() for this module...
|
||||
# might be nice to print some actual timezones?
|
||||
# there aren't really any great iterables to expose
|
||||
import os
|
||||
VIA_LOCATION_START_YEAR = int(os.environ.get("VIA_LOCATION_START_YEAR", 1990))
|
||||
def localized_years():
|
||||
last = datetime.now().year + 2
|
||||
# note: deliberately take + 2 years, so the iterator exhausts. otherwise stuff might never get cached
|
||||
# need to think about it...
|
||||
for Y in range(1990, last):
|
||||
for Y in range(VIA_LOCATION_START_YEAR, last):
|
||||
dt = datetime.fromisoformat(f'{Y}-01-01 01:01:01')
|
||||
yield localize(dt)
|
||||
return stat(localized_years)
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from datetime import datetime, timedelta, date, timezone
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import pytest # type: ignore
|
||||
import pytz # type: ignore
|
||||
|
@ -80,7 +79,7 @@ def prepare(tmp_path: Path):
|
|||
from .common import reset_modules
|
||||
reset_modules()
|
||||
|
||||
LTZ._FASTER = True
|
||||
LTZ.config.fast = True
|
||||
|
||||
from .location import _prepare_google_config
|
||||
google = _prepare_google_config(tmp_path)
|
||||
|
@ -98,7 +97,8 @@ def prepare(tmp_path: Path):
|
|||
|
||||
class time:
|
||||
class tz:
|
||||
pass # just rely on the default..
|
||||
class via_location:
|
||||
pass # just rely on the defaults...
|
||||
|
||||
import my.core.cfg as C
|
||||
with C.tmp_config() as config:
|
||||
|
|
7
tox.ini
7
tox.ini
|
@ -100,6 +100,9 @@ commands =
|
|||
hpi module install my.goodreads
|
||||
hpi module install my.pdfs
|
||||
hpi module install my.smscalls
|
||||
hpi module install my.location.gpslogger
|
||||
hpi module install my.location.via_ip
|
||||
hpi module install my.google.takeout.parser
|
||||
|
||||
# todo fuck. -p my.github isn't checking the subpackages?? wtf...
|
||||
# guess it wants .pyi file??
|
||||
|
@ -118,6 +121,10 @@ commands =
|
|||
-p my.body.exercise.cross_trainer \
|
||||
-p my.bluemaestro \
|
||||
-p my.location.google \
|
||||
-p my.location.google_takeout \
|
||||
-p my.location.via_ip \
|
||||
-p my.location.gpslogger \
|
||||
-p my.ip.common \
|
||||
-p my.time.tz.via_location \
|
||||
-p my.calendar.holidays \
|
||||
-p my.arbtt \
|
||||
|
|
Loading…
Add table
Reference in a new issue