location: add all.py, using takeout/gpslogger/ip (#237)
* location: add all.py, using takeout/gpslogger/ip, update docs
This commit is contained in:
parent
66a00c6ada
commit
2cb836181b
15 changed files with 488 additions and 46 deletions
|
@ -16,9 +16,12 @@ If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting][
|
||||||
- [[#toc][TOC]]
|
- [[#toc][TOC]]
|
||||||
- [[#intro][Intro]]
|
- [[#intro][Intro]]
|
||||||
- [[#configs][Configs]]
|
- [[#configs][Configs]]
|
||||||
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
|
- [[#mygoogletakeoutparser][my.google.takeout.parser]]
|
||||||
- [[#myhypothesis][my.hypothesis]]
|
- [[#myhypothesis][my.hypothesis]]
|
||||||
- [[#myreddit][my.reddit]]
|
- [[#myreddit][my.reddit]]
|
||||||
|
- [[#mybrowser][my.browser]]
|
||||||
|
- [[#mylocation][my.location]]
|
||||||
|
- [[#mytimetzvia_location][my.time.tz.via_location]]
|
||||||
- [[#mypocket][my.pocket]]
|
- [[#mypocket][my.pocket]]
|
||||||
- [[#mytwittertwint][my.twitter.twint]]
|
- [[#mytwittertwint][my.twitter.twint]]
|
||||||
- [[#mytwitterarchive][my.twitter.archive]]
|
- [[#mytwitterarchive][my.twitter.archive]]
|
||||||
|
@ -90,12 +93,12 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** [[file:../my/browser/][my.browser]]
|
** [[file:../my/browser/][my.browser]]
|
||||||
|
|
||||||
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]]
|
||||||
|
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
@dataclass
|
|
||||||
class browser:
|
class browser:
|
||||||
class export:
|
class export:
|
||||||
# path[s]/glob to your backed up browser history sqlite files
|
# path[s]/glob to your backed up browser history sqlite files
|
||||||
|
@ -108,6 +111,80 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
||||||
# active_databases = Firefox.locate_database()
|
# active_databases = Firefox.locate_database()
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
#+end_src
|
#+end_src
|
||||||
|
** [[file:../my/location][my.location]]
|
||||||
|
|
||||||
|
Merged location history from lots of sources.
|
||||||
|
|
||||||
|
The main sources here are
|
||||||
|
[[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (XML) files, and
|
||||||
|
google takeout (using =my.google.takeout.parser=), with a fallback on
|
||||||
|
manually defined home locations.
|
||||||
|
|
||||||
|
You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to
|
||||||
|
provide geolocation data for an IPs (though no IPs are provided from any
|
||||||
|
of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]]
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
class location:
|
||||||
|
home = (
|
||||||
|
# supports ISO strings
|
||||||
|
('2005-12-04' , (42.697842, 23.325973)), # Bulgaria, Sofia
|
||||||
|
# supports date/datetime objects
|
||||||
|
(date(year=1980, month=2, day=15) , (40.7128 , -74.0060 )), # NY
|
||||||
|
(datetime.fromtimestamp(1600000000, tz=timezone.utc), (55.7558 , 37.6173 )), # Moscow, Russia
|
||||||
|
)
|
||||||
|
# note: order doesn't matter, will be sorted in the data provider
|
||||||
|
|
||||||
|
class gpslogger:
|
||||||
|
# path[s]/glob to the exported gpx files
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
# default accuracy for gpslogger
|
||||||
|
accuracy: float = 50.0
|
||||||
|
|
||||||
|
class via_ip:
|
||||||
|
# guess ~15km accuracy for IP addresses
|
||||||
|
accuracy: float = 15_000
|
||||||
|
#+end_src
|
||||||
|
** [[file:../my/time/tz/via_location.py][my.time.tz.via_location]]
|
||||||
|
|
||||||
|
Uses the =my.location= module to determine the timezone for a location.
|
||||||
|
|
||||||
|
This can be used to 'localize' timezones. Most modules here return
|
||||||
|
datetimes in UTC, to prevent confusion whether or not its a local
|
||||||
|
timezone, one from UTC, or one in your timezone.
|
||||||
|
|
||||||
|
Depending on the specific data provider and your level of paranoia you might expect different behaviour.. E.g.:
|
||||||
|
- if your objects already have tz info, you might not need to call localize() at all
|
||||||
|
- it's safer when either all of your objects are tz aware or all are tz unware, not a mixture
|
||||||
|
- you might trust your original timezone, or it might just be UTC, and you want to use something more reasonable
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
TzPolicy = Literal[
|
||||||
|
'keep' , # if datetime is tz aware, just preserve it
|
||||||
|
'convert', # if datetime is tz aware, convert to provider's tz
|
||||||
|
'throw' , # if datetime is tz aware, throw exception
|
||||||
|
]
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
This is still a work in progress, plan is to integrate it with =hpi query=
|
||||||
|
so that you can easily convert/localize timezones for some module/data
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
class time:
|
||||||
|
class tz:
|
||||||
|
policy = 'keep'
|
||||||
|
|
||||||
|
class via_location:
|
||||||
|
# less precise, but faster
|
||||||
|
fast: bool = True
|
||||||
|
|
||||||
|
# if the accuracy for the location is more than 5km (this
|
||||||
|
# isn't an accurate location, so shouldn't use it to determine
|
||||||
|
# timezone), don't use
|
||||||
|
require_accuracy: float = 5_000
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
||||||
|
|
||||||
|
@ -163,7 +240,6 @@ for cls, p in modules:
|
||||||
|
|
||||||
#+RESULTS:
|
#+RESULTS:
|
||||||
|
|
||||||
|
|
||||||
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]]
|
||||||
|
|
||||||
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]]
|
||||||
|
|
11
my/config.py
11
my/config.py
|
@ -72,10 +72,19 @@ class location:
|
||||||
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
||||||
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
||||||
|
|
||||||
|
class via_ip:
|
||||||
|
accuracy: float
|
||||||
|
|
||||||
|
class gpslogger:
|
||||||
|
export_path: Paths = ''
|
||||||
|
accuracy: float
|
||||||
|
|
||||||
|
|
||||||
class time:
|
class time:
|
||||||
class tz:
|
class tz:
|
||||||
pass
|
class via_location:
|
||||||
|
fast: bool
|
||||||
|
require_accuracy: float
|
||||||
|
|
||||||
|
|
||||||
class orgmode:
|
class orgmode:
|
||||||
|
|
29
my/ip/all.py
Normal file
29
my/ip/all.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
"""
|
||||||
|
An example all.py stub module that provides ip data
|
||||||
|
|
||||||
|
To use this, you'd add IP providers that yield IPs to the 'ips' function
|
||||||
|
|
||||||
|
For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core.common import Stats, warn_if_empty
|
||||||
|
|
||||||
|
from .common import IP
|
||||||
|
|
||||||
|
|
||||||
|
@warn_if_empty
|
||||||
|
def ips() -> Iterator[IP]:
|
||||||
|
yield from ()
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {
|
||||||
|
**stat(ips),
|
||||||
|
}
|
39
my/ip/common.py
Normal file
39
my/ip/common.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
"""
|
||||||
|
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||||
|
|
||||||
|
from my.core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
from typing import NamedTuple, Iterator
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import ipgeocache
|
||||||
|
|
||||||
|
from my.core import Json
|
||||||
|
|
||||||
|
|
||||||
|
class IP(NamedTuple):
|
||||||
|
dt: datetime
|
||||||
|
addr: str # an IP address
|
||||||
|
|
||||||
|
# TODO: could cache? not sure if it's worth it
|
||||||
|
def ipgeocache(self) -> Json:
|
||||||
|
return ipgeocache.get(self.addr)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tzname(self) -> str:
|
||||||
|
tz: str = self.ipgeocache()["timezone"]
|
||||||
|
return tz
|
||||||
|
|
||||||
|
|
||||||
|
def drop_private(ips: Iterator[IP]) -> Iterator[IP]:
|
||||||
|
"""
|
||||||
|
Helper function that can be used to filter out private IPs
|
||||||
|
"""
|
||||||
|
for ip in ips:
|
||||||
|
if ipaddress.ip_address(ip.addr).is_private:
|
||||||
|
continue
|
||||||
|
yield ip
|
48
my/location/all.py
Normal file
48
my/location/all.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
"""
|
||||||
|
Merges location data from multiple sources
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core import Stats, LazyLogger
|
||||||
|
from my.core.source import import_source
|
||||||
|
|
||||||
|
from my.location.via_ip import locations
|
||||||
|
|
||||||
|
from .common import Location
|
||||||
|
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__, level="warning")
|
||||||
|
|
||||||
|
|
||||||
|
def locations() -> Iterator[Location]:
|
||||||
|
# can add/comment out sources here to disable them, or use core.disabled_modules
|
||||||
|
yield from _takeout_locations()
|
||||||
|
yield from _gpslogger_locations()
|
||||||
|
yield from _ip_locations()
|
||||||
|
|
||||||
|
|
||||||
|
@import_source(module_name="my.location.google_takeout")
|
||||||
|
def _takeout_locations() -> Iterator[Location]:
|
||||||
|
from . import google_takeout
|
||||||
|
yield from google_takeout.locations()
|
||||||
|
|
||||||
|
|
||||||
|
@import_source(module_name="my.location.gpslogger")
|
||||||
|
def _gpslogger_locations() -> Iterator[Location]:
|
||||||
|
from . import gpslogger
|
||||||
|
yield from gpslogger.locations()
|
||||||
|
|
||||||
|
|
||||||
|
@import_source(module_name="my.location.via_ip")
|
||||||
|
def _ip_locations() -> Iterator[Location]:
|
||||||
|
from . import via_ip
|
||||||
|
yield from via_ip.locations()
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {
|
||||||
|
**stat(locations),
|
||||||
|
}
|
17
my/location/common.py
Normal file
17
my/location/common.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
from datetime import date, datetime
|
||||||
|
from typing import Union, Tuple, NamedTuple, Optional
|
||||||
|
|
||||||
|
from my.core import __NOT_HPI_MODULE__
|
||||||
|
|
||||||
|
DateIsh = Union[datetime, date, str]
|
||||||
|
|
||||||
|
LatLon = Tuple[float, float]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: add timezone to this? can use timezonefinder in tz provider instead though
|
||||||
|
class Location(NamedTuple):
|
||||||
|
lat: float
|
||||||
|
lon: float
|
||||||
|
dt: datetime
|
||||||
|
accuracy: Optional[float]
|
||||||
|
elevation: Optional[float]
|
|
@ -1,6 +1,9 @@
|
||||||
"""
|
"""
|
||||||
Location data from Google Takeout
|
Location data from Google Takeout
|
||||||
|
|
||||||
|
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
|
||||||
"""
|
"""
|
||||||
|
|
||||||
REQUIRES = [
|
REQUIRES = [
|
||||||
'geopy', # checking that coordinates are valid
|
'geopy', # checking that coordinates are valid
|
||||||
'ijson',
|
'ijson',
|
||||||
|
@ -20,6 +23,10 @@ from ..core.common import LazyLogger, mcachew
|
||||||
from ..core.cachew import cache_dir
|
from ..core.cachew import cache_dir
|
||||||
from ..core import kompress
|
from ..core import kompress
|
||||||
|
|
||||||
|
from my.core.warnings import high
|
||||||
|
|
||||||
|
high("Please set up my.google.takeout.parser module for better takeout support")
|
||||||
|
|
||||||
|
|
||||||
# otherwise uses ijson
|
# otherwise uses ijson
|
||||||
# todo move to config??
|
# todo move to config??
|
||||||
|
|
33
my/location/google_takeout.py
Normal file
33
my/location/google_takeout.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
"""
|
||||||
|
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
|
||||||
|
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.google.takeout.parser import events, _cachew_depends_on
|
||||||
|
from google_takeout_parser.models import Location as GoogleLocation
|
||||||
|
|
||||||
|
from my.core.common import mcachew, LazyLogger, Stats
|
||||||
|
from .common import Location
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@mcachew(
|
||||||
|
depends_on=_cachew_depends_on,
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
def locations() -> Iterator[Location]:
|
||||||
|
for g in events():
|
||||||
|
if isinstance(g, GoogleLocation):
|
||||||
|
yield Location(
|
||||||
|
lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {**stat(locations)}
|
74
my/location/gpslogger.py
Normal file
74
my/location/gpslogger.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
"""
|
||||||
|
Parse [[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (xml) files
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["gpxpy"]
|
||||||
|
|
||||||
|
from my.config import location
|
||||||
|
from my.core import Paths, dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class config(location.gpslogger):
|
||||||
|
# path[s]/glob to the synced gpx (XML) files
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
# default accuracy for gpslogger
|
||||||
|
accuracy: float = 50.0
|
||||||
|
|
||||||
|
|
||||||
|
from itertools import chain
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterator, Sequence, List
|
||||||
|
|
||||||
|
import gpxpy # type: ignore[import]
|
||||||
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
|
from my.core import Stats, LazyLogger
|
||||||
|
from my.core.common import get_files, mcachew
|
||||||
|
from .common import Location
|
||||||
|
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__, level="warning")
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path, glob="*.gpx")
|
||||||
|
|
||||||
|
|
||||||
|
def _cachew_depends_on() -> List[float]:
|
||||||
|
return [p.stat().st_mtime for p in inputs()]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: could use a better cachew key/this has to recompute every file whenever the newest one changes
|
||||||
|
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
||||||
|
def locations() -> Iterator[Location]:
|
||||||
|
yield from unique_everseen(
|
||||||
|
chain(*map(_extract_locations, inputs())), key=lambda loc: loc.dt
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_locations(path: Path) -> Iterator[Location]:
|
||||||
|
with path.open("r") as gf:
|
||||||
|
gpx_obj = gpxpy.parse(gf)
|
||||||
|
for track in gpx_obj.tracks:
|
||||||
|
for segment in track.segments:
|
||||||
|
for point in segment.points:
|
||||||
|
if point.time is None:
|
||||||
|
continue
|
||||||
|
# hmm - for gpslogger, seems that timezone is always SimpleTZ('Z'), which
|
||||||
|
# specifies UTC -- see https://github.com/tkrajina/gpxpy/blob/cb243b22841bd2ce9e603fe3a96672fc75edecf2/gpxpy/gpxfield.py#L38
|
||||||
|
yield Location(
|
||||||
|
lat=point.latitude,
|
||||||
|
lon=point.longitude,
|
||||||
|
accuracy=config.accuracy,
|
||||||
|
elevation=point.elevation,
|
||||||
|
dt=datetime.replace(point.time, tzinfo=timezone.utc),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {**stat(locations)}
|
|
@ -2,17 +2,13 @@
|
||||||
Simple location provider, serving as a fallback when more detailed data isn't available
|
Simple location provider, serving as a fallback when more detailed data isn't available
|
||||||
'''
|
'''
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, date, time, timezone
|
from datetime import datetime, time, timezone
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Sequence, Tuple, Union, cast
|
from typing import Sequence, Tuple, Union, cast
|
||||||
|
|
||||||
from my.config import location as user_config
|
from my.config import location as user_config
|
||||||
|
|
||||||
|
from my.location.common import LatLon, DateIsh
|
||||||
DateIsh = Union[datetime, date, str]
|
|
||||||
|
|
||||||
# todo hopefully reasonable? might be nice to add name or something too
|
|
||||||
LatLon = Tuple[float, float]
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config(user_config):
|
class Config(user_config):
|
||||||
|
|
39
my/location/via_ip.py
Normal file
39
my/location/via_ip.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
"""
|
||||||
|
Converts IP addresses provided by my.location.ip to estimated locations
|
||||||
|
"""
|
||||||
|
|
||||||
|
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||||
|
|
||||||
|
from my.core import dataclass, Stats
|
||||||
|
from my.config import location
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class config(location.via_ip):
|
||||||
|
# no real science to this, just a guess of ~15km accuracy for IP addresses
|
||||||
|
accuracy: float = 15_000.0
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from .common import Location
|
||||||
|
from my.ip.all import ips
|
||||||
|
|
||||||
|
|
||||||
|
def locations() -> Iterator[Location]:
|
||||||
|
for ip in ips():
|
||||||
|
loc: str = ip.ipgeocache()["loc"]
|
||||||
|
lat, _, lon = loc.partition(",")
|
||||||
|
yield Location(
|
||||||
|
lat=float(lat),
|
||||||
|
lon=float(lon),
|
||||||
|
dt=ip.dt,
|
||||||
|
accuracy=config.accuracy,
|
||||||
|
elevation=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
|
||||||
|
return {**stat(locations)}
|
|
@ -10,24 +10,27 @@ Depending on the specific data provider and your level of paranoia you might exp
|
||||||
- it's safer when either all of your objects are tz aware or all are tz unware, not a mixture
|
- it's safer when either all of your objects are tz aware or all are tz unware, not a mixture
|
||||||
- you might trust your original timezone, or it might just be UTC, and you want to use something more reasonable
|
- you might trust your original timezone, or it might just be UTC, and you want to use something more reasonable
|
||||||
'''
|
'''
|
||||||
Policy = Literal[
|
TzPolicy = Literal[
|
||||||
'keep' , # if datetime is tz aware, just preserve it
|
'keep' , # if datetime is tz aware, just preserve it
|
||||||
'convert', # if datetime is tz aware, convert to provider's tz
|
'convert', # if datetime is tz aware, convert to provider's tz
|
||||||
'throw' , # if datetime is tz aware, throw exception
|
'throw' , # if datetime is tz aware, throw exception
|
||||||
# todo 'warn'? not sure if very useful
|
# todo 'warn'? not sure if very useful
|
||||||
]
|
]
|
||||||
|
|
||||||
def default_policy() -> Policy:
|
# backwards compatibility
|
||||||
|
Policy = TzPolicy
|
||||||
|
|
||||||
|
def default_policy() -> TzPolicy:
|
||||||
try:
|
try:
|
||||||
from my.config import time as user_config
|
from my.config import time as user_config
|
||||||
return cast(Policy, user_config.tz.policy)
|
return cast(TzPolicy, user_config.tz.policy)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# todo meh.. need to think how to do this more carefully
|
# todo meh.. need to think how to do this more carefully
|
||||||
# rationale: do not mess with user's data unless they want
|
# rationale: do not mess with user's data unless they want
|
||||||
return 'keep'
|
return 'keep'
|
||||||
|
|
||||||
|
|
||||||
def localize_with_policy(lfun: Callable[[datetime], tzdatetime], dt: datetime, policy: Policy=default_policy()) -> tzdatetime:
|
def localize_with_policy(lfun: Callable[[datetime], tzdatetime], dt: datetime, policy: TzPolicy=default_policy()) -> tzdatetime:
|
||||||
tz = dt.tzinfo
|
tz = dt.tzinfo
|
||||||
if tz is None:
|
if tz is None:
|
||||||
return lfun(dt)
|
return lfun(dt)
|
||||||
|
|
|
@ -7,27 +7,34 @@ REQUIRES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
from my.config import time
|
||||||
|
from my.core import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class config(time.tz.via_location):
|
||||||
|
# less precise, but faster
|
||||||
|
fast: bool = True
|
||||||
|
|
||||||
|
# if the accuracy for the location is more than 5km, don't use
|
||||||
|
require_accuracy: float = 5_000
|
||||||
|
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from itertools import groupby
|
from itertools import groupby
|
||||||
from typing import Iterator, NamedTuple, Optional
|
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
|
||||||
|
|
||||||
from more_itertools import seekable
|
from more_itertools import seekable
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from ...core.common import LazyLogger, mcachew, tzdatetime
|
from my.core.common import LazyLogger, mcachew, tzdatetime
|
||||||
from ...core.cachew import cache_dir
|
|
||||||
from ...location.google import locations
|
|
||||||
|
|
||||||
|
logger = LazyLogger(__name__, level='warning')
|
||||||
|
|
||||||
logger = LazyLogger(__name__, level='debug')
|
|
||||||
|
|
||||||
|
|
||||||
# todo should move to config? not sure
|
|
||||||
_FASTER: bool = True
|
|
||||||
@lru_cache(2)
|
@lru_cache(2)
|
||||||
def _timezone_finder(fast: bool):
|
def _timezone_finder(fast: bool) -> Any:
|
||||||
if fast:
|
if fast:
|
||||||
# less precise, but faster
|
# less precise, but faster
|
||||||
from timezonefinder import TimezoneFinderL as Finder # type: ignore
|
from timezonefinder import TimezoneFinderL as Finder # type: ignore
|
||||||
|
@ -46,39 +53,89 @@ class DayWithZone(NamedTuple):
|
||||||
zone: Zone
|
zone: Zone
|
||||||
|
|
||||||
|
|
||||||
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
|
from my.location.common import LatLon
|
||||||
finder = _timezone_finder(fast=_FASTER) # rely on the default
|
|
||||||
pdt = None
|
# for backwards compatibility
|
||||||
|
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
|
||||||
|
try:
|
||||||
|
import my.location.all
|
||||||
|
for loc in my.location.all.locations():
|
||||||
|
if loc.accuracy is not None and loc.accuracy > config.require_accuracy:
|
||||||
|
continue
|
||||||
|
yield ((loc.lat, loc.lon), loc.dt)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
from my.core.warnings import high
|
||||||
|
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
|
||||||
|
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
|
||||||
|
|
||||||
|
import my.location.google
|
||||||
|
|
||||||
|
for gloc in my.location.google.locations():
|
||||||
|
yield ((gloc.lat, gloc.lon), gloc.dt)
|
||||||
|
|
||||||
|
# TODO: could use heapmerge or sort the underlying iterators somehow?
|
||||||
|
# see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934
|
||||||
|
def _sorted_locations() -> List[Tuple[LatLon, datetime]]:
|
||||||
|
return list(sorted(_locations(), key=lambda x: x[1]))
|
||||||
|
|
||||||
|
|
||||||
|
# Note: this takes a while, as the upstream since _locations isn't sorted, so this
|
||||||
|
# has to do an iterative sort of the entire my.locations.all list
|
||||||
|
def _iter_local_dates() -> Iterator[DayWithZone]:
|
||||||
|
finder = _timezone_finder(fast=config.fast) # rely on the default
|
||||||
|
#pdt = None
|
||||||
|
# TODO: warnings doesnt actually warn?
|
||||||
warnings = []
|
warnings = []
|
||||||
# todo allow to skip if not noo many errors in row?
|
# todo allow to skip if not noo many errors in row?
|
||||||
for l in locations(start=start, stop=stop):
|
for (lat, lon), dt in _sorted_locations():
|
||||||
# TODO right. its _very_ slow...
|
# TODO right. its _very_ slow...
|
||||||
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
|
zone = finder.timezone_at(lat=lat, lng=lon)
|
||||||
if zone is None:
|
if zone is None:
|
||||||
warnings.append(f"Couldn't figure out tz for {l}")
|
warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
|
||||||
continue
|
continue
|
||||||
tz = pytz.timezone(zone)
|
tz = pytz.timezone(zone)
|
||||||
# TODO this is probably a bit expensive... test & benchmark
|
# TODO this is probably a bit expensive... test & benchmark
|
||||||
ldt = l.dt.astimezone(tz)
|
ldt = dt.astimezone(tz)
|
||||||
ndate = ldt.date()
|
ndate = ldt.date()
|
||||||
if pdt is not None and ndate < pdt.date():
|
#if pdt is not None and ndate < pdt.date():
|
||||||
# TODO for now just drop and collect the stats
|
# # TODO for now just drop and collect the stats
|
||||||
# I guess we'd have minor drops while air travel...
|
# # I guess we'd have minor drops while air travel...
|
||||||
warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
# warnings.append("local time goes backwards {ldt} ({tz}) < {pdt}")
|
||||||
continue
|
# continue
|
||||||
pdt = ldt
|
#pdt = ldt
|
||||||
z = tz.zone; assert z is not None
|
z = tz.zone; assert z is not None
|
||||||
yield DayWithZone(day=ndate, zone=z)
|
yield DayWithZone(day=ndate, zone=z)
|
||||||
|
|
||||||
|
|
||||||
def most_common(l):
|
def most_common(lst: List[DayWithZone]) -> DayWithZone:
|
||||||
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
|
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@mcachew(cache_path=cache_dir())
|
def _iter_tz_depends_on() -> str:
|
||||||
|
"""
|
||||||
|
Since you might get new data which specifies a new timezone sometime
|
||||||
|
in the day, this causes _iter_tzs to refresh every 6 hours, like:
|
||||||
|
2022-04-26_00
|
||||||
|
2022-04-26_06
|
||||||
|
2022-04-26_12
|
||||||
|
2022-04-26_18
|
||||||
|
"""
|
||||||
|
day = str(date.today())
|
||||||
|
hr = datetime.now().hour
|
||||||
|
hr_truncated = hr // 6 * 6
|
||||||
|
return "{}_{}".format(day, hr_truncated)
|
||||||
|
|
||||||
|
|
||||||
|
# refresh _iter_tzs every 6 hours -- don't think a better depends_on is possible dynamically
|
||||||
|
@mcachew(logger=logger, depends_on=_iter_tz_depends_on)
|
||||||
def _iter_tzs() -> Iterator[DayWithZone]:
|
def _iter_tzs() -> Iterator[DayWithZone]:
|
||||||
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
|
# since we have no control over what order the locations are returned,
|
||||||
|
# we need to sort them first before we can do a groupby
|
||||||
|
local_dates: List[DayWithZone] = list(_iter_local_dates())
|
||||||
|
local_dates.sort(key=lambda p: p.day)
|
||||||
|
for d, gr in groupby(local_dates, key=lambda p: p.day):
|
||||||
logger.info('processed %s', d)
|
logger.info('processed %s', d)
|
||||||
zone = most_common(list(gr)).zone
|
zone = most_common(list(gr)).zone
|
||||||
yield DayWithZone(day=d, zone=zone)
|
yield DayWithZone(day=d, zone=zone)
|
||||||
|
@ -106,6 +163,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
||||||
break
|
break
|
||||||
return None if zone is None else pytz.timezone(zone)
|
return None if zone is None else pytz.timezone(zone)
|
||||||
|
|
||||||
|
|
||||||
# ok to cache, there are only a few home locations?
|
# ok to cache, there are only a few home locations?
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
||||||
|
@ -119,8 +177,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
||||||
return pytz.timezone(zone)
|
return pytz.timezone(zone)
|
||||||
|
|
||||||
|
|
||||||
# TODO expose? to main as well?
|
|
||||||
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||||
|
'''
|
||||||
|
Given a datetime, returns the timezone for that date.
|
||||||
|
'''
|
||||||
res = _get_day_tz(d=dt.date())
|
res = _get_day_tz(d=dt.date())
|
||||||
if res is not None:
|
if res is not None:
|
||||||
return res
|
return res
|
||||||
|
@ -129,6 +189,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||||
loc = home.get_location(dt)
|
loc = home.get_location(dt)
|
||||||
return _get_home_tz(loc=loc)
|
return _get_home_tz(loc=loc)
|
||||||
|
|
||||||
|
# expose as 'public' function
|
||||||
|
get_tz = _get_tz
|
||||||
|
|
||||||
|
|
||||||
def localize(dt: datetime) -> tzdatetime:
|
def localize(dt: datetime) -> tzdatetime:
|
||||||
tz = _get_tz(dt)
|
tz = _get_tz(dt)
|
||||||
|
@ -144,11 +207,13 @@ def stats() -> Stats:
|
||||||
# TODO not sure what would be a good stat() for this module...
|
# TODO not sure what would be a good stat() for this module...
|
||||||
# might be nice to print some actual timezones?
|
# might be nice to print some actual timezones?
|
||||||
# there aren't really any great iterables to expose
|
# there aren't really any great iterables to expose
|
||||||
|
import os
|
||||||
|
VIA_LOCATION_START_YEAR = int(os.environ.get("VIA_LOCATION_START_YEAR", 1990))
|
||||||
def localized_years():
|
def localized_years():
|
||||||
last = datetime.now().year + 2
|
last = datetime.now().year + 2
|
||||||
# note: deliberately take + 2 years, so the iterator exhausts. otherwise stuff might never get cached
|
# note: deliberately take + 2 years, so the iterator exhausts. otherwise stuff might never get cached
|
||||||
# need to think about it...
|
# need to think about it...
|
||||||
for Y in range(1990, last):
|
for Y in range(VIA_LOCATION_START_YEAR, last):
|
||||||
dt = datetime.fromisoformat(f'{Y}-01-01 01:01:01')
|
dt = datetime.fromisoformat(f'{Y}-01-01 01:01:01')
|
||||||
yield localize(dt)
|
yield localize(dt)
|
||||||
return stat(localized_years)
|
return stat(localized_years)
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from datetime import datetime, timedelta, date, timezone
|
from datetime import datetime, timedelta, date, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest # type: ignore
|
import pytest # type: ignore
|
||||||
import pytz # type: ignore
|
import pytz # type: ignore
|
||||||
|
@ -80,7 +79,7 @@ def prepare(tmp_path: Path):
|
||||||
from .common import reset_modules
|
from .common import reset_modules
|
||||||
reset_modules()
|
reset_modules()
|
||||||
|
|
||||||
LTZ._FASTER = True
|
LTZ.config.fast = True
|
||||||
|
|
||||||
from .location import _prepare_google_config
|
from .location import _prepare_google_config
|
||||||
google = _prepare_google_config(tmp_path)
|
google = _prepare_google_config(tmp_path)
|
||||||
|
@ -98,7 +97,8 @@ def prepare(tmp_path: Path):
|
||||||
|
|
||||||
class time:
|
class time:
|
||||||
class tz:
|
class tz:
|
||||||
pass # just rely on the default..
|
class via_location:
|
||||||
|
pass # just rely on the defaults...
|
||||||
|
|
||||||
import my.core.cfg as C
|
import my.core.cfg as C
|
||||||
with C.tmp_config() as config:
|
with C.tmp_config() as config:
|
||||||
|
|
7
tox.ini
7
tox.ini
|
@ -100,6 +100,9 @@ commands =
|
||||||
hpi module install my.goodreads
|
hpi module install my.goodreads
|
||||||
hpi module install my.pdfs
|
hpi module install my.pdfs
|
||||||
hpi module install my.smscalls
|
hpi module install my.smscalls
|
||||||
|
hpi module install my.location.gpslogger
|
||||||
|
hpi module install my.location.via_ip
|
||||||
|
hpi module install my.google.takeout.parser
|
||||||
|
|
||||||
# todo fuck. -p my.github isn't checking the subpackages?? wtf...
|
# todo fuck. -p my.github isn't checking the subpackages?? wtf...
|
||||||
# guess it wants .pyi file??
|
# guess it wants .pyi file??
|
||||||
|
@ -118,6 +121,10 @@ commands =
|
||||||
-p my.body.exercise.cross_trainer \
|
-p my.body.exercise.cross_trainer \
|
||||||
-p my.bluemaestro \
|
-p my.bluemaestro \
|
||||||
-p my.location.google \
|
-p my.location.google \
|
||||||
|
-p my.location.google_takeout \
|
||||||
|
-p my.location.via_ip \
|
||||||
|
-p my.location.gpslogger \
|
||||||
|
-p my.ip.common \
|
||||||
-p my.time.tz.via_location \
|
-p my.time.tz.via_location \
|
||||||
-p my.calendar.holidays \
|
-p my.calendar.holidays \
|
||||||
-p my.arbtt \
|
-p my.arbtt \
|
||||||
|
|
Loading…
Add table
Reference in a new issue