location: add all.py, using takeout/gpslogger/ip
This commit is contained in:
parent
66a00c6ada
commit
ca10d524a4
12 changed files with 357 additions and 27 deletions
11
my/config.py
11
my/config.py
|
@ -72,10 +72,19 @@ class location:
|
|||
# and we can't import the types from the module itself, otherwise would be circular. common module?
|
||||
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
|
||||
|
||||
class via_ip:
|
||||
accuracy: float
|
||||
|
||||
class gpslogger:
|
||||
export_path: Paths = ''
|
||||
accuracy: float
|
||||
|
||||
|
||||
class time:
|
||||
class tz:
|
||||
pass
|
||||
class via_location:
|
||||
fast: bool
|
||||
require_accuracy: float
|
||||
|
||||
|
||||
class orgmode:
|
||||
|
|
28
my/ip/all.py
Normal file
28
my/ip/all.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
"""
|
||||
An example all.py stub module that provides ip data
|
||||
|
||||
To use this, you'd add IP providers that yield IPs to the 'ips' function
|
||||
|
||||
For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.core.common import Stats
|
||||
|
||||
from .common import IP
|
||||
|
||||
|
||||
def ips() -> Iterator[IP]:
|
||||
yield from ()
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {
|
||||
**stat(ips),
|
||||
}
|
39
my/ip/common.py
Normal file
39
my/ip/common.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
from my.core import __NOT_HPI_MODULE__
|
||||
|
||||
import ipaddress
|
||||
from typing import NamedTuple, Iterator
|
||||
from datetime import datetime
|
||||
|
||||
import ipgeocache
|
||||
|
||||
from my.core import Json
|
||||
|
||||
|
||||
class IP(NamedTuple):
|
||||
dt: datetime
|
||||
addr: str # an IP address
|
||||
|
||||
# TODO: could cache? not sure if it's worth it
|
||||
def ipgeocache(self) -> Json:
|
||||
return ipgeocache.get(self.addr)
|
||||
|
||||
@property
|
||||
def tz(self) -> str:
|
||||
tz: str = self.ipgeocache()["timezone"]
|
||||
return tz
|
||||
|
||||
|
||||
def drop_private(ips: Iterator[IP]) -> Iterator[IP]:
|
||||
"""
|
||||
Helper function that can be used to filter out private IPs
|
||||
"""
|
||||
for ip in ips:
|
||||
if ipaddress.ip_address(ip.addr).is_private:
|
||||
continue
|
||||
yield ip
|
46
my/location/all.py
Normal file
46
my/location/all.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
Merges location data from multiple sources
|
||||
"""
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.core import Stats, LazyLogger
|
||||
from my.core.source import import_source
|
||||
|
||||
from my.location.via_ip import locations
|
||||
|
||||
from .common import Location
|
||||
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
|
||||
def locations() -> Iterator[Location]:
|
||||
yield from _takeout_locations()
|
||||
yield from _gpslogger_locations()
|
||||
yield from _ip_locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.via_ip")
|
||||
def _ip_locations() -> Iterator[Location]:
|
||||
from . import via_ip
|
||||
yield from via_ip.locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.google_takeout")
|
||||
def _takeout_locations() -> Iterator[Location]:
|
||||
from . import google_takeout
|
||||
yield from google_takeout.locations()
|
||||
|
||||
|
||||
@import_source(module_name="my.location.gpslogger")
|
||||
def _gpslogger_locations() -> Iterator[Location]:
|
||||
from . import gpslogger
|
||||
yield from gpslogger.locations()
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {
|
||||
**stat(locations),
|
||||
}
|
17
my/location/common.py
Normal file
17
my/location/common.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from datetime import date, datetime
|
||||
from typing import Union, Tuple, NamedTuple, Optional
|
||||
|
||||
from my.core import __NOT_HPI_MODULE__
|
||||
|
||||
DateIsh = Union[datetime, date, str]
|
||||
|
||||
LatLon = Tuple[float, float]
|
||||
|
||||
|
||||
# TODO: add timezone to this? can use timezonefinder in tz provider instead though
|
||||
class Location(NamedTuple):
|
||||
lon: float
|
||||
lat: float
|
||||
dt: datetime
|
||||
accuracy: Optional[float]
|
||||
elevation: Optional[float]
|
|
@ -1,6 +1,9 @@
|
|||
"""
|
||||
Location data from Google Takeout
|
||||
|
||||
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
|
||||
"""
|
||||
|
||||
REQUIRES = [
|
||||
'geopy', # checking that coordinates are valid
|
||||
'ijson',
|
||||
|
@ -20,6 +23,10 @@ from ..core.common import LazyLogger, mcachew
|
|||
from ..core.cachew import cache_dir
|
||||
from ..core import kompress
|
||||
|
||||
from my.core.warnings import high
|
||||
|
||||
high("Please set up my.google.takeout.parser module for better takeout support")
|
||||
|
||||
|
||||
# otherwise uses ijson
|
||||
# todo move to config??
|
||||
|
|
33
my/location/google_takeout.py
Normal file
33
my/location/google_takeout.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
"""
|
||||
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from my.google.takeout.parser import events, _cachew_depends_on
|
||||
from google_takeout_parser.models import Location as GoogleLocation
|
||||
|
||||
from my.core.common import mcachew, LazyLogger, Stats
|
||||
from .common import Location
|
||||
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
@mcachew(
|
||||
depends_on=_cachew_depends_on,
|
||||
logger=logger,
|
||||
)
|
||||
def locations() -> Iterator[Location]:
|
||||
for g in events():
|
||||
if isinstance(g, GoogleLocation) and not isinstance(g, Exception):
|
||||
yield Location(
|
||||
lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
75
my/location/gpslogger.py
Normal file
75
my/location/gpslogger.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
"""
|
||||
Parse [[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (xml) files
|
||||
"""
|
||||
|
||||
REQUIRES = ["gpxpy"]
|
||||
|
||||
from my.config import location
|
||||
from my.core import Paths, dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(location.gpslogger):
|
||||
# path[s]/glob to the synced gpx (XML) files
|
||||
export_path: Paths
|
||||
|
||||
# default accuracy for gpslogger
|
||||
accuracy: float = 50.0
|
||||
|
||||
|
||||
from itertools import chain
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence, List
|
||||
|
||||
import gpxpy # type: ignore[import]
|
||||
from more_itertools import unique_everseen
|
||||
|
||||
from my.core import Stats, LazyLogger
|
||||
from my.core.common import get_files, mcachew
|
||||
from my.utils.input_source import InputSource
|
||||
from .common import Location
|
||||
|
||||
|
||||
logger = LazyLogger(__name__, level="warning")
|
||||
|
||||
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path, glob="*.gpx")
|
||||
|
||||
|
||||
def _cachew_depends_on(from_paths: InputSource) -> List[float]:
|
||||
return [p.stat().st_mtime for p in from_paths()]
|
||||
|
||||
|
||||
# TODO: could use a better cachew key/this has to recompute every file whenever the newest one changes
|
||||
@mcachew(depends_on=_cachew_depends_on, logger=logger)
|
||||
def locations(from_paths: InputSource = inputs) -> Iterator[Location]:
|
||||
yield from unique_everseen(
|
||||
chain(*map(_extract_locations, from_paths())), key=lambda loc: loc.dt
|
||||
)
|
||||
|
||||
|
||||
def _extract_locations(path: Path) -> Iterator[Location]:
|
||||
with path.open("r") as gf:
|
||||
gpx_obj = gpxpy.parse(gf)
|
||||
for track in gpx_obj.tracks:
|
||||
for segment in track.segments:
|
||||
for point in segment.points:
|
||||
if point.time is None:
|
||||
continue
|
||||
# hmm - for gpslogger, seems that timezone is always SimpleTZ('Z'), which
|
||||
# specifies UTC -- see https://github.com/tkrajina/gpxpy/blob/cb243b22841bd2ce9e603fe3a96672fc75edecf2/gpxpy/gpxfield.py#L38
|
||||
yield Location(
|
||||
lat=point.latitude,
|
||||
lon=point.longitude,
|
||||
accuracy=config.accuracy,
|
||||
elevation=point.elevation,
|
||||
dt=datetime.replace(point.time, tzinfo=timezone.utc),
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
|
@ -2,17 +2,13 @@
|
|||
Simple location provider, serving as a fallback when more detailed data isn't available
|
||||
'''
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, date, time, timezone
|
||||
from datetime import datetime, time, timezone
|
||||
from functools import lru_cache
|
||||
from typing import Sequence, Tuple, Union, cast
|
||||
|
||||
from my.config import location as user_config
|
||||
|
||||
|
||||
DateIsh = Union[datetime, date, str]
|
||||
|
||||
# todo hopefully reasonable? might be nice to add name or something too
|
||||
LatLon = Tuple[float, float]
|
||||
from my.location.common import LatLon, DateIsh
|
||||
|
||||
@dataclass
|
||||
class Config(user_config):
|
||||
|
|
39
my/location/via_ip.py
Normal file
39
my/location/via_ip.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
Converts IP addresses provided by my.location.ip to estimated locations
|
||||
"""
|
||||
|
||||
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
|
||||
|
||||
from my.core import dataclass, Stats
|
||||
from my.config import location
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(location.via_ip):
|
||||
# no real science to this, just a guess of ~15km accuracy for IP addresses
|
||||
accuracy: int = 15_000
|
||||
|
||||
|
||||
from typing import Iterator
|
||||
|
||||
from .common import Location
|
||||
from my.ip.all import ips
|
||||
|
||||
|
||||
def locations() -> Iterator[Location]:
|
||||
for ip in ips():
|
||||
loc: str = ip.ipgeocache()["loc"]
|
||||
lat, _, lon = loc.partition(",")
|
||||
yield Location(
|
||||
lat=float(lat),
|
||||
lon=float(lon),
|
||||
dt=ip.dt,
|
||||
accuracy=config.accuracy,
|
||||
elevation=None,
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
from my.core import stat
|
||||
|
||||
return {**stat(locations)}
|
|
@ -7,27 +7,34 @@ REQUIRES = [
|
|||
]
|
||||
|
||||
|
||||
from my.config import time
|
||||
from my.core import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class config(time.tz.via_location):
|
||||
# less precise, but faster
|
||||
fast: bool = True
|
||||
|
||||
# if the accuracy for the location is more than 5km, don't use
|
||||
require_accuracy: float = 5_000
|
||||
|
||||
|
||||
from collections import Counter
|
||||
from datetime import date, datetime
|
||||
from functools import lru_cache
|
||||
from itertools import groupby
|
||||
from typing import Iterator, NamedTuple, Optional
|
||||
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
|
||||
|
||||
from more_itertools import seekable
|
||||
import pytz
|
||||
|
||||
from ...core.common import LazyLogger, mcachew, tzdatetime
|
||||
from ...core.cachew import cache_dir
|
||||
from ...location.google import locations
|
||||
from my.core.common import LazyLogger, mcachew, tzdatetime
|
||||
|
||||
logger = LazyLogger(__name__, level='warning')
|
||||
|
||||
logger = LazyLogger(__name__, level='debug')
|
||||
|
||||
|
||||
# todo should move to config? not sure
|
||||
_FASTER: bool = True
|
||||
@lru_cache(2)
|
||||
def _timezone_finder(fast: bool):
|
||||
def _timezone_finder(fast: bool) -> Any:
|
||||
if fast:
|
||||
# less precise, but faster
|
||||
from timezonefinder import TimezoneFinderL as Finder # type: ignore
|
||||
|
@ -46,20 +53,40 @@ class DayWithZone(NamedTuple):
|
|||
zone: Zone
|
||||
|
||||
|
||||
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
|
||||
finder = _timezone_finder(fast=_FASTER) # rely on the default
|
||||
from my.location.common import LatLon
|
||||
|
||||
# for backwards compatibility
|
||||
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
|
||||
try:
|
||||
import my.location.all
|
||||
for loc in my.location.all.locations():
|
||||
yield ((loc.lat, loc.lon), loc.dt)
|
||||
|
||||
except Exception as e:
|
||||
from my.core.warnings import high
|
||||
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
|
||||
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
|
||||
|
||||
import my.location.google
|
||||
|
||||
for loc in my.location.google.locations():
|
||||
yield ((loc.lat, loc.lon), loc.dt)
|
||||
|
||||
|
||||
def _iter_local_dates() -> Iterator[DayWithZone]:
|
||||
finder = _timezone_finder(fast=config.fast) # rely on the default
|
||||
pdt = None
|
||||
warnings = []
|
||||
# todo allow to skip if not noo many errors in row?
|
||||
for l in locations(start=start, stop=stop):
|
||||
for (lat, lon), dt in _locations():
|
||||
# TODO right. its _very_ slow...
|
||||
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
|
||||
zone = finder.timezone_at(lat=lat, lng=lon)
|
||||
if zone is None:
|
||||
warnings.append(f"Couldn't figure out tz for {l}")
|
||||
warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
|
||||
continue
|
||||
tz = pytz.timezone(zone)
|
||||
# TODO this is probably a bit expensive... test & benchmark
|
||||
ldt = l.dt.astimezone(tz)
|
||||
ldt = dt.astimezone(tz)
|
||||
ndate = ldt.date()
|
||||
if pdt is not None and ndate < pdt.date():
|
||||
# TODO for now just drop and collect the stats
|
||||
|
@ -71,12 +98,13 @@ def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
|
|||
yield DayWithZone(day=ndate, zone=z)
|
||||
|
||||
|
||||
def most_common(l):
|
||||
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
|
||||
def most_common(lst: List[DayWithZone]) -> DayWithZone:
|
||||
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
|
||||
return res
|
||||
|
||||
|
||||
@mcachew(cache_path=cache_dir())
|
||||
# refresh _iter_tzs once per day -- don't think a better depends_on is possible dynamically
|
||||
@mcachew(logger=logger, depends_on=lambda: str(date.today()))
|
||||
def _iter_tzs() -> Iterator[DayWithZone]:
|
||||
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
|
||||
logger.info('processed %s', d)
|
||||
|
@ -106,6 +134,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
|
|||
break
|
||||
return None if zone is None else pytz.timezone(zone)
|
||||
|
||||
|
||||
# ok to cache, there are only a few home locations?
|
||||
@lru_cache(maxsize=None)
|
||||
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
||||
|
@ -119,8 +148,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
|
|||
return pytz.timezone(zone)
|
||||
|
||||
|
||||
# TODO expose? to main as well?
|
||||
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
||||
'''
|
||||
Given a datetime, returns the timezone for that date.
|
||||
'''
|
||||
res = _get_day_tz(d=dt.date())
|
||||
if res is not None:
|
||||
return res
|
||||
|
@ -129,6 +160,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
|
|||
loc = home.get_location(dt)
|
||||
return _get_home_tz(loc=loc)
|
||||
|
||||
# expose as 'public' function
|
||||
get_tz = _get_tz
|
||||
|
||||
|
||||
def localize(dt: datetime) -> tzdatetime:
|
||||
tz = _get_tz(dt)
|
||||
|
|
7
tox.ini
7
tox.ini
|
@ -100,6 +100,9 @@ commands =
|
|||
hpi module install my.goodreads
|
||||
hpi module install my.pdfs
|
||||
hpi module install my.smscalls
|
||||
hpi module install my.location.gpslogger
|
||||
hpi module install my.location.via_ip
|
||||
hpi module install my.google.takeout.parser
|
||||
|
||||
# todo fuck. -p my.github isn't checking the subpackages?? wtf...
|
||||
# guess it wants .pyi file??
|
||||
|
@ -118,6 +121,10 @@ commands =
|
|||
-p my.body.exercise.cross_trainer \
|
||||
-p my.bluemaestro \
|
||||
-p my.location.google \
|
||||
-p my.location.google_takeout \
|
||||
-p my.location.via_ip \
|
||||
-p my.location.gpslogger \
|
||||
-p my.ip.common \
|
||||
-p my.time.tz.via_location \
|
||||
-p my.calendar.holidays \
|
||||
-p my.arbtt \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue