location: add all.py, using takeout/gpslogger/ip

This commit is contained in:
Sean Breckenridge 2022-04-25 18:21:52 -07:00
parent 66a00c6ada
commit ca10d524a4
12 changed files with 357 additions and 27 deletions

View file

@ -72,10 +72,19 @@ class location:
# and we can't import the types from the module itself, otherwise would be circular. common module? # and we can't import the types from the module itself, otherwise would be circular. common module?
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0) home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
class via_ip:
accuracy: float
class gpslogger:
export_path: Paths = ''
accuracy: float
class time: class time:
class tz: class tz:
pass class via_location:
fast: bool
require_accuracy: float
class orgmode: class orgmode:

28
my/ip/all.py Normal file
View file

@ -0,0 +1,28 @@
"""
An example all.py stub module that provides ip data
To use this, you'd add IP providers that yield IPs to the 'ips' function
For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from typing import Iterator
from my.core.common import Stats
from .common import IP
def ips() -> Iterator[IP]:
yield from ()
def stats() -> Stats:
from my.core import stat
return {
**stat(ips),
}

39
my/ip/common.py Normal file
View file

@ -0,0 +1,39 @@
"""
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from my.core import __NOT_HPI_MODULE__
import ipaddress
from typing import NamedTuple, Iterator
from datetime import datetime
import ipgeocache
from my.core import Json
class IP(NamedTuple):
dt: datetime
addr: str # an IP address
# TODO: could cache? not sure if it's worth it
def ipgeocache(self) -> Json:
return ipgeocache.get(self.addr)
@property
def tz(self) -> str:
tz: str = self.ipgeocache()["timezone"]
return tz
def drop_private(ips: Iterator[IP]) -> Iterator[IP]:
"""
Helper function that can be used to filter out private IPs
"""
for ip in ips:
if ipaddress.ip_address(ip.addr).is_private:
continue
yield ip

46
my/location/all.py Normal file
View file

@ -0,0 +1,46 @@
"""
Merges location data from multiple sources
"""
from typing import Iterator
from my.core import Stats, LazyLogger
from my.core.source import import_source
from my.location.via_ip import locations
from .common import Location
logger = LazyLogger(__name__, level="warning")
def locations() -> Iterator[Location]:
yield from _takeout_locations()
yield from _gpslogger_locations()
yield from _ip_locations()
@import_source(module_name="my.location.via_ip")
def _ip_locations() -> Iterator[Location]:
from . import via_ip
yield from via_ip.locations()
@import_source(module_name="my.location.google_takeout")
def _takeout_locations() -> Iterator[Location]:
from . import google_takeout
yield from google_takeout.locations()
@import_source(module_name="my.location.gpslogger")
def _gpslogger_locations() -> Iterator[Location]:
from . import gpslogger
yield from gpslogger.locations()
def stats() -> Stats:
from my.core import stat
return {
**stat(locations),
}

17
my/location/common.py Normal file
View file

@ -0,0 +1,17 @@
from datetime import date, datetime
from typing import Union, Tuple, NamedTuple, Optional
from my.core import __NOT_HPI_MODULE__
DateIsh = Union[datetime, date, str]
LatLon = Tuple[float, float]
# TODO: add timezone to this? can use timezonefinder in tz provider instead though
class Location(NamedTuple):
lon: float
lat: float
dt: datetime
accuracy: Optional[float]
elevation: Optional[float]

View file

@ -1,6 +1,9 @@
""" """
Location data from Google Takeout Location data from Google Takeout
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
""" """
REQUIRES = [ REQUIRES = [
'geopy', # checking that coordinates are valid 'geopy', # checking that coordinates are valid
'ijson', 'ijson',
@ -20,6 +23,10 @@ from ..core.common import LazyLogger, mcachew
from ..core.cachew import cache_dir from ..core.cachew import cache_dir
from ..core import kompress from ..core import kompress
from my.core.warnings import high
high("Please set up my.google.takeout.parser module for better takeout support")
# otherwise uses ijson # otherwise uses ijson
# todo move to config?? # todo move to config??

View file

@ -0,0 +1,33 @@
"""
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
from typing import Iterator
from my.google.takeout.parser import events, _cachew_depends_on
from google_takeout_parser.models import Location as GoogleLocation
from my.core.common import mcachew, LazyLogger, Stats
from .common import Location
logger = LazyLogger(__name__)
@mcachew(
depends_on=_cachew_depends_on,
logger=logger,
)
def locations() -> Iterator[Location]:
for g in events():
if isinstance(g, GoogleLocation) and not isinstance(g, Exception):
yield Location(
lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

75
my/location/gpslogger.py Normal file
View file

@ -0,0 +1,75 @@
"""
Parse [[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (xml) files
"""
REQUIRES = ["gpxpy"]
from my.config import location
from my.core import Paths, dataclass
@dataclass
class config(location.gpslogger):
# path[s]/glob to the synced gpx (XML) files
export_path: Paths
# default accuracy for gpslogger
accuracy: float = 50.0
from itertools import chain
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Sequence, List
import gpxpy # type: ignore[import]
from more_itertools import unique_everseen
from my.core import Stats, LazyLogger
from my.core.common import get_files, mcachew
from my.utils.input_source import InputSource
from .common import Location
logger = LazyLogger(__name__, level="warning")
def inputs() -> Sequence[Path]:
return get_files(config.export_path, glob="*.gpx")
def _cachew_depends_on(from_paths: InputSource) -> List[float]:
return [p.stat().st_mtime for p in from_paths()]
# TODO: could use a better cachew key/this has to recompute every file whenever the newest one changes
@mcachew(depends_on=_cachew_depends_on, logger=logger)
def locations(from_paths: InputSource = inputs) -> Iterator[Location]:
yield from unique_everseen(
chain(*map(_extract_locations, from_paths())), key=lambda loc: loc.dt
)
def _extract_locations(path: Path) -> Iterator[Location]:
with path.open("r") as gf:
gpx_obj = gpxpy.parse(gf)
for track in gpx_obj.tracks:
for segment in track.segments:
for point in segment.points:
if point.time is None:
continue
# hmm - for gpslogger, seems that timezone is always SimpleTZ('Z'), which
# specifies UTC -- see https://github.com/tkrajina/gpxpy/blob/cb243b22841bd2ce9e603fe3a96672fc75edecf2/gpxpy/gpxfield.py#L38
yield Location(
lat=point.latitude,
lon=point.longitude,
accuracy=config.accuracy,
elevation=point.elevation,
dt=datetime.replace(point.time, tzinfo=timezone.utc),
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

View file

@ -2,17 +2,13 @@
Simple location provider, serving as a fallback when more detailed data isn't available Simple location provider, serving as a fallback when more detailed data isn't available
''' '''
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, date, time, timezone from datetime import datetime, time, timezone
from functools import lru_cache from functools import lru_cache
from typing import Sequence, Tuple, Union, cast from typing import Sequence, Tuple, Union, cast
from my.config import location as user_config from my.config import location as user_config
from my.location.common import LatLon, DateIsh
DateIsh = Union[datetime, date, str]
# todo hopefully reasonable? might be nice to add name or something too
LatLon = Tuple[float, float]
@dataclass @dataclass
class Config(user_config): class Config(user_config):

39
my/location/via_ip.py Normal file
View file

@ -0,0 +1,39 @@
"""
Converts IP addresses provided by my.location.ip to estimated locations
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from my.core import dataclass, Stats
from my.config import location
@dataclass
class config(location.via_ip):
# no real science to this, just a guess of ~15km accuracy for IP addresses
accuracy: int = 15_000
from typing import Iterator
from .common import Location
from my.ip.all import ips
def locations() -> Iterator[Location]:
for ip in ips():
loc: str = ip.ipgeocache()["loc"]
lat, _, lon = loc.partition(",")
yield Location(
lat=float(lat),
lon=float(lon),
dt=ip.dt,
accuracy=config.accuracy,
elevation=None,
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

View file

@ -7,27 +7,34 @@ REQUIRES = [
] ]
from my.config import time
from my.core import dataclass
@dataclass
class config(time.tz.via_location):
# less precise, but faster
fast: bool = True
# if the accuracy for the location is more than 5km, don't use
require_accuracy: float = 5_000
from collections import Counter from collections import Counter
from datetime import date, datetime from datetime import date, datetime
from functools import lru_cache from functools import lru_cache
from itertools import groupby from itertools import groupby
from typing import Iterator, NamedTuple, Optional from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
from more_itertools import seekable from more_itertools import seekable
import pytz import pytz
from ...core.common import LazyLogger, mcachew, tzdatetime from my.core.common import LazyLogger, mcachew, tzdatetime
from ...core.cachew import cache_dir
from ...location.google import locations
logger = LazyLogger(__name__, level='warning')
logger = LazyLogger(__name__, level='debug')
# todo should move to config? not sure
_FASTER: bool = True
@lru_cache(2) @lru_cache(2)
def _timezone_finder(fast: bool): def _timezone_finder(fast: bool) -> Any:
if fast: if fast:
# less precise, but faster # less precise, but faster
from timezonefinder import TimezoneFinderL as Finder # type: ignore from timezonefinder import TimezoneFinderL as Finder # type: ignore
@ -46,20 +53,40 @@ class DayWithZone(NamedTuple):
zone: Zone zone: Zone
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]: from my.location.common import LatLon
finder = _timezone_finder(fast=_FASTER) # rely on the default
# for backwards compatibility
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
try:
import my.location.all
for loc in my.location.all.locations():
yield ((loc.lat, loc.lon), loc.dt)
except Exception as e:
from my.core.warnings import high
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
import my.location.google
for loc in my.location.google.locations():
yield ((loc.lat, loc.lon), loc.dt)
def _iter_local_dates() -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=config.fast) # rely on the default
pdt = None pdt = None
warnings = [] warnings = []
# todo allow to skip if not noo many errors in row? # todo allow to skip if not noo many errors in row?
for l in locations(start=start, stop=stop): for (lat, lon), dt in _locations():
# TODO right. its _very_ slow... # TODO right. its _very_ slow...
zone = finder.timezone_at(lng=l.lon, lat=l.lat) zone = finder.timezone_at(lat=lat, lng=lon)
if zone is None: if zone is None:
warnings.append(f"Couldn't figure out tz for {l}") warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
continue continue
tz = pytz.timezone(zone) tz = pytz.timezone(zone)
# TODO this is probably a bit expensive... test & benchmark # TODO this is probably a bit expensive... test & benchmark
ldt = l.dt.astimezone(tz) ldt = dt.astimezone(tz)
ndate = ldt.date() ndate = ldt.date()
if pdt is not None and ndate < pdt.date(): if pdt is not None and ndate < pdt.date():
# TODO for now just drop and collect the stats # TODO for now just drop and collect the stats
@ -71,12 +98,13 @@ def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
yield DayWithZone(day=ndate, zone=z) yield DayWithZone(day=ndate, zone=z)
def most_common(l): def most_common(lst: List[DayWithZone]) -> DayWithZone:
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated] res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
return res return res
@mcachew(cache_path=cache_dir()) # refresh _iter_tzs once per day -- don't think a better depends_on is possible dynamically
@mcachew(logger=logger, depends_on=lambda: str(date.today()))
def _iter_tzs() -> Iterator[DayWithZone]: def _iter_tzs() -> Iterator[DayWithZone]:
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day): for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
logger.info('processed %s', d) logger.info('processed %s', d)
@ -106,6 +134,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
break break
return None if zone is None else pytz.timezone(zone) return None if zone is None else pytz.timezone(zone)
# ok to cache, there are only a few home locations? # ok to cache, there are only a few home locations?
@lru_cache(maxsize=None) @lru_cache(maxsize=None)
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]: def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
@ -119,8 +148,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
return pytz.timezone(zone) return pytz.timezone(zone)
# TODO expose? to main as well?
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]: def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
'''
Given a datetime, returns the timezone for that date.
'''
res = _get_day_tz(d=dt.date()) res = _get_day_tz(d=dt.date())
if res is not None: if res is not None:
return res return res
@ -129,6 +160,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
loc = home.get_location(dt) loc = home.get_location(dt)
return _get_home_tz(loc=loc) return _get_home_tz(loc=loc)
# expose as 'public' function
get_tz = _get_tz
def localize(dt: datetime) -> tzdatetime: def localize(dt: datetime) -> tzdatetime:
tz = _get_tz(dt) tz = _get_tz(dt)

View file

@ -100,6 +100,9 @@ commands =
hpi module install my.goodreads hpi module install my.goodreads
hpi module install my.pdfs hpi module install my.pdfs
hpi module install my.smscalls hpi module install my.smscalls
hpi module install my.location.gpslogger
hpi module install my.location.via_ip
hpi module install my.google.takeout.parser
# todo fuck. -p my.github isn't checking the subpackages?? wtf... # todo fuck. -p my.github isn't checking the subpackages?? wtf...
# guess it wants .pyi file?? # guess it wants .pyi file??
@ -118,6 +121,10 @@ commands =
-p my.body.exercise.cross_trainer \ -p my.body.exercise.cross_trainer \
-p my.bluemaestro \ -p my.bluemaestro \
-p my.location.google \ -p my.location.google \
-p my.location.google_takeout \
-p my.location.via_ip \
-p my.location.gpslogger \
-p my.ip.common \
-p my.time.tz.via_location \ -p my.time.tz.via_location \
-p my.calendar.holidays \ -p my.calendar.holidays \
-p my.arbtt \ -p my.arbtt \