location: add all.py, using takeout/gpslogger/ip

This commit is contained in:
Sean Breckenridge 2022-04-25 18:21:52 -07:00
parent 66a00c6ada
commit ca10d524a4
12 changed files with 357 additions and 27 deletions

View file

@ -72,10 +72,19 @@ class location:
# and we can't import the types from the module itself, otherwise would be circular. common module?
home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0)
class via_ip:
accuracy: float
class gpslogger:
export_path: Paths = ''
accuracy: float
class time:
class tz:
pass
class via_location:
fast: bool
require_accuracy: float
class orgmode:

28
my/ip/all.py Normal file
View file

@ -0,0 +1,28 @@
"""
An example all.py stub module that provides ip data
To use this, you'd add IP providers that yield IPs to the 'ips' function
For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from typing import Iterator
from my.core.common import Stats
from .common import IP
def ips() -> Iterator[IP]:
yield from ()
def stats() -> Stats:
from my.core import stat
return {
**stat(ips),
}

39
my/ip/common.py Normal file
View file

@ -0,0 +1,39 @@
"""
Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]]
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from my.core import __NOT_HPI_MODULE__
import ipaddress
from typing import NamedTuple, Iterator
from datetime import datetime
import ipgeocache
from my.core import Json
class IP(NamedTuple):
dt: datetime
addr: str # an IP address
# TODO: could cache? not sure if it's worth it
def ipgeocache(self) -> Json:
return ipgeocache.get(self.addr)
@property
def tz(self) -> str:
tz: str = self.ipgeocache()["timezone"]
return tz
def drop_private(ips: Iterator[IP]) -> Iterator[IP]:
"""
Helper function that can be used to filter out private IPs
"""
for ip in ips:
if ipaddress.ip_address(ip.addr).is_private:
continue
yield ip

46
my/location/all.py Normal file
View file

@ -0,0 +1,46 @@
"""
Merges location data from multiple sources
"""
from typing import Iterator
from my.core import Stats, LazyLogger
from my.core.source import import_source
from my.location.via_ip import locations
from .common import Location
logger = LazyLogger(__name__, level="warning")
def locations() -> Iterator[Location]:
yield from _takeout_locations()
yield from _gpslogger_locations()
yield from _ip_locations()
@import_source(module_name="my.location.via_ip")
def _ip_locations() -> Iterator[Location]:
from . import via_ip
yield from via_ip.locations()
@import_source(module_name="my.location.google_takeout")
def _takeout_locations() -> Iterator[Location]:
from . import google_takeout
yield from google_takeout.locations()
@import_source(module_name="my.location.gpslogger")
def _gpslogger_locations() -> Iterator[Location]:
from . import gpslogger
yield from gpslogger.locations()
def stats() -> Stats:
from my.core import stat
return {
**stat(locations),
}

17
my/location/common.py Normal file
View file

@ -0,0 +1,17 @@
from datetime import date, datetime
from typing import Union, Tuple, NamedTuple, Optional
from my.core import __NOT_HPI_MODULE__
DateIsh = Union[datetime, date, str]
LatLon = Tuple[float, float]
# TODO: add timezone to this? can use timezonefinder in tz provider instead though
class Location(NamedTuple):
lon: float
lat: float
dt: datetime
accuracy: Optional[float]
elevation: Optional[float]

View file

@ -1,6 +1,9 @@
"""
Location data from Google Takeout
DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead
"""
REQUIRES = [
'geopy', # checking that coordinates are valid
'ijson',
@ -20,6 +23,10 @@ from ..core.common import LazyLogger, mcachew
from ..core.cachew import cache_dir
from ..core import kompress
from my.core.warnings import high
high("Please set up my.google.takeout.parser module for better takeout support")
# otherwise uses ijson
# todo move to config??

View file

@ -0,0 +1,33 @@
"""
Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"]
from typing import Iterator
from my.google.takeout.parser import events, _cachew_depends_on
from google_takeout_parser.models import Location as GoogleLocation
from my.core.common import mcachew, LazyLogger, Stats
from .common import Location
logger = LazyLogger(__name__)
@mcachew(
depends_on=_cachew_depends_on,
logger=logger,
)
def locations() -> Iterator[Location]:
for g in events():
if isinstance(g, GoogleLocation) and not isinstance(g, Exception):
yield Location(
lon=g.lng, lat=g.lat, dt=g.dt, accuracy=g.accuracy, elevation=None
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

75
my/location/gpslogger.py Normal file
View file

@ -0,0 +1,75 @@
"""
Parse [[https://github.com/mendhak/gpslogger][gpslogger]] .gpx (xml) files
"""
REQUIRES = ["gpxpy"]
from my.config import location
from my.core import Paths, dataclass
@dataclass
class config(location.gpslogger):
# path[s]/glob to the synced gpx (XML) files
export_path: Paths
# default accuracy for gpslogger
accuracy: float = 50.0
from itertools import chain
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Sequence, List
import gpxpy # type: ignore[import]
from more_itertools import unique_everseen
from my.core import Stats, LazyLogger
from my.core.common import get_files, mcachew
from my.utils.input_source import InputSource
from .common import Location
logger = LazyLogger(__name__, level="warning")
def inputs() -> Sequence[Path]:
return get_files(config.export_path, glob="*.gpx")
def _cachew_depends_on(from_paths: InputSource) -> List[float]:
return [p.stat().st_mtime for p in from_paths()]
# TODO: could use a better cachew key/this has to recompute every file whenever the newest one changes
@mcachew(depends_on=_cachew_depends_on, logger=logger)
def locations(from_paths: InputSource = inputs) -> Iterator[Location]:
yield from unique_everseen(
chain(*map(_extract_locations, from_paths())), key=lambda loc: loc.dt
)
def _extract_locations(path: Path) -> Iterator[Location]:
with path.open("r") as gf:
gpx_obj = gpxpy.parse(gf)
for track in gpx_obj.tracks:
for segment in track.segments:
for point in segment.points:
if point.time is None:
continue
# hmm - for gpslogger, seems that timezone is always SimpleTZ('Z'), which
# specifies UTC -- see https://github.com/tkrajina/gpxpy/blob/cb243b22841bd2ce9e603fe3a96672fc75edecf2/gpxpy/gpxfield.py#L38
yield Location(
lat=point.latitude,
lon=point.longitude,
accuracy=config.accuracy,
elevation=point.elevation,
dt=datetime.replace(point.time, tzinfo=timezone.utc),
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

View file

@ -2,17 +2,13 @@
Simple location provider, serving as a fallback when more detailed data isn't available
'''
from dataclasses import dataclass
from datetime import datetime, date, time, timezone
from datetime import datetime, time, timezone
from functools import lru_cache
from typing import Sequence, Tuple, Union, cast
from my.config import location as user_config
DateIsh = Union[datetime, date, str]
# todo hopefully reasonable? might be nice to add name or something too
LatLon = Tuple[float, float]
from my.location.common import LatLon, DateIsh
@dataclass
class Config(user_config):

39
my/location/via_ip.py Normal file
View file

@ -0,0 +1,39 @@
"""
Converts IP addresses provided by my.location.ip to estimated locations
"""
REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"]
from my.core import dataclass, Stats
from my.config import location
@dataclass
class config(location.via_ip):
# no real science to this, just a guess of ~15km accuracy for IP addresses
accuracy: int = 15_000
from typing import Iterator
from .common import Location
from my.ip.all import ips
def locations() -> Iterator[Location]:
for ip in ips():
loc: str = ip.ipgeocache()["loc"]
lat, _, lon = loc.partition(",")
yield Location(
lat=float(lat),
lon=float(lon),
dt=ip.dt,
accuracy=config.accuracy,
elevation=None,
)
def stats() -> Stats:
from my.core import stat
return {**stat(locations)}

View file

@ -7,27 +7,34 @@ REQUIRES = [
]
from my.config import time
from my.core import dataclass
@dataclass
class config(time.tz.via_location):
# less precise, but faster
fast: bool = True
# if the accuracy for the location is more than 5km, don't use
require_accuracy: float = 5_000
from collections import Counter
from datetime import date, datetime
from functools import lru_cache
from itertools import groupby
from typing import Iterator, NamedTuple, Optional
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
from more_itertools import seekable
import pytz
from ...core.common import LazyLogger, mcachew, tzdatetime
from ...core.cachew import cache_dir
from ...location.google import locations
from my.core.common import LazyLogger, mcachew, tzdatetime
logger = LazyLogger(__name__, level='warning')
logger = LazyLogger(__name__, level='debug')
# todo should move to config? not sure
_FASTER: bool = True
@lru_cache(2)
def _timezone_finder(fast: bool):
def _timezone_finder(fast: bool) -> Any:
if fast:
# less precise, but faster
from timezonefinder import TimezoneFinderL as Finder # type: ignore
@ -46,20 +53,40 @@ class DayWithZone(NamedTuple):
zone: Zone
def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=_FASTER) # rely on the default
from my.location.common import LatLon
# for backwards compatibility
def _locations() -> Iterator[Tuple[LatLon, datetime]]:
try:
import my.location.all
for loc in my.location.all.locations():
yield ((loc.lat, loc.lon), loc.dt)
except Exception as e:
from my.core.warnings import high
logger.exception("Could not setup via_location using my.location.all provider, falling back to legacy google implemetation", exc_info=e)
high("Setup my.google.takeout.parser, then my.location.all for better google takeout/location data")
import my.location.google
for loc in my.location.google.locations():
yield ((loc.lat, loc.lon), loc.dt)
def _iter_local_dates() -> Iterator[DayWithZone]:
finder = _timezone_finder(fast=config.fast) # rely on the default
pdt = None
warnings = []
# todo allow to skip if not noo many errors in row?
for l in locations(start=start, stop=stop):
for (lat, lon), dt in _locations():
# TODO right. its _very_ slow...
zone = finder.timezone_at(lng=l.lon, lat=l.lat)
zone = finder.timezone_at(lat=lat, lng=lon)
if zone is None:
warnings.append(f"Couldn't figure out tz for {l}")
warnings.append(f"Couldn't figure out tz for {lat}, {lon}")
continue
tz = pytz.timezone(zone)
# TODO this is probably a bit expensive... test & benchmark
ldt = l.dt.astimezone(tz)
ldt = dt.astimezone(tz)
ndate = ldt.date()
if pdt is not None and ndate < pdt.date():
# TODO for now just drop and collect the stats
@ -71,12 +98,13 @@ def _iter_local_dates(start=0, stop=None) -> Iterator[DayWithZone]:
yield DayWithZone(day=ndate, zone=z)
def most_common(l):
res, count = Counter(l).most_common(1)[0] # type: ignore[var-annotated]
def most_common(lst: List[DayWithZone]) -> DayWithZone:
res, _ = Counter(lst).most_common(1)[0] # type: ignore[var-annotated]
return res
@mcachew(cache_path=cache_dir())
# refresh _iter_tzs once per day -- don't think a better depends_on is possible dynamically
@mcachew(logger=logger, depends_on=lambda: str(date.today()))
def _iter_tzs() -> Iterator[DayWithZone]:
for d, gr in groupby(_iter_local_dates(), key=lambda p: p.day):
logger.info('processed %s', d)
@ -106,6 +134,7 @@ def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]:
break
return None if zone is None else pytz.timezone(zone)
# ok to cache, there are only a few home locations?
@lru_cache(maxsize=None)
def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
@ -119,8 +148,10 @@ def _get_home_tz(loc) -> Optional[pytz.BaseTzInfo]:
return pytz.timezone(zone)
# TODO expose? to main as well?
def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
'''
Given a datetime, returns the timezone for that date.
'''
res = _get_day_tz(d=dt.date())
if res is not None:
return res
@ -129,6 +160,9 @@ def _get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]:
loc = home.get_location(dt)
return _get_home_tz(loc=loc)
# expose as 'public' function
get_tz = _get_tz
def localize(dt: datetime) -> tzdatetime:
tz = _get_tz(dt)

View file

@ -100,6 +100,9 @@ commands =
hpi module install my.goodreads
hpi module install my.pdfs
hpi module install my.smscalls
hpi module install my.location.gpslogger
hpi module install my.location.via_ip
hpi module install my.google.takeout.parser
# todo fuck. -p my.github isn't checking the subpackages?? wtf...
# guess it wants .pyi file??
@ -118,6 +121,10 @@ commands =
-p my.body.exercise.cross_trainer \
-p my.bluemaestro \
-p my.location.google \
-p my.location.google_takeout \
-p my.location.via_ip \
-p my.location.gpslogger \
-p my.ip.common \
-p my.time.tz.via_location \
-p my.calendar.holidays \
-p my.arbtt \