location fallback (#263)
see https://github.com/karlicoss/HPI/issues/262 * move home to fallback/via_home.py * move via_ip to fallback * add fallback model * add stub via_ip file * add fallback_locations for via_ip * use protocol for locations * estimate_from helper, via_home estimator, all.py * via_home: add accuracy, cache history * add datasources to gpslogger/google_takeout * tz/via_location.py: update import to fallback * denylist docs/installation instructions * tz.via_location: let user customize cachew refresh time * add via_ip.estimate_location using binary search * use estimate_location in via_home.get_location * tests: add gpslogger to location config stub * tests: install tz related libs in test env * tz: add regression test for broken windows dates * vendorize bisect_left from python src doesnt have a 'key' parameter till python3.10
This commit is contained in:
parent
6dc5e7575f
commit
98b086f746
25 changed files with 1166 additions and 190 deletions
106
tests/core/test_denylist.py
Normal file
106
tests/core/test_denylist.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
import warnings
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import NamedTuple, Iterator
|
||||
|
||||
from my.core.denylist import DenyList
|
||||
|
||||
|
||||
class IP(NamedTuple):
|
||||
addr: str
|
||||
dt: datetime
|
||||
|
||||
|
||||
def data() -> Iterator[IP]:
|
||||
# random IP addresses
|
||||
yield IP(addr="67.98.113.0", dt=datetime(2020, 1, 1))
|
||||
yield IP(addr="59.40.113.87", dt=datetime(2020, 2, 1))
|
||||
yield IP(addr="161.235.192.228", dt=datetime(2020, 3, 1))
|
||||
yield IP(addr="165.243.139.87", dt=datetime(2020, 4, 1))
|
||||
yield IP(addr="69.69.141.154", dt=datetime(2020, 5, 1))
|
||||
yield IP(addr="50.72.224.80", dt=datetime(2020, 6, 1))
|
||||
yield IP(addr="221.67.89.168", dt=datetime(2020, 7, 1))
|
||||
yield IP(addr="177.113.119.251", dt=datetime(2020, 8, 1))
|
||||
yield IP(addr="93.200.246.215", dt=datetime(2020, 9, 1))
|
||||
yield IP(addr="127.105.171.61", dt=datetime(2020, 10, 1))
|
||||
|
||||
|
||||
def test_denylist(tmp_path: Path) -> None:
|
||||
tf = (tmp_path / "denylist.json").absolute()
|
||||
with warnings.catch_warnings(record=True):
|
||||
|
||||
# create empty denylist (though file does not have to exist for denylist to work)
|
||||
tf.write_text("[]")
|
||||
|
||||
d = DenyList(tf)
|
||||
|
||||
d.load()
|
||||
assert dict(d._deny_map) == {}
|
||||
assert d._deny_raw_list == []
|
||||
|
||||
assert list(d.filter(data())) == list(data())
|
||||
# no data in denylist yet
|
||||
assert len(d._deny_map) == 0
|
||||
assert len(d._deny_raw_list) == 0
|
||||
|
||||
# add some data
|
||||
d.deny(key="addr", value="67.98.113.0")
|
||||
# write and reload to update _deny_map, _deny_raw_list
|
||||
d.write()
|
||||
d.load()
|
||||
|
||||
assert len(d._deny_map) == 1
|
||||
assert len(d._deny_raw_list) == 1
|
||||
|
||||
assert d._deny_raw_list == [{"addr": "67.98.113.0"}]
|
||||
|
||||
filtered = list(d.filter(data()))
|
||||
assert len(filtered) == 9
|
||||
assert "67.98.113.0" not in [i.addr for i in filtered]
|
||||
|
||||
assert dict(d._deny_map) == {"addr": {"67.98.113.0"}}
|
||||
|
||||
denied = list(d.filter(data(), invert=True))
|
||||
assert len(denied) == 1
|
||||
|
||||
assert denied[0] == IP(addr="67.98.113.0", dt=datetime(2020, 1, 1))
|
||||
|
||||
# add some non-JSON primitive data
|
||||
|
||||
d.deny(key="dt", value=datetime(2020, 2, 1))
|
||||
|
||||
# test internal behavior, _deny_raw_list should have been updated,
|
||||
# but _deny_map doesnt get updated by a call to .deny
|
||||
#
|
||||
# if we change this just update the test, is just here to ensure
|
||||
# this is the behaviour
|
||||
|
||||
assert len(d._deny_map) == 1
|
||||
|
||||
# write and load to update _deny_map
|
||||
d.write()
|
||||
d.load()
|
||||
|
||||
assert len(d._deny_map) == 2
|
||||
assert len(d._deny_raw_list) == 2
|
||||
|
||||
assert d._deny_raw_list[-1] == {"dt": "2020-02-01T00:00:00"}
|
||||
|
||||
filtered = list(d.filter(data()))
|
||||
assert len(filtered) == 8
|
||||
|
||||
assert "59.40.113.87" not in [i.addr for i in filtered]
|
||||
|
||||
with open(tf, "r") as f:
|
||||
data_json = json.loads(f.read())
|
||||
|
||||
assert data_json == [
|
||||
{
|
||||
"addr": "67.98.113.0",
|
||||
},
|
||||
{
|
||||
"dt": "2020-02-01T00:00:00",
|
||||
},
|
||||
]
|
|
@ -1,7 +1,5 @@
|
|||
from pathlib import Path
|
||||
|
||||
from more_itertools import one
|
||||
|
||||
import pytest # type: ignore
|
||||
|
||||
|
||||
|
@ -20,26 +18,11 @@ def test() -> None:
|
|||
|
||||
@pytest.fixture(autouse=True)
|
||||
def prepare(tmp_path: Path):
|
||||
from .common import reset_modules
|
||||
reset_modules()
|
||||
|
||||
user_config = _prepare_google_config(tmp_path)
|
||||
from .shared_config import temp_config
|
||||
user_config = temp_config(tmp_path)
|
||||
|
||||
import my.core.cfg as C
|
||||
with C.tmp_config() as config:
|
||||
config.google = user_config # type: ignore
|
||||
config.google = user_config.google
|
||||
yield
|
||||
|
||||
|
||||
def _prepare_google_config(tmp_path: Path):
|
||||
from .common import testdata
|
||||
track = one(testdata().rglob('italy-slovenia-2017-07-29.json'))
|
||||
|
||||
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
|
||||
import zipfile
|
||||
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
|
||||
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
|
||||
|
||||
class google_config:
|
||||
takeout_path = tmp_path
|
||||
return google_config
|
||||
|
|
125
tests/location_fallback.py
Normal file
125
tests/location_fallback.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
"""
|
||||
To test my.location.fallback_location.all
|
||||
"""
|
||||
|
||||
from typing import Iterator
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
from more_itertools import ilen
|
||||
|
||||
from my.ip.common import IP
|
||||
|
||||
def data() -> Iterator[IP]:
|
||||
# random IP addresses
|
||||
yield IP(addr="67.98.113.0", dt=datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc))
|
||||
yield IP(addr="67.98.112.0", dt=datetime(2020, 1, 15, 12, 0, 0, tzinfo=timezone.utc))
|
||||
yield IP(addr="59.40.113.87", dt=datetime(2020, 2, 1, 12, 0, 0, tzinfo=timezone.utc))
|
||||
yield IP(addr="59.40.139.87", dt=datetime(2020, 2, 1, 16, 0, 0, tzinfo=timezone.utc))
|
||||
yield IP(addr="161.235.192.228", dt=datetime(2020, 3, 1, 12, 0, 0, tzinfo=timezone.utc))
|
||||
|
||||
# redefine the my.ip.all function using data for testing
|
||||
import my.ip.all as ip_module
|
||||
ip_module.ips = data
|
||||
|
||||
from my.location.fallback import via_ip
|
||||
|
||||
# these are all tests for the bisect algorithm defined in via_ip.py
|
||||
# to make sure we can correctly find IPs that are within the 'for_duration' of a given datetime
|
||||
|
||||
def test_ip_fallback() -> None:
|
||||
# make sure that the data override works
|
||||
assert ilen(ip_module.ips()) == ilen(data())
|
||||
assert ilen(ip_module.ips()) == ilen(via_ip.fallback_locations())
|
||||
assert ilen(via_ip.fallback_locations()) == 5
|
||||
assert ilen(via_ip._sorted_fallback_locations()) == 5
|
||||
|
||||
# confirm duration from via_ip since that is used for bisect
|
||||
assert via_ip.config.for_duration == timedelta(hours=24)
|
||||
|
||||
# basic tests
|
||||
|
||||
# try estimating slightly before the first IP
|
||||
est = list(via_ip.estimate_location(datetime(2020, 1, 1, 11, 59, 59, tzinfo=timezone.utc)))
|
||||
assert len(est) == 0
|
||||
|
||||
# during the duration for the first IP
|
||||
est = list(via_ip.estimate_location(datetime(2020, 1, 1, 12, 30, 0, tzinfo=timezone.utc)))
|
||||
assert len(est) == 1
|
||||
|
||||
# right after the 'for_duration' for an IP
|
||||
est = list(via_ip.estimate_location(datetime(2020, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + via_ip.config.for_duration + timedelta(seconds=1)))
|
||||
assert len(est) == 0
|
||||
|
||||
# on 2/1/2020, threes one IP if before 16:30
|
||||
est = list(via_ip.estimate_location(datetime(2020, 2, 1, 12, 30, 0, tzinfo=timezone.utc)))
|
||||
assert len(est) == 1
|
||||
|
||||
# and two if after 16:30
|
||||
est = list(via_ip.estimate_location(datetime(2020, 2, 1, 17, 00, 0, tzinfo=timezone.utc)))
|
||||
assert len(est) == 2
|
||||
|
||||
# the 12:30 IP should 'expire' before the 16:30 IP, use 3:30PM on the next day
|
||||
est = list(via_ip.estimate_location(datetime(2020, 2, 2, 15, 30, 0, tzinfo=timezone.utc)))
|
||||
assert len(est) == 1
|
||||
|
||||
use_dt = datetime(2020, 3, 1, 12, 15, 0, tzinfo=timezone.utc)
|
||||
|
||||
# test last IP
|
||||
est = list(via_ip.estimate_location(use_dt))
|
||||
assert len(est) == 1
|
||||
|
||||
# datetime should be the IPs, not the passed IP (if via_home, it uses the passed dt)
|
||||
assert est[0].dt != use_dt
|
||||
|
||||
# test interop with other fallback estimators/all.py
|
||||
#
|
||||
# redefine fallback_estimators to prevent possible namespace packages the user
|
||||
# may have installed from having side effects testing this
|
||||
from my.location.fallback import all
|
||||
from my.location.fallback import via_home
|
||||
def _fe() -> Iterator[all.LocationEstimator]:
|
||||
yield via_ip.estimate_location
|
||||
yield via_home.estimate_location
|
||||
|
||||
all.fallback_estimators = _fe
|
||||
assert ilen(all.fallback_estimators()) == 2
|
||||
|
||||
# test that all.estimate_location has access to both IPs
|
||||
#
|
||||
# just passing via_ip should give one IP
|
||||
from my.location.fallback.common import _iter_estimate_from
|
||||
raw_est = list(_iter_estimate_from(use_dt, (via_ip.estimate_location,)))
|
||||
assert len(raw_est) == 1
|
||||
assert raw_est[0].datasource == "via_ip"
|
||||
assert raw_est[0].accuracy == 15_000
|
||||
|
||||
# passing home should give one
|
||||
home_est = list(_iter_estimate_from(use_dt, (via_home.estimate_location,)))
|
||||
assert len(home_est) == 1
|
||||
assert home_est[0].accuracy == 30_000
|
||||
|
||||
# make sure ip accuracy is more accurate
|
||||
assert raw_est[0].accuracy < home_est[0].accuracy
|
||||
|
||||
# passing both should give two
|
||||
raw_est = list(_iter_estimate_from(use_dt, (via_ip.estimate_location, via_home.estimate_location)))
|
||||
assert len(raw_est) == 2
|
||||
|
||||
# shouldn't raise value error
|
||||
all_est = all.estimate_location(use_dt)
|
||||
# should have used the IP from via_ip since it was more accurate
|
||||
assert all_est.datasource == "via_ip"
|
||||
|
||||
# test that a home defined in shared_config.py is used if no IP is found
|
||||
loc = all.estimate_location(datetime(2021, 1, 1, 12, 30, 0, tzinfo=timezone.utc))
|
||||
assert loc.datasource == "via_home"
|
||||
|
||||
# test a different home using location.fallback.all
|
||||
bulgaria = all.estimate_location(datetime(2006, 1, 1, 12, 30, 0, tzinfo=timezone.utc))
|
||||
assert bulgaria.datasource == "via_home"
|
||||
assert (bulgaria.lat, bulgaria.lon) == (42.697842, 23.325973)
|
||||
assert (loc.lat, loc.lon) != (bulgaria.lat, bulgaria.lon)
|
||||
|
||||
|
||||
# re-use prepare fixture for overriding config from shared_config.py
|
||||
from .tz import prepare
|
65
tests/shared_config.py
Normal file
65
tests/shared_config.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
# Defines some shared config for tests
|
||||
|
||||
from datetime import datetime, date, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Any, NamedTuple
|
||||
import my.time.tz.via_location as LTZ
|
||||
from more_itertools import one
|
||||
|
||||
|
||||
class SharedConfig(NamedTuple):
|
||||
google: Any
|
||||
location: Any
|
||||
time: Any
|
||||
|
||||
|
||||
def _prepare_google_config(tmp_path: Path):
|
||||
from .common import testdata
|
||||
try:
|
||||
track = one(testdata().rglob('italy-slovenia-2017-07-29.json'))
|
||||
except ValueError:
|
||||
raise RuntimeError('testdata not found, setup git submodules?')
|
||||
|
||||
|
||||
# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
|
||||
import zipfile
|
||||
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
|
||||
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())
|
||||
|
||||
class google_config:
|
||||
takeout_path = tmp_path
|
||||
return google_config
|
||||
|
||||
|
||||
# pass tmp_path from pytest to this helper function
|
||||
# see tests/tz.py as an example
|
||||
def temp_config(temp_path: Path) -> Any:
|
||||
from .common import reset_modules
|
||||
reset_modules()
|
||||
|
||||
LTZ.config.fast = True
|
||||
|
||||
class location:
|
||||
home_accuracy = 30_000
|
||||
home = (
|
||||
# supports ISO strings
|
||||
('2005-12-04' , (42.697842, 23.325973)), # Bulgaria, Sofia
|
||||
# supports date/datetime objects
|
||||
(date(year=1980, month=2, day=15) , (40.7128 , -74.0060 )), # NY
|
||||
# check tz handling..
|
||||
(datetime.fromtimestamp(1600000000, tz=timezone.utc), (55.7558 , 37.6173 )), # Moscow, Russia
|
||||
)
|
||||
# note: order doesn't matter, will be sorted in the data provider
|
||||
class via_ip:
|
||||
accuracy = 15_000
|
||||
class gpslogger:
|
||||
pass
|
||||
|
||||
class time:
|
||||
class tz:
|
||||
class via_location:
|
||||
pass # just rely on the defaults...
|
||||
|
||||
|
||||
return SharedConfig(google=_prepare_google_config(temp_path), location=location, time=time)
|
47
tests/tz.py
47
tests/tz.py
|
@ -1,4 +1,5 @@
|
|||
from datetime import datetime, timedelta, date, timezone
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pytest # type: ignore
|
||||
|
@ -46,8 +47,15 @@ def test_tz() -> None:
|
|||
tz = LTZ._get_tz(D('20201001 14:15:16'))
|
||||
assert tz is not None
|
||||
|
||||
tz = LTZ._get_tz(datetime.min)
|
||||
assert tz is not None
|
||||
on_windows = sys.platform == 'win32'
|
||||
if not on_windows:
|
||||
tz = LTZ._get_tz(datetime.min)
|
||||
assert tz is not None
|
||||
else:
|
||||
# seems this fails because windows doesnt support same date ranges
|
||||
# https://stackoverflow.com/a/41400321/
|
||||
with pytest.raises(OSError):
|
||||
LTZ._get_tz(datetime.min)
|
||||
|
||||
|
||||
def test_policies() -> None:
|
||||
|
@ -73,36 +81,15 @@ def D(dstr: str) -> datetime:
|
|||
return datetime.strptime(dstr, '%Y%m%d %H:%M:%S')
|
||||
|
||||
|
||||
# TODO copy pasted from location.py, need to extract some common provider
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def prepare(tmp_path: Path):
|
||||
from .common import reset_modules
|
||||
reset_modules()
|
||||
|
||||
LTZ.config.fast = True
|
||||
|
||||
from .location import _prepare_google_config
|
||||
google = _prepare_google_config(tmp_path)
|
||||
|
||||
class location:
|
||||
home = (
|
||||
# supports ISO strings
|
||||
('2005-12-04' , (42.697842, 23.325973)), # Bulgaria, Sofia
|
||||
# supports date/datetime objects
|
||||
(date(year=1980, month=2, day=15) , (40.7128 , -74.0060 )), # NY
|
||||
# check tz handling..
|
||||
(datetime.fromtimestamp(1600000000, tz=timezone.utc), (55.7558 , 37.6173 )), # Moscow, Russia
|
||||
)
|
||||
# note: order doesn't matter, will be sorted in the data provider
|
||||
|
||||
class time:
|
||||
class tz:
|
||||
class via_location:
|
||||
pass # just rely on the defaults...
|
||||
from .shared_config import temp_config
|
||||
conf = temp_config(tmp_path)
|
||||
|
||||
import my.core.cfg as C
|
||||
with C.tmp_config() as config:
|
||||
config.google = google
|
||||
config.time = time
|
||||
config.location = location
|
||||
config.google = conf.google
|
||||
config.time = conf.time
|
||||
config.location = conf.location
|
||||
yield
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue