handle errors defensively
This commit is contained in:
parent
b694667fb9
commit
a0916ed6bd
3 changed files with 56 additions and 40 deletions
10
ci.sh
10
ci.sh
|
@ -1,10 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd "$(this_dir)" || exit
|
|
||||||
|
|
||||||
. ~/bash_ci
|
|
||||||
|
|
||||||
ci_run mypy location
|
|
||||||
ci_run pylint -E location
|
|
||||||
|
|
||||||
ci_report_errors
|
|
|
@ -1,16 +1,21 @@
|
||||||
from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence, Deque, Any
|
from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence, Deque, Any, Optional
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import os
|
|
||||||
from os import listdir
|
|
||||||
from os.path import join
|
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
import logging
|
import logging
|
||||||
import csv
|
import csv
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
|
||||||
|
from kython import kompress
|
||||||
|
|
||||||
|
|
||||||
|
# pipe install geopy
|
||||||
|
import geopy # type: ignore
|
||||||
import geopy.distance # type: ignore
|
import geopy.distance # type: ignore
|
||||||
# pip3 install ijson
|
# pip3 install ijson
|
||||||
import ijson # type: ignore
|
import ijson # type: ignore
|
||||||
|
@ -18,8 +23,10 @@ import ijson # type: ignore
|
||||||
def get_logger():
|
def get_logger():
|
||||||
return logging.getLogger("location")
|
return logging.getLogger("location")
|
||||||
|
|
||||||
TAKEOUTS_PATH = "/path/to/takeout"
|
|
||||||
CACHE_PATH = "/L/data/.cache/location.picklel"
|
TAKEOUTS_PATH = Path("/path/to/takeout")
|
||||||
|
CACHE_PATH = Path("/L/data/.cache/location.picklel")
|
||||||
|
|
||||||
|
|
||||||
Tag = str
|
Tag = str
|
||||||
|
|
||||||
|
@ -27,36 +34,52 @@ class Location(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
lat: float
|
lat: float
|
||||||
lon: float
|
lon: float
|
||||||
alt: float
|
alt: Optional[float]
|
||||||
tag: Tag
|
tag: Tag
|
||||||
|
|
||||||
|
|
||||||
def tagger(dt: datetime, lat: float, lon: float) -> Tag:
|
def tagger(dt: datetime, point: geopy.Point) -> Tag:
|
||||||
TAGS = [
|
TAGS = [
|
||||||
# removed
|
# removed
|
||||||
]
|
]
|
||||||
for coord, dist, tag in TAGS:
|
for coord, dist, tag in TAGS:
|
||||||
if geopy.distance.distance(coord, (lat, lon)).m < dist:
|
if geopy.distance.distance(coord, point).m < dist:
|
||||||
return tag
|
return tag
|
||||||
else:
|
else:
|
||||||
return "other"
|
return "other"
|
||||||
|
|
||||||
# TODO hope they are sorted...
|
# TODO hope they are sorted...
|
||||||
# TODO that could also serve as basis for timezone provider.
|
# TODO that could also serve as basis for tz provider
|
||||||
def load_locations() -> Iterator[Location]:
|
def load_locations() -> Iterator[Location]:
|
||||||
last_takeout = max([f for f in listdir(TAKEOUTS_PATH) if re.match('takeout.*.zip', f)])
|
logger = get_logger() # TODO count errors?
|
||||||
jdata = None
|
|
||||||
with ZipFile(join(TAKEOUTS_PATH, last_takeout)).open('Takeout/Location History/Location History.json') as fo:
|
last_takeout = max(TAKEOUTS_PATH.glob('takeout*.zip'))
|
||||||
cc = 0
|
|
||||||
|
# TODO wonder if old takeouts could contribute as well??
|
||||||
|
total = 0
|
||||||
|
errors = 0
|
||||||
|
with kompress.open(last_takeout, 'Takeout/Location History/Location History.json') as fo:
|
||||||
for j in ijson.items(fo, 'locations.item'):
|
for j in ijson.items(fo, 'locations.item'):
|
||||||
dt = datetime.fromtimestamp(int(j["timestampMs"]) / 1000) # TODO utc??
|
dt = datetime.utcfromtimestamp(int(j["timestampMs"]) / 1000)
|
||||||
if cc % 10000 == 0:
|
if total % 10000 == 0:
|
||||||
print(f'processing {dt}')
|
logger.info('processing item %d %s', total, dt)
|
||||||
cc += 1
|
total += 1
|
||||||
|
|
||||||
|
dt = pytz.utc.localize(dt)
|
||||||
|
try:
|
||||||
lat = float(j["latitudeE7"] / 10000000)
|
lat = float(j["latitudeE7"] / 10000000)
|
||||||
lon = float(j["longitudeE7"] / 10000000)
|
lon = float(j["longitudeE7"] / 10000000)
|
||||||
alt = float(j["altitude"])
|
point = geopy.Point(lat, lon) # kinda sanity check that coordinates are ok
|
||||||
tag = tagger(dt, lat, lon)
|
except Exception as e:
|
||||||
|
logger.exception(e)
|
||||||
|
errors += 1
|
||||||
|
if float(errors) / total > 0.01:
|
||||||
|
raise RuntimeError('too many errors! aborting')
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
alt = j.get("altitude", None)
|
||||||
|
tag = tagger(dt, point) # TODO take accuracy into account??
|
||||||
yield Location(
|
yield Location(
|
||||||
dt=dt,
|
dt=dt,
|
||||||
lat=lat,
|
lat=lat,
|
||||||
|
@ -71,7 +94,7 @@ def iter_locations(cached: bool=False) -> Iterator[Location]:
|
||||||
|
|
||||||
import pickle as dill # type: ignore
|
import pickle as dill # type: ignore
|
||||||
if cached:
|
if cached:
|
||||||
with open(CACHE_PATH, 'rb') as fo:
|
with CACHE_PATH.open('rb') as fo:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# TODO shit really?? it can't load now, do I need to adjust pythonpath or something?...
|
# TODO shit really?? it can't load now, do I need to adjust pythonpath or something?...
|
||||||
|
@ -180,9 +203,10 @@ def get_groups(cached: bool=False) -> List[LocInterval]:
|
||||||
|
|
||||||
def update_cache():
|
def update_cache():
|
||||||
import pickle as dill # type: ignore
|
import pickle as dill # type: ignore
|
||||||
CACHE_PATH_TMP = CACHE_PATH + '.tmp'
|
CACHE_PATH_TMP = CACHE_PATH.with_suffix('.tmp')
|
||||||
# TODO maybe, also keep on /tmp first?
|
# TODO maybe, also keep on /tmp first?
|
||||||
with open(CACHE_PATH_TMP, 'wb', 2 ** 20) as fo:
|
|
||||||
|
with CACHE_PATH_TMP.open('wb', 2 ** 20) as fo:
|
||||||
for loc in iter_locations(cached=False):
|
for loc in iter_locations(cached=False):
|
||||||
dill.dump(loc, fo)
|
dill.dump(loc, fo)
|
||||||
os.rename(CACHE_PATH_TMP, CACHE_PATH)
|
CACHE_PATH_TMP.rename(CACHE_PATH)
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
from location import get_logger, get_locations, iter_locations, get_groups
|
import sys
|
||||||
|
import logging
|
||||||
|
|
||||||
logger = get_logger()
|
from location import get_logger, get_locations, iter_locations, get_groups
|
||||||
|
|
||||||
from kython.klogging import setup_logzero
|
from kython.klogging import setup_logzero
|
||||||
|
|
||||||
setup_logzero(logger)
|
logger = get_logger()
|
||||||
|
setup_logzero(logger, level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
cmd = sys.argv[1]
|
cmd = sys.argv[1]
|
||||||
|
|
Loading…
Add table
Reference in a new issue