handle errors defensively

This commit is contained in:
Dima Gerasimov 2019-04-12 20:46:57 +01:00
parent b694667fb9
commit a0916ed6bd
3 changed files with 56 additions and 40 deletions

10
ci.sh
View file

@ -1,10 +0,0 @@
#!/bin/bash
cd "$(this_dir)" || exit
. ~/bash_ci
ci_run mypy location
ci_run pylint -E location
ci_report_errors

View file

@ -1,16 +1,21 @@
from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence, Deque, Any from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence, Deque, Any, Optional
from collections import deque from collections import deque
from itertools import islice from itertools import islice
from datetime import datetime from datetime import datetime
import os
from os import listdir
from os.path import join
from zipfile import ZipFile from zipfile import ZipFile
import logging import logging
import csv import csv
import re import re
import json import json
from pathlib import Path
import pytz
from kython import kompress
# pipe install geopy
import geopy # type: ignore
import geopy.distance # type: ignore import geopy.distance # type: ignore
# pip3 install ijson # pip3 install ijson
import ijson # type: ignore import ijson # type: ignore
@ -18,8 +23,10 @@ import ijson # type: ignore
def get_logger(): def get_logger():
return logging.getLogger("location") return logging.getLogger("location")
TAKEOUTS_PATH = "/path/to/takeout"
CACHE_PATH = "/L/data/.cache/location.picklel" TAKEOUTS_PATH = Path("/path/to/takeout")
CACHE_PATH = Path("/L/data/.cache/location.picklel")
Tag = str Tag = str
@ -27,36 +34,52 @@ class Location(NamedTuple):
dt: datetime dt: datetime
lat: float lat: float
lon: float lon: float
alt: float alt: Optional[float]
tag: Tag tag: Tag
def tagger(dt: datetime, lat: float, lon: float) -> Tag: def tagger(dt: datetime, point: geopy.Point) -> Tag:
TAGS = [ TAGS = [
# removed # removed
] ]
for coord, dist, tag in TAGS: for coord, dist, tag in TAGS:
if geopy.distance.distance(coord, (lat, lon)).m < dist: if geopy.distance.distance(coord, point).m < dist:
return tag return tag
else: else:
return "other" return "other"
# TODO hope they are sorted... # TODO hope they are sorted...
# TODO that could also serve as basis for timezone provider. # TODO that could also serve as basis for tz provider
def load_locations() -> Iterator[Location]: def load_locations() -> Iterator[Location]:
last_takeout = max([f for f in listdir(TAKEOUTS_PATH) if re.match('takeout.*.zip', f)]) logger = get_logger() # TODO count errors?
jdata = None
with ZipFile(join(TAKEOUTS_PATH, last_takeout)).open('Takeout/Location History/Location History.json') as fo: last_takeout = max(TAKEOUTS_PATH.glob('takeout*.zip'))
cc = 0
# TODO wonder if old takeouts could contribute as well??
total = 0
errors = 0
with kompress.open(last_takeout, 'Takeout/Location History/Location History.json') as fo:
for j in ijson.items(fo, 'locations.item'): for j in ijson.items(fo, 'locations.item'):
dt = datetime.fromtimestamp(int(j["timestampMs"]) / 1000) # TODO utc?? dt = datetime.utcfromtimestamp(int(j["timestampMs"]) / 1000)
if cc % 10000 == 0: if total % 10000 == 0:
print(f'processing {dt}') logger.info('processing item %d %s', total, dt)
cc += 1 total += 1
dt = pytz.utc.localize(dt)
try:
lat = float(j["latitudeE7"] / 10000000) lat = float(j["latitudeE7"] / 10000000)
lon = float(j["longitudeE7"] / 10000000) lon = float(j["longitudeE7"] / 10000000)
alt = float(j["altitude"]) point = geopy.Point(lat, lon) # kinda sanity check that coordinates are ok
tag = tagger(dt, lat, lon) except Exception as e:
logger.exception(e)
errors += 1
if float(errors) / total > 0.01:
raise RuntimeError('too many errors! aborting')
else:
continue
alt = j.get("altitude", None)
tag = tagger(dt, point) # TODO take accuracy into account??
yield Location( yield Location(
dt=dt, dt=dt,
lat=lat, lat=lat,
@ -71,7 +94,7 @@ def iter_locations(cached: bool=False) -> Iterator[Location]:
import pickle as dill # type: ignore import pickle as dill # type: ignore
if cached: if cached:
with open(CACHE_PATH, 'rb') as fo: with CACHE_PATH.open('rb') as fo:
while True: while True:
try: try:
# TODO shit really?? it can't load now, do I need to adjust pythonpath or something?... # TODO shit really?? it can't load now, do I need to adjust pythonpath or something?...
@ -180,9 +203,10 @@ def get_groups(cached: bool=False) -> List[LocInterval]:
def update_cache(): def update_cache():
import pickle as dill # type: ignore import pickle as dill # type: ignore
CACHE_PATH_TMP = CACHE_PATH + '.tmp' CACHE_PATH_TMP = CACHE_PATH.with_suffix('.tmp')
# TODO maybe, also keep on /tmp first? # TODO maybe, also keep on /tmp first?
with open(CACHE_PATH_TMP, 'wb', 2 ** 20) as fo:
with CACHE_PATH_TMP.open('wb', 2 ** 20) as fo:
for loc in iter_locations(cached=False): for loc in iter_locations(cached=False):
dill.dump(loc, fo) dill.dump(loc, fo)
os.rename(CACHE_PATH_TMP, CACHE_PATH) CACHE_PATH_TMP.rename(CACHE_PATH)

View file

@ -1,12 +1,14 @@
from location import get_logger, get_locations, iter_locations, get_groups import sys
import logging
logger = get_logger() from location import get_logger, get_locations, iter_locations, get_groups
from kython.klogging import setup_logzero from kython.klogging import setup_logzero
setup_logzero(logger) logger = get_logger()
setup_logzero(logger, level=logging.INFO)
import sys
if len(sys.argv) > 1: if len(sys.argv) > 1:
cmd = sys.argv[1] cmd = sys.argv[1]