Collect from zip; iteratively
This commit is contained in:
parent
70a09a80ba
commit
96c8c324f3
1 changed files with 18 additions and 10 deletions
|
@ -1,13 +1,21 @@
|
||||||
from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence
|
from typing import NamedTuple, Iterator, List, Iterable, Collection, Sequence
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from os import listdir
|
||||||
|
from os.path import join
|
||||||
|
from zipfile import ZipFile
|
||||||
import logging
|
import logging
|
||||||
import csv
|
import csv
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
import geopy.distance # type: ignore
|
import geopy.distance # type: ignore
|
||||||
|
# pip3 install ijson
|
||||||
|
import ijson # type: ignore
|
||||||
|
|
||||||
def get_logger():
|
def get_logger():
|
||||||
return logging.getLogger("location")
|
return logging.getLogger("location")
|
||||||
|
|
||||||
PATH = "/L/data/location/location.csv"
|
TAKEOUTS_PATH = "/path/to/takeout"
|
||||||
CACHE_PATH = "/L/.cache/location.cache"
|
CACHE_PATH = "/L/.cache/location.cache"
|
||||||
|
|
||||||
# TODO need to cache?
|
# TODO need to cache?
|
||||||
|
@ -35,15 +43,14 @@ def tagger(dt: datetime, lat: float, lon: float) -> Tag:
|
||||||
# TODO hope they are sorted...
|
# TODO hope they are sorted...
|
||||||
# TODO that could also serve as basis for timezone provider.
|
# TODO that could also serve as basis for timezone provider.
|
||||||
def iter_locations() -> Iterator[Location]:
|
def iter_locations() -> Iterator[Location]:
|
||||||
with open(PATH) as fo:
|
last_takeout = max([f for f in listdir(TAKEOUTS_PATH) if re.match('takeout.*.zip', f)])
|
||||||
reader = csv.reader(fo)
|
jdata = None
|
||||||
next(reader) # skip header
|
with ZipFile(join(TAKEOUTS_PATH, last_takeout)).open('Takeout/Location History/Location History.json') as fo:
|
||||||
for ll in reader:
|
for j in ijson.items(fo, 'locations.item'):
|
||||||
[ts, lats, lons] = ll
|
# TODO eh, not very streaming?..
|
||||||
# TODO hmm, is it local??
|
dt = datetime.fromtimestamp(int(j["timestampMs"]) / 1000) # TODO utc??
|
||||||
dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
|
lat = float(j["latitudeE7"] / 10000000)
|
||||||
lat = float(lats)
|
lon = float(j["longitudeE7"] / 10000000)
|
||||||
lon = float(lons)
|
|
||||||
tag = tagger(dt, lat, lon)
|
tag = tagger(dt, lat, lon)
|
||||||
yield Location(
|
yield Location(
|
||||||
dt=dt,
|
dt=dt,
|
||||||
|
@ -65,6 +72,7 @@ class LocInterval(NamedTuple):
|
||||||
from_: Location
|
from_: Location
|
||||||
to: Location
|
to: Location
|
||||||
|
|
||||||
|
# TOOD could cache groups too?... using 16% cpu is a bit annoying.. could also use some sliding window here
|
||||||
def get_groups(cached: bool=False) -> List[LocInterval]:
|
def get_groups(cached: bool=False) -> List[LocInterval]:
|
||||||
locs = get_locations(cached=cached)
|
locs = get_locations(cached=cached)
|
||||||
i = 0
|
i = 0
|
||||||
|
|
Loading…
Add table
Reference in a new issue