From 001f030fff732bf59f7f7b74ca19f0d17bc513ea Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 1 May 2019 00:08:38 +0100 Subject: [PATCH] fully switch to kython.kcache --- location/__init__.py | 43 +++++++++++++++++++------------------------ location/__main__.py | 3 +-- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/location/__init__.py b/location/__init__.py index 5f5bd01..a07a0d3 100644 --- a/location/__init__.py +++ b/location/__init__.py @@ -26,6 +26,7 @@ def get_logger(): TAKEOUTS_PATH = Path("/path/to/takeout") +CACHE_PATH = Path('/L/data/.cache/location.sqlite') Tag = str @@ -37,7 +38,7 @@ class Location(NamedTuple): alt: Optional[float] tag: Tag -dbcache = make_dbcache('/L/data/.cache/location.sqlite', hashf=mtime_hash, type_=Location) +dbcache = make_dbcache(CACHE_PATH, hashf=mtime_hash, type_=Location, chunk_by=10000) def tagger(dt: datetime, point: geopy.Point) -> Tag: @@ -51,7 +52,6 @@ def tagger(dt: datetime, point: geopy.Point) -> Tag: return "other" -# TODO careful, might not fit in glumov ram... def _iter_locations_fo(fo) -> Iterator[Location]: logger = get_logger() total = 0 @@ -89,29 +89,26 @@ def _iter_locations_fo(fo) -> Iterator[Location]: # TODO hope they are sorted... # TODO that could also serve as basis for tz provider @dbcache -def _iter_locations(path: Path) -> List[Location]: +def _iter_locations(path: Path) -> Iterator[Location]: limit = None - # TODO FIXME support archives - with path.open('r') as fo: - return list(islice(_iter_locations_fo(fo), 0, limit)) + if path.suffix == '.json': + ctx = path.open('r') + else: # must be a takeout archive + ctx = kompress.open(path, 'Takeout/Location History/Location History.json') + + with ctx as fo: + yield from islice(_iter_locations_fo(fo), 0, limit) # TODO wonder if old takeouts could contribute as well?? - # with kompress.open(last_takeout, 'Takeout/Location History/Location History.json') as fo: - # return _iter_locations_fo(fo) -# TODO shit.. should support iterator.. -def iter_locations() -> List[Location]: +def iter_locations() -> Iterator[Location]: last_takeout = max(TAKEOUTS_PATH.glob('takeout*.zip')) - last_takeout = Path('/L/tmp/LocationHistory.json') return _iter_locations(last_takeout) - import sys - sys.path.append('/L/Dropbox/data/location_provider') # jeez.. otherwise it refuses to unpickle :( - -def get_locations(cached: bool=False) -> Sequence[Location]: - return list(iter_locations(cached=cached)) +def get_locations() -> Sequence[Location]: + return list(iter_locations()) class LocInterval(NamedTuple): from_: Location @@ -203,12 +200,10 @@ def get_groups() -> List[LocInterval]: dump_group() return groups -def update_cache(): - import pickle as dill # type: ignore - CACHE_PATH_TMP = CACHE_PATH.with_suffix('.tmp') - # TODO maybe, also keep on /tmp first? - with CACHE_PATH_TMP.open('wb', 2 ** 20) as fo: - for loc in iter_locations(cached=False): - dill.dump(loc, fo) - CACHE_PATH_TMP.rename(CACHE_PATH) +def update_cache(): + # TODO perhaps set hash to null instead, that's a bit less intrusive + if CACHE_PATH.exists(): + CACHE_PATH.unlink() + for _ in iter_locations(): + pass diff --git a/location/__main__.py b/location/__main__.py index e246c3c..a796ea6 100644 --- a/location/__main__.py +++ b/location/__main__.py @@ -13,19 +13,18 @@ def main(): setup_logzero(get_kcache_logger(), level=logging.DEBUG) - if len(sys.argv) > 1: cmd = sys.argv[1] # TODO ok, update cache makes sense just to refresh in case of code changes... if cmd == "update_cache": from location import update_cache, get_locations update_cache() - get_locations(cached=True) else: raise RuntimeError(f"Unknown command {cmd}") else: for p in get_groups(): print(p) + # shit. ok, 4 gigs of ram is def too much for glumov... # TODO need datetime! if __name__ == '__main__':