fully switch to kython.kcache

This commit is contained in:
Dima Gerasimov 2019-05-01 00:08:38 +01:00
parent 22333d5078
commit 001f030fff
2 changed files with 20 additions and 26 deletions

View file

@ -26,6 +26,7 @@ def get_logger():
TAKEOUTS_PATH = Path("/path/to/takeout") TAKEOUTS_PATH = Path("/path/to/takeout")
CACHE_PATH = Path('/L/data/.cache/location.sqlite')
Tag = str Tag = str
@ -37,7 +38,7 @@ class Location(NamedTuple):
alt: Optional[float] alt: Optional[float]
tag: Tag tag: Tag
dbcache = make_dbcache('/L/data/.cache/location.sqlite', hashf=mtime_hash, type_=Location) dbcache = make_dbcache(CACHE_PATH, hashf=mtime_hash, type_=Location, chunk_by=10000)
def tagger(dt: datetime, point: geopy.Point) -> Tag: def tagger(dt: datetime, point: geopy.Point) -> Tag:
@ -51,7 +52,6 @@ def tagger(dt: datetime, point: geopy.Point) -> Tag:
return "other" return "other"
# TODO careful, might not fit in glumov ram...
def _iter_locations_fo(fo) -> Iterator[Location]: def _iter_locations_fo(fo) -> Iterator[Location]:
logger = get_logger() logger = get_logger()
total = 0 total = 0
@ -89,29 +89,26 @@ def _iter_locations_fo(fo) -> Iterator[Location]:
# TODO hope they are sorted... # TODO hope they are sorted...
# TODO that could also serve as basis for tz provider # TODO that could also serve as basis for tz provider
@dbcache @dbcache
def _iter_locations(path: Path) -> List[Location]: def _iter_locations(path: Path) -> Iterator[Location]:
limit = None limit = None
# TODO FIXME support archives
with path.open('r') as fo:
return list(islice(_iter_locations_fo(fo), 0, limit))
if path.suffix == '.json':
ctx = path.open('r')
else: # must be a takeout archive
ctx = kompress.open(path, 'Takeout/Location History/Location History.json')
with ctx as fo:
yield from islice(_iter_locations_fo(fo), 0, limit)
# TODO wonder if old takeouts could contribute as well?? # TODO wonder if old takeouts could contribute as well??
# with kompress.open(last_takeout, 'Takeout/Location History/Location History.json') as fo:
# return _iter_locations_fo(fo)
# TODO shit.. should support iterator.. def iter_locations() -> Iterator[Location]:
def iter_locations() -> List[Location]:
last_takeout = max(TAKEOUTS_PATH.glob('takeout*.zip')) last_takeout = max(TAKEOUTS_PATH.glob('takeout*.zip'))
last_takeout = Path('/L/tmp/LocationHistory.json')
return _iter_locations(last_takeout) return _iter_locations(last_takeout)
import sys
sys.path.append('/L/Dropbox/data/location_provider') # jeez.. otherwise it refuses to unpickle :(
def get_locations() -> Sequence[Location]:
def get_locations(cached: bool=False) -> Sequence[Location]: return list(iter_locations())
return list(iter_locations(cached=cached))
class LocInterval(NamedTuple): class LocInterval(NamedTuple):
from_: Location from_: Location
@ -203,12 +200,10 @@ def get_groups() -> List[LocInterval]:
dump_group() dump_group()
return groups return groups
def update_cache():
import pickle as dill # type: ignore
CACHE_PATH_TMP = CACHE_PATH.with_suffix('.tmp')
# TODO maybe, also keep on /tmp first?
with CACHE_PATH_TMP.open('wb', 2 ** 20) as fo: def update_cache():
for loc in iter_locations(cached=False): # TODO perhaps set hash to null instead, that's a bit less intrusive
dill.dump(loc, fo) if CACHE_PATH.exists():
CACHE_PATH_TMP.rename(CACHE_PATH) CACHE_PATH.unlink()
for _ in iter_locations():
pass

View file

@ -13,19 +13,18 @@ def main():
setup_logzero(get_kcache_logger(), level=logging.DEBUG) setup_logzero(get_kcache_logger(), level=logging.DEBUG)
if len(sys.argv) > 1: if len(sys.argv) > 1:
cmd = sys.argv[1] cmd = sys.argv[1]
# TODO ok, update cache makes sense just to refresh in case of code changes... # TODO ok, update cache makes sense just to refresh in case of code changes...
if cmd == "update_cache": if cmd == "update_cache":
from location import update_cache, get_locations from location import update_cache, get_locations
update_cache() update_cache()
get_locations(cached=True)
else: else:
raise RuntimeError(f"Unknown command {cmd}") raise RuntimeError(f"Unknown command {cmd}")
else: else:
for p in get_groups(): for p in get_groups():
print(p) print(p)
# shit. ok, 4 gigs of ram is def too much for glumov...
# TODO need datetime! # TODO need datetime!
if __name__ == '__main__': if __name__ == '__main__':