more takeout tweaks and comments

This commit is contained in:
Dima Gerasimov 2020-04-24 15:57:44 +01:00
parent 21e82f0cd6
commit 60ccca52ad
4 changed files with 35 additions and 11 deletions

View file

@ -27,7 +27,7 @@ from ..takeout import get_last_takeout
from ..kython import kompress from ..kython import kompress
logger = LazyLogger(__package__) logger = LazyLogger(__name__)
def cache_path(*args, **kwargs): def cache_path(*args, **kwargs):

View file

@ -20,7 +20,9 @@ class Watched(NamedTuple):
def get_watched(): def get_watched():
path = 'Takeout/My Activity/YouTube/MyActivity.html' # TODO need to use a glob? to make up for old takouts that didn't start with Takeout/
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
last = get_last_takeout(path=path) last = get_last_takeout(path=path)
watches: List[Watched] = [] watches: List[Watched] = []
@ -33,6 +35,7 @@ def get_watched():
dd = fo.read().decode('utf8') dd = fo.read().decode('utf8')
parser.feed(dd) parser.feed(dd)
# TODO hmm they already come sorted.. wonder if should just rely on it..
return list(sorted(watches, key=lambda e: e.when)) return list(sorted(watches, key=lambda e: e.when))

View file

@ -1,24 +1,27 @@
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional, Iterable
from .common import get_files from .common import get_files
from .kython.kompress import kopen, kexists from .kython.kompress import kopen, kexists
from my.config import google as config from my.config import google as config
def get_last_takeout(*, path: Optional[str]=None) -> Path: def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
""" """
Ok, sometimes google splits takeout into two zip archives Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
I guess I could detect it (they've got 001/002 etc suffixes), but fornow that works fine..
""" """
# TODO FIXME zip is not great.. # TODO FIXME zip is not great..
# allow a lambda expression? that way the user could restrict it # allow a lambda expression? that way the user could restrict it
for takeout in reversed(get_files(config.takeout_path, glob='*.zip')): for takeout in get_files(config.takeout_path, glob='*.zip'):
if path is None or kexists(takeout, path): if path is None or kexists(takeout, path):
return takeout yield takeout
else:
continue
raise RuntimeError(f'Not found: {path}') def get_last_takeout(*, path: Optional[str]=None) -> Path:
# TODO more_itertools?
matching = list(get_takeouts(path=path))
return matching[-1]
# TODO might be a good idea to merge across multiple takeouts... # TODO might be a good idea to merge across multiple takeouts...
# perhaps even a special takeout module that deals with all of this automatically? # perhaps even a special takeout module that deals with all of this automatically?

18
tests/takeout.py Normal file
View file

@ -0,0 +1,18 @@
#!/usr/bin/env python3
from itertools import islice
from my.core.cachew import disable_cachew
disable_cachew()
import my.location.takeout as LT
def ilen(it):
# TODO more_itertools?
return len(list(it))
def test_location_perf():
# 2.80 s for 10 iterations and 10K points
# TODO try switching to jq and see how it goes? not sure..
print(ilen(islice(LT.iter_locations(), 0, 10000)))