more takeout tweaks and comments
This commit is contained in:
parent
21e82f0cd6
commit
60ccca52ad
4 changed files with 35 additions and 11 deletions
|
@ -27,7 +27,7 @@ from ..takeout import get_last_takeout
|
||||||
from ..kython import kompress
|
from ..kython import kompress
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__package__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def cache_path(*args, **kwargs):
|
def cache_path(*args, **kwargs):
|
||||||
|
|
|
@ -20,7 +20,9 @@ class Watched(NamedTuple):
|
||||||
|
|
||||||
|
|
||||||
def get_watched():
|
def get_watched():
|
||||||
path = 'Takeout/My Activity/YouTube/MyActivity.html'
|
# TODO need to use a glob? to make up for old takouts that didn't start with Takeout/
|
||||||
|
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
|
||||||
|
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
|
||||||
last = get_last_takeout(path=path)
|
last = get_last_takeout(path=path)
|
||||||
|
|
||||||
watches: List[Watched] = []
|
watches: List[Watched] = []
|
||||||
|
@ -33,6 +35,7 @@ def get_watched():
|
||||||
dd = fo.read().decode('utf8')
|
dd = fo.read().decode('utf8')
|
||||||
parser.feed(dd)
|
parser.feed(dd)
|
||||||
|
|
||||||
|
# TODO hmm they already come sorted.. wonder if should just rely on it..
|
||||||
return list(sorted(watches, key=lambda e: e.when))
|
return list(sorted(watches, key=lambda e: e.when))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,24 +1,27 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional, Iterable
|
||||||
|
|
||||||
from .common import get_files
|
from .common import get_files
|
||||||
from .kython.kompress import kopen, kexists
|
from .kython.kompress import kopen, kexists
|
||||||
|
|
||||||
from my.config import google as config
|
from my.config import google as config
|
||||||
|
|
||||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
|
||||||
"""
|
"""
|
||||||
Ok, sometimes google splits takeout into two zip archives
|
Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
|
||||||
I guess I could detect it (they've got 001/002 etc suffixes), but fornow that works fine..
|
|
||||||
"""
|
"""
|
||||||
# TODO FIXME zip is not great..
|
# TODO FIXME zip is not great..
|
||||||
# allow a lambda expression? that way the user could restrict it
|
# allow a lambda expression? that way the user could restrict it
|
||||||
for takeout in reversed(get_files(config.takeout_path, glob='*.zip')):
|
for takeout in get_files(config.takeout_path, glob='*.zip'):
|
||||||
if path is None or kexists(takeout, path):
|
if path is None or kexists(takeout, path):
|
||||||
return takeout
|
yield takeout
|
||||||
else:
|
|
||||||
continue
|
|
||||||
raise RuntimeError(f'Not found: {path}')
|
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||||
|
# TODO more_itertools?
|
||||||
|
matching = list(get_takeouts(path=path))
|
||||||
|
return matching[-1]
|
||||||
|
|
||||||
|
|
||||||
# TODO might be a good idea to merge across multiple takeouts...
|
# TODO might be a good idea to merge across multiple takeouts...
|
||||||
# perhaps even a special takeout module that deals with all of this automatically?
|
# perhaps even a special takeout module that deals with all of this automatically?
|
||||||
|
|
18
tests/takeout.py
Normal file
18
tests/takeout.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from itertools import islice
|
||||||
|
|
||||||
|
from my.core.cachew import disable_cachew
|
||||||
|
disable_cachew()
|
||||||
|
|
||||||
|
import my.location.takeout as LT
|
||||||
|
|
||||||
|
|
||||||
|
def ilen(it):
|
||||||
|
# TODO more_itertools?
|
||||||
|
return len(list(it))
|
||||||
|
|
||||||
|
|
||||||
|
def test_location_perf():
|
||||||
|
# 2.80 s for 10 iterations and 10K points
|
||||||
|
# TODO try switching to jq and see how it goes? not sure..
|
||||||
|
print(ilen(islice(LT.iter_locations(), 0, 10000)))
|
Loading…
Add table
Reference in a new issue