more takeout tweaks and comments
This commit is contained in:
parent
21e82f0cd6
commit
60ccca52ad
4 changed files with 35 additions and 11 deletions
|
@ -27,7 +27,7 @@ from ..takeout import get_last_takeout
|
|||
from ..kython import kompress
|
||||
|
||||
|
||||
logger = LazyLogger(__package__)
|
||||
logger = LazyLogger(__name__)
|
||||
|
||||
|
||||
def cache_path(*args, **kwargs):
|
||||
|
|
|
@ -20,7 +20,9 @@ class Watched(NamedTuple):
|
|||
|
||||
|
||||
def get_watched():
|
||||
path = 'Takeout/My Activity/YouTube/MyActivity.html'
|
||||
# TODO need to use a glob? to make up for old takouts that didn't start with Takeout/
|
||||
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
|
||||
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
|
||||
last = get_last_takeout(path=path)
|
||||
|
||||
watches: List[Watched] = []
|
||||
|
@ -33,6 +35,7 @@ def get_watched():
|
|||
dd = fo.read().decode('utf8')
|
||||
parser.feed(dd)
|
||||
|
||||
# TODO hmm they already come sorted.. wonder if should just rely on it..
|
||||
return list(sorted(watches, key=lambda e: e.when))
|
||||
|
||||
|
||||
|
|
|
@ -1,24 +1,27 @@
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Optional, Iterable
|
||||
|
||||
from .common import get_files
|
||||
from .kython.kompress import kopen, kexists
|
||||
|
||||
from my.config import google as config
|
||||
|
||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||
def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
|
||||
"""
|
||||
Ok, sometimes google splits takeout into two zip archives
|
||||
I guess I could detect it (they've got 001/002 etc suffixes), but fornow that works fine..
|
||||
Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need
|
||||
"""
|
||||
# TODO FIXME zip is not great..
|
||||
# allow a lambda expression? that way the user could restrict it
|
||||
for takeout in reversed(get_files(config.takeout_path, glob='*.zip')):
|
||||
for takeout in get_files(config.takeout_path, glob='*.zip'):
|
||||
if path is None or kexists(takeout, path):
|
||||
return takeout
|
||||
else:
|
||||
continue
|
||||
raise RuntimeError(f'Not found: {path}')
|
||||
yield takeout
|
||||
|
||||
|
||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
||||
# TODO more_itertools?
|
||||
matching = list(get_takeouts(path=path))
|
||||
return matching[-1]
|
||||
|
||||
|
||||
# TODO might be a good idea to merge across multiple takeouts...
|
||||
# perhaps even a special takeout module that deals with all of this automatically?
|
||||
|
|
18
tests/takeout.py
Normal file
18
tests/takeout.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
from itertools import islice
|
||||
|
||||
from my.core.cachew import disable_cachew
|
||||
disable_cachew()
|
||||
|
||||
import my.location.takeout as LT
|
||||
|
||||
|
||||
def ilen(it):
|
||||
# TODO more_itertools?
|
||||
return len(list(it))
|
||||
|
||||
|
||||
def test_location_perf():
|
||||
# 2.80 s for 10 iterations and 10K points
|
||||
# TODO try switching to jq and see how it goes? not sure..
|
||||
print(ilen(islice(LT.iter_locations(), 0, 10000)))
|
Loading…
Add table
Reference in a new issue