diff --git a/my/media/youtube.py b/my/media/youtube.py index ddf659c..ba7385f 100755 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -6,16 +6,7 @@ from pathlib import Path from kython.ktakeout import TakeoutHTMLParser from kython.kompress import open as kopen -from ..common import get_files - -from mycfg import paths - - -def _get_last_takeout(): - # TODO FIXME might be a good idea to merge across multiple taekouts... - # perhaps even a special takeout module that deals with all of this automatically? - # e.g. accumulate, filter and maybe report useless takeouts? - return max(get_files(paths.google.takeout_path, glob='*.zip')) +from ..takeout import get_last_takeout class Watched(NamedTuple): @@ -29,7 +20,8 @@ class Watched(NamedTuple): def get_watched(): - last = _get_last_takeout() + path = 'Takeout/My Activity/YouTube/MyActivity.html' + last = get_last_takeout(path=path) watches: List[Watched] = [] def cb(dt, url, title): @@ -37,18 +29,13 @@ def get_watched(): parser = TakeoutHTMLParser(cb) - with kopen(last, 'Takeout/My Activity/YouTube/MyActivity.html') as fo: + with kopen(last, path) as fo: dd = fo.read().decode('utf8') parser.feed(dd) return list(sorted(watches, key=lambda e: e.when)) -def test(): - watched = get_watched() - assert len(watched) > 1000 - - def main(): # TODO shit. a LOT of watches... for w in get_watched(): diff --git a/my/takeout.py b/my/takeout.py new file mode 100644 index 0000000..03378e2 --- /dev/null +++ b/my/takeout.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import Optional + +from .common import get_files + +from mycfg import paths + +from kython.kompress import open as kopen + +def get_last_takeout(*, path: Optional[str]=None) -> Path: + """ + Ok, sometimes google splits takeout into two zip archives + I guess I could detect it (they've got 001/002 etc suffixes), but fornow that works fine.. + """ + for takeout in reversed(get_files(paths.google.takeout_path, glob='*.zip')): + if path is None: + return takeout + else: + try: + kopen(takeout, path) + return takeout + except: + # TODO eh, a bit horrible, but works for now.. + continue + raise RuntimeError(f'Not found: {path}') + +# TODO might be a good idea to merge across multiple taekouts... +# perhaps even a special takeout module that deals with all of this automatically? +# e.g. accumulate, filter and maybe report useless takeouts? + diff --git a/tests/youtube.py b/tests/youtube.py new file mode 100644 index 0000000..23a3452 --- /dev/null +++ b/tests/youtube.py @@ -0,0 +1,11 @@ +# TODO move elsewhere? + +# these tests would only make sense with some existing data? although some of them would work for everyone.. +# not sure what's a good way of handling this.. + +from my.media.youtube import get_watched + + +def test(): + watched = get_watched() + assert len(watched) > 1000