From d4480adb71162fa81aabb2abf723c42891dd2a3a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 19 May 2019 11:00:14 +0100 Subject: [PATCH] initial youtube handler --- media/youtube.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 media/youtube.py diff --git a/media/youtube.py b/media/youtube.py new file mode 100755 index 0000000..3c7a83a --- /dev/null +++ b/media/youtube.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +from datetime import datetime +from typing import NamedTuple, List +from pathlib import Path + +from kython.ktakeout import TakeoutHTMLParser +from kython.kompress import open as kopen + +BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/") + +class Watched(NamedTuple): + url: str + # TODO title + when: datetime + + @property + def eid(self) -> str: + return f'{self.url}-{self.when.isoformat()}' + +def get_watched(): + last = max(BDIR.glob('*.zip')) + + watches: List[Watched] = [] + def cb(dt, url): + watches.append(Watched(url=url, when=dt)) + + parser = TakeoutHTMLParser(cb) + + with kopen(last, 'Takeout/My Activity/YouTube/MyActivity.html') as fo: + dd = fo.read().decode('utf8') + parser.feed(dd) + + return list(sorted(watches, key=lambda e: e.when)) + + +def test(): + watched = get_watched() + assert len(watched) > 1000 + + +def main(): + # TODO shit. a LOT of watches... + for w in get_watched(): + print(w) + +if __name__ == '__main__': + main()