From d4480adb71162fa81aabb2abf723c42891dd2a3a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 19 May 2019 11:00:14 +0100 Subject: [PATCH 1/4] initial youtube handler --- media/youtube.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 media/youtube.py diff --git a/media/youtube.py b/media/youtube.py new file mode 100755 index 0000000..3c7a83a --- /dev/null +++ b/media/youtube.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +from datetime import datetime +from typing import NamedTuple, List +from pathlib import Path + +from kython.ktakeout import TakeoutHTMLParser +from kython.kompress import open as kopen + +BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/") + +class Watched(NamedTuple): + url: str + # TODO title + when: datetime + + @property + def eid(self) -> str: + return f'{self.url}-{self.when.isoformat()}' + +def get_watched(): + last = max(BDIR.glob('*.zip')) + + watches: List[Watched] = [] + def cb(dt, url): + watches.append(Watched(url=url, when=dt)) + + parser = TakeoutHTMLParser(cb) + + with kopen(last, 'Takeout/My Activity/YouTube/MyActivity.html') as fo: + dd = fo.read().decode('utf8') + parser.feed(dd) + + return list(sorted(watches, key=lambda e: e.when)) + + +def test(): + watched = get_watched() + assert len(watched) > 1000 + + +def main(): + # TODO shit. a LOT of watches... + for w in get_watched(): + print(w) + +if __name__ == '__main__': + main() From a72f3129fd0319007d7b1dc10c07a7d0fba454bf Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 19 May 2019 11:30:43 +0100 Subject: [PATCH 2/4] support title --- media/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/media/youtube.py b/media/youtube.py index 3c7a83a..d0ebcc2 100755 --- a/media/youtube.py +++ b/media/youtube.py @@ -10,7 +10,7 @@ BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/") class Watched(NamedTuple): url: str - # TODO title + title: str when: datetime @property @@ -21,8 +21,8 @@ def get_watched(): last = max(BDIR.glob('*.zip')) watches: List[Watched] = [] - def cb(dt, url): - watches.append(Watched(url=url, when=dt)) + def cb(dt, url, title): + watches.append(Watched(url=url, title=title, when=dt)) parser = TakeoutHTMLParser(cb) From 8371ccb42fdae9e902926243a0aa4c9cde0ca7d9 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 13 Aug 2019 21:39:28 +0100 Subject: [PATCH 3/4] imdb provider --- media/__init__.py | 0 media/imdb.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++ media/movies.py | 1 + 3 files changed, 52 insertions(+) create mode 100644 media/__init__.py create mode 100644 media/imdb.py create mode 100644 media/movies.py diff --git a/media/__init__.py b/media/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/media/imdb.py b/media/imdb.py new file mode 100644 index 0000000..00802a3 --- /dev/null +++ b/media/imdb.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import csv +import json +from typing import Iterator, List, NamedTuple + +from ..paths import BACKUPS + + +BDIR = BACKUPS / 'imdb' + + +def get_last(): + # TODO wonder where did json come from.. + return max(BDIR.glob('*.csv')) + + +class Movie(NamedTuple): + created: str + title: str + rating: int + + +def iter_movies() -> Iterator[Movie]: + last = get_last() + + with last.open() as fo: + reader = csv.DictReader(fo) + for i, line in enumerate(reader): + # print(line) + # id_ = "n" + str(i) + title = line['Title'] + rating = line['You rated'] + created = line['created'] + # TODO const?? + yield Movie(created=created, title=title, rating=rating) + + +def get_movies() -> List[Movie]: + return list(sorted(iter_movies(), key=lambda m: m.created)) + + +def test(): + assert len(get_movies()) > 10 + + +def main(): + for movie in get_movies(): + print(movie) + +if __name__ == '__main__': + main() diff --git a/media/movies.py b/media/movies.py new file mode 100644 index 0000000..ef06b92 --- /dev/null +++ b/media/movies.py @@ -0,0 +1 @@ +from .imdb import get_movies From fb3c6aebe791b8ac9343df242cb1a6dd4d5c0555 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 13 Aug 2019 21:59:31 +0100 Subject: [PATCH 4/4] extract created date --- media/imdb.py | 9 +++++---- media/movies.py | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/media/imdb.py b/media/imdb.py index 00802a3..ce47c51 100644 --- a/media/imdb.py +++ b/media/imdb.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import csv import json +from datetime import datetime from typing import Iterator, List, NamedTuple from ..paths import BACKUPS @@ -15,7 +16,7 @@ def get_last(): class Movie(NamedTuple): - created: str + created: datetime title: str rating: int @@ -26,11 +27,11 @@ def iter_movies() -> Iterator[Movie]: with last.open() as fo: reader = csv.DictReader(fo) for i, line in enumerate(reader): - # print(line) - # id_ = "n" + str(i) + # TODO extract directors?? title = line['Title'] rating = line['You rated'] - created = line['created'] + createds = line['created'] + created = datetime.strptime(createds, '%a %b %d %H:%M:%S %Y') # TODO const?? yield Movie(created=created, title=title, rating=rating) diff --git a/media/movies.py b/media/movies.py index ef06b92..448a987 100644 --- a/media/movies.py +++ b/media/movies.py @@ -1 +1,3 @@ from .imdb import get_movies + +# TODO extract items from org mode? perhaps not very high priority