diff --git a/media/__init__.py b/media/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/media/imdb.py b/media/imdb.py new file mode 100644 index 0000000..ce47c51 --- /dev/null +++ b/media/imdb.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import csv +import json +from datetime import datetime +from typing import Iterator, List, NamedTuple + +from ..paths import BACKUPS + + +BDIR = BACKUPS / 'imdb' + + +def get_last(): + # TODO wonder where did json come from.. + return max(BDIR.glob('*.csv')) + + +class Movie(NamedTuple): + created: datetime + title: str + rating: int + + +def iter_movies() -> Iterator[Movie]: + last = get_last() + + with last.open() as fo: + reader = csv.DictReader(fo) + for i, line in enumerate(reader): + # TODO extract directors?? + title = line['Title'] + rating = line['You rated'] + createds = line['created'] + created = datetime.strptime(createds, '%a %b %d %H:%M:%S %Y') + # TODO const?? + yield Movie(created=created, title=title, rating=rating) + + +def get_movies() -> List[Movie]: + return list(sorted(iter_movies(), key=lambda m: m.created)) + + +def test(): + assert len(get_movies()) > 10 + + +def main(): + for movie in get_movies(): + print(movie) + +if __name__ == '__main__': + main() diff --git a/media/movies.py b/media/movies.py new file mode 100644 index 0000000..448a987 --- /dev/null +++ b/media/movies.py @@ -0,0 +1,3 @@ +from .imdb import get_movies + +# TODO extract items from org mode? perhaps not very high priority diff --git a/media/youtube.py b/media/youtube.py new file mode 100755 index 0000000..d0ebcc2 --- /dev/null +++ b/media/youtube.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +from datetime import datetime +from typing import NamedTuple, List +from pathlib import Path + +from kython.ktakeout import TakeoutHTMLParser +from kython.kompress import open as kopen + +BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/") + +class Watched(NamedTuple): + url: str + title: str + when: datetime + + @property + def eid(self) -> str: + return f'{self.url}-{self.when.isoformat()}' + +def get_watched(): + last = max(BDIR.glob('*.zip')) + + watches: List[Watched] = [] + def cb(dt, url, title): + watches.append(Watched(url=url, title=title, when=dt)) + + parser = TakeoutHTMLParser(cb) + + with kopen(last, 'Takeout/My Activity/YouTube/MyActivity.html') as fo: + dd = fo.read().decode('utf8') + parser.feed(dd) + + return list(sorted(watches, key=lambda e: e.when)) + + +def test(): + watched = get_watched() + assert len(watched) > 1000 + + +def main(): + # TODO shit. a LOT of watches... + for w in get_watched(): + print(w) + +if __name__ == '__main__': + main()