From d4480adb71162fa81aabb2abf723c42891dd2a3a Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Sun, 19 May 2019 11:00:14 +0100
Subject: [PATCH 1/4] initial youtube handler

---
 media/youtube.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100755 media/youtube.py

diff --git a/media/youtube.py b/media/youtube.py
new file mode 100755
index 0000000..3c7a83a
--- /dev/null
+++ b/media/youtube.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+from datetime import datetime
+from typing import NamedTuple, List
+from pathlib import Path
+
+from kython.ktakeout import TakeoutHTMLParser
+from kython.kompress import open as kopen
+
+BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/")
+
+class Watched(NamedTuple):
+    url: str
+    # TODO title
+    when: datetime
+
+    @property
+    def eid(self) -> str:
+        return f'{self.url}-{self.when.isoformat()}'
+
+def get_watched():
+    last = max(BDIR.glob('*.zip'))
+
+    watches: List[Watched] = []
+    def cb(dt, url):
+        watches.append(Watched(url=url, when=dt))
+
+    parser = TakeoutHTMLParser(cb)
+
+    with kopen(last, 'Takeout/My Activity/YouTube/MyActivity.html') as fo:
+        dd = fo.read().decode('utf8')
+        parser.feed(dd)
+
+    return list(sorted(watches, key=lambda e: e.when))
+
+
+def test():
+    watched = get_watched()
+    assert len(watched) > 1000
+
+
+def main():
+    # TODO shit. a LOT of watches...
+    for w in get_watched():
+        print(w)
+
+if __name__ == '__main__':
+    main()

From a72f3129fd0319007d7b1dc10c07a7d0fba454bf Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Sun, 19 May 2019 11:30:43 +0100
Subject: [PATCH 2/4] support title

---
 media/youtube.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/media/youtube.py b/media/youtube.py
index 3c7a83a..d0ebcc2 100755
--- a/media/youtube.py
+++ b/media/youtube.py
@@ -10,7 +10,7 @@ BDIR = Path("/L/backups/takeout/karlicoss_gmail_com/")
 
 class Watched(NamedTuple):
     url: str
-    # TODO title
+    title: str
     when: datetime
 
     @property
@@ -21,8 +21,8 @@ def get_watched():
     last = max(BDIR.glob('*.zip'))
 
     watches: List[Watched] = []
-    def cb(dt, url):
-        watches.append(Watched(url=url, when=dt))
+    def cb(dt, url, title):
+        watches.append(Watched(url=url, title=title, when=dt))
 
     parser = TakeoutHTMLParser(cb)
 

From 8371ccb42fdae9e902926243a0aa4c9cde0ca7d9 Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Tue, 13 Aug 2019 21:39:28 +0100
Subject: [PATCH 3/4] imdb provider

---
 media/__init__.py |  0
 media/imdb.py     | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 media/movies.py   |  1 +
 3 files changed, 52 insertions(+)
 create mode 100644 media/__init__.py
 create mode 100644 media/imdb.py
 create mode 100644 media/movies.py

diff --git a/media/__init__.py b/media/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/media/imdb.py b/media/imdb.py
new file mode 100644
index 0000000..00802a3
--- /dev/null
+++ b/media/imdb.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+import csv
+import json
+from typing import Iterator, List, NamedTuple
+
+from ..paths import BACKUPS
+
+
+BDIR = BACKUPS / 'imdb'
+
+
+def get_last():
+    # TODO wonder where did json come from..
+    return max(BDIR.glob('*.csv'))
+
+
+class Movie(NamedTuple):
+    created: str
+    title: str
+    rating: int
+
+
+def iter_movies() -> Iterator[Movie]:
+    last = get_last()
+
+    with last.open() as fo:
+        reader = csv.DictReader(fo)
+        for i, line in enumerate(reader):
+            # print(line)
+            # id_ = "n" + str(i)
+            title = line['Title']
+            rating = line['You rated']
+            created = line['created']
+            # TODO const??
+            yield Movie(created=created, title=title, rating=rating)
+
+
+def get_movies() -> List[Movie]:
+    return list(sorted(iter_movies(), key=lambda m: m.created))
+
+
+def test():
+    assert len(get_movies()) > 10
+
+
+def main():
+    for movie in get_movies():
+        print(movie)
+
+if __name__ == '__main__':
+    main()
diff --git a/media/movies.py b/media/movies.py
new file mode 100644
index 0000000..ef06b92
--- /dev/null
+++ b/media/movies.py
@@ -0,0 +1 @@
+from .imdb import get_movies

From fb3c6aebe791b8ac9343df242cb1a6dd4d5c0555 Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Tue, 13 Aug 2019 21:59:31 +0100
Subject: [PATCH 4/4] extract created date

---
 media/imdb.py   | 9 +++++----
 media/movies.py | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/media/imdb.py b/media/imdb.py
index 00802a3..ce47c51 100644
--- a/media/imdb.py
+++ b/media/imdb.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import csv
 import json
+from datetime import datetime
 from typing import Iterator, List, NamedTuple
 
 from ..paths import BACKUPS
@@ -15,7 +16,7 @@ def get_last():
 
 
 class Movie(NamedTuple):
-    created: str
+    created: datetime
     title: str
     rating: int
 
@@ -26,11 +27,11 @@ def iter_movies() -> Iterator[Movie]:
     with last.open() as fo:
         reader = csv.DictReader(fo)
         for i, line in enumerate(reader):
-            # print(line)
-            # id_ = "n" + str(i)
+            # TODO extract directors??
             title = line['Title']
             rating = line['You rated']
-            created = line['created']
+            createds = line['created']
+            created = datetime.strptime(createds, '%a %b %d %H:%M:%S %Y')
             # TODO const??
             yield Movie(created=created, title=title, rating=rating)
 
diff --git a/media/movies.py b/media/movies.py
index ef06b92..448a987 100644
--- a/media/movies.py
+++ b/media/movies.py
@@ -1 +1,3 @@
 from .imdb import get_movies
+
+# TODO extract items from org mode? perhaps not very high priority