diff --git a/my/kython/ktakeout.py b/my/kython/ktakeout.py
index 96a3f58..30688e3 100644
--- a/my/kython/ktakeout.py
+++ b/my/kython/ktakeout.py
@@ -3,7 +3,7 @@ import re
 from pathlib import Path
 from datetime import datetime
 from html.parser import HTMLParser
-from typing import List, Dict, Optional, Any
+from typing import List, Dict, Optional, Any, Callable, Iterable, Tuple
 from collections import OrderedDict
 from urllib.parse import unquote
 import pytz
@@ -49,10 +49,15 @@ class State(Enum):
     PARSING_DATE = 3
 
 
+Url = str
+Title = str
+Parsed = Tuple[datetime, Url, Title]
+Callback = Callable[[datetime, Url, Title], None]
+
 
 # would be easier to use beautiful soup, but ends up in a big memory footprint..
 class TakeoutHTMLParser(HTMLParser):
-    def __init__(self, callback) -> None:
+    def __init__(self, callback: Callback) -> None:
         super().__init__()
         self.state: State = State.OUTSIDE
 
@@ -118,3 +123,16 @@ class TakeoutHTMLParser(HTMLParser):
 
             self.state = State.OUTSIDE
             return
+
+
+def read_html(tpath: Path, file: str) -> Iterable[Parsed]:
+    from .kompress import kopen
+    results: List[Parsed] = []
+    def cb(dt: datetime, url: Url, title: Title) -> None:
+        results.append((dt, url, title))
+    parser = TakeoutHTMLParser(callback=cb)
+    with kopen(tpath, file) as fo:
+        # TODO careful, wht if it's a string already? make asutf method?
+        data = fo.read().decode('utf8')
+        parser.feed(data)
+    return results
diff --git a/my/media/youtube.py b/my/media/youtube.py
index 4e23f5b..6331190 100755
--- a/my/media/youtube.py
+++ b/my/media/youtube.py
@@ -2,10 +2,7 @@
 from datetime import datetime
 from typing import NamedTuple, List
 
-# TODO ugh. reuse it in mypkg/releaste takeout parser separately?
-from ..kython.ktakeout import TakeoutHTMLParser
-
-from ..kython.kompress import kopen
+from ..kython.ktakeout import read_html
 from ..takeout import get_last_takeout
 
 
@@ -26,15 +23,9 @@ def get_watched():
     last = get_last_takeout(path=path)
 
     watches: List[Watched] = []
-    def cb(dt, url, title):
+    for dt, url, title in read_html(last, path):
         watches.append(Watched(url=url, title=title, when=dt))
 
-    parser = TakeoutHTMLParser(cb)
-
-    with kopen(last, path) as fo:
-        dd = fo.read().decode('utf8')
-        parser.feed(dd)
-
     # TODO hmm they already come sorted.. wonder if should just rely on it..
     return list(sorted(watches, key=lambda e: e.when))
 
diff --git a/my/takeout.py b/my/takeout.py
index 26404eb..e38e493 100644
--- a/my/takeout.py
+++ b/my/takeout.py
@@ -3,6 +3,7 @@ from typing import Optional, Iterable
 
 from .common import get_files
 from .kython.kompress import kopen, kexists
+from .kython.ktakeout import read_html
 
 from my.config import google as config
 
diff --git a/tests/takeout.py b/tests/takeout.py
index 6f7c8d8..6acca9b 100644
--- a/tests/takeout.py
+++ b/tests/takeout.py
@@ -19,30 +19,35 @@ def test_location_perf():
     print(ilen(islice(LT.iter_locations(), 0, 10000)))
 
 
-def test_parser():
-    from my.kython.ktakeout import TakeoutHTMLParser
+# in theory should support any HTML takeout file?
+# although IIRC bookmakrs and search-history.html weren't working
+import pytest # type: ignore
+@pytest.mark.parametrize(
+    'path', [
+        'YouTube/history/watch-history.html',
+        'My Activity/YouTube/MyActivity.html',
+        'My Activity/Chrome/MyActivity.html',
+        'My Activity/Search/MyActivity.html',
+    ]
+)
+def test_parser(path: str):
+    path = 'Takeout/' + path
+    from my.kython.ktakeout import read_html
     from my.takeout import get_last_takeout
 
-    # 4s for parsing with HTMLParser (30K results)
-    path = 'Takeout/My Activity/Chrome/MyActivity.html'
     tpath = get_last_takeout(path=path)
 
     results = []
-    def cb(dt, url, title):
-        results.append((dt, url, title))
+    for res in read_html(tpath, path):
+        results.append(res)
 
-    parser = TakeoutHTMLParser(cb)
-
-    with kopen(tpath, path) as fo:
-        dd = fo.read().decode('utf8')
-        parser.feed(dd)
     print(len(results))
 
 
 def parse_takeout_xmllint(data: str):
     # without xmllint (splitting by '<div class="content-cell' -- 0.68 secs)
     # with xmllint -- 2 seconds
-    # using html.parser -- 4 seconds (+ all the parsing etc)
+    # using html.parser -- 4 seconds (+ all the parsing etc), 30K results
     # not *that* much opportunity to speedup I guess
     # the only downside is that html.parser isn't iterative.. might be able to hack with some iternal hacks?
     # wonder what's the bottleneck..