From adadffef16263585dade3752a3d1fd33a7955690 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 24 Apr 2020 16:11:19 +0100 Subject: [PATCH] add takeout parser test --- tests/takeout.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/takeout.py b/tests/takeout.py index d7bd3ca..bbe6271 100644 --- a/tests/takeout.py +++ b/tests/takeout.py @@ -5,6 +5,7 @@ from my.core.cachew import disable_cachew disable_cachew() import my.location.takeout as LT +from my.kython.kompress import kopen def ilen(it): @@ -16,3 +17,23 @@ def test_location_perf(): # 2.80 s for 10 iterations and 10K points # TODO try switching to jq and see how it goes? not sure.. print(ilen(islice(LT.iter_locations(), 0, 10000))) + + +def test_parser(): + from my.kython.ktakeout import TakeoutHTMLParser + from my.takeout import get_last_takeout + + # 4s for parsing with HTMLParser (30K results) + path = 'Takeout/My Activity/Chrome/MyActivity.html' + tpath = get_last_takeout(path=path) + + results = [] + def cb(dt, url, title): + results.append((dt, url, title)) + + parser = TakeoutHTMLParser(cb) + + with kopen(tpath, path) as fo: + dd = fo.read().decode('utf8') + parser.feed(dd) + print(len(results))