From e8e4994c02f1073a3acfa9f59c5ec36623573368 Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Sun, 11 Oct 2020 22:16:39 +0100
Subject: [PATCH] google.takeout.paths: return Optional if there are no
 takeouts

---
 my/bluemaestro/__init__.py |  8 ++++++--
 my/github/ghexport.py      |  5 ++++-
 my/google/takeout/paths.py |  4 ++--
 my/location/google.py      | 14 ++++++++------
 my/media/youtube.py        | 16 ++++++++--------
 tests/takeout.py           |  2 ++
 6 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/my/bluemaestro/__init__.py b/my/bluemaestro/__init__.py
index 3acc43e..5230431 100755
--- a/my/bluemaestro/__init__.py
+++ b/my/bluemaestro/__init__.py
@@ -139,7 +139,7 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]:
     # for k, v in merged.items():
     #     yield Point(dt=k, temp=v) # meh?
 
-from ..core.common import stat, Stats
+from ..core import stat, Stats
 def stats() -> Stats:
     return stat(measurements)
 
@@ -155,7 +155,11 @@ def dataframe() -> DataFrameT:
     # todo not sure why x axis time ticks are weird...  df[:6269] works, whereas df[:6269] breaks...
     # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
     import pandas as pd # type: ignore
-    df = pd.DataFrame(p._asdict() for p in measurements())
+    df = pd.DataFrame(
+        (p._asdict() for p in measurements()),
+        # todo meh. otherwise fails on empty inputs...
+        columns=list(Measurement._fields),
+    )
     # todo not sure how it would handle mixed timezones??
     return df.set_index('dt')
 
diff --git a/my/github/ghexport.py b/my/github/ghexport.py
index 5d02f9e..f4007cd 100644
--- a/my/github/ghexport.py
+++ b/my/github/ghexport.py
@@ -68,7 +68,10 @@ def _dal() -> dal.DAL:
 @mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
 def events(dal=_dal()) -> Results:
     for d in dal.events():
-        yield _parse_event(d)
+        if isinstance(d, Exception):
+            yield d
+        else:
+            yield _parse_event(d)
 
 
 def stats():
diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py
index 994d5d3..36b3e0c 100644
--- a/my/google/takeout/paths.py
+++ b/my/google/takeout/paths.py
@@ -37,8 +37,8 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
             yield takeout
 
 
-def get_last_takeout(*, path: Optional[str]=None) -> Path:
-    return last(get_takeouts(path=path))
+def get_last_takeout(*, path: Optional[str]=None) -> Optional[Path]:
+    return last(get_takeouts(path=path), default=None)
 
 
 # TODO might be a good idea to merge across multiple takeouts...
diff --git a/my/location/google.py b/my/location/google.py
index 0bd68f3..b8cfcfc 100644
--- a/my/location/google.py
+++ b/my/location/google.py
@@ -10,7 +10,7 @@ from datetime import datetime, timezone
 from itertools import islice
 from pathlib import Path
 from subprocess import Popen, PIPE
-from typing import Any, Collection, Iterator, NamedTuple, Optional, Sequence, IO, Tuple
+from typing import Any, Collection, Iterable, NamedTuple, Optional, Sequence, IO, Tuple
 import re
 
 # pip3 install geopy
@@ -39,7 +39,7 @@ class Location(NamedTuple):
 TsLatLon = Tuple[int, int, int]
 
 
-def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
+def _iter_via_ijson(fo) -> Iterable[TsLatLon]:
     # ijson version takes 25 seconds for 1M items (without processing)
     try:
         # pip3 install ijson cffi
@@ -58,7 +58,7 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
 
 
 # todo ugh. fragile, not sure, maybe should do some assert in advance?
-def _iter_via_grep(fo) -> Iterator[TsLatLon]:
+def _iter_via_grep(fo) -> Iterable[TsLatLon]:
     # grep version takes 5 seconds for 1M items (without processing)
     x = [-1, -1, -1]
     for i, line in enumerate(fo):
@@ -78,7 +78,7 @@ def _iter_via_grep(fo) -> Iterator[TsLatLon]:
 # would need to find out a way to know when to stop? process in some sort of sqrt progression??
 
 
-def _iter_locations_fo(fit) -> Iterator[Location]:
+def _iter_locations_fo(fit) -> Iterable[Location]:
     total = 0
     errors = 0
 
@@ -119,7 +119,7 @@ _LOCATION_JSON = 'Takeout/Location History/Location History.json'
 # TODO hope they are sorted... (could assert for it)
 # todo configure cache automatically?
 @mcachew(cache_dir(), logger=logger)
-def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
+def _iter_locations(path: Path, start=0, stop=None) -> Iterable[Location]:
     ctx: IO[str]
     if path.suffix == '.json':
         # todo: to support, should perhaps provide it as input= to Popen
@@ -146,11 +146,13 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
     # todo wonder if old takeouts could contribute as well??
 
 
-def locations(**kwargs) -> Iterator[Location]:
+def locations(**kwargs) -> Iterable[Location]:
     # NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config
     # very weird, as if this function captures the values of globals somehow?? investigate later.
     from ..google.takeout.paths import get_last_takeout
     last_takeout = get_last_takeout(path=_LOCATION_JSON)
+    if last_takeout is None:
+        return []
 
     return _iter_locations(path=last_takeout, **kwargs)
 
diff --git a/my/media/youtube.py b/my/media/youtube.py
index faeb09a..8212f12 100755
--- a/my/media/youtube.py
+++ b/my/media/youtube.py
@@ -21,6 +21,9 @@ def watched() -> Iterable[Watched]:
     path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
     # TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
     last = get_last_takeout(path=path)
+    if last is None:
+        return []
+
 
     watches: List[Watched] = []
     for dt, url, title in read_html(last, path):
@@ -30,14 +33,11 @@ def watched() -> Iterable[Watched]:
     return list(sorted(watches, key=lambda e: e.when))
 
 
+from ..core import stat, Stats
+def stats() -> Stats:
+    return stat(watched)
+
+
 # todo deprecate
 get_watched = watched
 
-
-def main():
-    # TODO shit. a LOT of watches...
-    for w in get_watched():
-        print(w)
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/takeout.py b/tests/takeout.py
index 69e16de..2cfb7a3 100644
--- a/tests/takeout.py
+++ b/tests/takeout.py
@@ -33,6 +33,7 @@ import pytest # type: ignore
 def test_parser(path: str):
     path = 'Takeout/' + path
     tpath = get_last_takeout(path=path)
+    assert tpath is not None
     results = list(read_html(tpath, path))
     # TODO assert len > 100 or something?
     print(len(results))
@@ -41,6 +42,7 @@ def test_parser(path: str):
 def test_myactivity_search():
     path = 'Takeout/My Activity/Search/MyActivity.html'
     tpath = get_last_takeout(path=path)
+    assert tpath is not None
     results = list(read_html(tpath, path))
 
     res = (