From e8e4994c02f1073a3acfa9f59c5ec36623573368 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 11 Oct 2020 22:16:39 +0100 Subject: [PATCH] google.takeout.paths: return Optional if there are no takeouts --- my/bluemaestro/__init__.py | 8 ++++++-- my/github/ghexport.py | 5 ++++- my/google/takeout/paths.py | 4 ++-- my/location/google.py | 14 ++++++++------ my/media/youtube.py | 16 ++++++++-------- tests/takeout.py | 2 ++ 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/my/bluemaestro/__init__.py b/my/bluemaestro/__init__.py index 3acc43e..5230431 100755 --- a/my/bluemaestro/__init__.py +++ b/my/bluemaestro/__init__.py @@ -139,7 +139,7 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]: # for k, v in merged.items(): # yield Point(dt=k, temp=v) # meh? -from ..core.common import stat, Stats +from ..core import stat, Stats def stats() -> Stats: return stat(measurements) @@ -155,7 +155,11 @@ def dataframe() -> DataFrameT: # todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks... # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh? import pandas as pd # type: ignore - df = pd.DataFrame(p._asdict() for p in measurements()) + df = pd.DataFrame( + (p._asdict() for p in measurements()), + # todo meh. otherwise fails on empty inputs... + columns=list(Measurement._fields), + ) # todo not sure how it would handle mixed timezones?? return df.set_index('dt') diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 5d02f9e..f4007cd 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -68,7 +68,10 @@ def _dal() -> dal.DAL: @mcachew(config.cache_dir, hashf=lambda dal: dal.sources) def events(dal=_dal()) -> Results: for d in dal.events(): - yield _parse_event(d) + if isinstance(d, Exception): + yield d + else: + yield _parse_event(d) def stats(): diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index 994d5d3..36b3e0c 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -37,8 +37,8 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]: yield takeout -def get_last_takeout(*, path: Optional[str]=None) -> Path: - return last(get_takeouts(path=path)) +def get_last_takeout(*, path: Optional[str]=None) -> Optional[Path]: + return last(get_takeouts(path=path), default=None) # TODO might be a good idea to merge across multiple takeouts... diff --git a/my/location/google.py b/my/location/google.py index 0bd68f3..b8cfcfc 100644 --- a/my/location/google.py +++ b/my/location/google.py @@ -10,7 +10,7 @@ from datetime import datetime, timezone from itertools import islice from pathlib import Path from subprocess import Popen, PIPE -from typing import Any, Collection, Iterator, NamedTuple, Optional, Sequence, IO, Tuple +from typing import Any, Collection, Iterable, NamedTuple, Optional, Sequence, IO, Tuple import re # pip3 install geopy @@ -39,7 +39,7 @@ class Location(NamedTuple): TsLatLon = Tuple[int, int, int] -def _iter_via_ijson(fo) -> Iterator[TsLatLon]: +def _iter_via_ijson(fo) -> Iterable[TsLatLon]: # ijson version takes 25 seconds for 1M items (without processing) try: # pip3 install ijson cffi @@ -58,7 +58,7 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]: # todo ugh. fragile, not sure, maybe should do some assert in advance? -def _iter_via_grep(fo) -> Iterator[TsLatLon]: +def _iter_via_grep(fo) -> Iterable[TsLatLon]: # grep version takes 5 seconds for 1M items (without processing) x = [-1, -1, -1] for i, line in enumerate(fo): @@ -78,7 +78,7 @@ def _iter_via_grep(fo) -> Iterator[TsLatLon]: # would need to find out a way to know when to stop? process in some sort of sqrt progression?? -def _iter_locations_fo(fit) -> Iterator[Location]: +def _iter_locations_fo(fit) -> Iterable[Location]: total = 0 errors = 0 @@ -119,7 +119,7 @@ _LOCATION_JSON = 'Takeout/Location History/Location History.json' # TODO hope they are sorted... (could assert for it) # todo configure cache automatically? @mcachew(cache_dir(), logger=logger) -def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: +def _iter_locations(path: Path, start=0, stop=None) -> Iterable[Location]: ctx: IO[str] if path.suffix == '.json': # todo: to support, should perhaps provide it as input= to Popen @@ -146,11 +146,13 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: # todo wonder if old takeouts could contribute as well?? -def locations(**kwargs) -> Iterator[Location]: +def locations(**kwargs) -> Iterable[Location]: # NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config # very weird, as if this function captures the values of globals somehow?? investigate later. from ..google.takeout.paths import get_last_takeout last_takeout = get_last_takeout(path=_LOCATION_JSON) + if last_takeout is None: + return [] return _iter_locations(path=last_takeout, **kwargs) diff --git a/my/media/youtube.py b/my/media/youtube.py index faeb09a..8212f12 100755 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -21,6 +21,9 @@ def watched() -> Iterable[Watched]: path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last # TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json last = get_last_takeout(path=path) + if last is None: + return [] + watches: List[Watched] = [] for dt, url, title in read_html(last, path): @@ -30,14 +33,11 @@ def watched() -> Iterable[Watched]: return list(sorted(watches, key=lambda e: e.when)) +from ..core import stat, Stats +def stats() -> Stats: + return stat(watched) + + # todo deprecate get_watched = watched - -def main(): - # TODO shit. a LOT of watches... - for w in get_watched(): - print(w) - -if __name__ == '__main__': - main() diff --git a/tests/takeout.py b/tests/takeout.py index 69e16de..2cfb7a3 100644 --- a/tests/takeout.py +++ b/tests/takeout.py @@ -33,6 +33,7 @@ import pytest # type: ignore def test_parser(path: str): path = 'Takeout/' + path tpath = get_last_takeout(path=path) + assert tpath is not None results = list(read_html(tpath, path)) # TODO assert len > 100 or something? print(len(results)) @@ -41,6 +42,7 @@ def test_parser(path: str): def test_myactivity_search(): path = 'Takeout/My Activity/Search/MyActivity.html' tpath = get_last_takeout(path=path) + assert tpath is not None results = list(read_html(tpath, path)) res = (