google.takeout.paths: return Optional if there are no takeouts

This commit is contained in:
Dima Gerasimov 2020-10-11 22:16:39 +01:00 committed by karlicoss
parent 4666378f7e
commit e8e4994c02
6 changed files with 30 additions and 19 deletions

View file

@ -139,7 +139,7 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]:
# for k, v in merged.items(): # for k, v in merged.items():
# yield Point(dt=k, temp=v) # meh? # yield Point(dt=k, temp=v) # meh?
from ..core.common import stat, Stats from ..core import stat, Stats
def stats() -> Stats: def stats() -> Stats:
return stat(measurements) return stat(measurements)
@ -155,7 +155,11 @@ def dataframe() -> DataFrameT:
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks... # todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh? # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
import pandas as pd # type: ignore import pandas as pd # type: ignore
df = pd.DataFrame(p._asdict() for p in measurements()) df = pd.DataFrame(
(p._asdict() for p in measurements()),
# todo meh. otherwise fails on empty inputs...
columns=list(Measurement._fields),
)
# todo not sure how it would handle mixed timezones?? # todo not sure how it would handle mixed timezones??
return df.set_index('dt') return df.set_index('dt')

View file

@ -68,7 +68,10 @@ def _dal() -> dal.DAL:
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources) @mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
def events(dal=_dal()) -> Results: def events(dal=_dal()) -> Results:
for d in dal.events(): for d in dal.events():
yield _parse_event(d) if isinstance(d, Exception):
yield d
else:
yield _parse_event(d)
def stats(): def stats():

View file

@ -37,8 +37,8 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
yield takeout yield takeout
def get_last_takeout(*, path: Optional[str]=None) -> Path: def get_last_takeout(*, path: Optional[str]=None) -> Optional[Path]:
return last(get_takeouts(path=path)) return last(get_takeouts(path=path), default=None)
# TODO might be a good idea to merge across multiple takeouts... # TODO might be a good idea to merge across multiple takeouts...

View file

@ -10,7 +10,7 @@ from datetime import datetime, timezone
from itertools import islice from itertools import islice
from pathlib import Path from pathlib import Path
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from typing import Any, Collection, Iterator, NamedTuple, Optional, Sequence, IO, Tuple from typing import Any, Collection, Iterable, NamedTuple, Optional, Sequence, IO, Tuple
import re import re
# pip3 install geopy # pip3 install geopy
@ -39,7 +39,7 @@ class Location(NamedTuple):
TsLatLon = Tuple[int, int, int] TsLatLon = Tuple[int, int, int]
def _iter_via_ijson(fo) -> Iterator[TsLatLon]: def _iter_via_ijson(fo) -> Iterable[TsLatLon]:
# ijson version takes 25 seconds for 1M items (without processing) # ijson version takes 25 seconds for 1M items (without processing)
try: try:
# pip3 install ijson cffi # pip3 install ijson cffi
@ -58,7 +58,7 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
# todo ugh. fragile, not sure, maybe should do some assert in advance? # todo ugh. fragile, not sure, maybe should do some assert in advance?
def _iter_via_grep(fo) -> Iterator[TsLatLon]: def _iter_via_grep(fo) -> Iterable[TsLatLon]:
# grep version takes 5 seconds for 1M items (without processing) # grep version takes 5 seconds for 1M items (without processing)
x = [-1, -1, -1] x = [-1, -1, -1]
for i, line in enumerate(fo): for i, line in enumerate(fo):
@ -78,7 +78,7 @@ def _iter_via_grep(fo) -> Iterator[TsLatLon]:
# would need to find out a way to know when to stop? process in some sort of sqrt progression?? # would need to find out a way to know when to stop? process in some sort of sqrt progression??
def _iter_locations_fo(fit) -> Iterator[Location]: def _iter_locations_fo(fit) -> Iterable[Location]:
total = 0 total = 0
errors = 0 errors = 0
@ -119,7 +119,7 @@ _LOCATION_JSON = 'Takeout/Location History/Location History.json'
# TODO hope they are sorted... (could assert for it) # TODO hope they are sorted... (could assert for it)
# todo configure cache automatically? # todo configure cache automatically?
@mcachew(cache_dir(), logger=logger) @mcachew(cache_dir(), logger=logger)
def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: def _iter_locations(path: Path, start=0, stop=None) -> Iterable[Location]:
ctx: IO[str] ctx: IO[str]
if path.suffix == '.json': if path.suffix == '.json':
# todo: to support, should perhaps provide it as input= to Popen # todo: to support, should perhaps provide it as input= to Popen
@ -146,11 +146,13 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
# todo wonder if old takeouts could contribute as well?? # todo wonder if old takeouts could contribute as well??
def locations(**kwargs) -> Iterator[Location]: def locations(**kwargs) -> Iterable[Location]:
# NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config # NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config
# very weird, as if this function captures the values of globals somehow?? investigate later. # very weird, as if this function captures the values of globals somehow?? investigate later.
from ..google.takeout.paths import get_last_takeout from ..google.takeout.paths import get_last_takeout
last_takeout = get_last_takeout(path=_LOCATION_JSON) last_takeout = get_last_takeout(path=_LOCATION_JSON)
if last_takeout is None:
return []
return _iter_locations(path=last_takeout, **kwargs) return _iter_locations(path=last_takeout, **kwargs)

View file

@ -21,6 +21,9 @@ def watched() -> Iterable[Watched]:
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json # TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
last = get_last_takeout(path=path) last = get_last_takeout(path=path)
if last is None:
return []
watches: List[Watched] = [] watches: List[Watched] = []
for dt, url, title in read_html(last, path): for dt, url, title in read_html(last, path):
@ -30,14 +33,11 @@ def watched() -> Iterable[Watched]:
return list(sorted(watches, key=lambda e: e.when)) return list(sorted(watches, key=lambda e: e.when))
from ..core import stat, Stats
def stats() -> Stats:
return stat(watched)
# todo deprecate # todo deprecate
get_watched = watched get_watched = watched
def main():
# TODO shit. a LOT of watches...
for w in get_watched():
print(w)
if __name__ == '__main__':
main()

View file

@ -33,6 +33,7 @@ import pytest # type: ignore
def test_parser(path: str): def test_parser(path: str):
path = 'Takeout/' + path path = 'Takeout/' + path
tpath = get_last_takeout(path=path) tpath = get_last_takeout(path=path)
assert tpath is not None
results = list(read_html(tpath, path)) results = list(read_html(tpath, path))
# TODO assert len > 100 or something? # TODO assert len > 100 or something?
print(len(results)) print(len(results))
@ -41,6 +42,7 @@ def test_parser(path: str):
def test_myactivity_search(): def test_myactivity_search():
path = 'Takeout/My Activity/Search/MyActivity.html' path = 'Takeout/My Activity/Search/MyActivity.html'
tpath = get_last_takeout(path=path) tpath = get_last_takeout(path=path)
assert tpath is not None
results = list(read_html(tpath, path)) results = list(read_html(tpath, path))
res = ( res = (