google.takeout.paths: return Optional if there are no takeouts
This commit is contained in:
parent
4666378f7e
commit
e8e4994c02
6 changed files with 30 additions and 19 deletions
|
@ -139,7 +139,7 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]:
|
||||||
# for k, v in merged.items():
|
# for k, v in merged.items():
|
||||||
# yield Point(dt=k, temp=v) # meh?
|
# yield Point(dt=k, temp=v) # meh?
|
||||||
|
|
||||||
from ..core.common import stat, Stats
|
from ..core import stat, Stats
|
||||||
def stats() -> Stats:
|
def stats() -> Stats:
|
||||||
return stat(measurements)
|
return stat(measurements)
|
||||||
|
|
||||||
|
@ -155,7 +155,11 @@ def dataframe() -> DataFrameT:
|
||||||
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
|
# todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks...
|
||||||
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
|
# either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
|
||||||
import pandas as pd # type: ignore
|
import pandas as pd # type: ignore
|
||||||
df = pd.DataFrame(p._asdict() for p in measurements())
|
df = pd.DataFrame(
|
||||||
|
(p._asdict() for p in measurements()),
|
||||||
|
# todo meh. otherwise fails on empty inputs...
|
||||||
|
columns=list(Measurement._fields),
|
||||||
|
)
|
||||||
# todo not sure how it would handle mixed timezones??
|
# todo not sure how it would handle mixed timezones??
|
||||||
return df.set_index('dt')
|
return df.set_index('dt')
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,10 @@ def _dal() -> dal.DAL:
|
||||||
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
||||||
def events(dal=_dal()) -> Results:
|
def events(dal=_dal()) -> Results:
|
||||||
for d in dal.events():
|
for d in dal.events():
|
||||||
yield _parse_event(d)
|
if isinstance(d, Exception):
|
||||||
|
yield d
|
||||||
|
else:
|
||||||
|
yield _parse_event(d)
|
||||||
|
|
||||||
|
|
||||||
def stats():
|
def stats():
|
||||||
|
|
|
@ -37,8 +37,8 @@ def get_takeouts(*, path: Optional[str]=None) -> Iterable[Path]:
|
||||||
yield takeout
|
yield takeout
|
||||||
|
|
||||||
|
|
||||||
def get_last_takeout(*, path: Optional[str]=None) -> Path:
|
def get_last_takeout(*, path: Optional[str]=None) -> Optional[Path]:
|
||||||
return last(get_takeouts(path=path))
|
return last(get_takeouts(path=path), default=None)
|
||||||
|
|
||||||
|
|
||||||
# TODO might be a good idea to merge across multiple takeouts...
|
# TODO might be a good idea to merge across multiple takeouts...
|
||||||
|
|
|
@ -10,7 +10,7 @@ from datetime import datetime, timezone
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import Popen, PIPE
|
from subprocess import Popen, PIPE
|
||||||
from typing import Any, Collection, Iterator, NamedTuple, Optional, Sequence, IO, Tuple
|
from typing import Any, Collection, Iterable, NamedTuple, Optional, Sequence, IO, Tuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# pip3 install geopy
|
# pip3 install geopy
|
||||||
|
@ -39,7 +39,7 @@ class Location(NamedTuple):
|
||||||
TsLatLon = Tuple[int, int, int]
|
TsLatLon = Tuple[int, int, int]
|
||||||
|
|
||||||
|
|
||||||
def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
|
def _iter_via_ijson(fo) -> Iterable[TsLatLon]:
|
||||||
# ijson version takes 25 seconds for 1M items (without processing)
|
# ijson version takes 25 seconds for 1M items (without processing)
|
||||||
try:
|
try:
|
||||||
# pip3 install ijson cffi
|
# pip3 install ijson cffi
|
||||||
|
@ -58,7 +58,7 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
|
||||||
|
|
||||||
|
|
||||||
# todo ugh. fragile, not sure, maybe should do some assert in advance?
|
# todo ugh. fragile, not sure, maybe should do some assert in advance?
|
||||||
def _iter_via_grep(fo) -> Iterator[TsLatLon]:
|
def _iter_via_grep(fo) -> Iterable[TsLatLon]:
|
||||||
# grep version takes 5 seconds for 1M items (without processing)
|
# grep version takes 5 seconds for 1M items (without processing)
|
||||||
x = [-1, -1, -1]
|
x = [-1, -1, -1]
|
||||||
for i, line in enumerate(fo):
|
for i, line in enumerate(fo):
|
||||||
|
@ -78,7 +78,7 @@ def _iter_via_grep(fo) -> Iterator[TsLatLon]:
|
||||||
# would need to find out a way to know when to stop? process in some sort of sqrt progression??
|
# would need to find out a way to know when to stop? process in some sort of sqrt progression??
|
||||||
|
|
||||||
|
|
||||||
def _iter_locations_fo(fit) -> Iterator[Location]:
|
def _iter_locations_fo(fit) -> Iterable[Location]:
|
||||||
total = 0
|
total = 0
|
||||||
errors = 0
|
errors = 0
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ _LOCATION_JSON = 'Takeout/Location History/Location History.json'
|
||||||
# TODO hope they are sorted... (could assert for it)
|
# TODO hope they are sorted... (could assert for it)
|
||||||
# todo configure cache automatically?
|
# todo configure cache automatically?
|
||||||
@mcachew(cache_dir(), logger=logger)
|
@mcachew(cache_dir(), logger=logger)
|
||||||
def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
|
def _iter_locations(path: Path, start=0, stop=None) -> Iterable[Location]:
|
||||||
ctx: IO[str]
|
ctx: IO[str]
|
||||||
if path.suffix == '.json':
|
if path.suffix == '.json':
|
||||||
# todo: to support, should perhaps provide it as input= to Popen
|
# todo: to support, should perhaps provide it as input= to Popen
|
||||||
|
@ -146,11 +146,13 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
|
||||||
# todo wonder if old takeouts could contribute as well??
|
# todo wonder if old takeouts could contribute as well??
|
||||||
|
|
||||||
|
|
||||||
def locations(**kwargs) -> Iterator[Location]:
|
def locations(**kwargs) -> Iterable[Location]:
|
||||||
# NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config
|
# NOTE: if this import isn't lazy, tests/tz.py breaks because it can't override config
|
||||||
# very weird, as if this function captures the values of globals somehow?? investigate later.
|
# very weird, as if this function captures the values of globals somehow?? investigate later.
|
||||||
from ..google.takeout.paths import get_last_takeout
|
from ..google.takeout.paths import get_last_takeout
|
||||||
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
last_takeout = get_last_takeout(path=_LOCATION_JSON)
|
||||||
|
if last_takeout is None:
|
||||||
|
return []
|
||||||
|
|
||||||
return _iter_locations(path=last_takeout, **kwargs)
|
return _iter_locations(path=last_takeout, **kwargs)
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,9 @@ def watched() -> Iterable[Watched]:
|
||||||
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
|
path = 'Takeout/My Activity/YouTube/MyActivity.html' # looks like this one doesn't have retention? so enough to use the last
|
||||||
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
|
# TODO YouTube/history/watch-history.html, also YouTube/history/watch-history.json
|
||||||
last = get_last_takeout(path=path)
|
last = get_last_takeout(path=path)
|
||||||
|
if last is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
watches: List[Watched] = []
|
watches: List[Watched] = []
|
||||||
for dt, url, title in read_html(last, path):
|
for dt, url, title in read_html(last, path):
|
||||||
|
@ -30,14 +33,11 @@ def watched() -> Iterable[Watched]:
|
||||||
return list(sorted(watches, key=lambda e: e.when))
|
return list(sorted(watches, key=lambda e: e.when))
|
||||||
|
|
||||||
|
|
||||||
|
from ..core import stat, Stats
|
||||||
|
def stats() -> Stats:
|
||||||
|
return stat(watched)
|
||||||
|
|
||||||
|
|
||||||
# todo deprecate
|
# todo deprecate
|
||||||
get_watched = watched
|
get_watched = watched
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# TODO shit. a LOT of watches...
|
|
||||||
for w in get_watched():
|
|
||||||
print(w)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ import pytest # type: ignore
|
||||||
def test_parser(path: str):
|
def test_parser(path: str):
|
||||||
path = 'Takeout/' + path
|
path = 'Takeout/' + path
|
||||||
tpath = get_last_takeout(path=path)
|
tpath = get_last_takeout(path=path)
|
||||||
|
assert tpath is not None
|
||||||
results = list(read_html(tpath, path))
|
results = list(read_html(tpath, path))
|
||||||
# TODO assert len > 100 or something?
|
# TODO assert len > 100 or something?
|
||||||
print(len(results))
|
print(len(results))
|
||||||
|
@ -41,6 +42,7 @@ def test_parser(path: str):
|
||||||
def test_myactivity_search():
|
def test_myactivity_search():
|
||||||
path = 'Takeout/My Activity/Search/MyActivity.html'
|
path = 'Takeout/My Activity/Search/MyActivity.html'
|
||||||
tpath = get_last_takeout(path=path)
|
tpath = get_last_takeout(path=path)
|
||||||
|
assert tpath is not None
|
||||||
results = list(read_html(tpath, path))
|
results = list(read_html(tpath, path))
|
||||||
|
|
||||||
res = (
|
res = (
|
||||||
|
|
Loading…
Add table
Reference in a new issue