enhance instapaper provider

This commit is contained in:
Dima Gerasimov 2019-03-09 11:08:02 +00:00
parent a017316cbd
commit b63a15e6aa

View file

@ -2,66 +2,108 @@ from datetime import datetime
import json import json
from pathlib import Path from pathlib import Path
import pytz import pytz
from typing import NamedTuple, Optional from typing import NamedTuple, Optional, List, Dict
from collections import OrderedDict
from kython import group_by_key
BDIR = Path('/L/backups/instapaper/') BDIR = Path('/L/backups/instapaper/')
Bid = str
Hid = str
class Highlight(NamedTuple): class Highlight(NamedTuple):
dt: datetime dt: datetime
uid: str uid: Hid
bid: Bid
text: str text: str
note: Optional[str] note: Optional[str]
url: str url: str
title: str title: str
class Bookmark(NamedTuple):
bid: Bid
time: int
url: str
title: str
class Page(NamedTuple):
bookmark: Bookmark
highlights: List[Highlight]
def get_files(): def get_files():
return sorted(f for f in BDIR.iterdir() if f.suffix == '.json') return sorted(f for f in BDIR.iterdir() if f.suffix == '.json')
def get_stuff(): def dkey(x):
all_bks = {} return lambda d: d[x]
all_hls = {}
def get_stuff(all=True):
all_bks: Dict[Bid, Bookmark] = OrderedDict()
all_hls: Dict[Hid, Highlight] = OrderedDict()
# TODO can restore url by bookmark id # TODO can restore url by bookmark id
for f in get_files(): for f in get_files():
with f.open('r') as fo: with f.open('r') as fo:
j = json.load(fo) j = json.load(fo)
# TODO what are bookmarks?? for b in sorted(j['bookmarks'], key=dkey('time')):
for b in j['bookmarks']: bid = str(b['bookmark_id'])
bid = b['bookmark_id']
prev = all_bks.get(bid, None) prev = all_bks.get(bid, None)
# assert prev is None or prev == b, '%s vs %s' % (prev, b) # assert prev is None or prev == b, '%s vs %s' % (prev, b)
# TODO shit, ok progress can change apparently # TODO shit, ok progress can change apparently
all_bks[bid] = b all_bks[bid] = Bookmark(
bid=bid,
time=b['time'],
url=b['url'],
title=b['title'],
)
hls = j['highlights'] hls = j['highlights']
for h in hls: for h in sorted(hls, key=dkey('time')):
hid = h['highlight_id'] hid = h['highlight_id']
bid = str(h['bookmark_id'])
# TODO just reference to bookmark in hightlight?
bk = all_bks[bid]
dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time']))
h = Highlight(
uid=hid,
bid=bk.bid,
dt=dt,
text=h['text'],
note=h['note'],
url=bk.url,
title=bk.title,
)
prev = all_hls.get(hid, None) prev = all_hls.get(hid, None)
assert prev is None or prev == h assert prev is None or prev == h
all_hls[hid] = h all_hls[hid] = h
return all_bks, all_hls return all_bks, all_hls
def iter_highlights(): def iter_highlights():
bks, hls = get_stuff() return iter(get_stuff()[1])
for h in hls.values():
bid = h['bookmark_id']
bk = bks[bid]
dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time']))
yield Highlight(
uid=str(h['highlight_id']),
dt=dt,
text=h['text'],
note=h['note'],
url=bk['url'],
title=bk['title'],
)
def get_highlights(): def get_highlights():
return sorted(iter_highlights(), key=lambda h: h.dt) return list(iter_highlights())
def get_todos(): def get_todos():
def is_todo(h): def is_todo(h):
return h.note is not None and h.note.lstrip().lower().startswith('todo') return h.note is not None and h.note.lstrip().lower().startswith('todo')
return list(filter(is_todo, get_highlights())) return list(filter(is_todo, get_highlights()))
def get_pages() -> List[Page]:
bms, hls = get_stuff()
groups = group_by_key(hls.values(), key=lambda h: h.bid)
pages = []
# TODO how to make sure there are no dangling bookmarks?
for bid, bm in bms.items():
pages.append(Page(
bookmark=bm,
highlights=sorted(groups.get(bid, []), key=lambda b: b.dt),
))
return pages
def main(): def main():
for h in get_todos(): for h in get_todos():
print(h) print(h)