enhance instapaper provider
This commit is contained in:
parent
a017316cbd
commit
b63a15e6aa
1 changed files with 66 additions and 24 deletions
|
@ -2,66 +2,108 @@ from datetime import datetime
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pytz
|
import pytz
|
||||||
from typing import NamedTuple, Optional
|
from typing import NamedTuple, Optional, List, Dict
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from kython import group_by_key
|
||||||
|
|
||||||
BDIR = Path('/L/backups/instapaper/')
|
BDIR = Path('/L/backups/instapaper/')
|
||||||
|
|
||||||
|
Bid = str
|
||||||
|
Hid = str
|
||||||
|
|
||||||
class Highlight(NamedTuple):
|
class Highlight(NamedTuple):
|
||||||
dt: datetime
|
dt: datetime
|
||||||
uid: str
|
uid: Hid
|
||||||
|
bid: Bid
|
||||||
text: str
|
text: str
|
||||||
note: Optional[str]
|
note: Optional[str]
|
||||||
url: str
|
url: str
|
||||||
title: str
|
title: str
|
||||||
|
|
||||||
|
class Bookmark(NamedTuple):
|
||||||
|
bid: Bid
|
||||||
|
time: int
|
||||||
|
url: str
|
||||||
|
title: str
|
||||||
|
|
||||||
|
class Page(NamedTuple):
|
||||||
|
bookmark: Bookmark
|
||||||
|
highlights: List[Highlight]
|
||||||
|
|
||||||
def get_files():
|
def get_files():
|
||||||
return sorted(f for f in BDIR.iterdir() if f.suffix == '.json')
|
return sorted(f for f in BDIR.iterdir() if f.suffix == '.json')
|
||||||
|
|
||||||
def get_stuff():
|
def dkey(x):
|
||||||
all_bks = {}
|
return lambda d: d[x]
|
||||||
all_hls = {}
|
|
||||||
|
def get_stuff(all=True):
|
||||||
|
all_bks: Dict[Bid, Bookmark] = OrderedDict()
|
||||||
|
all_hls: Dict[Hid, Highlight] = OrderedDict()
|
||||||
# TODO can restore url by bookmark id
|
# TODO can restore url by bookmark id
|
||||||
for f in get_files():
|
for f in get_files():
|
||||||
with f.open('r') as fo:
|
with f.open('r') as fo:
|
||||||
j = json.load(fo)
|
j = json.load(fo)
|
||||||
# TODO what are bookmarks??
|
for b in sorted(j['bookmarks'], key=dkey('time')):
|
||||||
for b in j['bookmarks']:
|
bid = str(b['bookmark_id'])
|
||||||
bid = b['bookmark_id']
|
|
||||||
prev = all_bks.get(bid, None)
|
prev = all_bks.get(bid, None)
|
||||||
# assert prev is None or prev == b, '%s vs %s' % (prev, b)
|
# assert prev is None or prev == b, '%s vs %s' % (prev, b)
|
||||||
# TODO shit, ok progress can change apparently
|
# TODO shit, ok progress can change apparently
|
||||||
all_bks[bid] = b
|
all_bks[bid] = Bookmark(
|
||||||
|
bid=bid,
|
||||||
|
time=b['time'],
|
||||||
|
url=b['url'],
|
||||||
|
title=b['title'],
|
||||||
|
)
|
||||||
hls = j['highlights']
|
hls = j['highlights']
|
||||||
for h in hls:
|
for h in sorted(hls, key=dkey('time')):
|
||||||
hid = h['highlight_id']
|
hid = h['highlight_id']
|
||||||
prev = all_hls.get(hid, None)
|
bid = str(h['bookmark_id'])
|
||||||
assert prev is None or prev == h
|
# TODO just reference to bookmark in hightlight?
|
||||||
all_hls[hid] = h
|
bk = all_bks[bid]
|
||||||
return all_bks, all_hls
|
|
||||||
|
|
||||||
def iter_highlights():
|
|
||||||
bks, hls = get_stuff()
|
|
||||||
for h in hls.values():
|
|
||||||
bid = h['bookmark_id']
|
|
||||||
bk = bks[bid]
|
|
||||||
dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time']))
|
dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time']))
|
||||||
yield Highlight(
|
h = Highlight(
|
||||||
uid=str(h['highlight_id']),
|
uid=hid,
|
||||||
|
bid=bk.bid,
|
||||||
dt=dt,
|
dt=dt,
|
||||||
text=h['text'],
|
text=h['text'],
|
||||||
note=h['note'],
|
note=h['note'],
|
||||||
url=bk['url'],
|
url=bk.url,
|
||||||
title=bk['title'],
|
title=bk.title,
|
||||||
)
|
)
|
||||||
|
prev = all_hls.get(hid, None)
|
||||||
|
assert prev is None or prev == h
|
||||||
|
all_hls[hid] = h
|
||||||
|
|
||||||
|
return all_bks, all_hls
|
||||||
|
|
||||||
|
def iter_highlights():
|
||||||
|
return iter(get_stuff()[1])
|
||||||
|
|
||||||
|
|
||||||
def get_highlights():
|
def get_highlights():
|
||||||
return sorted(iter_highlights(), key=lambda h: h.dt)
|
return list(iter_highlights())
|
||||||
|
|
||||||
|
|
||||||
def get_todos():
|
def get_todos():
|
||||||
def is_todo(h):
|
def is_todo(h):
|
||||||
return h.note is not None and h.note.lstrip().lower().startswith('todo')
|
return h.note is not None and h.note.lstrip().lower().startswith('todo')
|
||||||
return list(filter(is_todo, get_highlights()))
|
return list(filter(is_todo, get_highlights()))
|
||||||
|
|
||||||
|
|
||||||
|
def get_pages() -> List[Page]:
|
||||||
|
bms, hls = get_stuff()
|
||||||
|
groups = group_by_key(hls.values(), key=lambda h: h.bid)
|
||||||
|
pages = []
|
||||||
|
# TODO how to make sure there are no dangling bookmarks?
|
||||||
|
for bid, bm in bms.items():
|
||||||
|
pages.append(Page(
|
||||||
|
bookmark=bm,
|
||||||
|
highlights=sorted(groups.get(bid, []), key=lambda b: b.dt),
|
||||||
|
))
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
for h in get_todos():
|
for h in get_todos():
|
||||||
print(h)
|
print(h)
|
||||||
|
|
Loading…
Add table
Reference in a new issue