HPI/instapaper/__init__.py
2019-02-06 23:53:49 +00:00

70 lines
1.8 KiB
Python

from datetime import datetime
import json
from pathlib import Path
import pytz
from typing import NamedTuple, Optional
BDIR = Path('/L/backups/instapaper/')
class Highlight(NamedTuple):
dt: datetime
uid: str
text: str
note: Optional[str]
url: str
title: str
def get_files():
return sorted(f for f in BDIR.iterdir() if f.suffix == '.json')
def get_stuff():
all_bks = {}
all_hls = {}
# TODO can restore url by bookmark id
for f in get_files():
with f.open('r') as fo:
j = json.load(fo)
# TODO what are bookmarks??
for b in j['bookmarks']:
bid = b['bookmark_id']
prev = all_bks.get(bid, None)
# assert prev is None or prev == b, '%s vs %s' % (prev, b)
# TODO shit, ok progress can change apparently
all_bks[bid] = b
hls = j['highlights']
for h in hls:
hid = h['highlight_id']
prev = all_hls.get(hid, None)
assert prev is None or prev == h
all_hls[hid] = h
return all_bks, all_hls
def iter_highlights():
bks, hls = get_stuff()
for h in hls.values():
bid = h['bookmark_id']
bk = bks[bid]
dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time']))
yield Highlight(
uid=str(h['highlight_id']),
dt=dt,
text=h['text'],
note=h['note'],
url=bk['url'],
title=bk['title'],
)
def get_highlights():
return sorted(iter_highlights(), key=lambda h: h.dt)
def get_todos():
def is_todo(h):
return h.note is not None and h.note.lstrip().lower().startswith('todo')
return list(filter(is_todo, get_highlights()))
def main():
for h in get_todos():
print(h)
if __name__ == '__main__':
main()