from datetime import datetime import json from pathlib import Path from typing import NamedTuple, Optional, List, Dict, Iterator, Tuple from collections import OrderedDict import pytz from .common import group_by_key, PathIsh _export_dir: Optional[Path] = None def configure(*, export_dir: Optional[PathIsh]=None) -> None: if export_dir is not None: global _export_dir _export_dir = Path(export_dir) def _get_files(): export_dir = _export_dir if export_dir is None: # fallback to my_configuration from . import paths export_dir = paths.instapexport.export_dir return list(sorted(Path(export_dir).glob('*.json'))) Bid = str Hid = str class Highlight(NamedTuple): dt: datetime uid: Hid bid: Bid text: str note: Optional[str] url: str title: str @property def instapaper_link(self) -> str: return f'https://www.instapaper.com/read/{self.bid}/{self.uid}' class Bookmark(NamedTuple): bid: Bid dt: datetime url: str title: str @property def instapaper_link(self) -> str: return f'https://www.instapaper.com/read/{self.bid}' class Page(NamedTuple): bookmark: Bookmark highlights: List[Highlight] BDict = Dict[Bid, Bookmark] HDict = Dict[Hid, Highlight] def get_stuff(limit=0) -> Tuple[BDict, HDict]: def make_dt(time) -> datetime: return pytz.utc.localize(datetime.utcfromtimestamp(time)) def dkey(x): return lambda d: d[x] all_bks: BDict = OrderedDict() all_hls: HDict = OrderedDict() # TODO can restore url by bookmark id for f in _get_files()[-limit:]: with f.open('r') as fo: j = json.load(fo) for b in sorted(j['bookmarks'], key=dkey('time')): bid = str(b['bookmark_id']) prevb = all_bks.get(bid, None) # assert prev is None or prev == b, '%s vs %s' % (prev, b) # TODO shit, ok progress can change apparently all_bks[bid] = Bookmark( bid=bid, dt=make_dt(b['time']), url=b['url'], title=b['title'], ) hls = j['highlights'] for h in sorted(hls, key=dkey('time')): hid = str(h['highlight_id']) bid = str(h['bookmark_id']) # TODO just reference to bookmark in hightlight? bk = all_bks[bid] h = Highlight( uid=hid, bid=bk.bid, dt=make_dt(h['time']), text=h['text'], note=h['note'], url=bk.url, title=bk.title, ) prev = all_hls.get(hid, None) assert prev is None or prev == h all_hls[hid] = h return all_bks, all_hls def iter_highlights(**kwargs) -> Iterator[Highlight]: return iter(get_stuff(**kwargs)[1].values()) def get_highlights(**kwargs) -> List[Highlight]: return list(iter_highlights(**kwargs)) def get_todos() -> List[Highlight]: def is_todo(h): note = h.note or '' note = note.lstrip().lower() return note.startswith('todo') return list(filter(is_todo, iter_highlights())) def get_pages(**kwargs) -> List[Page]: bms, hls = get_stuff(**kwargs) groups = group_by_key(hls.values(), key=lambda h: h.bid) pages = [] # TODO how to make sure there are no dangling bookmarks? for bid, bm in bms.items(): pages.append(Page( bookmark=bm, highlights=sorted(groups.get(bid, []), key=lambda b: b.dt), )) return pages def test_get_todos(): for t in get_todos(): print(t) def main(): for h in get_todos(): print(h)