From de29758462243c8c53d4064cb7a2fcb78f0ef30b Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 6 Feb 2019 23:50:58 +0000 Subject: [PATCH 1/8] instapaper highlights provider --- instapaper/__init__.py | 70 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 instapaper/__init__.py diff --git a/instapaper/__init__.py b/instapaper/__init__.py new file mode 100644 index 0000000..e40cd44 --- /dev/null +++ b/instapaper/__init__.py @@ -0,0 +1,70 @@ +from datetime import datetime +import json +from pathlib import Path +import pytz +from typing import NamedTuple, Optional + +BDIR = Path('/L/backups/instapaper/') + +class Highlight(NamedTuple): + dt: datetime + hid: str + text: str + note: Optional[str] + url: str + title: str + +def get_files(): + return sorted(f for f in BDIR.iterdir() if f.suffix == '.json') + +def get_stuff(): + all_bks = {} + all_hls = {} + # TODO can restore url by bookmark id + for f in get_files(): + with f.open('r') as fo: + j = json.load(fo) + # TODO what are bookmarks?? + for b in j['bookmarks']: + bid = b['bookmark_id'] + prev = all_bks.get(bid, None) + # assert prev is None or prev == b, '%s vs %s' % (prev, b) + # TODO shit, ok progress can change apparently + all_bks[bid] = b + hls = j['highlights'] + for h in hls: + hid = h['highlight_id'] + prev = all_hls.get(hid, None) + assert prev is None or prev == h + all_hls[hid] = h + return all_bks, all_hls + +def iter_highlights(): + bks, hls = get_stuff() + for h in hls.values(): + bid = h['bookmark_id'] + bk = bks[bid] + dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time'])) + yield Highlight( + hid=str(h['highlight_id']), + dt=dt, + text=h['text'], + note=h['note'], + url=bk['url'], + title=bk['title'], + ) + +def get_highlights(): + return sorted(iter_highlights(), key=lambda h: h.dt) + +def get_todos(): + def is_todo(h): + return h.note is not None and h.note.lstrip().lower().startswith('todo') + return list(filter(is_todo, get_highlights())) + +def main(): + for h in get_todos(): + print(h) + +if __name__ == '__main__': + main() From a017316cbd541f2751ae6bf0a83b20cd67de8090 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 6 Feb 2019 23:53:49 +0000 Subject: [PATCH 2/8] rename hid to uid --- instapaper/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index e40cd44..f800477 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -8,7 +8,7 @@ BDIR = Path('/L/backups/instapaper/') class Highlight(NamedTuple): dt: datetime - hid: str + uid: str text: str note: Optional[str] url: str @@ -46,7 +46,7 @@ def iter_highlights(): bk = bks[bid] dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time'])) yield Highlight( - hid=str(h['highlight_id']), + uid=str(h['highlight_id']), dt=dt, text=h['text'], note=h['note'], From b63a15e6aa34aa69f6bd25a0cf998e114f90bac0 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 9 Mar 2019 11:08:02 +0000 Subject: [PATCH 3/8] enhance instapaper provider --- instapaper/__init__.py | 90 +++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index f800477..686da34 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -2,66 +2,108 @@ from datetime import datetime import json from pathlib import Path import pytz -from typing import NamedTuple, Optional +from typing import NamedTuple, Optional, List, Dict +from collections import OrderedDict + +from kython import group_by_key BDIR = Path('/L/backups/instapaper/') +Bid = str +Hid = str + class Highlight(NamedTuple): dt: datetime - uid: str + uid: Hid + bid: Bid text: str note: Optional[str] url: str title: str +class Bookmark(NamedTuple): + bid: Bid + time: int + url: str + title: str + +class Page(NamedTuple): + bookmark: Bookmark + highlights: List[Highlight] + def get_files(): return sorted(f for f in BDIR.iterdir() if f.suffix == '.json') -def get_stuff(): - all_bks = {} - all_hls = {} +def dkey(x): + return lambda d: d[x] + +def get_stuff(all=True): + all_bks: Dict[Bid, Bookmark] = OrderedDict() + all_hls: Dict[Hid, Highlight] = OrderedDict() # TODO can restore url by bookmark id for f in get_files(): with f.open('r') as fo: j = json.load(fo) - # TODO what are bookmarks?? - for b in j['bookmarks']: - bid = b['bookmark_id'] + for b in sorted(j['bookmarks'], key=dkey('time')): + bid = str(b['bookmark_id']) prev = all_bks.get(bid, None) # assert prev is None or prev == b, '%s vs %s' % (prev, b) # TODO shit, ok progress can change apparently - all_bks[bid] = b + all_bks[bid] = Bookmark( + bid=bid, + time=b['time'], + url=b['url'], + title=b['title'], + ) hls = j['highlights'] - for h in hls: + for h in sorted(hls, key=dkey('time')): hid = h['highlight_id'] + bid = str(h['bookmark_id']) + # TODO just reference to bookmark in hightlight? + bk = all_bks[bid] + dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time'])) + h = Highlight( + uid=hid, + bid=bk.bid, + dt=dt, + text=h['text'], + note=h['note'], + url=bk.url, + title=bk.title, + ) prev = all_hls.get(hid, None) assert prev is None or prev == h all_hls[hid] = h + return all_bks, all_hls def iter_highlights(): - bks, hls = get_stuff() - for h in hls.values(): - bid = h['bookmark_id'] - bk = bks[bid] - dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time'])) - yield Highlight( - uid=str(h['highlight_id']), - dt=dt, - text=h['text'], - note=h['note'], - url=bk['url'], - title=bk['title'], - ) + return iter(get_stuff()[1]) + def get_highlights(): - return sorted(iter_highlights(), key=lambda h: h.dt) + return list(iter_highlights()) + def get_todos(): def is_todo(h): return h.note is not None and h.note.lstrip().lower().startswith('todo') return list(filter(is_todo, get_highlights())) + +def get_pages() -> List[Page]: + bms, hls = get_stuff() + groups = group_by_key(hls.values(), key=lambda h: h.bid) + pages = [] + # TODO how to make sure there are no dangling bookmarks? + for bid, bm in bms.items(): + pages.append(Page( + bookmark=bm, + highlights=sorted(groups.get(bid, []), key=lambda b: b.dt), + )) + return pages + + def main(): for h in get_todos(): print(h) From 3ec273729c4ae4c301ed194a03390f163054ec2c Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 11 Mar 2019 17:29:20 +0000 Subject: [PATCH 4/8] instapaper links --- instapaper/__init__.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index 686da34..05e9fd7 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -21,12 +21,22 @@ class Highlight(NamedTuple): url: str title: str + @property + def instapaper_link(self) -> str: + return f'https://www.instapaper.com/read/{self.bid}/{self.uid}' + + class Bookmark(NamedTuple): bid: Bid - time: int + dt: datetime url: str title: str + @property + def instapaper_link(self) -> str: + return f'https://www.instapaper.com/read/{self.bid}' + + class Page(NamedTuple): bookmark: Bookmark highlights: List[Highlight] @@ -37,6 +47,11 @@ def get_files(): def dkey(x): return lambda d: d[x] + +def make_dt(time) -> datetime: + return pytz.utc.localize(datetime.utcfromtimestamp(time)) + + def get_stuff(all=True): all_bks: Dict[Bid, Bookmark] = OrderedDict() all_hls: Dict[Hid, Highlight] = OrderedDict() @@ -51,7 +66,7 @@ def get_stuff(all=True): # TODO shit, ok progress can change apparently all_bks[bid] = Bookmark( bid=bid, - time=b['time'], + dt=make_dt(b['time']), url=b['url'], title=b['title'], ) @@ -61,11 +76,10 @@ def get_stuff(all=True): bid = str(h['bookmark_id']) # TODO just reference to bookmark in hightlight? bk = all_bks[bid] - dt = pytz.utc.localize(datetime.utcfromtimestamp(h['time'])) h = Highlight( uid=hid, bid=bk.bid, - dt=dt, + dt=make_dt(h['time']), text=h['text'], note=h['note'], url=bk.url, From 03b937fd3b9a1f68bbbbd5ffb3b72d0fa1cea0a5 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 11 Mar 2019 23:23:03 +0000 Subject: [PATCH 5/8] Fix ruci --- instapaper/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index 05e9fd7..c648655 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -61,7 +61,7 @@ def get_stuff(all=True): j = json.load(fo) for b in sorted(j['bookmarks'], key=dkey('time')): bid = str(b['bookmark_id']) - prev = all_bks.get(bid, None) + prevb = all_bks.get(bid, None) # assert prev is None or prev == b, '%s vs %s' % (prev, b) # TODO shit, ok progress can change apparently all_bks[bid] = Bookmark( From 71e04643a48142ed87f697a76836a93460d00df9 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 12 Mar 2019 14:24:36 +0000 Subject: [PATCH 6/8] fix todos retrieval --- instapaper/__init__.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index c648655..05102e4 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -2,7 +2,7 @@ from datetime import datetime import json from pathlib import Path import pytz -from typing import NamedTuple, Optional, List, Dict +from typing import NamedTuple, Optional, List, Dict, Iterator, Tuple from collections import OrderedDict from kython import group_by_key @@ -52,9 +52,13 @@ def make_dt(time) -> datetime: return pytz.utc.localize(datetime.utcfromtimestamp(time)) -def get_stuff(all=True): - all_bks: Dict[Bid, Bookmark] = OrderedDict() - all_hls: Dict[Hid, Highlight] = OrderedDict() +BDict = Dict[Bid, Bookmark] +HDict = Dict[Hid, Highlight] + + +def get_stuff(all=True) -> Tuple[BDict, HDict]: + all_bks: BDict = OrderedDict() + all_hls: HDict = OrderedDict() # TODO can restore url by bookmark id for f in get_files(): with f.open('r') as fo: @@ -91,18 +95,18 @@ def get_stuff(all=True): return all_bks, all_hls -def iter_highlights(): - return iter(get_stuff()[1]) +def iter_highlights() -> Iterator[Highlight]: + return iter(get_stuff()[1].values()) -def get_highlights(): +def get_highlights() -> List[Highlight]: return list(iter_highlights()) -def get_todos(): +def get_todos() -> List[Highlight]: def is_todo(h): return h.note is not None and h.note.lstrip().lower().startswith('todo') - return list(filter(is_todo, get_highlights())) + return list(filter(is_todo, iter_highlights())) def get_pages() -> List[Page]: @@ -118,6 +122,11 @@ def get_pages() -> List[Page]: return pages +def test_get_todos(): + for t in get_todos(): + print(t) + + def main(): for h in get_todos(): print(h) From 5427abb82a8e2c97301ffda609522b436c1fd98b Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Thu, 14 Mar 2019 08:00:11 +0000 Subject: [PATCH 7/8] hid should be str --- instapaper/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index 05102e4..a44560a 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -76,7 +76,7 @@ def get_stuff(all=True) -> Tuple[BDict, HDict]: ) hls = j['highlights'] for h in sorted(hls, key=dkey('time')): - hid = h['highlight_id'] + hid = str(h['highlight_id']) bid = str(h['bookmark_id']) # TODO just reference to bookmark in hightlight? bk = all_bks[bid] From c6fffaac63c7ece76579e61bde7cf214ad85e6c4 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 19 May 2019 11:00:00 +0100 Subject: [PATCH 8/8] add limit for instapaper --- instapaper/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/instapaper/__init__.py b/instapaper/__init__.py index a44560a..b1c479b 100644 --- a/instapaper/__init__.py +++ b/instapaper/__init__.py @@ -56,11 +56,11 @@ BDict = Dict[Bid, Bookmark] HDict = Dict[Hid, Highlight] -def get_stuff(all=True) -> Tuple[BDict, HDict]: +def get_stuff(limit=0) -> Tuple[BDict, HDict]: all_bks: BDict = OrderedDict() all_hls: HDict = OrderedDict() # TODO can restore url by bookmark id - for f in get_files(): + for f in get_files()[-limit:]: with f.open('r') as fo: j = json.load(fo) for b in sorted(j['bookmarks'], key=dkey('time')): @@ -95,12 +95,12 @@ def get_stuff(all=True) -> Tuple[BDict, HDict]: return all_bks, all_hls -def iter_highlights() -> Iterator[Highlight]: - return iter(get_stuff()[1].values()) +def iter_highlights(**kwargs) -> Iterator[Highlight]: + return iter(get_stuff(**kwargs)[1].values()) -def get_highlights() -> List[Highlight]: - return list(iter_highlights()) +def get_highlights(**kwargs) -> List[Highlight]: + return list(iter_highlights(**kwargs)) def get_todos() -> List[Highlight]: @@ -109,8 +109,8 @@ def get_todos() -> List[Highlight]: return list(filter(is_todo, iter_highlights())) -def get_pages() -> List[Page]: - bms, hls = get_stuff() +def get_pages(**kwargs) -> List[Page]: + bms, hls = get_stuff(**kwargs) groups = group_by_key(hls.values(), key=lambda h: h.bid) pages = [] # TODO how to make sure there are no dangling bookmarks?