extract highlighted thing

This commit is contained in:
Dima Gerasimov 2018-10-12 19:54:51 +01:00
parent fd2715ab74
commit 942c55fe16

View file

@ -5,15 +5,14 @@ from pytz import UTC
from datetime import datetime from datetime import datetime
import os import os
# TODO maybe, it should generate some kind of html snippet?
_PATH = '/L/backups/hypothesis/' _PATH = '/L/backups/hypothesis/'
class Hypothesis(NamedTuple): class Entry(NamedTuple):
dt: datetime dt: datetime
text: str summary: str
tag: str content: str
link: str
eid: str
# TODO guarantee order? # TODO guarantee order?
def _iter(): def _iter():
@ -24,10 +23,26 @@ def _iter():
for i in j: for i in j:
dts = i['created'] dts = i['created']
title = ' '.join(i['document']['title']) title = ' '.join(i['document']['title'])
selectors = i['target'][0].get('selector', None)
if selectors is None:
# TODO warn?...
selectors = []
content = None
for s in selectors:
if 'exact' in s:
content = s['exact']
break
eid = i['id']
link = i['uri']
dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z') dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z')
yield Hypothesis(dt, title, 'hyp') yield Entry(
dt,
title,
content,
link,
eid,
)
@lru_cache()
def get_entries(): def get_entries():
return list(_iter()) return list(_iter())