adjust hypothesis modul to use model.py

This commit is contained in:
Dima Gerasimov 2019-09-23 01:42:42 +01:00
parent c44f9293bf
commit 7a97c61e0f

View file

@ -1,85 +1,49 @@
from functools import lru_cache from functools import lru_cache
from . import paths
@lru_cache()
def hypexport():
from kython import import_file
return import_file(paths.hypexport.repo / 'model.py')
Annotation = hypexport().Annotation
def get_model():
sources = list(sorted(paths.hypexport.export_dir.glob('*.json')))
model = hypexport().Model(sources)
return model
from kython import listdir_abs from kython import listdir_abs
from typing import Dict, List, NamedTuple, Optional, Sequence from typing import Dict, List, NamedTuple, Optional, Sequence
from pathlib import Path from pathlib import Path
import json
from pytz import UTC
from datetime import datetime from datetime import datetime
import os
from kython import group_by_key, the, cproperty from kython import group_by_key, the, cproperty
_PATH = '/L/backups/hypothesis/'
Url = str
class Entry(NamedTuple):
dt: datetime
summary: str
content: Optional[str] # might be none if for instance we just marked page with tags. not sure if we want to handle it somehow separately
link: Url
eid: str
annotation: Optional[str]
context: Url
tags: Sequence[str]
hyp_link: Url
@property
def title(self):
return self.summary # TODO eh, remove one of them?...
class Page(NamedTuple): class Page(NamedTuple):
highlights: Sequence[Entry] """
Represents annotated page along with the annotations
"""
annotations: Sequence[Annotation]
@cproperty @cproperty
def link(self): def link(self):
return the(h.link for h in self.highlights) return the(h.link for h in self.annotations)
@cproperty @cproperty
def title(self): def title(self):
return the(h.summary for h in self.highlights) return the(h.title for h in self.annotations)
@cproperty @cproperty
def dt(self): def dt(self) -> datetime:
return min(h.dt for h in self.highlights) return min(h.dt for h in self.annotations)
# TODO guarantee order?
def _iter(): def _iter():
last = max(listdir_abs(_PATH)) yield from get_model().iter_annotations()
with Path(last).open() as fo:
j = json.load(fo)
for i in j:
dts = i['created']
title = ' '.join(i['document']['title'])
selectors = i['target'][0].get('selector', None)
if selectors is None:
# TODO warn?...
selectors = []
content: Optional[str] = None
for s in selectors:
if 'exact' in s:
content = s['exact']
break
eid = i['id']
link = i['uri']
dt = datetime.strptime(dts[:-3] + dts[-2:], '%Y-%m-%dT%H:%M:%S.%f%z')
txt = i['text']
annotation = None if len(txt.strip()) == 0 else txt
context = i['links']['incontext']
yield Entry(
dt=dt,
summary=title,
content=content,
link=link,
eid=eid,
annotation=annotation,
context=context, # TODO FIXME is context used anywhere?
tags=tuple(i['tags']),
hyp_link=context,
)
def get_pages() -> List[Page]: def get_pages() -> List[Page]:
@ -87,23 +51,24 @@ def get_pages() -> List[Page]:
pages = [] pages = []
for link, group in grouped.items(): for link, group in grouped.items():
sgroup = tuple(sorted(group, key=lambda e: e.dt)) sgroup = tuple(sorted(group, key=lambda e: e.dt))
pages.append(Page(highlights=sgroup)) pages.append(Page(annotations=sgroup))
pages = list(sorted(pages, key=lambda p: p.dt)) pages = list(sorted(pages, key=lambda p: p.dt))
# TODO fixme page tag?? # TODO fixme page tag??
return pages return pages
# TODO is it even necessary?
def get_entries(): def get_entries():
return list(_iter()) return list(_iter())
def get_todos(): def get_todos():
def is_todo(e: Entry) -> bool: def is_todo(e: Annotation) -> bool:
if any(t.lower() == 'todo' for t in e.tags): if any(t.lower() == 'todo' for t in e.tags):
return True return True
if e.annotation is None: if e.text is None:
return False return False
return e.annotation.lstrip().lower().startswith('todo') return e.text.lstrip().lower().startswith('todo')
return list(filter(is_todo, get_entries())) return list(filter(is_todo, get_entries()))