initial work on polar

This commit is contained in:
Dima Gerasimov 2019-05-03 22:53:59 +01:00
commit d24f7e18f2

172
reading/polar.py Executable file
View file

@ -0,0 +1,172 @@
#!/usr/bin/python3
from pathlib import Path
import logging
from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
import json
from kython.kerror import ResT, echain, unwrap, sort_res_by
from kython.klogging import setup_logzero
BDIR = Path('/L/zzz_syncthing/data/.polar')
def get_logger():
return logging.getLogger('polar-provider')
def _get_datas() -> List[Path]:
return list(sorted(BDIR.glob('*/state.json')))
Uid = str
class Error(Exception):
def __init__(self, p: Path, *args, **kwargs) -> None:
super().__init__(*args, **kwargs) # type: ignore
self.uid: Uid = p.parent.name
ResultItem = ResT['Item', Error]
class Item(NamedTuple):
uid: Uid
ResultBook = ResT['Book', Error]
class Book(NamedTuple):
uid: Uid
filename: str
title: Optional[str]
items: Sequence[ResultItem]
from kython.konsume import zoom, akeq
class Loader:
def __init__(self, p: Path) -> None:
self.path = p
self.uid = self.path.parent.name
self.err = Error(p)
self.logger = get_logger()
def error(self, cause, extra):
return echain(Error(self.path, extra), cause)
def load_item(self, meta) -> Iterator[ResultItem]:
# TODO this should be destructive zoom?
try:
meta['notes'].zoom()
meta['pagemarks'].zoom()
if 'notes' in meta:
# TODO something nicer?
meta['notes'].zoom()
meta['comments'].zoom()
meta['questions'].zoom()
meta['flashcards'].zoom()
meta['textHighlights'].zoom()
meta['areaHighlights'].zoom()
meta['screenshots'].zoom()
meta['thumbnails'].zoom()
meta['readingProgress'].zoom()
# TODO want to ignore the whold subtree..
pi = meta['pageInfo'].zoom()
pi['num'].zoom()
except Exception as exx:
err = self.error(exx, meta)
self.logger.exception(err)
yield err
from pprint import pprint
# pprint(notes)
# try:
# pm, notes, comm, que, flash, text, area, screens, thumb, rp, pi = zoom(
# meta,
# )
# except Exception as exx:
# yield echain(self.err, exx)
# return
# def aempty(x):
# akeq(x)
# try:
# aempty(pm)
# aempty(que)
# aempty(flash)
# aempty(text)
# aempty(area) # TODO these should be yieldy?
# aempty(screens)
# aempty(rp)
# akeq(pi, 'num')
# except Exception as ex:
# # TODO make it a method?
# yield echain(self.err, ex)
# aempty(notes)
# yield Item(self.uid)
def load_items(self, metas) -> Iterator[ResultItem]:
from kython.konsume import wrap
for p, meta in metas.items():
with wrap(meta) as meta:
yield from self.load_item(meta)
def load(self) -> Iterator[ResultBook]:
self.logger.info('processing %s', self.path)
j = json.loads(self.path.read_text())
try:
di = j['docInfo']
filename = di['filename']
title = di.get('title', None)
tags = di['tags']
pm = j['pageMetas']
except Exception as ex:
self.logger.exception(ex)
yield echain(self.err, ex)
return
# TODO should I group by book???
yield Book(
uid=self.uid,
filename=filename,
title=title,
items=list(self.load_items(pm)),
)
# "textHighlights": {},
# "comments": {},
# TODO
# "pagemarks": {},
# "notes": {},
# "questions": {},
# "flashcards": {},
# "areaHighlights": {},
# "screenshots": {},
# "thumbnails": {},
# "readingProgress": {},
# "pageInfo": {
# "num": 1
# }
def iter_entries() -> Iterator[ResultBook]:
for d in _get_datas():
yield from Loader(d).load()
def main():
logger = get_logger()
setup_logzero(logger, level=logging.DEBUG)
for entry in iter_entries():
logger.info('processed %s', entry.uid)
for i in entry.items:
try:
ii = unwrap(i)
except Error as e:
logger.exception(e)
else:
logger.info(ii)
if __name__ == '__main__':
main()