This commit is contained in:
Dima Gerasimov 2019-05-04 00:49:20 +01:00
parent 9380a4e8e2
commit d76ba4e77f

View file

@ -1,8 +1,10 @@
#!/usr/bin/python3 #!/usr/bin/python3
from pathlib import Path from pathlib import Path
from datetime import datetime
import logging import logging
from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
import json import json
import pytz
from kython.kerror import ResT, echain, unwrap, sort_res_by from kython.kerror import ResT, echain, unwrap, sort_res_by
from kython.klogging import setup_logzero from kython.klogging import setup_logzero
@ -20,6 +22,9 @@ def _get_datas() -> List[Path]:
return list(sorted(BDIR.glob('*/state.json'))) return list(sorted(BDIR.glob('*/state.json')))
def parse_dt(s: str) -> datetime:
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))
Uid = str Uid = str
class Error(Exception): class Error(Exception):
@ -27,28 +32,26 @@ class Error(Exception):
super().__init__(*args, **kwargs) # type: ignore super().__init__(*args, **kwargs) # type: ignore
self.uid: Uid = p.parent.name self.uid: Uid = p.parent.name
# TODO not sure if I even need comment?
# Ok I guess handling comment-level errors is a bit too much.. # Ok I guess handling comment-level errors is a bit too much..
Cid = str Cid = str
class Comment(NamedTuple): class Comment(NamedTuple):
cid: Cid cid: Cid
created: str # TODO datetime (parse iso) created: datetime
comment: str comment: str
Hid = str Hid = str
class Highlight(NamedTuple): class Highlight(NamedTuple):
hid: Hid hid: Hid
created: str # TODO datetime created: datetime
selection: str selection: str
comments: Sequence[Comment] comments: Sequence[Comment]
Result = ResT['Book', Error]
ResultBook = ResT['Book', Error]
class Book(NamedTuple): class Book(NamedTuple):
uid: Uid uid: Uid
created: datetime
filename: str filename: str
title: Optional[str] title: Optional[str]
items: Sequence[Highlight] items: Sequence[Highlight]
@ -61,7 +64,7 @@ class Loader:
self.err = Error(p) self.err = Error(p)
self.logger = get_logger() self.logger = get_logger()
def error(self, cause, extra): def error(self, cause, extra=''):
return echain(Error(self.path, extra), cause) return echain(Error(self.path, extra), cause)
def load_item(self, meta) -> Iterator[Highlight]: def load_item(self, meta) -> Iterator[Highlight]:
@ -104,7 +107,7 @@ class Loader:
cmap[hlid] = ccs cmap[hlid] = ccs
ccs.append(Comment( ccs.append(Comment(
cid=cid.value, cid=cid.value,
created=crt.value, created=parse_dt(crt.value),
comment=html.value, # TODO perhaps coonvert from html to text or org? comment=html.value, # TODO perhaps coonvert from html to text or org?
)) ))
v.consume() v.consume()
@ -132,7 +135,7 @@ class Loader:
yield Highlight( yield Highlight(
hid=hid, hid=hid,
created=crt, created=parse_dt(crt),
selection=text, selection=text,
comments=tuple(comments), comments=tuple(comments),
) )
@ -148,49 +151,42 @@ class Loader:
with wrap(meta) as meta: with wrap(meta) as meta:
yield from self.load_item(meta) yield from self.load_item(meta)
def load(self) -> Iterator[ResultBook]: def load(self) -> Iterator[Result]:
self.logger.info('processing %s', self.path) self.logger.info('processing %s', self.path)
j = json.loads(self.path.read_text()) j = json.loads(self.path.read_text())
try: # TODO konsume here as well?
di = j['docInfo'] di = j['docInfo']
added = di['added']
filename = di['filename'] filename = di['filename']
title = di.get('title', None) title = di.get('title', None)
tags = di['tags'] tags = di['tags']
pm = j['pageMetas'] pm = j['pageMetas']
except Exception as ex:
err = self.error(ex, j)
self.logger.exception(err)
yield err
return
# TODO should I group by book???
yield Book( yield Book(
uid=self.uid, uid=self.uid,
created=parse_dt(added),
filename=filename, filename=filename,
title=title, title=title,
items=list(self.load_items(pm)), items=list(self.load_items(pm)),
) )
# "textHighlights": {},
# "comments": {},
# TODO
# "pagemarks": {},
# "notes": {},
# "questions": {},
# "flashcards": {},
# "areaHighlights": {},
# "screenshots": {},
# "thumbnails": {},
# "readingProgress": {},
# "pageInfo": {
# "num": 1
# }
def iter_entries() -> Iterator[ResultBook]: def iter_entries() -> Iterator[Result]:
logger = get_logger()
for d in _get_datas(): for d in _get_datas():
yield from Loader(d).load() loader = Loader(d)
try:
yield from loader.load()
except Exception as ee:
err = loader.error(ee)
logger.exception(err)
yield err
def get_entries() -> List[Result]:
# sorting by first annotation is reasonable I guess???
# TODO
return list(sort_res_by(iter_entries(), key=lambda e: e.created))
def main(): def main():
logger = get_logger() logger = get_logger()