parse dt
This commit is contained in:
parent
9380a4e8e2
commit
d76ba4e77f
1 changed files with 34 additions and 38 deletions
|
@ -1,8 +1,10 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
|
from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
|
||||||
import json
|
import json
|
||||||
|
import pytz
|
||||||
|
|
||||||
from kython.kerror import ResT, echain, unwrap, sort_res_by
|
from kython.kerror import ResT, echain, unwrap, sort_res_by
|
||||||
from kython.klogging import setup_logzero
|
from kython.klogging import setup_logzero
|
||||||
|
@ -20,6 +22,9 @@ def _get_datas() -> List[Path]:
|
||||||
return list(sorted(BDIR.glob('*/state.json')))
|
return list(sorted(BDIR.glob('*/state.json')))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_dt(s: str) -> datetime:
|
||||||
|
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))
|
||||||
|
|
||||||
Uid = str
|
Uid = str
|
||||||
|
|
||||||
class Error(Exception):
|
class Error(Exception):
|
||||||
|
@ -27,28 +32,26 @@ class Error(Exception):
|
||||||
super().__init__(*args, **kwargs) # type: ignore
|
super().__init__(*args, **kwargs) # type: ignore
|
||||||
self.uid: Uid = p.parent.name
|
self.uid: Uid = p.parent.name
|
||||||
|
|
||||||
# TODO not sure if I even need comment?
|
|
||||||
# Ok I guess handling comment-level errors is a bit too much..
|
# Ok I guess handling comment-level errors is a bit too much..
|
||||||
|
|
||||||
Cid = str
|
Cid = str
|
||||||
class Comment(NamedTuple):
|
class Comment(NamedTuple):
|
||||||
cid: Cid
|
cid: Cid
|
||||||
created: str # TODO datetime (parse iso)
|
created: datetime
|
||||||
comment: str
|
comment: str
|
||||||
|
|
||||||
Hid = str
|
Hid = str
|
||||||
class Highlight(NamedTuple):
|
class Highlight(NamedTuple):
|
||||||
hid: Hid
|
hid: Hid
|
||||||
created: str # TODO datetime
|
created: datetime
|
||||||
selection: str
|
selection: str
|
||||||
comments: Sequence[Comment]
|
comments: Sequence[Comment]
|
||||||
|
|
||||||
|
|
||||||
|
Result = ResT['Book', Error]
|
||||||
ResultBook = ResT['Book', Error]
|
|
||||||
|
|
||||||
class Book(NamedTuple):
|
class Book(NamedTuple):
|
||||||
uid: Uid
|
uid: Uid
|
||||||
|
created: datetime
|
||||||
filename: str
|
filename: str
|
||||||
title: Optional[str]
|
title: Optional[str]
|
||||||
items: Sequence[Highlight]
|
items: Sequence[Highlight]
|
||||||
|
@ -61,7 +64,7 @@ class Loader:
|
||||||
self.err = Error(p)
|
self.err = Error(p)
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
|
|
||||||
def error(self, cause, extra):
|
def error(self, cause, extra=''):
|
||||||
return echain(Error(self.path, extra), cause)
|
return echain(Error(self.path, extra), cause)
|
||||||
|
|
||||||
def load_item(self, meta) -> Iterator[Highlight]:
|
def load_item(self, meta) -> Iterator[Highlight]:
|
||||||
|
@ -104,7 +107,7 @@ class Loader:
|
||||||
cmap[hlid] = ccs
|
cmap[hlid] = ccs
|
||||||
ccs.append(Comment(
|
ccs.append(Comment(
|
||||||
cid=cid.value,
|
cid=cid.value,
|
||||||
created=crt.value,
|
created=parse_dt(crt.value),
|
||||||
comment=html.value, # TODO perhaps coonvert from html to text or org?
|
comment=html.value, # TODO perhaps coonvert from html to text or org?
|
||||||
))
|
))
|
||||||
v.consume()
|
v.consume()
|
||||||
|
@ -132,7 +135,7 @@ class Loader:
|
||||||
|
|
||||||
yield Highlight(
|
yield Highlight(
|
||||||
hid=hid,
|
hid=hid,
|
||||||
created=crt,
|
created=parse_dt(crt),
|
||||||
selection=text,
|
selection=text,
|
||||||
comments=tuple(comments),
|
comments=tuple(comments),
|
||||||
)
|
)
|
||||||
|
@ -148,49 +151,42 @@ class Loader:
|
||||||
with wrap(meta) as meta:
|
with wrap(meta) as meta:
|
||||||
yield from self.load_item(meta)
|
yield from self.load_item(meta)
|
||||||
|
|
||||||
def load(self) -> Iterator[ResultBook]:
|
def load(self) -> Iterator[Result]:
|
||||||
self.logger.info('processing %s', self.path)
|
self.logger.info('processing %s', self.path)
|
||||||
j = json.loads(self.path.read_text())
|
j = json.loads(self.path.read_text())
|
||||||
|
|
||||||
try:
|
# TODO konsume here as well?
|
||||||
di = j['docInfo']
|
di = j['docInfo']
|
||||||
|
added = di['added']
|
||||||
filename = di['filename']
|
filename = di['filename']
|
||||||
title = di.get('title', None)
|
title = di.get('title', None)
|
||||||
tags = di['tags']
|
tags = di['tags']
|
||||||
pm = j['pageMetas']
|
pm = j['pageMetas']
|
||||||
except Exception as ex:
|
|
||||||
err = self.error(ex, j)
|
|
||||||
self.logger.exception(err)
|
|
||||||
yield err
|
|
||||||
return
|
|
||||||
|
|
||||||
# TODO should I group by book???
|
|
||||||
yield Book(
|
yield Book(
|
||||||
uid=self.uid,
|
uid=self.uid,
|
||||||
|
created=parse_dt(added),
|
||||||
filename=filename,
|
filename=filename,
|
||||||
title=title,
|
title=title,
|
||||||
items=list(self.load_items(pm)),
|
items=list(self.load_items(pm)),
|
||||||
)
|
)
|
||||||
# "textHighlights": {},
|
|
||||||
# "comments": {},
|
|
||||||
# TODO
|
|
||||||
# "pagemarks": {},
|
|
||||||
# "notes": {},
|
|
||||||
# "questions": {},
|
|
||||||
# "flashcards": {},
|
|
||||||
# "areaHighlights": {},
|
|
||||||
# "screenshots": {},
|
|
||||||
# "thumbnails": {},
|
|
||||||
# "readingProgress": {},
|
|
||||||
# "pageInfo": {
|
|
||||||
# "num": 1
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
def iter_entries() -> Iterator[ResultBook]:
|
def iter_entries() -> Iterator[Result]:
|
||||||
|
logger = get_logger()
|
||||||
for d in _get_datas():
|
for d in _get_datas():
|
||||||
yield from Loader(d).load()
|
loader = Loader(d)
|
||||||
|
try:
|
||||||
|
yield from loader.load()
|
||||||
|
except Exception as ee:
|
||||||
|
err = loader.error(ee)
|
||||||
|
logger.exception(err)
|
||||||
|
yield err
|
||||||
|
|
||||||
|
def get_entries() -> List[Result]:
|
||||||
|
# sorting by first annotation is reasonable I guess???
|
||||||
|
# TODO
|
||||||
|
return list(sort_res_by(iter_entries(), key=lambda e: e.created))
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
Loading…
Add table
Reference in a new issue