From b2b7eee480678875c0209464e9c21a1febc883a7 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 07:42:21 +0100 Subject: [PATCH 01/10] polar: add test against custom public repos --- tests/extra/polar.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/extra/polar.py diff --git a/tests/extra/polar.py b/tests/extra/polar.py new file mode 100644 index 0000000..e8c1af0 --- /dev/null +++ b/tests/extra/polar.py @@ -0,0 +1,30 @@ +from pathlib import Path + +ROOT = Path(__file__).parent.parent.absolute() + + +import pytest # type: ignore + +# todo maybe search fot info.json recursively? +@pytest.mark.parametrize('dotpolar', [ + 'data/polar/BojanKV_polar/.polar', + 'data/polar/TheCedarPrince_KnowledgeRepository', + 'data/polar/coelias_polardocs', + 'data/polar/warkdarrior_polar-document-repository' +]) +def test_hpi(dotpolar: str): + pdir = Path(ROOT / dotpolar) + class user_config: + export_dir = pdir + + import my.config + setattr(my.config, 'polar', user_config) + import sys + M = 'my.reading.polar' + if M in sys.modules: + del sys.modules[M] + # TODO maybe set config directly against polar module? + + import my.reading.polar as polar + from my.reading.polar import get_entries + assert len(list(get_entries())) > 10 From 8f86d7706b8c59b98208f86760c24957b9b3fa6e Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 07:57:51 +0100 Subject: [PATCH 02/10] core: use appdirs for ~/.config detection --- my/core/init.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/my/core/init.py b/my/core/init.py index e3a5e7a..4070f4d 100644 --- a/my/core/init.py +++ b/my/core/init.py @@ -30,6 +30,7 @@ def setup_config() -> None: import os import warnings from typing import Optional + import appdirs # type: ignore[import] # not sure if that's necessary, i.e. could rely on PYTHONPATH instead # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path? @@ -37,9 +38,7 @@ def setup_config() -> None: if mvar is not None: mycfg_dir = Path(mvar) else: - # TODO use appdir?? - cfg_dir = Path('~/.config').expanduser() - mycfg_dir = cfg_dir / 'my' + mycfg_dir = Path(appdirs.user_config_dir('my')) if not mycfg_dir.exists(): warnings.warn(f"my.config package isn't found! (expected at {mycfg_dir}). This is likely to result in issues.") From f3d5064ff2ef9eda0b0fe9ac404d631bb7eef75c Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 08:14:06 +0100 Subject: [PATCH 03/10] polar: allow properly specifying polar_dir, with ~ as a default --- doc/MODULES.org | 12 ++++++++++++ my/core/__init__.py | 2 +- my/reading/polar.py | 40 ++++++++++++++++++++++++++++++++++------ tests/extra/polar.py | 13 ++++++++----- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/doc/MODULES.org b/doc/MODULES.org index ddff2bd..0e01188 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -33,6 +33,7 @@ modules = [ ('twint' , 'my.twitter.twint' ), ('twitter', 'my.twitter.archive' ), ('lastfm' , 'my.lastfm' ), + ('polar' , 'my.reading.polar' ), ] def indent(s, spaces=4): @@ -117,4 +118,15 @@ for cls, p in modules: """ export_path: Paths #+end_src +- [[file:../my/reading/polar.py][my.reading.polar]] + + [[https://github.com/burtonator/polar-books][Polar]] articles and highlights + + #+begin_src python + class polar: + ''' + Polar config is optional, you only need it if you want to specify custom 'polar_dir' + ''' + polar_dir: Path = Path('~/.polar').expanduser() + #+end_src :end: diff --git a/my/core/__init__.py b/my/core/__init__.py index bc12b60..4515235 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -1,4 +1,4 @@ # this file only keeps the most common & critical types/utility functions from .common import PathIsh, Paths, Json -from .common import get_files +from .common import get_files, LazyLogger from .cfg import make_config diff --git a/my/reading/polar.py b/my/reading/polar.py index 7ba4fc2..4f79fcf 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -1,23 +1,51 @@ """ [[https://github.com/burtonator/polar-books][Polar]] articles and highlights """ - from pathlib import Path +from typing import Type, Any, cast, TYPE_CHECKING + + +import my.config + +if not TYPE_CHECKING: + user_config = getattr(my.config, 'polar', None) +else: + # mypy can't handle dynamic base classes... https://github.com/python/mypy/issues/2477 + user_config = object + +# by default, Polar doesn't need any config, so perhaps makes sense to make it defensive here +if user_config is None: + class user_config: # type: ignore[no-redef] + pass + + +from dataclasses import dataclass +@dataclass +class polar(user_config): + ''' + Polar config is optional, you only need it if you want to specify custom 'polar_dir' + ''' + polar_dir: Path = Path('~/.polar').expanduser() + + +from ..core import make_config +config = make_config(polar) + +# todo not sure where it keeps stuff on Windows? +# https://github.com/burtonator/polar-bookshelf/issues/296 + from datetime import datetime from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional import json import pytz -from ..common import LazyLogger, get_files +from ..core import get_files, LazyLogger from ..error import Res, echain, unwrap, sort_res_by from ..kython.konsume import wrap, zoom, ignore -_POLAR_DIR = Path('~').expanduser() / '.polar' - - logger = LazyLogger(__name__) @@ -173,7 +201,7 @@ class Loader: def iter_entries() -> Iterator[Result]: - for d in get_files(_POLAR_DIR, glob='*/state.json'): + for d in get_files(config.polar_dir, glob='*/state.json'): loader = Loader(d) try: yield from loader.load() diff --git a/tests/extra/polar.py b/tests/extra/polar.py index e8c1af0..709f44f 100644 --- a/tests/extra/polar.py +++ b/tests/extra/polar.py @@ -7,18 +7,21 @@ import pytest # type: ignore # todo maybe search fot info.json recursively? @pytest.mark.parametrize('dotpolar', [ + '', 'data/polar/BojanKV_polar/.polar', 'data/polar/TheCedarPrince_KnowledgeRepository', 'data/polar/coelias_polardocs', 'data/polar/warkdarrior_polar-document-repository' ]) def test_hpi(dotpolar: str): - pdir = Path(ROOT / dotpolar) - class user_config: - export_dir = pdir + if dotpolar != '': + pdir = Path(ROOT / dotpolar) + class user_config: + export_dir = pdir + + import my.config + setattr(my.config, 'polar', user_config) - import my.config - setattr(my.config, 'polar', user_config) import sys M = 'my.reading.polar' if M in sys.modules: From 0f27071dcc0df2bb5d6e5c494b23d3fef2229d2d Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 08:40:12 +0100 Subject: [PATCH 04/10] polar: minor improvements, konsume: more type annotations --- my/core/common.py | 3 ++- my/kython/konsume.py | 28 ++++++++++++++++------- my/reading/polar.py | 53 +++++++++++++++++++++----------------------- tests/extra/polar.py | 6 ++--- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/my/core/common.py b/my/core/common.py index 918f4b2..fcbeabb 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -134,7 +134,8 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!") paths.extend(map(Path, do_glob(ss))) else: - assert src.is_file(), src + if not src.is_file(): + raise RuntimeError(f"Expected '{src}' to exist") # todo assert matches glob?? paths.append(src) diff --git a/my/kython/konsume.py b/my/kython/konsume.py index 6e829d3..679755c 100644 --- a/my/kython/konsume.py +++ b/my/kython/konsume.py @@ -11,7 +11,7 @@ def zoom(w, *keys): # TODO need to support lists class Zoomable: - def __init__(self, parent, *args, **kwargs): + def __init__(self, parent, *args, **kwargs) -> None: super().__init__(*args, **kwargs) # type: ignore self.parent = parent @@ -21,19 +21,19 @@ class Zoomable: def dependants(self): raise NotImplementedError - def ignore(self): + def ignore(self) -> None: self.consume_all() - def consume_all(self): + def consume_all(self) -> None: for d in self.dependants: d.consume_all() self.consume() - def consume(self): + def consume(self) -> None: assert self.parent is not None self.parent._remove(self) - def zoom(self): + def zoom(self) -> 'Zoomable': self.consume() return self @@ -56,6 +56,8 @@ class Wdict(Zoomable, OrderedDict): def this_consumed(self): return len(self) == 0 + # TODO specify mypy type for the index special method? + class Wlist(Zoomable, list): def _remove(self, xx): @@ -83,7 +85,8 @@ class Wvalue(Zoomable): def __repr__(self): return 'WValue{' + repr(self.value) + '}' -def _wrap(j, parent=None): +from typing import Tuple +def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]: res: Zoomable cc: List[Zoomable] if isinstance(j, dict): @@ -109,13 +112,14 @@ def _wrap(j, parent=None): raise RuntimeError(f'Unexpected type: {type(j)} {j}') from contextlib import contextmanager +from typing import Iterator class UnconsumedError(Exception): pass # TODO think about error policy later... @contextmanager -def wrap(j, throw=True): +def wrap(j, throw=True) -> Iterator[Zoomable]: w, children = _wrap(j) yield w @@ -128,28 +132,33 @@ def wrap(j, throw=True): # TODO log? pass - +from typing import cast def test_unconsumed(): import pytest # type: ignore with pytest.raises(UnconsumedError): with wrap({'a': 1234}) as w: + w = cast(Wdict, w) pass with pytest.raises(UnconsumedError): with wrap({'c': {'d': 2222}}) as w: + w = cast(Wdict, w) d = w['c']['d'].zoom() def test_consumed(): with wrap({'a': 1234}) as w: + w = cast(Wdict, w) a = w['a'].zoom() with wrap({'c': {'d': 2222}}) as w: + w = cast(Wdict, w) c = w['c'].zoom() d = c['d'].zoom() def test_types(): # (string, number, object, array, boolean or nul with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w: + w = cast(Wdict, w) w['string'].zoom() w['number'].consume() w['boolean'].zoom() @@ -159,5 +168,8 @@ def test_types(): def test_consume_all(): with wrap({'aaa': {'bbb': {'hi': 123}}}) as w: + w = cast(Wdict, w) aaa = w['aaa'].zoom() aaa['bbb'].consume_all() + +# TODO type check this... diff --git a/my/reading/polar.py b/my/reading/polar.py index 4f79fcf..a38662d 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -35,27 +35,20 @@ config = make_config(polar) # https://github.com/burtonator/polar-bookshelf/issues/296 from datetime import datetime -from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional +from typing import List, Dict, Iterable, NamedTuple, Sequence, Optional import json import pytz -from ..core import get_files, LazyLogger - -from ..error import Res, echain, unwrap, sort_res_by -from ..kython.konsume import wrap, zoom, ignore +from ..core import LazyLogger, Json +from ..core.common import isoparse +from ..error import Res, echain, sort_res_by +from ..kython.konsume import wrap, zoom, ignore, Zoomable, Wdict logger = LazyLogger(__name__) -# TODO use core.isoparse -def parse_dt(s: str) -> datetime: - return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ')) - -Uid = str - - # Ok I guess handling comment-level errors is a bit too much.. Cid = str class Comment(NamedTuple): @@ -71,6 +64,8 @@ class Highlight(NamedTuple): comments: Sequence[Comment] + +Uid = str class Book(NamedTuple): uid: Uid created: datetime @@ -80,8 +75,6 @@ class Book(NamedTuple): # think about it later. items: Sequence[Highlight] -Error = Exception # for backwards compat with Orger; can remove later - Result = Res[Book] class Loader: @@ -89,12 +82,13 @@ class Loader: self.path = p self.uid = self.path.parent.name - def error(self, cause, extra='') -> Exception: + def error(self, cause: Exception, extra: str ='') -> Exception: if len(extra) > 0: extra = '\n' + extra return echain(Exception(f'while processing {self.path}{extra}'), cause) - def load_item(self, meta) -> Iterator[Highlight]: + def load_item(self, meta: Zoomable) -> Iterable[Highlight]: + meta = cast(Wdict, meta) # TODO this should be destructive zoom? meta['notes'].zoom() meta['pagemarks'].zoom() @@ -134,7 +128,7 @@ class Loader: cmap[hlid] = ccs ccs.append(Comment( cid=cid.value, - created=parse_dt(crt.value), + created=isoparse(crt.value), text=html.value, # TODO perhaps coonvert from html to text or org? )) v.consume() @@ -162,7 +156,7 @@ class Loader: yield Highlight( hid=hid, - created=parse_dt(crt), + created=isoparse(crt), selection=text, comments=tuple(comments), ) @@ -174,12 +168,12 @@ class Loader: # TODO sort by date? - def load_items(self, metas) -> Iterator[Highlight]: + def load_items(self, metas: Json) -> Iterable[Highlight]: for p, meta in metas.items(): with wrap(meta, throw=False) as meta: yield from self.load_item(meta) - def load(self) -> Iterator[Result]: + def load(self) -> Iterable[Result]: logger.info('processing %s', self.path) j = json.loads(self.path.read_text()) @@ -193,14 +187,15 @@ class Loader: yield Book( uid=self.uid, - created=parse_dt(added), + created=isoparse(added), filename=filename, title=title, items=list(self.load_items(pm)), ) -def iter_entries() -> Iterator[Result]: +def iter_entries() -> Iterable[Result]: + from ..core import get_files for d in get_files(config.polar_dir, glob='*/state.json'): loader = Loader(d) try: @@ -213,16 +208,18 @@ def iter_entries() -> Iterator[Result]: def get_entries() -> List[Result]: # sorting by first annotation is reasonable I guess??? + # todo perhaps worth making it a pattern? X() returns iterable, get_X returns reasonably sorted list? return list(sort_res_by(iter_entries(), key=lambda e: e.created)) def main(): - for entry in iter_entries(): - try: - ee = unwrap(entry) - except Error as e: + for e in iter_entries(): + if isinstance(e, Exception): logger.exception(e) else: - logger.info('processed %s', ee.uid) - for i in ee.items: + logger.info('processed %s', e.uid) + for i in e.items: logger.info(i) + + +Error = Exception # for backwards compat with Orger; can remove later diff --git a/tests/extra/polar.py b/tests/extra/polar.py index 709f44f..3ed0342 100644 --- a/tests/extra/polar.py +++ b/tests/extra/polar.py @@ -1,6 +1,6 @@ from pathlib import Path -ROOT = Path(__file__).parent.parent.absolute() +ROOT = Path(__file__).parent.absolute() import pytest # type: ignore @@ -17,7 +17,7 @@ def test_hpi(dotpolar: str): if dotpolar != '': pdir = Path(ROOT / dotpolar) class user_config: - export_dir = pdir + polar_dir = pdir import my.config setattr(my.config, 'polar', user_config) @@ -30,4 +30,4 @@ def test_hpi(dotpolar: str): import my.reading.polar as polar from my.reading.polar import get_entries - assert len(list(get_entries())) > 10 + assert len(list(get_entries())) > 1 From 87ad9d38bbe1fd1217f23479ae5eccf13999ac55 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 09:52:18 +0100 Subject: [PATCH 05/10] polar: add test for orger integration --- my/core/common.py | 7 +++++++ tests/extra/polar.py | 48 ++++++++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/my/core/common.py b/my/core/common.py index fcbeabb..985ca67 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -246,3 +246,10 @@ def isoparse(s: str) -> tzdatetime: assert s.endswith('Z'), s s = s[:-1] + '+00:00' return fromisoformat(s) + + +import re +# https://stackoverflow.com/a/295466/706389 +def get_valid_filename(s: str) -> str: + s = str(s).strip().replace(' ', '_') + return re.sub(r'(?u)[^-\w.]', '', s) diff --git a/tests/extra/polar.py b/tests/extra/polar.py index 3ed0342..1656132 100644 --- a/tests/extra/polar.py +++ b/tests/extra/polar.py @@ -1,19 +1,49 @@ from pathlib import Path +import sys +from importlib import reload +from my.core.common import get_valid_filename ROOT = Path(__file__).parent.absolute() +OUTPUTS = ROOT / 'outputs' import pytest # type: ignore -# todo maybe search fot info.json recursively? -@pytest.mark.parametrize('dotpolar', [ + +def test_hpi(prepare: str) -> None: + import my.reading.polar as polar + reload(polar) + from my.reading.polar import get_entries + assert len(list(get_entries())) > 1 + + +def test_orger(prepare: str, tmp_path: Path) -> None: + import my.reading.polar as polar + reload(polar) + # TODO hmm... ok, need to document reload() + + from my.core.common import import_from, import_file + om = import_file(ROOT / 'orger/modules/polar.py') + # reload(om) + + pv = om.PolarView() # type: ignore + # TODO hmm. worth making public? + OUTPUTS.mkdir(exist_ok=True) + out = OUTPUTS / (get_valid_filename(prepare) + '.org') + pv._run(to=out) + + +PARAMS = [ '', 'data/polar/BojanKV_polar/.polar', 'data/polar/TheCedarPrince_KnowledgeRepository', 'data/polar/coelias_polardocs', 'data/polar/warkdarrior_polar-document-repository' -]) -def test_hpi(dotpolar: str): +] + +@pytest.fixture(params=PARAMS) +def prepare(request): + dotpolar = request.param if dotpolar != '': pdir = Path(ROOT / dotpolar) class user_config: @@ -22,12 +52,4 @@ def test_hpi(dotpolar: str): import my.config setattr(my.config, 'polar', user_config) - import sys - M = 'my.reading.polar' - if M in sys.modules: - del sys.modules[M] - # TODO maybe set config directly against polar module? - - import my.reading.polar as polar - from my.reading.polar import get_entries - assert len(list(get_entries())) > 1 + yield dotpolar From 759b0e1324cdd643d0f8139005bd23f26815165a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 10:11:09 +0100 Subject: [PATCH 06/10] polar: expose a proper filename --- my/reading/polar.py | 21 +++++++++++++++------ tests/extra/polar.py | 10 +++------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/my/reading/polar.py b/my/reading/polar.py index a38662d..5b2ecae 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -19,13 +19,14 @@ if user_config is None: pass +from ..core import PathIsh from dataclasses import dataclass @dataclass class polar(user_config): ''' Polar config is optional, you only need it if you want to specify custom 'polar_dir' ''' - polar_dir: Path = Path('~/.polar').expanduser() + polar_dir: PathIsh = Path('~/.polar').expanduser() from ..core import make_config @@ -67,14 +68,19 @@ class Highlight(NamedTuple): Uid = str class Book(NamedTuple): - uid: Uid created: datetime - filename: str + uid: Uid + path: Path title: Optional[str] # TODO hmmm. I think this needs to be defensive as well... # think about it later. items: Sequence[Highlight] + @property + def filename(self) -> str: + # TODO deprecate + return str(self.path) + Result = Res[Book] class Loader: @@ -180,15 +186,18 @@ class Loader: # TODO konsume here as well? di = j['docInfo'] added = di['added'] - filename = di['filename'] + filename = di['filename'] # TODO here title = di.get('title', None) tags = di['tags'] pm = j['pageMetas'] + + path = Path(config.polar_dir) / 'stash' / filename + yield Book( - uid=self.uid, created=isoparse(added), - filename=filename, + uid=self.uid, + path=path, title=title, items=list(self.load_items(pm)), ) diff --git a/tests/extra/polar.py b/tests/extra/polar.py index 1656132..606e32e 100644 --- a/tests/extra/polar.py +++ b/tests/extra/polar.py @@ -11,17 +11,10 @@ import pytest # type: ignore def test_hpi(prepare: str) -> None: - import my.reading.polar as polar - reload(polar) from my.reading.polar import get_entries assert len(list(get_entries())) > 1 - def test_orger(prepare: str, tmp_path: Path) -> None: - import my.reading.polar as polar - reload(polar) - # TODO hmm... ok, need to document reload() - from my.core.common import import_from, import_file om = import_file(ROOT / 'orger/modules/polar.py') # reload(om) @@ -52,4 +45,7 @@ def prepare(request): import my.config setattr(my.config, 'polar', user_config) + import my.reading.polar as polar + reload(polar) + # TODO hmm... ok, need to document reload() yield dotpolar From 844ebf28c12bd8bfe89541d75da94ad4e1572a67 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 11:47:37 +0100 Subject: [PATCH 07/10] polar: extract book tags --- my/reading/polar.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/my/reading/polar.py b/my/reading/polar.py index 5b2ecae..bcd712f 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -76,6 +76,8 @@ class Book(NamedTuple): # think about it later. items: Sequence[Highlight] + tags: Sequence[str] + @property def filename(self) -> str: # TODO deprecate @@ -188,9 +190,11 @@ class Loader: added = di['added'] filename = di['filename'] # TODO here title = di.get('title', None) - tags = di['tags'] + tags_dict = di['tags'] pm = j['pageMetas'] + # todo defensive? + tags = tuple(t['label'] for t in tags_dict.values()) path = Path(config.polar_dir) / 'stash' / filename @@ -200,6 +204,7 @@ class Loader: path=path, title=title, items=list(self.load_items(pm)), + tags=tags, ) From 3d8002c8c90c737ed10740faddcfea423178d10a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 12:40:15 +0100 Subject: [PATCH 08/10] polar: support configuring defensive behaviour, support for highlight tags --- my/kython/konsume.py | 14 +++++++++++++- my/reading/polar.py | 18 +++++++++++++++--- tests/extra/polar.py | 20 ++++++++++---------- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/my/kython/konsume.py b/my/kython/konsume.py index 679755c..5fa9f3a 100644 --- a/my/kython/konsume.py +++ b/my/kython/konsume.py @@ -127,7 +127,9 @@ def wrap(j, throw=True) -> Iterator[Zoomable]: for c in children: if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed??? if throw: - raise UnconsumedError(str(c)) + raise UnconsumedError(f''' +Expected {c} to be fully consumed by the parser. +'''.lstrip()) else: # TODO log? pass @@ -172,4 +174,14 @@ def test_consume_all(): aaa = w['aaa'].zoom() aaa['bbb'].consume_all() + +def test_zoom() -> None: + import pytest # type: ignore + with wrap({'aaa': 'whatever'}) as w: + w = cast(Wdict, w) + with pytest.raises(KeyError): + w['nosuchkey'].zoom() + w['aaa'].zoom() + + # TODO type check this... diff --git a/my/reading/polar.py b/my/reading/polar.py index bcd712f..e7f0825 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -27,6 +27,7 @@ class polar(user_config): Polar config is optional, you only need it if you want to specify custom 'polar_dir' ''' polar_dir: PathIsh = Path('~/.polar').expanduser() + defensive: bool = True # pass False if you want it to fail faster on errors (useful for debugging) from ..core import make_config @@ -63,7 +64,7 @@ class Highlight(NamedTuple): created: datetime selection: str comments: Sequence[Comment] - + tags: Sequence[str] Uid = str @@ -98,7 +99,7 @@ class Loader: def load_item(self, meta: Zoomable) -> Iterable[Highlight]: meta = cast(Wdict, meta) # TODO this should be destructive zoom? - meta['notes'].zoom() + meta['notes'].zoom() # TODO ??? is it deliberate? meta['pagemarks'].zoom() if 'notes' in meta: # TODO something nicer? @@ -153,6 +154,16 @@ class Loader: updated = h['lastUpdated'].zoom().value h['rects'].ignore() + # TODO make it more generic.. + htags: List[str] = [] + if 'tags' in h: + ht = h['tags'].zoom() + for k, v in list(ht.items()): + ctag = v.zoom() + ctag['id'].consume() + ct = ctag['label'].zoom() + htags.append(ct.value) + h['textSelections'].ignore() h['notes'].consume() h['questions'].consume() @@ -167,6 +178,7 @@ class Loader: created=isoparse(crt), selection=text, comments=tuple(comments), + tags=tuple(htags), ) h.consume() @@ -178,7 +190,7 @@ class Loader: def load_items(self, metas: Json) -> Iterable[Highlight]: for p, meta in metas.items(): - with wrap(meta, throw=False) as meta: + with wrap(meta, throw=not config.defensive) as meta: yield from self.load_item(meta) def load(self) -> Iterable[Result]: diff --git a/tests/extra/polar.py b/tests/extra/polar.py index 606e32e..b0611f9 100644 --- a/tests/extra/polar.py +++ b/tests/extra/polar.py @@ -27,23 +27,23 @@ def test_orger(prepare: str, tmp_path: Path) -> None: PARAMS = [ + # 'data/polar/BojanKV_polar/.polar', '', - 'data/polar/BojanKV_polar/.polar', - 'data/polar/TheCedarPrince_KnowledgeRepository', - 'data/polar/coelias_polardocs', - 'data/polar/warkdarrior_polar-document-repository' + # 'data/polar/TheCedarPrince_KnowledgeRepository', + # 'data/polar/coelias_polardocs', + # 'data/polar/warkdarrior_polar-document-repository' ] @pytest.fixture(params=PARAMS) def prepare(request): dotpolar = request.param - if dotpolar != '': - pdir = Path(ROOT / dotpolar) - class user_config: - polar_dir = pdir + class user_config: + if dotpolar != '': # defaul + polar_dir = Path(ROOT / dotpolar) + defensive = False - import my.config - setattr(my.config, 'polar', user_config) + import my.config + setattr(my.config, 'polar', user_config) import my.reading.polar as polar reload(polar) From 8277b33c18f869952f928fd46a389097abe45d63 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 12:52:22 +0100 Subject: [PATCH 09/10] polar: add highlight colors --- my/reading/polar.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/my/reading/polar.py b/my/reading/polar.py index e7f0825..10d2e74 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -65,6 +65,7 @@ class Highlight(NamedTuple): selection: str comments: Sequence[Comment] tags: Sequence[str] + color: Optional[str] = None Uid = str @@ -106,10 +107,12 @@ class Loader: notes = meta['notes'].zoom() else: notes = [] # TODO FIXME dict? - comments = meta['comments'].zoom() + comments = list(meta['comments'].zoom().values()) if 'comments' in meta else [] meta['questions'].zoom() meta['flashcards'].zoom() highlights = meta['textHighlights'].zoom() + + # TODO could be useful to at least add a meta bout area highlights/screens meta['areaHighlights'].zoom() meta['screenshots'].zoom() meta['thumbnails'].zoom() @@ -122,7 +125,7 @@ class Loader: # TODO how to make it nicer? cmap: Dict[Hid, List[Comment]] = {} - vals = list(comments.values()) + vals = list(comments) for v in vals: cid = v['id'].zoom() v['guid'].zoom() @@ -168,7 +171,7 @@ class Loader: h['notes'].consume() h['questions'].consume() h['flashcards'].consume() - h['color'].consume() + color = h['color'].zoom().value h['images'].ignore() # TODO eh, quite excessive \ns... text = h['text'].zoom()['TEXT'].zoom().value @@ -179,6 +182,7 @@ class Loader: selection=text, comments=tuple(comments), tags=tuple(htags), + color=color, ) h.consume() From 65138808e7d0501c961227b0d6fc1c35cf01fa7d Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 15 May 2020 13:17:02 +0100 Subject: [PATCH 10/10] polar: handle few more attributes defensively --- my/kython/konsume.py | 14 ++++++++++++++ my/reading/polar.py | 15 ++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/my/kython/konsume.py b/my/kython/konsume.py index 5fa9f3a..bdf9d4b 100644 --- a/my/kython/konsume.py +++ b/my/kython/konsume.py @@ -127,6 +127,7 @@ def wrap(j, throw=True) -> Iterator[Zoomable]: for c in children: if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed??? if throw: + # TODO need to keep a full path or something... raise UnconsumedError(f''' Expected {c} to be fully consumed by the parser. '''.lstrip()) @@ -175,6 +176,19 @@ def test_consume_all(): aaa['bbb'].consume_all() +def test_consume_few(): + import pytest + pytest.skip('Will think about it later..') + with wrap({ + 'important': 123, + 'unimportant': 'whatever' + }) as w: + w = cast(Wdict, w) + w['important'].zoom() + w.consume_all() + # TODO hmm, we want smth like this to work.. + + def test_zoom() -> None: import pytest # type: ignore with wrap({'aaa': 'whatever'}) as w: diff --git a/my/reading/polar.py b/my/reading/polar.py index 10d2e74..2db5e4d 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -101,7 +101,10 @@ class Loader: meta = cast(Wdict, meta) # TODO this should be destructive zoom? meta['notes'].zoom() # TODO ??? is it deliberate? - meta['pagemarks'].zoom() + + meta['pagemarks'].consume_all() + + if 'notes' in meta: # TODO something nicer? notes = meta['notes'].zoom() @@ -113,15 +116,17 @@ class Loader: highlights = meta['textHighlights'].zoom() # TODO could be useful to at least add a meta bout area highlights/screens - meta['areaHighlights'].zoom() + meta['areaHighlights'].consume_all() meta['screenshots'].zoom() meta['thumbnails'].zoom() if 'readingProgress' in meta: - meta['readingProgress'].zoom() + meta['readingProgress'].consume_all() - # TODO want to ignore the whold subtree.. + # TODO want to ignore the whole subtree.. pi = meta['pageInfo'].zoom() pi['num'].zoom() + if 'dimensions' in pi: + pi['dimensions'].consume_all() # TODO how to make it nicer? cmap: Dict[Hid, List[Comment]] = {} @@ -207,7 +212,7 @@ class Loader: filename = di['filename'] # TODO here title = di.get('title', None) tags_dict = di['tags'] - pm = j['pageMetas'] + pm = j['pageMetas'] # TODO FIXME handle this too # todo defensive? tags = tuple(t['label'] for t in tags_dict.values())