get rid of porg dependency, use orgparse directly

2020-11-06 04:04:05 +00:00 · 2020-11-06 04:04:05 +00:00 · a6e5908e6d
commit a6e5908e6d
parent 62e1bdc39a
8 changed files with 101 additions and 54 deletions
--- a/my/body/blood.py
+++ b/my/body/blood.py
@ -7,11 +7,11 @@ from typing import Iterable, NamedTuple, Optional

 from ..core.common import listify
 from ..core.error import Res, echain
-from ..core.orgmode import parse_org_datetime
+from ..core.orgmode import parse_org_datetime, one_table


 import pandas as pd # type: ignore
-import porg
+import orgparse


 from my.config import blood as config
@ -47,14 +47,13 @@ def try_float(s: str) -> Optional[float]:
        return None
    return float(x)

-
 def glucose_ketones_data() -> Iterable[Result]:
-    o = porg.Org.from_file(str(config.blood_log))
-    tbl = o.xpath('//table')
+    o = orgparse.load(config.blood_log)
+    tbl = one_table(o)
    # todo some sort of sql-like interface for org tables might be ideal?
-    for l in tbl.lines:
-        kets = l['ket'].strip()
-        glus = l['glu'].strip()
+    for l in tbl.as_dicts:
+        kets = l['ket']
+        glus = l['glu']
        extra = l['notes']
        dt = parse_org_datetime(l['datetime'])
        try:
@ -75,9 +74,9 @@ def glucose_ketones_data() -> Iterable[Result]:


 def blood_tests_data() -> Iterable[Result]:
-    o = porg.Org.from_file(str(config.blood_tests_log))
-    tbl = o.xpath('//table')
-    for d in tbl.lines:
+    o = orgparse.load(config.blood_tests_log)
+    tbl = one_table(o)
+    for d in tbl.as_dicts:
        try:
            dt = parse_org_datetime(d['datetime'])
            assert isinstance(dt, datetime), dt
--- a/my/body/exercise/cross_trainer.py
+++ b/my/body/exercise/cross_trainer.py
@ -9,6 +9,7 @@ from datetime import datetime, timedelta
 from typing import Optional

 from ...core.pandas import DataFrameT, check_dataframe as cdf
+from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable

 from my.config import exercise as config

@ -26,11 +27,15 @@ def tzify(d: datetime) -> datetime:
 def cross_trainer_data():
    # FIXME some manual entries in python
    # I guess just convert them to org
+    import orgparse
+    # todo should use all org notes and just query from them?
+    wlog = orgparse.load(config.workout_log)

-    from porg import Org
-    # FIXME should use all org notes and just query from them?
-    wlog = Org.from_file(config.workout_log)
-    cross_table = wlog.xpath('//org[heading="Cross training"]//table')
+    [table] = collect(
+        wlog,
+        lambda n: [] if n.heading != 'Cross training' else [x for x in n.body_rich if isinstance(x, Table)]
+    )
+    cross_table = TypedTable(table)

    def maybe(f):
        def parse(s):
@ -46,13 +51,12 @@ def cross_trainer_data():
    # todo eh. not sure if there is a way of getting around writing code...
    # I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
    # need to look up org-mode standard..
-    from ...core.orgmode import parse_org_datetime
    mappers = {
        'duration': lambda s: parse_mm_ss(s),
        'date'    : lambda s: tzify(parse_org_datetime(s)),
        'comment' : str,
    }
-    for row in cross_table.lines:
+    for row in cross_table.as_dicts:
        # todo make more defensive, fallback on nan for individual fields??
        try:
            d = {}
--- a/my/body/weight.py
+++ b/my/body/weight.py
@ -45,7 +45,7 @@ def from_orgmode() -> Iterator[Result]:
            log.exception(e)
            yield e
            continue
-        # todo perhaps, better to use timezone provider
+        # FIXME use timezone provider
        created = config.default_timezone.localize(created)
        yield Entry(
            dt=created,
--- a/my/core/orgmode.py
+++ b/my/core/orgmode.py
@ -2,8 +2,6 @@
 Various helpers for reading org-mode data
 """
 from datetime import datetime
-
-
 def parse_org_datetime(s: str) -> datetime:
    s = s.strip('[]')
    for fmt, cl in [
@ -19,3 +17,36 @@ def parse_org_datetime(s: str) -> datetime:
            continue
    else:
        raise RuntimeError(f"Bad datetime string {s}")
+
+
+from orgparse import OrgNode
+from typing import Iterable, TypeVar, Callable
+V = TypeVar('V')
+
+def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
+    yield from cfun(n)
+    for c in n.children:
+        yield from collect(c, cfun)
+
+from more_itertools import one
+from orgparse.extra import Table
+def one_table(o: OrgNode) -> Table:
+    return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table))))
+
+
+from typing import Iterator, Dict, Any
+class TypedTable(Table):
+    def __new__(cls, orig: Table) -> 'TypedTable':
+        tt = super().__new__(TypedTable)
+        tt.__dict__ = orig.__dict__
+        blocks = list(orig.blocks)
+        header = blocks[0] # fist block is schema
+        if len(header) == 2:
+            # TODO later interpret first line as types
+            header = header[1:]
+        tt._blocks = [header, *blocks[1:]]
+        return tt
+
+    @property
+    def blocks(self):
+        return getattr(self, '_blocks')
--- a/my/orgmode.py
+++ b/my/orgmode.py
@ -5,14 +5,14 @@ from datetime import datetime, date
 from pathlib import Path
 from typing import List, Sequence, Iterable, NamedTuple, Optional

-from .core import PathIsh
+from .core import PathIsh, get_files
 from .core.common import mcachew
 from .core.cachew import cache_dir
+from .core.orgmode import collect

 from my.config import orgmode as user_config

-
-from porg import Org
+import orgparse


 # temporary? hack to cache org-mode notes
@ -28,12 +28,33 @@ def _sanitize(p: Path) -> str:
    return re.sub(r'\W', '_', str(p))


-def to_note(x: Org) -> OrgNote:
+from typing import Tuple
+_rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
+def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]:
+    heading = n.heading
+    # meh.. support in orgparse?
+    pp = {} if n.is_root() else n.properties # type: ignore
+    createds = pp.get('CREATED', None)
+    if createds is None:
+        # try to guess from heading
+        m = _rgx.search(heading)
+        if m is not None:
+            createds = m.group(0) # could be None
+    if createds is None:
+        return (None, heading)
+    [odt] = orgparse.date.OrgDate.list_from_str(createds)
+    dt = odt.start
+    # todo a bit hacky..
+    heading = heading.replace(createds + ' ', '')
+    return (dt, heading)
+
+
+def to_note(x: orgparse.OrgNode) -> OrgNote:
    # ugh. hack to merely make it cacheable
+    heading = x.heading
    created: Optional[datetime]
    try:
-        # TODO(porg) not sure if created should ever throw... maybe warning/log?
-        c = x.created
+        c, heading = _created(x)
        if isinstance(c, datetime):
            created = c
        else:
@ -43,12 +64,11 @@ def to_note(x: Org) -> OrgNote:
        created = None
    return OrgNote(
        created=created,
-        heading=x.heading, # todo include the rest?
+        heading=heading, # todo include the body?
        tags=list(x.tags),
    )


-# todo move to porg?
 class Query:
    def __init__(self, files: Sequence[Path]) -> None:
        self.files = files
@ -59,27 +79,20 @@ class Query:
        depends_on=lambda _, f: (f, f.stat().st_mtime),
    )
    def _iterate(self, f: Path) -> Iterable[OrgNote]:
-        o = Org.from_file(f)
-        for x in o.iterate():
+        o = orgparse.load(f)
+        for x in o:
            yield to_note(x)

    def all(self) -> Iterable[OrgNote]:
+        # TODO  build a virtual hierarchy from it?
        for f in self.files:
            yield from self._iterate(f)

-    # TODO very confusing names...
-    # TODO careful... maybe use orgparse iterate instead?? ugh.
-    def get_all(self):
-        return self._xpath_all('//org')
-
-    def query_all(self, query):
-        for of in self.files:
-            org = Org.from_file(str(of))
-            yield from query(org)
-
-    def _xpath_all(self, query: str):
-        return self.query_all(lambda x: x.xpath_all(query))
+    def collect_all(self, collector) -> Iterable[orgparse.OrgNode]:
+        for f in self.files:
+            o = orgparse.load(f)
+            yield from collect(o, collector)


 def query() -> Query:
-    return Query(files=list(user_config.files))
+    return Query(files=get_files(user_config.paths))
--- a/tests/config.py
+++ b/tests/config.py
@ -6,9 +6,9 @@ def setup_notes_path(notes: Path) -> None:
    from my.cfg import config

    class user_config:
-        roots = [notes]
+        paths = [notes]
    config.orgmode = user_config # type: ignore[misc,assignment]
-    # TODO FIXME ugh. this belongs to tz provider or global config or someting
+    # TODO  ugh. this belongs to tz provider or global config or someting
    import pytz
    class user_config_2:
        default_timezone = pytz.timezone('Europe/London')
--- a/tests/orgmode.py
+++ b/tests/orgmode.py
@ -1,7 +1,7 @@
 from my import orgmode
-
+from my.core.orgmode import collect

 def test() -> None:
    # meh
-    results = list(orgmode.query().query_all(lambda x: x.with_tag('python')))
+    results = list(orgmode.query().collect_all(lambda n: [n] if 'python' in n.tags else []))
    assert len(results) > 5
--- a/tox.ini
+++ b/tox.ini
@ -20,6 +20,9 @@ commands =
    # my.calendar.holidays dep
    pip install workalendar

+    # my.body.weight dep
+    pip install orgparse
+
    python3 -m pytest                              \
        tests/core.py                              \
        tests/misc.py                              \
@ -29,10 +32,8 @@ commands =
        tests/bluemaestro.py                       \
        tests/location.py                          \
        tests/tz.py                                \
-        tests/calendar.py
-    # TODO add; once I figure out porg depdencency?? tests/config.py
-    # TODO run demo.py? just make sure with_my is a bit cleverer?
-    # TODO e.g. under CI, rely on installing
+        tests/calendar.py                          \
+        tests/config.py
    hpi modules


@ -45,6 +46,7 @@ commands = ./demo.py
 whitelist_externals = bash
 commands =
    pip install -e .[testing] .[optional]
+    pip install orgparse
    pip install git+https://github.com/karlicoss/ghexport
    pip install git+https://github.com/karlicoss/hypexport
    pip install git+https://github.com/karlicoss/instapexport
@ -52,8 +54,6 @@ commands =
    pip install git+https://github.com/karlicoss/rexport
    pip install git+https://github.com/karlicoss/endoexport

-    pip install git+https://github.com/karlicoss/porg
-
    # ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path...
    # see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487
    # this is particularly annoying when user's config is leaking and mypy isn't running against the repository config
@ -82,5 +82,5 @@ commands =
 [testenv:mypy]
 skip_install = true
 commands =
-    pip install -e .[testing] .[optional]
+    pip install -e .[testing] .[optional] orgparse
    ./lint