get rid of porg dependency, use orgparse directly

2020-11-06 04:04:05 +00:00 · 2020-11-06 04:04:05 +00:00 · a6e5908e6d
commit a6e5908e6d
parent 62e1bdc39a
8 changed files with 101 additions and 54 deletions
--- a/my/body/blood.py
+++ b/my/body/blood.py
@ -7,11 +7,11 @@ from typing import Iterable, NamedTuple, Optional
 from ..core.common import listify
 from ..core.error import Res, echain
-from ..core.orgmode import parse_org_datetime
+from ..core.orgmode import parse_org_datetime, one_table
 import pandas as pd # type: ignore
-import porg
+import orgparse
 from my.config import blood as config
@ -47,14 +47,13 @@ def try_float(s: str) -> Optional[float]:
        return None
    return float(x)
 def glucose_ketones_data() -> Iterable[Result]:
-    o = porg.Org.from_file(str(config.blood_log))
+    o = orgparse.load(config.blood_log)
-    tbl = o.xpath('//table')
+    tbl = one_table(o)
    # todo some sort of sql-like interface for org tables might be ideal?
-    for l in tbl.lines:
+    for l in tbl.as_dicts:
-        kets = l['ket'].strip()
+        kets = l['ket']
-        glus = l['glu'].strip()
+        glus = l['glu']
        extra = l['notes']
        dt = parse_org_datetime(l['datetime'])
        try:
@ -75,9 +74,9 @@ def glucose_ketones_data() -> Iterable[Result]:
 def blood_tests_data() -> Iterable[Result]:
-    o = porg.Org.from_file(str(config.blood_tests_log))
+    o = orgparse.load(config.blood_tests_log)
-    tbl = o.xpath('//table')
+    tbl = one_table(o)
-    for d in tbl.lines:
+    for d in tbl.as_dicts:
        try:
            dt = parse_org_datetime(d['datetime'])
            assert isinstance(dt, datetime), dt
--- a/my/body/exercise/cross_trainer.py
+++ b/my/body/exercise/cross_trainer.py
@ -9,6 +9,7 @@ from datetime import datetime, timedelta
 from typing import Optional
 from ...core.pandas import DataFrameT, check_dataframe as cdf
 from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable
 from my.config import exercise as config
@ -26,11 +27,15 @@ def tzify(d: datetime) -> datetime:
 def cross_trainer_data():
    # FIXME some manual entries in python
    # I guess just convert them to org
    import orgparse
    # todo should use all org notes and just query from them?
    wlog = orgparse.load(config.workout_log)
-    from porg import Org
+    [table] = collect(
-    # FIXME should use all org notes and just query from them?
+        wlog,
-    wlog = Org.from_file(config.workout_log)
+        lambda n: [] if n.heading != 'Cross training' else [x for x in n.body_rich if isinstance(x, Table)]
-    cross_table = wlog.xpath('//org[heading="Cross training"]//table')
+    )
    cross_table = TypedTable(table)
    def maybe(f):
        def parse(s):
@ -46,13 +51,12 @@ def cross_trainer_data():
    # todo eh. not sure if there is a way of getting around writing code...
    # I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
    # need to look up org-mode standard..
    from ...core.orgmode import parse_org_datetime
    mappers = {
        'duration': lambda s: parse_mm_ss(s),
        'date'    : lambda s: tzify(parse_org_datetime(s)),
        'comment' : str,
    }
-    for row in cross_table.lines:
+    for row in cross_table.as_dicts:
        # todo make more defensive, fallback on nan for individual fields??
        try:
            d = {}
--- a/my/body/weight.py
+++ b/my/body/weight.py
@ -45,7 +45,7 @@ def from_orgmode() -> Iterator[Result]:
            log.exception(e)
            yield e
            continue
-        # todo perhaps, better to use timezone provider
+        # FIXME use timezone provider
        created = config.default_timezone.localize(created)
        yield Entry(
            dt=created,
--- a/my/core/orgmode.py
+++ b/my/core/orgmode.py
@ -2,8 +2,6 @@
 Various helpers for reading org-mode data
 """
 from datetime import datetime
 def parse_org_datetime(s: str) -> datetime:
    s = s.strip('[]')
    for fmt, cl in [
@ -19,3 +17,36 @@ def parse_org_datetime(s: str) -> datetime:
            continue
    else:
        raise RuntimeError(f"Bad datetime string {s}")
 from orgparse import OrgNode
 from typing import Iterable, TypeVar, Callable
 V = TypeVar('V')
 def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]:
    yield from cfun(n)
    for c in n.children:
        yield from collect(c, cfun)
 from more_itertools import one
 from orgparse.extra import Table
 def one_table(o: OrgNode) -> Table:
    return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table))))
 from typing import Iterator, Dict, Any
 class TypedTable(Table):
    def __new__(cls, orig: Table) -> 'TypedTable':
        tt = super().__new__(TypedTable)
        tt.__dict__ = orig.__dict__
        blocks = list(orig.blocks)
        header = blocks[0] # fist block is schema
        if len(header) == 2:
            # TODO later interpret first line as types
            header = header[1:]
        tt._blocks = [header, *blocks[1:]]
        return tt
    @property
    def blocks(self):
        return getattr(self, '_blocks')
--- a/my/orgmode.py
+++ b/my/orgmode.py
@ -5,14 +5,14 @@ from datetime import datetime, date
 from pathlib import Path
 from typing import List, Sequence, Iterable, NamedTuple, Optional
-from .core import PathIsh
+from .core import PathIsh, get_files
 from .core.common import mcachew
 from .core.cachew import cache_dir
 from .core.orgmode import collect
 from my.config import orgmode as user_config
-
+import orgparse
 from porg import Org
 # temporary? hack to cache org-mode notes
@ -28,12 +28,33 @@ def _sanitize(p: Path) -> str:
    return re.sub(r'\W', '_', str(p))
-def to_note(x: Org) -> OrgNote:
+from typing import Tuple
 _rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
 def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]:
    heading = n.heading
    # meh.. support in orgparse?
    pp = {} if n.is_root() else n.properties # type: ignore
    createds = pp.get('CREATED', None)
    if createds is None:
        # try to guess from heading
        m = _rgx.search(heading)
        if m is not None:
            createds = m.group(0) # could be None
    if createds is None:
        return (None, heading)
    [odt] = orgparse.date.OrgDate.list_from_str(createds)
    dt = odt.start
    # todo a bit hacky..
    heading = heading.replace(createds + ' ', '')
    return (dt, heading)
 def to_note(x: orgparse.OrgNode) -> OrgNote:
    # ugh. hack to merely make it cacheable
    heading = x.heading
    created: Optional[datetime]
    try:
-        # TODO(porg) not sure if created should ever throw... maybe warning/log?
+        c, heading = _created(x)
        c = x.created
        if isinstance(c, datetime):
            created = c
        else:
@ -43,12 +64,11 @@ def to_note(x: Org) -> OrgNote:
        created = None
    return OrgNote(
        created=created,
-        heading=x.heading, # todo include the rest?
+        heading=heading, # todo include the body?
        tags=list(x.tags),
    )
 # todo move to porg?
 class Query:
    def __init__(self, files: Sequence[Path]) -> None:
        self.files = files
@ -59,27 +79,20 @@ class Query:
        depends_on=lambda _, f: (f, f.stat().st_mtime),
    )
    def _iterate(self, f: Path) -> Iterable[OrgNote]:
-        o = Org.from_file(f)
+        o = orgparse.load(f)
-        for x in o.iterate():
+        for x in o:
            yield to_note(x)
    def all(self) -> Iterable[OrgNote]:
        # TODO  build a virtual hierarchy from it?
        for f in self.files:
            yield from self._iterate(f)
-    # TODO very confusing names...
+    def collect_all(self, collector) -> Iterable[orgparse.OrgNode]:
-    # TODO careful... maybe use orgparse iterate instead?? ugh.
+        for f in self.files:
-    def get_all(self):
+            o = orgparse.load(f)
-        return self._xpath_all('//org')
+            yield from collect(o, collector)
    def query_all(self, query):
        for of in self.files:
            org = Org.from_file(str(of))
            yield from query(org)
    def _xpath_all(self, query: str):
        return self.query_all(lambda x: x.xpath_all(query))
 def query() -> Query:
-    return Query(files=list(user_config.files))
+    return Query(files=get_files(user_config.paths))
--- a/tests/config.py
+++ b/tests/config.py
@ -6,9 +6,9 @@ def setup_notes_path(notes: Path) -> None:
    from my.cfg import config
    class user_config:
-        roots = [notes]
+        paths = [notes]
    config.orgmode = user_config # type: ignore[misc,assignment]
-    # TODO FIXME ugh. this belongs to tz provider or global config or someting
+    # TODO  ugh. this belongs to tz provider or global config or someting
    import pytz
    class user_config_2:
        default_timezone = pytz.timezone('Europe/London')
--- a/tests/orgmode.py
+++ b/tests/orgmode.py
@ -1,7 +1,7 @@
 from my import orgmode
-
+from my.core.orgmode import collect
 def test() -> None:
    # meh
-    results = list(orgmode.query().query_all(lambda x: x.with_tag('python')))
+    results = list(orgmode.query().collect_all(lambda n: [n] if 'python' in n.tags else []))
    assert len(results) > 5
--- a/tox.ini
+++ b/tox.ini
@ -20,6 +20,9 @@ commands =
    # my.calendar.holidays dep
    pip install workalendar
    # my.body.weight dep
    pip install orgparse
    python3 -m pytest                              \
        tests/core.py                              \
        tests/misc.py                              \
@ -29,10 +32,8 @@ commands =
        tests/bluemaestro.py                       \
        tests/location.py                          \
        tests/tz.py                                \
-        tests/calendar.py
+        tests/calendar.py                          \
-    # TODO add; once I figure out porg depdencency?? tests/config.py
+        tests/config.py
    # TODO run demo.py? just make sure with_my is a bit cleverer?
    # TODO e.g. under CI, rely on installing
    hpi modules
@ -45,6 +46,7 @@ commands = ./demo.py
 whitelist_externals = bash
 commands =
    pip install -e .[testing] .[optional]
    pip install orgparse
    pip install git+https://github.com/karlicoss/ghexport
    pip install git+https://github.com/karlicoss/hypexport
    pip install git+https://github.com/karlicoss/instapexport
@ -52,8 +54,6 @@ commands =
    pip install git+https://github.com/karlicoss/rexport
    pip install git+https://github.com/karlicoss/endoexport
    pip install git+https://github.com/karlicoss/porg
    # ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path...
    # see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487
    # this is particularly annoying when user's config is leaking and mypy isn't running against the repository config
@ -82,5 +82,5 @@ commands =
 [testenv:mypy]
 skip_install = true
 commands =
-    pip install -e .[testing] .[optional]
+    pip install -e .[testing] .[optional] orgparse
    ./lint