From a6e5908e6de034ff16bfd808425fcb9f941be9cc Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 6 Nov 2020 04:04:05 +0000 Subject: [PATCH] get rid of porg dependency, use orgparse directly --- my/body/blood.py | 21 ++++++----- my/body/exercise/cross_trainer.py | 16 +++++---- my/body/weight.py | 2 +- my/core/orgmode.py | 35 ++++++++++++++++-- my/orgmode.py | 59 +++++++++++++++++++------------ tests/config.py | 4 +-- tests/orgmode.py | 4 +-- tox.ini | 14 ++++---- 8 files changed, 101 insertions(+), 54 deletions(-) diff --git a/my/body/blood.py b/my/body/blood.py index ee3bf03..51a9193 100755 --- a/my/body/blood.py +++ b/my/body/blood.py @@ -7,11 +7,11 @@ from typing import Iterable, NamedTuple, Optional from ..core.common import listify from ..core.error import Res, echain -from ..core.orgmode import parse_org_datetime +from ..core.orgmode import parse_org_datetime, one_table import pandas as pd # type: ignore -import porg +import orgparse from my.config import blood as config @@ -47,14 +47,13 @@ def try_float(s: str) -> Optional[float]: return None return float(x) - def glucose_ketones_data() -> Iterable[Result]: - o = porg.Org.from_file(str(config.blood_log)) - tbl = o.xpath('//table') + o = orgparse.load(config.blood_log) + tbl = one_table(o) # todo some sort of sql-like interface for org tables might be ideal? - for l in tbl.lines: - kets = l['ket'].strip() - glus = l['glu'].strip() + for l in tbl.as_dicts: + kets = l['ket'] + glus = l['glu'] extra = l['notes'] dt = parse_org_datetime(l['datetime']) try: @@ -75,9 +74,9 @@ def glucose_ketones_data() -> Iterable[Result]: def blood_tests_data() -> Iterable[Result]: - o = porg.Org.from_file(str(config.blood_tests_log)) - tbl = o.xpath('//table') - for d in tbl.lines: + o = orgparse.load(config.blood_tests_log) + tbl = one_table(o) + for d in tbl.as_dicts: try: dt = parse_org_datetime(d['datetime']) assert isinstance(dt, datetime), dt diff --git a/my/body/exercise/cross_trainer.py b/my/body/exercise/cross_trainer.py index 3fe2112..fd92666 100644 --- a/my/body/exercise/cross_trainer.py +++ b/my/body/exercise/cross_trainer.py @@ -9,6 +9,7 @@ from datetime import datetime, timedelta from typing import Optional from ...core.pandas import DataFrameT, check_dataframe as cdf +from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable from my.config import exercise as config @@ -26,11 +27,15 @@ def tzify(d: datetime) -> datetime: def cross_trainer_data(): # FIXME some manual entries in python # I guess just convert them to org + import orgparse + # todo should use all org notes and just query from them? + wlog = orgparse.load(config.workout_log) - from porg import Org - # FIXME should use all org notes and just query from them? - wlog = Org.from_file(config.workout_log) - cross_table = wlog.xpath('//org[heading="Cross training"]//table') + [table] = collect( + wlog, + lambda n: [] if n.heading != 'Cross training' else [x for x in n.body_rich if isinstance(x, Table)] + ) + cross_table = TypedTable(table) def maybe(f): def parse(s): @@ -46,13 +51,12 @@ def cross_trainer_data(): # todo eh. not sure if there is a way of getting around writing code... # I guess would be nice to have a means of specifying type in the column? maybe multirow column names?? # need to look up org-mode standard.. - from ...core.orgmode import parse_org_datetime mappers = { 'duration': lambda s: parse_mm_ss(s), 'date' : lambda s: tzify(parse_org_datetime(s)), 'comment' : str, } - for row in cross_table.lines: + for row in cross_table.as_dicts: # todo make more defensive, fallback on nan for individual fields?? try: d = {} diff --git a/my/body/weight.py b/my/body/weight.py index 01c3132..1135cdc 100644 --- a/my/body/weight.py +++ b/my/body/weight.py @@ -45,7 +45,7 @@ def from_orgmode() -> Iterator[Result]: log.exception(e) yield e continue - # todo perhaps, better to use timezone provider + # FIXME use timezone provider created = config.default_timezone.localize(created) yield Entry( dt=created, diff --git a/my/core/orgmode.py b/my/core/orgmode.py index 3f4ac25..668017c 100644 --- a/my/core/orgmode.py +++ b/my/core/orgmode.py @@ -2,8 +2,6 @@ Various helpers for reading org-mode data """ from datetime import datetime - - def parse_org_datetime(s: str) -> datetime: s = s.strip('[]') for fmt, cl in [ @@ -19,3 +17,36 @@ def parse_org_datetime(s: str) -> datetime: continue else: raise RuntimeError(f"Bad datetime string {s}") + + +from orgparse import OrgNode +from typing import Iterable, TypeVar, Callable +V = TypeVar('V') + +def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]: + yield from cfun(n) + for c in n.children: + yield from collect(c, cfun) + +from more_itertools import one +from orgparse.extra import Table +def one_table(o: OrgNode) -> Table: + return one(collect(o, lambda n: (x for x in n.body_rich if isinstance(x, Table)))) + + +from typing import Iterator, Dict, Any +class TypedTable(Table): + def __new__(cls, orig: Table) -> 'TypedTable': + tt = super().__new__(TypedTable) + tt.__dict__ = orig.__dict__ + blocks = list(orig.blocks) + header = blocks[0] # fist block is schema + if len(header) == 2: + # TODO later interpret first line as types + header = header[1:] + tt._blocks = [header, *blocks[1:]] + return tt + + @property + def blocks(self): + return getattr(self, '_blocks') diff --git a/my/orgmode.py b/my/orgmode.py index f9ed265..4860a4e 100644 --- a/my/orgmode.py +++ b/my/orgmode.py @@ -5,14 +5,14 @@ from datetime import datetime, date from pathlib import Path from typing import List, Sequence, Iterable, NamedTuple, Optional -from .core import PathIsh +from .core import PathIsh, get_files from .core.common import mcachew from .core.cachew import cache_dir +from .core.orgmode import collect from my.config import orgmode as user_config - -from porg import Org +import orgparse # temporary? hack to cache org-mode notes @@ -28,12 +28,33 @@ def _sanitize(p: Path) -> str: return re.sub(r'\W', '_', str(p)) -def to_note(x: Org) -> OrgNote: +from typing import Tuple +_rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE) +def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]: + heading = n.heading + # meh.. support in orgparse? + pp = {} if n.is_root() else n.properties # type: ignore + createds = pp.get('CREATED', None) + if createds is None: + # try to guess from heading + m = _rgx.search(heading) + if m is not None: + createds = m.group(0) # could be None + if createds is None: + return (None, heading) + [odt] = orgparse.date.OrgDate.list_from_str(createds) + dt = odt.start + # todo a bit hacky.. + heading = heading.replace(createds + ' ', '') + return (dt, heading) + + +def to_note(x: orgparse.OrgNode) -> OrgNote: # ugh. hack to merely make it cacheable + heading = x.heading created: Optional[datetime] try: - # TODO(porg) not sure if created should ever throw... maybe warning/log? - c = x.created + c, heading = _created(x) if isinstance(c, datetime): created = c else: @@ -43,12 +64,11 @@ def to_note(x: Org) -> OrgNote: created = None return OrgNote( created=created, - heading=x.heading, # todo include the rest? + heading=heading, # todo include the body? tags=list(x.tags), ) -# todo move to porg? class Query: def __init__(self, files: Sequence[Path]) -> None: self.files = files @@ -59,27 +79,20 @@ class Query: depends_on=lambda _, f: (f, f.stat().st_mtime), ) def _iterate(self, f: Path) -> Iterable[OrgNote]: - o = Org.from_file(f) - for x in o.iterate(): + o = orgparse.load(f) + for x in o: yield to_note(x) def all(self) -> Iterable[OrgNote]: + # TODO build a virtual hierarchy from it? for f in self.files: yield from self._iterate(f) - # TODO very confusing names... - # TODO careful... maybe use orgparse iterate instead?? ugh. - def get_all(self): - return self._xpath_all('//org') - - def query_all(self, query): - for of in self.files: - org = Org.from_file(str(of)) - yield from query(org) - - def _xpath_all(self, query: str): - return self.query_all(lambda x: x.xpath_all(query)) + def collect_all(self, collector) -> Iterable[orgparse.OrgNode]: + for f in self.files: + o = orgparse.load(f) + yield from collect(o, collector) def query() -> Query: - return Query(files=list(user_config.files)) + return Query(files=get_files(user_config.paths)) diff --git a/tests/config.py b/tests/config.py index 6e36517..995190f 100644 --- a/tests/config.py +++ b/tests/config.py @@ -6,9 +6,9 @@ def setup_notes_path(notes: Path) -> None: from my.cfg import config class user_config: - roots = [notes] + paths = [notes] config.orgmode = user_config # type: ignore[misc,assignment] - # TODO FIXME ugh. this belongs to tz provider or global config or someting + # TODO ugh. this belongs to tz provider or global config or someting import pytz class user_config_2: default_timezone = pytz.timezone('Europe/London') diff --git a/tests/orgmode.py b/tests/orgmode.py index 6467f0d..aa30114 100644 --- a/tests/orgmode.py +++ b/tests/orgmode.py @@ -1,7 +1,7 @@ from my import orgmode - +from my.core.orgmode import collect def test() -> None: # meh - results = list(orgmode.query().query_all(lambda x: x.with_tag('python'))) + results = list(orgmode.query().collect_all(lambda n: [n] if 'python' in n.tags else [])) assert len(results) > 5 diff --git a/tox.ini b/tox.ini index 9d20a77..7531cfd 100644 --- a/tox.ini +++ b/tox.ini @@ -20,6 +20,9 @@ commands = # my.calendar.holidays dep pip install workalendar + # my.body.weight dep + pip install orgparse + python3 -m pytest \ tests/core.py \ tests/misc.py \ @@ -29,10 +32,8 @@ commands = tests/bluemaestro.py \ tests/location.py \ tests/tz.py \ - tests/calendar.py - # TODO add; once I figure out porg depdencency?? tests/config.py - # TODO run demo.py? just make sure with_my is a bit cleverer? - # TODO e.g. under CI, rely on installing + tests/calendar.py \ + tests/config.py hpi modules @@ -45,6 +46,7 @@ commands = ./demo.py whitelist_externals = bash commands = pip install -e .[testing] .[optional] + pip install orgparse pip install git+https://github.com/karlicoss/ghexport pip install git+https://github.com/karlicoss/hypexport pip install git+https://github.com/karlicoss/instapexport @@ -52,8 +54,6 @@ commands = pip install git+https://github.com/karlicoss/rexport pip install git+https://github.com/karlicoss/endoexport - pip install git+https://github.com/karlicoss/porg - # ugh fuck. soo... need to reset HOME, otherwise user's site-packages are somehow leaking into mypy's path... # see https://github.com/python/mypy/blob/f6fb60ef69738cbfe2dfe56c747eca8f03735d8e/mypy/modulefinder.py#L487 # this is particularly annoying when user's config is leaking and mypy isn't running against the repository config @@ -82,5 +82,5 @@ commands = [testenv:mypy] skip_install = true commands = - pip install -e .[testing] .[optional] + pip install -e .[testing] .[optional] orgparse ./lint