From 1ca2d116ec673da86c1b6bfcce2745a2556ff5ae Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 13 Sep 2020 23:02:17 +0100 Subject: [PATCH] my.body.exercise: cleanup & error handling for merging cross trainer stuff --- my/body/exercise.py | 138 +++++++++++++++++++++++++++++++------------- 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/my/body/exercise.py b/my/body/exercise.py index 2ce53bd..fed6b8b 100644 --- a/my/body/exercise.py +++ b/my/body/exercise.py @@ -6,45 +6,71 @@ For now it's worth keeping it here as an example and perhaps utility functions m ''' from datetime import datetime, timedelta +from typing import Optional from my.config import exercise as config -# todo predataframe?? entries?? -def cross_trainer_data(): - # FIXME manual entries - - from porg import Org - # TODO FIXME should use all org notes and just query from them? - wlog = Org.from_file(config.workout_log) - cross_table = wlog.xpath('//org[heading="Cross training"]//table') - return cross_table.lines - # todo hmm, converting an org table directly to pandas kinda makes sense? - # could have a '.dataframe' method in orgparse, optional dependency - - import pytz # FIXME how to attach it properly? tz = pytz.timezone('Europe/London') +def tzify(d: datetime) -> datetime: + assert d.tzinfo is None, d + return tz.localize(d) + + +# todo predataframe?? entries?? +def cross_trainer_data(): + # FIXME some manual entries in python + # I guess just convert them to org + + from porg import Org + # FIXME should use all org notes and just query from them? + wlog = Org.from_file(config.workout_log) + cross_table = wlog.xpath('//org[heading="Cross training"]//table') + + def maybe(f): + def parse(s): + if len(s) == 0: + return None + return f(s) + return parse + + def parse_mm_ss(x: str) -> timedelta: + hs, ms = x.split(':') + return timedelta(seconds=int(hs) * 60 + int(ms)) + + # todo eh. not sure if there is a way of getting around writing code... + # I guess would be nice to have a means of specifying type in the column? maybe multirow column names?? + # need to look up org-mode standard.. + from ..core.orgmode import parse_org_datetime + mappers = { + 'duration': lambda s: parse_mm_ss(s), + 'date' : lambda s: tzify(parse_org_datetime(s)), + } + for row in cross_table.lines: + # todo make more defensive, fallback on nan for individual fields?? + try: + d = {} + for k, v in row.items(): + mapper = mappers.get(k, maybe(float)) + d[k] = mapper(v) + yield d + except Exception as e: + # todo add parsing context + yield {'error': str(e)} + + # todo hmm, converting an org table directly to pandas kinda makes sense? + # could have a '.dataframe' method in orgparse, optional dependency + + def cross_trainer_manual_dataframe(): ''' Only manual org-mode entries ''' import pandas as pd df = pd.DataFrame(cross_trainer_data()) - - from ..core.orgmode import parse_org_datetime - df['date'] = df['date'].apply(parse_org_datetime) - - def tzify(d: datetime) -> datetime: - assert d.tzinfo is None, d - return tz.localize(d) - - df['date'] = df['date'].apply(tzify) - - # TODO convert duration as well - # return df @@ -58,15 +84,19 @@ def cross_trainer_dataframe(): edf = EDF() edf = edf[edf['sport'].str.contains('Cross training')] + # Normalise and assume single bout of exercise per day # TODO this could be useful for other providers.. + # todo hmm maybe this bit is not really that necessary for this function?? + # just let it fail further down grouped = edf.set_index('start_time').groupby(lambda t: t.date()) singles = [] for day, grp in grouped: if len(grp) != 1: # FIXME yield runtimeerror continue - singles.append(grp) + else: + singles.append(grp) edf = pd.concat(singles) edf = edf.reset_index() @@ -75,25 +105,44 @@ def cross_trainer_dataframe(): rows = [] idxs = [] for i, row in mdf.iterrows(): - # todo rename 'date'?? mdate = row['date'] close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < timedelta(hours=3)] + idx: Optional[int] + rd = row.to_dict() + # todo in case of error, 'start date' becomes 'date'?? if len(close) == 0: - # FIXME emit warning -- nothing matched - continue - if len(close) > 1: - # FIXME emit warning - continue - loc = close.index[0] - # FIXME check and make defensive - # assert loc not in idxs, (loc, row) - idxs.append(loc) - rows.append(row) + idx = None + d = { + **rd, + 'error': 'no endomondo matches', + } + elif len(close) > 1: + idx = None + d = { + **rd, + 'error': 'multiple endomondo matches', + # todo add info on which exactly?? + } + else: + idx = close.index[0] + d = rd + + if idx in idxs: + # todo might be a good idea to remove the original match as well? + idx = None + d = { + **rd, + 'error': 'manual entry matched multiple times', + } + idxs.append(idx) + rows.append(d) mdf = pd.DataFrame(rows, index=idxs) - df = edf.join(mdf, rsuffix='_manual') + # todo careful about 'how'? we need it to preserve the errors + # maybe pd.merge is better suited for this?? + df = edf.join(mdf, how='outer', rsuffix='_manual') # TODO arbitrate kcal, duration, avg hr - # compare power and hr? + # compare power and hr? add 'quality' function?? return df @@ -102,6 +151,17 @@ def stats(): return stat(cross_trainer_data()) +def compare_manual(): + df = cross_trainer_dataframe() + df = df.set_index('start_time') + + df = df[[ + 'kcal' , 'kcal_manual', + 'duration', 'duration_manual', + ]].dropna() + print(df.to_string()) + + def pd_date_diff(a, b) -> timedelta: # ugh. pandas complains when we subtract timestamps in different timezones assert a.tzinfo is not None, a