body.exercise: add cardio summary, move cross trainer to a separate file

2020-09-17 21:35:18 +01:00 · 2020-09-17 21:35:18 +01:00 · f02c572cc0
commit f02c572cc0
parent eb14d5988d
2 changed files with 58 additions and 8 deletions
--- a/my/body/exercise/cardio.py
+++ b/my/body/exercise/cardio.py
@ -0,0 +1,50 @@
+'''
+Cardio data, filtered from Endomondo and inferred from other data sources
+'''
+from ...core.pandas import check_dataframe as cdf
+
+import pandas as pd # type: ignore
+
+
+CARDIO     = {
+    'Running',
+    'Running, treadmill',
+    'Cross training',
+    'Walking',
+    'Skating',
+    'Spinning',
+    'Skiing',
+    'Table tennis',
+    'Rope jumping',
+}
+# todo if it has HR data, take it into the account??
+NOT_CARDIO = {
+    'Other',
+}
+
+
+@cdf
+def endomondo_cardio() -> pd.DataFrame:
+    assert len(CARDIO.intersection(NOT_CARDIO)) == 0, (CARDIO, NOT_CARDIO)
+
+    from ..endomondo import dataframe as EDF
+    df = EDF()
+
+    # not sure...
+    # df = df[df['heart_rate_avg'].notna()]
+
+    is_cardio  = df['sport'].isin(CARDIO)
+    not_cardio = df['sport'].isin(NOT_CARDIO)
+    neither    = ~is_cardio & ~not_cardio
+    # if neither -- count, but warn? or show error?
+
+    # todo error about the rest??
+    # todo append errors?
+    df.loc[neither, 'error'] = 'Unexpected exercise type, please mark as cardio or non-cardio'
+    df = df[is_cardio | neither]
+
+    return df
+
+
+def dataframe():
+    return endomondo_cardio()
--- a/my/body/exercise/cross_trainer.py
+++ b/my/body/exercise/cross_trainer.py
@ -0,0 +1,182 @@
+'''
+My cross trainer exercise data, arbitrated between differen sources (mainly, Endomondo and various manual plaintext notes)
+
+This is probably too specific to my needs, so later I will move it away to a personal 'layer'.
+For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules.
+'''
+
+from datetime import datetime, timedelta
+from typing import Optional
+
+from ...core.pandas import check_dataframe as cdf
+
+from my.config import exercise as config
+
+
+import pytz
+# FIXME how to attach it properly?
+tz = pytz.timezone('Europe/London')
+
+def tzify(d: datetime) -> datetime:
+    assert d.tzinfo is None, d
+    return tz.localize(d)
+
+
+# todo predataframe?? entries??
+def cross_trainer_data():
+    # FIXME some manual entries in python
+    # I guess just convert them to org
+
+    from porg import Org
+    # FIXME should use all org notes and just query from them?
+    wlog = Org.from_file(config.workout_log)
+    cross_table = wlog.xpath('//org[heading="Cross training"]//table')
+
+    def maybe(f):
+        def parse(s):
+            if len(s) == 0:
+                return None
+            return f(s)
+        return parse
+
+    def parse_mm_ss(x: str) -> timedelta:
+        hs, ms = x.split(':')
+        return timedelta(seconds=int(hs) * 60 + int(ms))
+
+    # todo eh. not sure if there is a way of getting around writing code...
+    # I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
+    # need to look up org-mode standard..
+    from ...core.orgmode import parse_org_datetime
+    mappers = {
+        'duration': lambda s: parse_mm_ss(s),
+        'date'    : lambda s: tzify(parse_org_datetime(s)),
+        'comment' : str,
+    }
+    for row in cross_table.lines:
+        # todo make more defensive, fallback on nan for individual fields??
+        try:
+            d = {}
+            for k, v in row.items():
+                # todo have something smarter... e.g. allow pandas to infer the type??
+                mapper = mappers.get(k, maybe(float))
+                d[k] = mapper(v)
+            yield d
+        except Exception as e:
+            # todo add parsing context
+            yield {'error': str(e)}
+
+    # todo hmm, converting an org table directly to pandas kinda makes sense?
+    # could have a '.dataframe' method in orgparse, optional dependency
+
+
+@cdf
+def cross_trainer_manual_dataframe():
+    '''
+    Only manual org-mode entries
+    '''
+    import pandas as pd
+    df = pd.DataFrame(cross_trainer_data())
+    return df
+
+# this should be enough?..
+_DELTA = timedelta(hours=10)
+
+# todo check error handling by introducing typos (e.g. especially dates) in org-mode
+@cdf
+def dataframe():
+    '''
+    Attaches manually logged data (which Endomondo can't capture) and attaches it to Endomondo
+    '''
+    import pandas as pd
+
+    from ...endomondo import dataframe as EDF
+    edf = EDF()
+    edf = edf[edf['sport'].str.contains('Cross training')]
+
+    mdf = cross_trainer_manual_dataframe()
+    # TODO shit. need to always remember to split errors???
+    # on the other hand, dfs are always untyped. so it's not too bad??
+    # now for each manual entry, find a 'close enough' endomondo entry
+    # ideally it's a 1-1 (or 0-1) relationship, but there might be errors
+    rows = []
+    idxs = []
+    NO_ENDOMONDO = 'no endomondo matches'
+    for i, row in mdf.iterrows():
+        rd = row.to_dict()
+        mdate = row['date']
+        if pd.isna(mdate):
+            # todo error handling got to be easier. seriously, mypy friendly dataframes would be amazing
+            idxs.append(None)
+            rows.append(rd) # presumably has an error set
+            continue
+
+        idx: Optional[int]
+        close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA]
+        if len(close) == 0:
+            idx = None
+            d = {
+                **rd,
+                'error': NO_ENDOMONDO,
+            }
+        elif len(close) > 1:
+            idx = None
+            d = {
+                **rd,
+                'error': f'one manual, many endomondo: {close}',
+            }
+        else:
+            idx = close.index[0]
+            d = rd
+
+            if idx in idxs:
+                # todo might be a good idea to remove the original match as well?
+                idx = None
+                d = {
+                    **rd,
+                    'error': 'one endomondo, many manual',
+                }
+        idxs.append(idx)
+        rows.append(d)
+    mdf = pd.DataFrame(rows, index=idxs)
+
+    # todo careful about 'how'? we need it to preserve the errors
+    # maybe pd.merge is better suited for this??
+    df = edf.join(mdf, how='outer', rsuffix='_manual')
+    # todo reindex? so we dont' have Nan leftovers
+
+    # todo set date anyway? maybe just squeeze into the index??
+    noendo = df['error'] == NO_ENDOMONDO
+    # meh. otherwise the column type ends up object
+    tz = df[noendo]['start_time'].dtype.tz
+    df.loc[noendo, 'start_time'    ] = df[noendo]['date'           ].dt.tz_convert(tz)
+    df.loc[noendo, 'duration'      ] = df[noendo]['duration_manual']
+    df.loc[noendo, 'heart_rate_avg'] = df[noendo]['hr_avg'         ]
+
+    # todo set sport?? set source?
+    return df
+# TODO arbitrate kcal, duration, avg hr
+# compare power and hr? add 'quality' function??
+# TODO wtf?? where is speed coming from??
+
+
+def stats():
+    from ...core import stat
+    return stat(cross_trainer_data)
+
+
+def compare_manual():
+    df = dataframe()
+    df = df.set_index('start_time')
+
+    df = df[[
+        'kcal'    , 'kcal_manual',
+        'duration', 'duration_manual',
+    ]].dropna()
+    print(df.to_string())
+
+
+def pd_date_diff(a, b) -> timedelta:
+    # ugh. pandas complains when we subtract timestamps in different timezones
+    assert a.tzinfo is not None, a
+    assert b.tzinfo is not None, b
+    return a.to_pydatetime() - b.to_pydatetime()