my.body.exercise: cleanup & error handling for merging cross trainer stuff

This commit is contained in:
Dima Gerasimov 2020-09-13 23:02:17 +01:00 committed by karlicoss
parent 0b947e7d14
commit 1ca2d116ec

View file

@ -6,45 +6,71 @@ For now it's worth keeping it here as an example and perhaps utility functions m
'''
from datetime import datetime, timedelta
from typing import Optional
from my.config import exercise as config
# todo predataframe?? entries??
def cross_trainer_data():
# FIXME manual entries
from porg import Org
# TODO FIXME should use all org notes and just query from them?
wlog = Org.from_file(config.workout_log)
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
return cross_table.lines
# todo hmm, converting an org table directly to pandas kinda makes sense?
# could have a '.dataframe' method in orgparse, optional dependency
import pytz
# FIXME how to attach it properly?
tz = pytz.timezone('Europe/London')
def tzify(d: datetime) -> datetime:
assert d.tzinfo is None, d
return tz.localize(d)
# todo predataframe?? entries??
def cross_trainer_data():
# FIXME some manual entries in python
# I guess just convert them to org
from porg import Org
# FIXME should use all org notes and just query from them?
wlog = Org.from_file(config.workout_log)
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
def maybe(f):
def parse(s):
if len(s) == 0:
return None
return f(s)
return parse
def parse_mm_ss(x: str) -> timedelta:
hs, ms = x.split(':')
return timedelta(seconds=int(hs) * 60 + int(ms))
# todo eh. not sure if there is a way of getting around writing code...
# I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
# need to look up org-mode standard..
from ..core.orgmode import parse_org_datetime
mappers = {
'duration': lambda s: parse_mm_ss(s),
'date' : lambda s: tzify(parse_org_datetime(s)),
}
for row in cross_table.lines:
# todo make more defensive, fallback on nan for individual fields??
try:
d = {}
for k, v in row.items():
mapper = mappers.get(k, maybe(float))
d[k] = mapper(v)
yield d
except Exception as e:
# todo add parsing context
yield {'error': str(e)}
# todo hmm, converting an org table directly to pandas kinda makes sense?
# could have a '.dataframe' method in orgparse, optional dependency
def cross_trainer_manual_dataframe():
'''
Only manual org-mode entries
'''
import pandas as pd
df = pd.DataFrame(cross_trainer_data())
from ..core.orgmode import parse_org_datetime
df['date'] = df['date'].apply(parse_org_datetime)
def tzify(d: datetime) -> datetime:
assert d.tzinfo is None, d
return tz.localize(d)
df['date'] = df['date'].apply(tzify)
# TODO convert duration as well
#
return df
@ -58,15 +84,19 @@ def cross_trainer_dataframe():
edf = EDF()
edf = edf[edf['sport'].str.contains('Cross training')]
# Normalise and assume single bout of exercise per day
# TODO this could be useful for other providers..
# todo hmm maybe this bit is not really that necessary for this function??
# just let it fail further down
grouped = edf.set_index('start_time').groupby(lambda t: t.date())
singles = []
for day, grp in grouped:
if len(grp) != 1:
# FIXME yield runtimeerror
continue
singles.append(grp)
else:
singles.append(grp)
edf = pd.concat(singles)
edf = edf.reset_index()
@ -75,25 +105,44 @@ def cross_trainer_dataframe():
rows = []
idxs = []
for i, row in mdf.iterrows():
# todo rename 'date'??
mdate = row['date']
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < timedelta(hours=3)]
idx: Optional[int]
rd = row.to_dict()
# todo in case of error, 'start date' becomes 'date'??
if len(close) == 0:
# FIXME emit warning -- nothing matched
continue
if len(close) > 1:
# FIXME emit warning
continue
loc = close.index[0]
# FIXME check and make defensive
# assert loc not in idxs, (loc, row)
idxs.append(loc)
rows.append(row)
idx = None
d = {
**rd,
'error': 'no endomondo matches',
}
elif len(close) > 1:
idx = None
d = {
**rd,
'error': 'multiple endomondo matches',
# todo add info on which exactly??
}
else:
idx = close.index[0]
d = rd
if idx in idxs:
# todo might be a good idea to remove the original match as well?
idx = None
d = {
**rd,
'error': 'manual entry matched multiple times',
}
idxs.append(idx)
rows.append(d)
mdf = pd.DataFrame(rows, index=idxs)
df = edf.join(mdf, rsuffix='_manual')
# todo careful about 'how'? we need it to preserve the errors
# maybe pd.merge is better suited for this??
df = edf.join(mdf, how='outer', rsuffix='_manual')
# TODO arbitrate kcal, duration, avg hr
# compare power and hr?
# compare power and hr? add 'quality' function??
return df
@ -102,6 +151,17 @@ def stats():
return stat(cross_trainer_data())
def compare_manual():
df = cross_trainer_dataframe()
df = df.set_index('start_time')
df = df[[
'kcal' , 'kcal_manual',
'duration', 'duration_manual',
]].dropna()
print(df.to_string())
def pd_date_diff(a, b) -> timedelta:
# ugh. pandas complains when we subtract timestamps in different timezones
assert a.tzinfo is not None, a