my.body.exercise: cleanup & error handling for merging cross trainer stuff
This commit is contained in:
parent
0b947e7d14
commit
1ca2d116ec
1 changed files with 99 additions and 39 deletions
|
@ -6,45 +6,71 @@ For now it's worth keeping it here as an example and perhaps utility functions m
|
|||
'''
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from my.config import exercise as config
|
||||
|
||||
|
||||
# todo predataframe?? entries??
|
||||
def cross_trainer_data():
|
||||
# FIXME manual entries
|
||||
|
||||
from porg import Org
|
||||
# TODO FIXME should use all org notes and just query from them?
|
||||
wlog = Org.from_file(config.workout_log)
|
||||
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
|
||||
return cross_table.lines
|
||||
# todo hmm, converting an org table directly to pandas kinda makes sense?
|
||||
# could have a '.dataframe' method in orgparse, optional dependency
|
||||
|
||||
|
||||
import pytz
|
||||
# FIXME how to attach it properly?
|
||||
tz = pytz.timezone('Europe/London')
|
||||
|
||||
def tzify(d: datetime) -> datetime:
|
||||
assert d.tzinfo is None, d
|
||||
return tz.localize(d)
|
||||
|
||||
|
||||
# todo predataframe?? entries??
|
||||
def cross_trainer_data():
|
||||
# FIXME some manual entries in python
|
||||
# I guess just convert them to org
|
||||
|
||||
from porg import Org
|
||||
# FIXME should use all org notes and just query from them?
|
||||
wlog = Org.from_file(config.workout_log)
|
||||
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
|
||||
|
||||
def maybe(f):
|
||||
def parse(s):
|
||||
if len(s) == 0:
|
||||
return None
|
||||
return f(s)
|
||||
return parse
|
||||
|
||||
def parse_mm_ss(x: str) -> timedelta:
|
||||
hs, ms = x.split(':')
|
||||
return timedelta(seconds=int(hs) * 60 + int(ms))
|
||||
|
||||
# todo eh. not sure if there is a way of getting around writing code...
|
||||
# I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
|
||||
# need to look up org-mode standard..
|
||||
from ..core.orgmode import parse_org_datetime
|
||||
mappers = {
|
||||
'duration': lambda s: parse_mm_ss(s),
|
||||
'date' : lambda s: tzify(parse_org_datetime(s)),
|
||||
}
|
||||
for row in cross_table.lines:
|
||||
# todo make more defensive, fallback on nan for individual fields??
|
||||
try:
|
||||
d = {}
|
||||
for k, v in row.items():
|
||||
mapper = mappers.get(k, maybe(float))
|
||||
d[k] = mapper(v)
|
||||
yield d
|
||||
except Exception as e:
|
||||
# todo add parsing context
|
||||
yield {'error': str(e)}
|
||||
|
||||
# todo hmm, converting an org table directly to pandas kinda makes sense?
|
||||
# could have a '.dataframe' method in orgparse, optional dependency
|
||||
|
||||
|
||||
def cross_trainer_manual_dataframe():
|
||||
'''
|
||||
Only manual org-mode entries
|
||||
'''
|
||||
import pandas as pd
|
||||
df = pd.DataFrame(cross_trainer_data())
|
||||
|
||||
from ..core.orgmode import parse_org_datetime
|
||||
df['date'] = df['date'].apply(parse_org_datetime)
|
||||
|
||||
def tzify(d: datetime) -> datetime:
|
||||
assert d.tzinfo is None, d
|
||||
return tz.localize(d)
|
||||
|
||||
df['date'] = df['date'].apply(tzify)
|
||||
|
||||
# TODO convert duration as well
|
||||
#
|
||||
return df
|
||||
|
||||
|
||||
|
@ -58,15 +84,19 @@ def cross_trainer_dataframe():
|
|||
edf = EDF()
|
||||
edf = edf[edf['sport'].str.contains('Cross training')]
|
||||
|
||||
|
||||
# Normalise and assume single bout of exercise per day
|
||||
# TODO this could be useful for other providers..
|
||||
# todo hmm maybe this bit is not really that necessary for this function??
|
||||
# just let it fail further down
|
||||
grouped = edf.set_index('start_time').groupby(lambda t: t.date())
|
||||
singles = []
|
||||
for day, grp in grouped:
|
||||
if len(grp) != 1:
|
||||
# FIXME yield runtimeerror
|
||||
continue
|
||||
singles.append(grp)
|
||||
else:
|
||||
singles.append(grp)
|
||||
edf = pd.concat(singles)
|
||||
edf = edf.reset_index()
|
||||
|
||||
|
@ -75,25 +105,44 @@ def cross_trainer_dataframe():
|
|||
rows = []
|
||||
idxs = []
|
||||
for i, row in mdf.iterrows():
|
||||
# todo rename 'date'??
|
||||
mdate = row['date']
|
||||
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < timedelta(hours=3)]
|
||||
idx: Optional[int]
|
||||
rd = row.to_dict()
|
||||
# todo in case of error, 'start date' becomes 'date'??
|
||||
if len(close) == 0:
|
||||
# FIXME emit warning -- nothing matched
|
||||
continue
|
||||
if len(close) > 1:
|
||||
# FIXME emit warning
|
||||
continue
|
||||
loc = close.index[0]
|
||||
# FIXME check and make defensive
|
||||
# assert loc not in idxs, (loc, row)
|
||||
idxs.append(loc)
|
||||
rows.append(row)
|
||||
idx = None
|
||||
d = {
|
||||
**rd,
|
||||
'error': 'no endomondo matches',
|
||||
}
|
||||
elif len(close) > 1:
|
||||
idx = None
|
||||
d = {
|
||||
**rd,
|
||||
'error': 'multiple endomondo matches',
|
||||
# todo add info on which exactly??
|
||||
}
|
||||
else:
|
||||
idx = close.index[0]
|
||||
d = rd
|
||||
|
||||
if idx in idxs:
|
||||
# todo might be a good idea to remove the original match as well?
|
||||
idx = None
|
||||
d = {
|
||||
**rd,
|
||||
'error': 'manual entry matched multiple times',
|
||||
}
|
||||
idxs.append(idx)
|
||||
rows.append(d)
|
||||
mdf = pd.DataFrame(rows, index=idxs)
|
||||
|
||||
df = edf.join(mdf, rsuffix='_manual')
|
||||
# todo careful about 'how'? we need it to preserve the errors
|
||||
# maybe pd.merge is better suited for this??
|
||||
df = edf.join(mdf, how='outer', rsuffix='_manual')
|
||||
# TODO arbitrate kcal, duration, avg hr
|
||||
# compare power and hr?
|
||||
# compare power and hr? add 'quality' function??
|
||||
return df
|
||||
|
||||
|
||||
|
@ -102,6 +151,17 @@ def stats():
|
|||
return stat(cross_trainer_data())
|
||||
|
||||
|
||||
def compare_manual():
|
||||
df = cross_trainer_dataframe()
|
||||
df = df.set_index('start_time')
|
||||
|
||||
df = df[[
|
||||
'kcal' , 'kcal_manual',
|
||||
'duration', 'duration_manual',
|
||||
]].dropna()
|
||||
print(df.to_string())
|
||||
|
||||
|
||||
def pd_date_diff(a, b) -> timedelta:
|
||||
# ugh. pandas complains when we subtract timestamps in different timezones
|
||||
assert a.tzinfo is not None, a
|
||||
|
|
Loading…
Add table
Reference in a new issue