my.body.exercise: cleanup & error handling for merging cross trainer stuff
This commit is contained in:
parent
0b947e7d14
commit
1ca2d116ec
1 changed files with 99 additions and 39 deletions
|
@ -6,45 +6,71 @@ For now it's worth keeping it here as an example and perhaps utility functions m
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from my.config import exercise as config
|
from my.config import exercise as config
|
||||||
|
|
||||||
|
|
||||||
# todo predataframe?? entries??
|
|
||||||
def cross_trainer_data():
|
|
||||||
# FIXME manual entries
|
|
||||||
|
|
||||||
from porg import Org
|
|
||||||
# TODO FIXME should use all org notes and just query from them?
|
|
||||||
wlog = Org.from_file(config.workout_log)
|
|
||||||
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
|
|
||||||
return cross_table.lines
|
|
||||||
# todo hmm, converting an org table directly to pandas kinda makes sense?
|
|
||||||
# could have a '.dataframe' method in orgparse, optional dependency
|
|
||||||
|
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
# FIXME how to attach it properly?
|
# FIXME how to attach it properly?
|
||||||
tz = pytz.timezone('Europe/London')
|
tz = pytz.timezone('Europe/London')
|
||||||
|
|
||||||
|
def tzify(d: datetime) -> datetime:
|
||||||
|
assert d.tzinfo is None, d
|
||||||
|
return tz.localize(d)
|
||||||
|
|
||||||
|
|
||||||
|
# todo predataframe?? entries??
|
||||||
|
def cross_trainer_data():
|
||||||
|
# FIXME some manual entries in python
|
||||||
|
# I guess just convert them to org
|
||||||
|
|
||||||
|
from porg import Org
|
||||||
|
# FIXME should use all org notes and just query from them?
|
||||||
|
wlog = Org.from_file(config.workout_log)
|
||||||
|
cross_table = wlog.xpath('//org[heading="Cross training"]//table')
|
||||||
|
|
||||||
|
def maybe(f):
|
||||||
|
def parse(s):
|
||||||
|
if len(s) == 0:
|
||||||
|
return None
|
||||||
|
return f(s)
|
||||||
|
return parse
|
||||||
|
|
||||||
|
def parse_mm_ss(x: str) -> timedelta:
|
||||||
|
hs, ms = x.split(':')
|
||||||
|
return timedelta(seconds=int(hs) * 60 + int(ms))
|
||||||
|
|
||||||
|
# todo eh. not sure if there is a way of getting around writing code...
|
||||||
|
# I guess would be nice to have a means of specifying type in the column? maybe multirow column names??
|
||||||
|
# need to look up org-mode standard..
|
||||||
|
from ..core.orgmode import parse_org_datetime
|
||||||
|
mappers = {
|
||||||
|
'duration': lambda s: parse_mm_ss(s),
|
||||||
|
'date' : lambda s: tzify(parse_org_datetime(s)),
|
||||||
|
}
|
||||||
|
for row in cross_table.lines:
|
||||||
|
# todo make more defensive, fallback on nan for individual fields??
|
||||||
|
try:
|
||||||
|
d = {}
|
||||||
|
for k, v in row.items():
|
||||||
|
mapper = mappers.get(k, maybe(float))
|
||||||
|
d[k] = mapper(v)
|
||||||
|
yield d
|
||||||
|
except Exception as e:
|
||||||
|
# todo add parsing context
|
||||||
|
yield {'error': str(e)}
|
||||||
|
|
||||||
|
# todo hmm, converting an org table directly to pandas kinda makes sense?
|
||||||
|
# could have a '.dataframe' method in orgparse, optional dependency
|
||||||
|
|
||||||
|
|
||||||
def cross_trainer_manual_dataframe():
|
def cross_trainer_manual_dataframe():
|
||||||
'''
|
'''
|
||||||
Only manual org-mode entries
|
Only manual org-mode entries
|
||||||
'''
|
'''
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.DataFrame(cross_trainer_data())
|
df = pd.DataFrame(cross_trainer_data())
|
||||||
|
|
||||||
from ..core.orgmode import parse_org_datetime
|
|
||||||
df['date'] = df['date'].apply(parse_org_datetime)
|
|
||||||
|
|
||||||
def tzify(d: datetime) -> datetime:
|
|
||||||
assert d.tzinfo is None, d
|
|
||||||
return tz.localize(d)
|
|
||||||
|
|
||||||
df['date'] = df['date'].apply(tzify)
|
|
||||||
|
|
||||||
# TODO convert duration as well
|
|
||||||
#
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,15 +84,19 @@ def cross_trainer_dataframe():
|
||||||
edf = EDF()
|
edf = EDF()
|
||||||
edf = edf[edf['sport'].str.contains('Cross training')]
|
edf = edf[edf['sport'].str.contains('Cross training')]
|
||||||
|
|
||||||
|
|
||||||
# Normalise and assume single bout of exercise per day
|
# Normalise and assume single bout of exercise per day
|
||||||
# TODO this could be useful for other providers..
|
# TODO this could be useful for other providers..
|
||||||
|
# todo hmm maybe this bit is not really that necessary for this function??
|
||||||
|
# just let it fail further down
|
||||||
grouped = edf.set_index('start_time').groupby(lambda t: t.date())
|
grouped = edf.set_index('start_time').groupby(lambda t: t.date())
|
||||||
singles = []
|
singles = []
|
||||||
for day, grp in grouped:
|
for day, grp in grouped:
|
||||||
if len(grp) != 1:
|
if len(grp) != 1:
|
||||||
# FIXME yield runtimeerror
|
# FIXME yield runtimeerror
|
||||||
continue
|
continue
|
||||||
singles.append(grp)
|
else:
|
||||||
|
singles.append(grp)
|
||||||
edf = pd.concat(singles)
|
edf = pd.concat(singles)
|
||||||
edf = edf.reset_index()
|
edf = edf.reset_index()
|
||||||
|
|
||||||
|
@ -75,25 +105,44 @@ def cross_trainer_dataframe():
|
||||||
rows = []
|
rows = []
|
||||||
idxs = []
|
idxs = []
|
||||||
for i, row in mdf.iterrows():
|
for i, row in mdf.iterrows():
|
||||||
# todo rename 'date'??
|
|
||||||
mdate = row['date']
|
mdate = row['date']
|
||||||
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < timedelta(hours=3)]
|
close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < timedelta(hours=3)]
|
||||||
|
idx: Optional[int]
|
||||||
|
rd = row.to_dict()
|
||||||
|
# todo in case of error, 'start date' becomes 'date'??
|
||||||
if len(close) == 0:
|
if len(close) == 0:
|
||||||
# FIXME emit warning -- nothing matched
|
idx = None
|
||||||
continue
|
d = {
|
||||||
if len(close) > 1:
|
**rd,
|
||||||
# FIXME emit warning
|
'error': 'no endomondo matches',
|
||||||
continue
|
}
|
||||||
loc = close.index[0]
|
elif len(close) > 1:
|
||||||
# FIXME check and make defensive
|
idx = None
|
||||||
# assert loc not in idxs, (loc, row)
|
d = {
|
||||||
idxs.append(loc)
|
**rd,
|
||||||
rows.append(row)
|
'error': 'multiple endomondo matches',
|
||||||
|
# todo add info on which exactly??
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
idx = close.index[0]
|
||||||
|
d = rd
|
||||||
|
|
||||||
|
if idx in idxs:
|
||||||
|
# todo might be a good idea to remove the original match as well?
|
||||||
|
idx = None
|
||||||
|
d = {
|
||||||
|
**rd,
|
||||||
|
'error': 'manual entry matched multiple times',
|
||||||
|
}
|
||||||
|
idxs.append(idx)
|
||||||
|
rows.append(d)
|
||||||
mdf = pd.DataFrame(rows, index=idxs)
|
mdf = pd.DataFrame(rows, index=idxs)
|
||||||
|
|
||||||
df = edf.join(mdf, rsuffix='_manual')
|
# todo careful about 'how'? we need it to preserve the errors
|
||||||
|
# maybe pd.merge is better suited for this??
|
||||||
|
df = edf.join(mdf, how='outer', rsuffix='_manual')
|
||||||
# TODO arbitrate kcal, duration, avg hr
|
# TODO arbitrate kcal, duration, avg hr
|
||||||
# compare power and hr?
|
# compare power and hr? add 'quality' function??
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,6 +151,17 @@ def stats():
|
||||||
return stat(cross_trainer_data())
|
return stat(cross_trainer_data())
|
||||||
|
|
||||||
|
|
||||||
|
def compare_manual():
|
||||||
|
df = cross_trainer_dataframe()
|
||||||
|
df = df.set_index('start_time')
|
||||||
|
|
||||||
|
df = df[[
|
||||||
|
'kcal' , 'kcal_manual',
|
||||||
|
'duration', 'duration_manual',
|
||||||
|
]].dropna()
|
||||||
|
print(df.to_string())
|
||||||
|
|
||||||
|
|
||||||
def pd_date_diff(a, b) -> timedelta:
|
def pd_date_diff(a, b) -> timedelta:
|
||||||
# ugh. pandas complains when we subtract timestamps in different timezones
|
# ugh. pandas complains when we subtract timestamps in different timezones
|
||||||
assert a.tzinfo is not None, a
|
assert a.tzinfo is not None, a
|
||||||
|
|
Loading…
Add table
Reference in a new issue