split into data and plotting parts

This commit is contained in:
Dima Gerasimov 2018-08-18 16:47:33 +01:00
parent 35924b82b0
commit d531cba77a
2 changed files with 266 additions and 0 deletions

View file

@ -0,0 +1,138 @@
from kython import json_load
from datetime import datetime
from os.path import join
from functools import lru_cache
from datetime import timedelta, datetime
from typing import List, Dict, Iterator
fromts = datetime.fromtimestamp
def hhmm(minutes):
return '{:02d}:{:02d}'.format(*divmod(minutes, 60))
PATH = "/L/backups/emfit/"
AWAKE = 4
class Emfit:
def __init__(self, jj):
self.jj = jj
@property
def hrv_morning(self):
return self.jj['hrv_rmssd_morning']
@property
def hrv_evening(self):
return self.jj['hrv_rmssd_evening']
@property
def date(self):
return self.end.date()
@property
def start(self):
return fromts(self.jj['time_start'])
@property
def end(self):
return fromts(self.jj['time_end'])
@property
def epochs(self):
return self.jj['sleep_epoch_datapoints']
@property
@lru_cache()
def sleep_start(self) -> datetime:
for [ts, e] in self.epochs:
if e == AWAKE:
continue
return fromts(ts)
raise RuntimeError
@property
@lru_cache()
def sleep_end(self) -> datetime:
for [ts, e] in reversed(self.epochs):
if e == AWAKE:
continue
return fromts(ts)
raise RuntimeError
# 'sleep_epoch_datapoints'
# [[timestamp, number]]
# so it's actual sleep, without awake
# ok, so I need time_asleep
@property
def sleep_minutes(self):
return self.jj['sleep_duration'] // 60
@property
def hrv_lf(self):
return self.jj['hrv_lf']
@property
def hrv_hf(self):
return self.jj['hrv_hf']
@property
def summary(self):
return f"for {hhmm(self.sleep_minutes)} hrv: [{self.hrv_morning:.0f} {self.hrv_evening:.0f} {self.hrv_morning - self.hrv_evening:3.0f} {self.hrv_lf}/{self.hrv_hf}]"
# measured_datapoints
# [[timestamp, pulse, breath?, ??? hrv?]] # every 4 seconds?
@property
def sleep_hr(self):
tss = []
res = []
for ll in self.jj['measured_datapoints']:
[ts, pulse, br, activity] = ll
# TODO what the fuck is whaat?? It can't be HRV, it's about 500 ms on average
# act in csv.. so it must be activity? wonder how is it measured.
# but I guess makes sense. yeah, "measured_activity_avg": 595, about that
# makes even more sense given tossturn datapoints only have timestamp
if self.sleep_start < fromts(ts) < self.sleep_end:
tss.append(ts)
res.append(pulse)
return tss, res
@property
def hrv(self):
tss = []
res = []
for ll in self.jj['hrv_rmssd_datapoints']:
[ts, rmssd, _, _, almost_always_zero, _] = ll
# timestamp,rmssd,tp,lfn,hfn,r_hrv
# TP is total_power??
# erm. looks like there is a discrepancy between csv and json data.
# right, so web is using api v 1. what if i use v1??
# definitely a discrepancy between v1 and v4. have no idea how to resolve it :(
# also if one of them is indeed tp value, it must have been rounded.
# TODO what is the meaning of the rest???
# they don't look like HR data.
tss.append(ts)
res.append(rmssd)
return tss, res
@property
def sleep_hr_coverage(self):
tss, hrs = self.sleep_hr
covered_sec = len([h for h in hrs if h is not None])
expected_sec = self.sleep_minutes * 60 / 4
return covered_sec / expected_sec * 100
def iter_datas() -> Iterator[Emfit]:
import os
for f in sorted(os.listdir(PATH)):
if not f.endswith('.json'):
continue
with open(join(PATH, f), 'r') as fo:
ef = Emfit(json_load(fo))
yield ef
def get_datas() -> List[Emfit]:
return list(sorted(list(iter_datas()), key=lambda e: e.start))

128
plot.py Normal file
View file

@ -0,0 +1,128 @@
import matplotlib.dates as md # type: ignore
import numpy as np # type: ignore
import seaborn as sns # type: ignore
import matplotlib.pyplot as plt
def plot_file(jj: str):
pts = jj['sleep_epoch_datapoints']
tss = [datetime.fromtimestamp(p[0]) for p in pts]
vals = [p[1] for p in pts]
plt.figure(figsize=(20,10))
plt.plot(tss, vals)
xformatter = md.DateFormatter('%H:%M')
xlocator = md.MinuteLocator(interval = 15)
## Set xtick labels to appear every 15 minutes
plt.gcf().axes[0].xaxis.set_major_locator(xlocator)
## Format xtick labels as HH:MM
plt.gcf().axes[0].xaxis.set_major_formatter(xformatter)
plt.xlabel('time')
plt.ylabel('phase')
plt.title('Sleep phases')
plt.grid(True)
plt.savefig(f"{f}.png")
plt.close() # TODO
# plt.show()
pass
def plot_all():
for jj in iter_datas():
plot_file(jj)
# def stats():
# for jj in iter_datas():
# # TODO fimezone??
# # TODOgetinterval on 16 aug -- err it's pretty stupid. I shouldn't count bed exit interval...
# st = fromts(jj['time_start'])
# en = fromts(jj['time_end'])
# tfmt = "%Y-%m-%d %a"
# tot_mins = 0
# res = []
# res.append(f"{st.strftime(tfmt)} -- {en.strftime(tfmt)}")
# for cls in ['rem', 'light', 'deep']:
# mins = jj[f'sleep_class_{cls}_duration'] // 60
# res += [cls, hhmm(mins)]
# tot_mins += mins
# res += ["total", hhmm(tot_mins)]
# print(*res)
def stats():
datas = get_datas()
cur = datas[0].date
for jj in datas:
# import ipdb; ipdb.set_trace()
while cur < jj.date:
cur += timedelta(days=1)
if cur.weekday() == 0:
print("---")
if cur != jj.date:
print(" ")
# cur = jj.date
print(f"{jj.date.strftime('%m.%d %a')} {jj.hrv_morning:.0f} {jj.hrv_evening:.0f} {jj.hrv_morning - jj.hrv_evening:3.0f} {hhmm(jj.sleep_minutes)} {jj.hrv_lf}/{jj.hrv_hf} {jj.sleep_hr_coverage:3.0f}")
def plot_recovery_vs_hr_percentage():
sns.set(color_codes=True)
xs = []
ys = []
for jj in get_datas():
xs.append(jj.hrv_morning - jj.hrv_evening)
ys.append(jj.sleep_hr_coverage)
ax = sns.regplot(x=xs, y=ys) # "recovery", y="percentage", data=pdata)
ax.set(xlabel='recovery', ylabel='percentage')
plt.show()
def plot_hr():
jj = get_datas()[-1]
tss, uu = jj.sleep_hr
tss = tss[::10]
uu = uu[::10]
plt.figure(figsize=(15,4))
ax = sns.pointplot(tss, uu, markers=" ")
ax.set(ylim=(None, 1000))
plt.show()
# TODO ok, would be nice to have that every morning in timeline
# also timeline should have dynamic filters? maybe by tags
# then I could enable emfit feed and slog feed (pulled from all org notes) and see the correlation? also could pull workouts provider (and wlog) -- actually wlog processing could be moved to timeline too
# TODO could plot 'recovery' thing and see if it is impacted by workouts
# TODO time_start, time_end
# plot_hrv()
# stats()
# plot_recovery_vs_hr_percentage()
# stats()
# import matplotlib
# matplotlib.use('Agg')
# TODO maybe rmssd should only be computed if we have a reasonable chunk of datas
# also, trust it only if it's stable
# plot_timestamped([p[0] for p in pts], [p[1] for p in pts], mavgs=[]).savefig('res.png')
# TODO X axes: show hours and only two dates
# TODO 4 is awake, 3 REM, 2 light, 1 deep
# deviartion beyond 25-75 or 75-25 is bad??