From 9795c058fbddd50c875a5b725650b711185a738f Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 9 Aug 2017 23:12:35 +0100 Subject: [PATCH] Lots of plots! --- main.py | 287 ++++++++++++++++++++------------------------------------ 1 file changed, 102 insertions(+), 185 deletions(-) diff --git a/main.py b/main.py index 28bf2c2..a700c10 100755 --- a/main.py +++ b/main.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3.6 # TODO +from kython import * +from kython.plotting import * from csv import DictReader from itertools import islice @@ -20,200 +22,115 @@ import matplotlib.pylab as pylab pylab.rcParams['figure.figsize'] = (32.0, 24.0) pylab.rcParams['font.size'] = 10 - - -dimensions = 3 # Number of dimensions to reduce to -jawboneDataFile = "/L/Dropbox/backups/jawbone/2017.csv" # Data File Path - jawboneDataFeatures = "Jawbone/features.csv" # Data File Path featureDesc: Dict[str, str] = {} for x in genfromtxt(jawboneDataFeatures, dtype='unicode', delimiter=','): featureDesc[x[0]] = x[1] -def filterData_Jawbone (data): - #Removes null data (and corresponding features) - data = data[0:,:] - # for i in range(16): - # data = np.delete(data, 0, 1) - # print(data) - h, w = data.shape - data = np.where((data == ''), 0, data) - allZero = [np.all(np.delete([0 if col[i] == '' else col[i] for col in data], [0]).astype(float) - == 0) for i in range(w)] - allSame = [np.all(np.delete([0 if col[i] == '' else col[i] for col in data], [0]).astype(float) - == np.delete([0 if col[i] == '' else col[i] for col in data], [0]).astype(float)[0]) for i in range(w)] - empty = np.logical_or(allZero, allSame) - n = [i for i in range(np.array(empty).size) if empty[i] == True] - return np.delete(data, n, axis=1) +def _safe_float(s: str): + if len(s) == 0: + return None + return float(s) -dataAll = filterData_Jawbone(genfromtxt(jawboneDataFile, dtype='unicode', delimiter=',')) -features = dataAll[0] -features = [ - 's_light', # 'light sleep' from app - 's_awake', # 'woke up' from app (how many times you were awake) - 's_deep' # 'sound sleep' from app +def _safe_int(s: str): + if len(s) == 0: + return None + return int(float(s)) # TODO meh + +def _safe_mins(s: float): + if s is None: + return None + return s / 60 + +class SleepData(NamedTuple): + date: str + asleep_time: float + awake_time: float + total: float + awake: float # 'awake for' from app, time awake duing sleep (seconds) + awakenings: int + light: float # 'light sleep' from app (seconds) + deep: float # 'deep sleep' from app (sec) + quality: float # ??? + + @classmethod + def from_jawbone_dict(cls, d: Dict[str, Any]): + return cls( + date=d['DATE'], + asleep_time=_safe_mins(_safe_float(d['s_asleep_time'])), + awake_time=_safe_mins(_safe_float(d['s_awake_time'])), + total=_safe_mins(_safe_float(d['s_duration'])), + light=_safe_mins(_safe_float(d['s_light'])), + deep =_safe_mins(_safe_float(d['s_deep'])), + awake=_safe_mins(_safe_float(d['s_awake'])), + awakenings=_safe_int(d['s_awakenings']), + quality=_safe_float(d['s_quality']), + ) + + def is_bad(self): + return self.deep is None and self.light is None + + # @property + # def total(self) -> float: + # return self.light + self.deep + + + +def iter_useful(data_file: str): + from csv import DictReader + with open(data_file) as fo: + reader = DictReader(fo) + for d in reader: + dt = SleepData.from_jawbone_dict(d) + if not dt.is_bad(): + yield dt + +files = [ + "/L/Dropbox/backups/jawbone/2015.csv", + "/L/Dropbox/backups/jawbone/2016.csv", + "/L/Dropbox/backups/jawbone/2017.csv", ] -# TODO filter more carefully... +useful = concat(*(list(iter_useful(f)) for f in files)) -def getIndex (data, features): - index = [] - for f in features: - index.append(np.where((data[0] == f) == True)[0][0]) - return index +# for u in useful: +# print(f"{u.total} {u.asleep_time} {u.awake_time}") +# # pprint(u.total) +# pprint(u) +# pprint("---") -def getFeatures (data, features): - h, w = data.shape - index = getIndex(data, features) - extracted = np.zeros(h-1) - for i in index: - temp = np.delete([0 if col[i] == '' else col[i] for col in data], [0]).astype(float) - temp /= np.amax(temp) - extracted = np.vstack((extracted, temp)) - extracted = np.delete(extracted, 0, 0) - return extracted +dates = [parse_date(u.date, yearfirst=True, dayfirst=False) for u in useful] +# TODO filter outliers? +for attr, lims, mavg, fig in [ + # ('light', (0, 400), 5, None), + # ('deep', (0, 600), 5, None), + # ('total', (200, 600), 5, None), + ('awake_time', (0, 1200), None, 1), + ('asleep_time', (-100, 1000), None, 1), + # ('awakenings', (0, 5)), +]: + dates_wkd = [d for d in dates if d.weekday() < 5] + dates_wke = [d for d in dates if d.weekday() >= 5] + for dts, dn in [ + (dates, 'total'), + # (dates_wkd, 'weekday'), + # (dates_wke, 'weekend') + ]: + mavgs = [] + if mavg is not None: + mavgs.append((mavg, 'green')) + fig = plot_timestamped( + dts, + [getattr(u, attr) for u in useful], + marker='.', + ratio=(16, 4), + mavgs=mavgs, + ylimits=lims, + ytick_size=60, + figure=1, + ) + # plt.savefig(f'{attr}_{dn}.png') -# print(dataAll) -data = getFeatures(dataAll, features) - - -def remNull(x, y): - nx = np.where(x == 0) - ny = np.where(y == 0) - nulli = np.concatenate((nx[0], ny[0])) - x = np.delete(x, nulli, 0) - y = np.delete(y, nulli, 0) - return x, y - -def calculateVar(x, y) -> float: - x, y = remNull(x,y) - if len(x) == 0: - # TODO needs date? - print("Warning") - return 0.0 # TODO ??? - meanX = np.mean(x) - meanY = np.mean(y) - n = float(x.shape[0]) - print(n) - return ((1/n)*(np.sum((x-meanX)*(y-meanY)))) - # return ((1/(n + 1))*(np.sum((x-meanX)*(y-meanY)))) # TODO fixme.. - -def calculateCov(data): - h, w = data.shape - cov = np.zeros([h, h]) - - for i in range(h): - for j in range(h): - cov[i][j] = calculateVar(data[i], data[j]) - return cov - - -# In[119]: -# a = np.array([[1, 2, 3], [1, 2, 3]]) -# print(a) -# print(calculateCov(a)) -# print(np.cov(a)) -# print("VAR") -# print(np.var(a[0])) - -# print("DATA") -# print(data) - -# print("NPCOV") -# print(np.cov(data)) - -# cov = calculateCov (data) -# print("COV") -# print(cov) -cov = np.cov(data) # TODO ??? - - -# In[120]: - - -def plotFeatures (title, label1, label2, feature1, feature2): - plt.scatter(feature1, feature2) - - plt.title(title) - plt.xlabel(label1) - plt.ylabel(label2) - - plt.xlim(0, 1) - plt.ylim(0, 1) - - plt.show() - -def plotMatrix(data): - r, c = data.shape - c=2 - fig = plt.figure() - plotID = 1 - for i in range(c): - for j in range(c): - f1 = getFeature(data, data[0][i]) - f2 = getFeature(data, data[0][j]) - ax = fig.add_subplot( c, c, plotID ) - ax.scatter(f1, f2) - ax.set_title(data[0][i] + ' vs ' + data[0][j]) - ax.axis('off') - plotID += 1 - plt.show() - -def plotMatrix1(features, data): - for f in features: - print(f"{f}: {featureDesc[f]}") - r, c = data.shape - fig = plt.figure() - plotID = 1 - for i in range(r): - for j in range(r): - ax = fig.add_subplot( r, r, plotID ) - x,y = remNull(data[i], data[j]) - ax.scatter(x, y, s=2) - ax.set_title(features[i] + ' vs ' + features[j], fontsize=15) - ax.tick_params(axis='x', which='major', labelsize=8) - ax.tick_params(axis='y', which='major', labelsize=8) -# ax.set_xlim(0,1) -# ax.set_ylim(0,1) - plotID += 1 - plt.show() - - -# In[121]: - - -# plotMatrix1(features, data) - - -# In[ ]: - - -def rankF(features, cov): - n = len(features) - eigenV = np.linalg.eig(cov) - eigVal = np.matrix(eigenV[0]) - eigVec = np.matrix(eigenV[1]) - order = (n-1) - np.argsort(eigVal) - - rankFeatures = np.empty(n, dtype='