From 8e8d9702f378f8f3b2487d9d452bbeb9d49578cc Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 3 Oct 2020 23:04:10 +0100 Subject: [PATCH] my.bluemaestro: investigation of weird timestamps --- my/bluemaestro/__init__.py | 77 +++++++++++++++++++++++++++----------- tests/bluemaestro.py | 16 +++++++- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/my/bluemaestro/__init__.py b/my/bluemaestro/__init__.py index 8ff0b59..9afba0b 100755 --- a/my/bluemaestro/__init__.py +++ b/my/bluemaestro/__init__.py @@ -5,8 +5,9 @@ # todo eh, most of it belongs to DAL -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path +import re import sqlite3 from typing import Iterable, NamedTuple, Sequence, Set @@ -28,19 +29,20 @@ class Measurement(NamedTuple): temp: float +# fixme: later, rely on the timezone provider +# NOTE: the timezone should be set with respect to the export date!!! +import pytz # type: ignore +tz = pytz.timezone('Europe/London') + + @mcachew(cache_path=cache_dir() / 'bluemaestro.cache') def measurements(dbs=inputs()) -> Iterable[Measurement]: emitted: Set[datetime] = set() + + # tables are immutable, so can save on processing.. + processed_tables: Set[str] = set() for f in dbs: logger.debug('processing %s', f) - # err = f'{f}: mismatch: {v} vs {value}' - # if abs(v - value) > 0.4: - # logger.warning(err) - # # TODO mm. dunno how to mark errors properly.. - # # raise AssertionError(err) - # else: - # pass - # with sqlite3.connect(f'file:{db}?immutable=1', uri=True) as c: tot = 0 new = 0 # todo assert increasing timestamp? @@ -48,33 +50,66 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]: try: # try old format first # todo Humidity, Pressure, Dewpoint - datas = db.execute('SELECT Time, Temperature FROM data ORDER BY log_index') + datas = db.execute(f'SELECT "{f.name}" as name, Time, Temperature FROM data ORDER BY log_index') oldfmt = True except sqlite3.OperationalError: - # ok, must be new format? - log_tables = list(c[0] for c in db.execute('SELECT name FROM sqlite_sequence WHERE name LIKE "%_log"')) + # Right, this looks really bad. + # The device doesn't have internal time & what it does is: + # 1. every X seconds, record a datapoint, store it in the internal memory + # 2. on sync, take the phone's datetime ('now') and then ASSIGN the timestamps to the collected data + # as now, now - X, now - 2X, etc + # + # that basically means that for example, hourly timestamps are completely useless? because their error is about 1h + # yep, confirmed on some historic exports. seriously, what the fuck??? + # + # The device _does_ have an internal clock, but it's basically set to 0 every time you update settings + # So, e.g. if, say, at 17:15 you set the interval to 3600, the 'real' timestamps would be + # 17:15, 18:15, 19:15, etc + # But depending on when you export, you might get + # 17:35, 18:35, 19:35; or 17:55, 18:55, 19:55, etc + # basically all you guaranteed is that the 'correct' interval is within the frequency + # it doesn't seem to keep the reference time in the database + # + # UPD: fucking hell, so you can set the reference date in the settings (calcReferenceUnix field in meta db) + # but it's not set by default. + + log_tables = [c[0] for c in db.execute('SELECT name FROM sqlite_sequence WHERE name LIKE "%_log"')] + log_tables = [t for t in log_tables if t not in processed_tables] + processed_tables |= set(log_tables) + + # todo use later? + frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables] + + # todo could just filter out the older datapoints?? dunno. + # eh. a bit horrible, but seems the easiest way to do it? # todo could exclude logs that we already processed?? # todo humiReadings, pressReadings, dewpReadings - query = ' UNION '.join(f'SELECT unix, tempReadings FROM {t}' for t in log_tables) # todo order by? + query = ' UNION '.join(f'SELECT "{t}" AS name, unix, tempReadings FROM {t}' for t in log_tables) if len(log_tables) > 0: # ugh. otherwise end up with syntax error.. - query = f'SELECT * FROM ({query}) ORDER BY unix' + query = f'SELECT * FROM ({query}) ORDER BY name, unix' datas = db.execute(query) oldfmt = False - # todo otherwise, union all dbs?... this is slightly insane... - for tsc, tempc in datas: + for i, (name, tsc, tempc) in enumerate(datas): if oldfmt: - # TODO FIXME is that utc??? + # TODO double check the timezone tss = tsc.replace('Juli', 'Jul').replace('Aug.', 'Aug') - dt = datetime.strptime(tss, '%Y-%b-%d %H:%M') + dt = tz.localize(datetime.strptime(tss, '%Y-%b-%d %H:%M')) temp = tempc else: - dt = datetime.utcfromtimestamp(tsc / 1000) # todo not sure if utc? - temp = tempc / 10 # for some reason it's in tenths of degrees?? + m = re.search(r'_(\d+)_', name) + assert m is not None + export_ts = int(m.group(1)) + edt = datetime.fromtimestamp(export_ts / 1000, tz=tz) + # right, seems that it stores local datetime + dt = datetime.fromtimestamp(tsc / 1000, tz=tz) + temp = tempc / 10 # for some reason it's in tenths of degrees + + # need to exclude bad databases? some have weird years like 2000 # sanity check - assert -40 <= temp <= 60, (f, dt, temp) + assert -60 <= temp <= 60, (f, dt, temp) tot += 1 if dt in emitted: diff --git a/tests/bluemaestro.py b/tests/bluemaestro.py index 127f0b7..65a94dd 100644 --- a/tests/bluemaestro.py +++ b/tests/bluemaestro.py @@ -8,7 +8,21 @@ disable_cachew() # meh def test() -> None: from my.bluemaestro import measurements res = list(measurements()) - assert len(res) > 1000 + + tp = [x for x in res if x.temp == 2.1] + assert len(tp) > 0 + for p in tp: + print(p) + dts = p.dt.strftime('%Y%m%d %H') + # check that timezone is set properly + assert dts == '20200824 22' + + # NOTE: broken at the moment due to weirdness with timestamping + # assert len(tp) == 1 # should be unique + + # 2.5 K + 4 K datapoints, somwhat overlapping + # NOTE: boken at the moment due to weirdness with timestamping + # assert len(res) < 6000 import pytest # type: ignore