#!/usr/bin/python3 """ [[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor """ # todo eh, most of it belongs to DAL from datetime import datetime, timedelta from pathlib import Path import re import sqlite3 from typing import Iterable, NamedTuple, Sequence, Set from ..core.common import mcachew, LazyLogger, get_files from ..core.cachew import cache_dir from my.config import bluemaestro as config logger = LazyLogger('bluemaestro', level='debug') def inputs() -> Sequence[Path]: return get_files(config.export_path) class Measurement(NamedTuple): dt: datetime temp: float # fixme: later, rely on the timezone provider # NOTE: the timezone should be set with respect to the export date!!! import pytz # type: ignore tz = pytz.timezone('Europe/London') @mcachew(cache_path=cache_dir() / 'bluemaestro.cache') def measurements(dbs=inputs()) -> Iterable[Measurement]: emitted: Set[datetime] = set() # tables are immutable, so can save on processing.. processed_tables: Set[str] = set() for f in dbs: logger.debug('processing %s', f) tot = 0 new = 0 # todo assert increasing timestamp? with sqlite3.connect(f'file:{f}?immutable=1', uri=True) as db: try: # try old format first # todo Humidity, Pressure, Dewpoint datas = db.execute(f'SELECT "{f.name}" as name, Time, Temperature FROM data ORDER BY log_index') oldfmt = True except sqlite3.OperationalError: # Right, this looks really bad. # The device doesn't have internal time & what it does is: # 1. every X seconds, record a datapoint, store it in the internal memory # 2. on sync, take the phone's datetime ('now') and then ASSIGN the timestamps to the collected data # as now, now - X, now - 2X, etc # # that basically means that for example, hourly timestamps are completely useless? because their error is about 1h # yep, confirmed on some historic exports. seriously, what the fuck??? # # The device _does_ have an internal clock, but it's basically set to 0 every time you update settings # So, e.g. if, say, at 17:15 you set the interval to 3600, the 'real' timestamps would be # 17:15, 18:15, 19:15, etc # But depending on when you export, you might get # 17:35, 18:35, 19:35; or 17:55, 18:55, 19:55, etc # basically all you guaranteed is that the 'correct' interval is within the frequency # it doesn't seem to keep the reference time in the database # # UPD: fucking hell, so you can set the reference date in the settings (calcReferenceUnix field in meta db) # but it's not set by default. log_tables = [c[0] for c in db.execute('SELECT name FROM sqlite_sequence WHERE name LIKE "%_log"')] log_tables = [t for t in log_tables if t not in processed_tables] processed_tables |= set(log_tables) # todo use later? frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables] # todo could just filter out the older datapoints?? dunno. # eh. a bit horrible, but seems the easiest way to do it? # todo could exclude logs that we already processed?? # todo humiReadings, pressReadings, dewpReadings query = ' UNION '.join(f'SELECT "{t}" AS name, unix, tempReadings FROM {t}' for t in log_tables) if len(log_tables) > 0: # ugh. otherwise end up with syntax error.. query = f'SELECT * FROM ({query}) ORDER BY name, unix' datas = db.execute(query) oldfmt = False for i, (name, tsc, tempc) in enumerate(datas): if oldfmt: # TODO double check the timezone tss = tsc.replace('Juli', 'Jul').replace('Aug.', 'Aug') dt = tz.localize(datetime.strptime(tss, '%Y-%b-%d %H:%M')) temp = tempc else: m = re.search(r'_(\d+)_', name) assert m is not None export_ts = int(m.group(1)) edt = datetime.fromtimestamp(export_ts / 1000, tz=tz) # right, seems that it stores local datetime dt = datetime.fromtimestamp(tsc / 1000, tz=tz) temp = tempc / 10 # for some reason it's in tenths of degrees # need to exclude bad databases? some have weird years like 2000 # sanity check assert -60 <= temp <= 60, (f, dt, temp) tot += 1 if dt in emitted: continue emitted.add(dt) new += 1 p = Measurement( dt=dt, temp=temp, # TODO use pressure and humidity as well ) yield p logger.debug('%s: new %d/%d', f, new, tot) # logger.info('total items: %d', len(merged)) # TODO assert frequency? # for k, v in merged.items(): # # TODO shit. quite a few of them have varying values... how is that freaking possible???? # # most of them are within 0.5 degree though... so just ignore? # if isinstance(v, set) and len(v) > 1: # print(k, v) # for k, v in merged.items(): # yield Point(dt=k, temp=v) # meh? from ..core.common import stat, Stats def stats() -> Stats: return stat(measurements) from ..core.pandas import DataFrameT def dataframe() -> DataFrameT: """ %matplotlib gtk from my.bluemaestro import dataframe dataframe().plot() """ # todo not sure why x axis time ticks are weird... df[:6269] works, whereas df[:6269] breaks... # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh? import pandas as pd # type: ignore return pd.DataFrame(p._asdict() for p in measurements()).set_index('dt') # todo test against an older db?