bluemaesto: get rid of unnecessary file, move to top level

2021-02-14 19:42:35 +00:00 · 2021-02-14 19:42:35 +00:00 · d77ab92d86
commit d77ab92d86
parent d562f00dca
3 changed files with 31 additions and 33 deletions
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@ -0,0 +1,192 @@
+#!/usr/bin/python3
+"""
+[[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor
+"""
+
+# todo eh, most of it belongs to DAL
+
+from datetime import datetime, timedelta
+from pathlib import Path
+import re
+import sqlite3
+from typing import Iterable, NamedTuple, Sequence, Set, Optional
+
+
+from .core.common import mcachew, LazyLogger, get_files
+from .core.cachew import cache_dir
+from my.config import bluemaestro as config
+
+
+logger = LazyLogger('bluemaestro', level='debug')
+
+
+def inputs() -> Sequence[Path]:
+    return get_files(config.export_path)
+
+
+class Measurement(NamedTuple):
+    dt: datetime
+    temp    : float # Celsius
+    humidity: float # percent
+    pressure: float # mBar
+    dewpoint: float # Celsius
+
+
+# fixme: later, rely on the timezone provider
+# NOTE: the timezone should be set with respect to the export date!!!
+import pytz # type: ignore
+tz = pytz.timezone('Europe/London')
+# TODO when I change tz, check the diff
+
+
+@mcachew(cache_path=cache_dir() / 'bluemaestro.cache')
+def measurements(dbs=inputs()) -> Iterable[Measurement]:
+    last: Optional[datetime] = None
+
+    # tables are immutable, so can save on processing..
+    processed_tables: Set[str] = set()
+    for f in dbs:
+        logger.debug('processing %s', f)
+        tot = 0
+        new = 0
+        # todo assert increasing timestamp?
+        with sqlite3.connect(f'file:{f}?immutable=1', uri=True) as db:
+            try:
+                datas = db.execute(f'SELECT "{f.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index')
+                oldfmt = True
+            except sqlite3.OperationalError:
+                # Right, this looks really bad.
+                # The device doesn't have internal time & what it does is:
+                # 1. every X seconds, record a datapoint, store it in the internal memory
+                # 2. on sync, take the phone's datetime ('now') and then ASSIGN the timestamps to the collected data
+                #    as now, now - X, now - 2X, etc
+                #
+                # that basically means that for example, hourly timestamps are completely useless? because their error is about 1h
+                # yep, confirmed on some historic exports. seriously, what the fuck???
+                #
+                # The device _does_ have an internal clock, but it's basically set to 0 every time you update settings
+                # So, e.g. if, say, at 17:15 you set the interval to 3600, the 'real' timestamps would be
+                # 17:15, 18:15, 19:15, etc
+                # But depending on when you export, you might get
+                # 17:35, 18:35, 19:35; or 17:55, 18:55, 19:55, etc
+                # basically all you guaranteed is that the 'correct' interval is within the frequency
+                # it doesn't seem to keep the reference time in the database
+                #
+                # UPD: fucking hell, so you can set the reference date in the settings (calcReferenceUnix field in meta db)
+                # but it's not set by default.
+
+                log_tables = [c[0] for c in db.execute('SELECT name FROM sqlite_sequence WHERE name LIKE "%_log"')]
+                log_tables = [t for t in log_tables if t not in processed_tables]
+                processed_tables |= set(log_tables)
+
+                # todo use later?
+                frequencies = [list(db.execute(f'SELECT interval from {t.replace("_log", "_meta")}'))[0][0] for t in log_tables]
+
+                # todo could just filter out the older datapoints?? dunno.
+
+                # eh. a bit horrible, but seems the easiest way to do it?
+                # note: for some reason everything in the new table multiplied by 10
+                query = ' UNION '.join(
+                    f'SELECT "{t}" AS name, unix, tempReadings / 10.0, humiReadings / 10.0, pressReadings / 10.0, dewpReadings / 10.0 FROM {t}'
+                    for t in log_tables
+                )
+                if len(log_tables) > 0: # ugh. otherwise end up with syntax error..
+                    query = f'SELECT * FROM ({query}) ORDER BY name, unix'
+                datas = db.execute(query)
+                oldfmt = False
+
+            for i, (name, tsc, temp, hum, pres, dewp) in enumerate(datas):
+                # note: bluemaestro keeps local datetime
+                if oldfmt:
+                    tss = tsc.replace('Juli', 'Jul').replace('Aug.', 'Aug')
+                    dt = datetime.strptime(tss, '%Y-%b-%d %H:%M')
+                    dt = tz.localize(dt)
+                else:
+                    m = re.search(r'_(\d+)_', name)
+                    assert m is not None
+                    export_ts = int(m.group(1))
+                    edt = datetime.fromtimestamp(export_ts / 1000, tz=tz)
+
+                    dt = datetime.fromtimestamp(tsc / 1000, tz=tz)
+
+                ## sanity checks (todo make defensive/configurable?)
+                # not sure how that happens.. but basically they'd better be excluded
+                assert dt.year >= 2015, (f, name, dt)
+                assert -60 <= temp <= 60, (f, dt, temp)
+                ##
+
+                tot += 1
+                if last is not None and last >= dt:
+                    continue
+                # todo for performance, pass 'last' to sqlite instead?
+                last = dt
+                new += 1
+                p = Measurement(
+                    dt=dt,
+                    temp=temp,
+                    pressure=pres,
+                    humidity=hum,
+                    dewpoint=dewp,
+                )
+                yield p
+        logger.debug('%s: new %d/%d', f, new, tot)
+    # logger.info('total items: %d', len(merged))
+    # TODO assert frequency?
+    # for k, v in merged.items():
+    #     # TODO shit. quite a few of them have varying values... how is that freaking possible????
+    #     # most of them are within 0.5 degree though... so just ignore?
+    #     if isinstance(v, set) and len(v) > 1:
+    #         print(k, v)
+    # for k, v in merged.items():
+    #     yield Point(dt=k, temp=v) # meh?
+
+from .core import stat, Stats
+def stats() -> Stats:
+    return stat(measurements)
+
+
+from .core.pandas import DataFrameT, check_dataframe as cdf
+@cdf
+def dataframe() -> DataFrameT:
+    """
+    %matplotlib gtk
+    from my.bluemaestro import dataframe
+    dataframe().plot()
+    """
+    # todo not sure why x axis time ticks are weird...  df[:6269] works, whereas df[:6269] breaks...
+    # either way, plot is not the best representation for the temperature I guess.. maybe also use bokeh?
+    import pandas as pd # type: ignore
+    df = pd.DataFrame(
+        (p._asdict() for p in measurements()),
+        # todo meh. otherwise fails on empty inputs...
+        columns=list(Measurement._fields),
+    )
+    # todo not sure how it would handle mixed timezones??
+    return df.set_index('dt')
+
+# todo test against an older db?
+
+
+def check() -> None:
+    temps = list(measurements())
+    latest = temps[:-2]
+
+    prev = latest[-2].dt
+    last = latest[-1].dt
+
+    # todo stat should expose a dataclass?
+    # TODO ugh. might need to warn about points past 'now'??
+    # the default shouldn't allow points in the future...
+    #
+    # TODO also needs to be filtered out on processing, should be rejected on the basis of export date?
+
+    POINTS_STORED = 6000 # on device?
+    FREQ_SEC = 60
+    SECS_STORED = POINTS_STORED * FREQ_SEC
+    HOURS_STORED = POINTS_STORED / (60 * 60 / FREQ_SEC) # around 4 days
+    NOW = datetime.now()
+    assert NOW - last < timedelta(hours=HOURS_STORED / 2), f'old backup! {last}'
+
+
+    assert last - prev  < timedelta(minutes=3), f'bad interval! {last - prev}'
+    single = (last - prev).seconds