diff --git a/my/bluemaestro/__init__.py b/my/bluemaestro/__init__.py index 9afba0b..06bfdee 100755 --- a/my/bluemaestro/__init__.py +++ b/my/bluemaestro/__init__.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta from pathlib import Path import re import sqlite3 -from typing import Iterable, NamedTuple, Sequence, Set +from typing import Iterable, NamedTuple, Sequence, Set, Optional from ..core.common import mcachew, LazyLogger, get_files @@ -37,7 +37,7 @@ tz = pytz.timezone('Europe/London') @mcachew(cache_path=cache_dir() / 'bluemaestro.cache') def measurements(dbs=inputs()) -> Iterable[Measurement]: - emitted: Set[datetime] = set() + last: Optional[datetime] = None # tables are immutable, so can save on processing.. processed_tables: Set[str] = set() @@ -107,14 +107,18 @@ def measurements(dbs=inputs()) -> Iterable[Measurement]: dt = datetime.fromtimestamp(tsc / 1000, tz=tz) temp = tempc / 10 # for some reason it's in tenths of degrees - # need to exclude bad databases? some have weird years like 2000 - # sanity check + + ## sanity checks (todo make defensive/configurable?) + # not sure how that happens.. but basically they'd better be excluded + assert dt.year >= 2015, (f, name, dt) assert -60 <= temp <= 60, (f, dt, temp) + ## tot += 1 - if dt in emitted: + if last is not None and last >= dt: continue - emitted.add(dt) + # todo for performance, pass 'last' to sqlite instead? + last = dt new += 1 p = Measurement( dt=dt, diff --git a/tests/bluemaestro.py b/tests/bluemaestro.py index 65a94dd..e397f08 100644 --- a/tests/bluemaestro.py +++ b/tests/bluemaestro.py @@ -17,12 +17,10 @@ def test() -> None: # check that timezone is set properly assert dts == '20200824 22' - # NOTE: broken at the moment due to weirdness with timestamping - # assert len(tp) == 1 # should be unique + assert len(tp) == 1 # should be unique # 2.5 K + 4 K datapoints, somwhat overlapping - # NOTE: boken at the moment due to weirdness with timestamping - # assert len(res) < 6000 + assert len(res) < 6000 import pytest # type: ignore