diff --git a/my/common.py b/my/common.py index 2c241cd..89ee916 100644 --- a/my/common.py +++ b/my/common.py @@ -1,7 +1,7 @@ from pathlib import Path import functools import types -from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast +from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple from . import init @@ -46,6 +46,7 @@ def the(l: Iterable[T]) -> T: return first +# TODO more_itertools.bucket? def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]: res: Dict[K, List[T]] = {} for i in l: @@ -106,9 +107,11 @@ from .kython.klogging import setup_logger, LazyLogger Paths = Union[Sequence[PathIsh], PathIsh] -def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]: +def get_files(pp: Paths, glob: str, sort: bool=True) -> Tuple[Path, ...]: """ Helper function to avoid boilerplate. + + Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? sources: List[Path] = [] @@ -129,7 +132,7 @@ def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]: if sort: paths = list(sorted(paths)) - return paths + return tuple(paths) def mcachew(*args, **kwargs): diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py index b245824..2b8f5a8 100755 --- a/my/emfit/__init__.py +++ b/my/emfit/__init__.py @@ -5,26 +5,21 @@ Consumes data exported by https://github.com/karlicoss/backup-emfit """ import json -import logging -from collections import OrderedDict as odict from dataclasses import dataclass from datetime import date, datetime, time, timedelta +from itertools import groupby from pathlib import Path from typing import Dict, Iterator, List, NamedTuple, Any, cast import pytz +from more_itertools import bucket -from ..common import get_files, LazyLogger, cproperty, group_by_key, mcachew +from ..common import get_files, LazyLogger, cproperty, mcachew from my.config import emfit as config -logger = LazyLogger('my.emfit', level='info') - - -# TODO FIXME remove? -import kython -timed = lambda f: kython.timed(f, logger=logger) +logger = LazyLogger(__name__, level='info') def hhmm(minutes): @@ -35,13 +30,10 @@ AWAKE = 4 Sid = str -# TODO use tz provider for that? -_TZ = pytz.timezone(config.tz) - # TODO use common tz thing? def fromts(ts) -> datetime: - dt = datetime.fromtimestamp(ts) - return _TZ.localize(dt) + dt = datetime.fromtimestamp(ts, tz=pytz.utc) + return dt class Mixin: @@ -295,14 +287,14 @@ class Emfit(Mixin): # TODO move to common? def dir_hash(path: Path): - mtimes = tuple(p.stat().st_mtime for p in sorted(path.glob('*.json'))) + mtimes = tuple(p.stat().st_mtime for p in get_files(path, glob='*.json')) return mtimes +# TODO take __file__ into account somehow? @mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger) -def iter_datas_cached(path: Path) -> Iterator[Emfit]: - # TODO use get_files? - for f in sorted(path.glob('*.json')): +def iter_datas(path: Path=config.export_path) -> Iterator[Emfit]: + for f in get_files(path, glob='*.json'): sid = f.stem if sid in config.excluded_sids: continue @@ -311,20 +303,17 @@ def iter_datas_cached(path: Path) -> Iterator[Emfit]: yield from Emfit.make(em) -def iter_datas(path=config.export_path) -> Iterator[Emfit]: - yield from iter_datas_cached(path) - - def get_datas() -> List[Emfit]: return list(sorted(iter_datas(), key=lambda e: e.start)) # TODO move away old entries if there is a diff?? -@timed def by_night() -> Dict[date, Emfit]: - res: Dict[date, Emfit] = odict() + res: Dict[date, Emfit] = {} # TODO shit. I need some sort of interrupted sleep detection? - for dd, sleeps in group_by_key(get_datas(), key=lambda s: s.date).items(): + grouped = bucket(get_datas(), key=lambda s: s.date) + for dd in grouped: + sleeps = list(grouped[dd]) if len(sleeps) > 1: logger.warning("multiple sleeps per night, not handled yet: %s", sleeps) continue diff --git a/my/foursquare.py b/my/foursquare.py index 03cc312..ed55a24 100755 --- a/my/foursquare.py +++ b/my/foursquare.py @@ -15,10 +15,10 @@ from .common import get_files, LazyLogger from my.config import foursquare as config -logger = LazyLogger(__package__) +logger = LazyLogger(__name__) -def _get_exports() -> List[Path]: +def inputs(): return get_files(config.export_path, '*.json') @@ -62,7 +62,7 @@ class Place: def get_raw(fname=None): if fname is None: - fname = max(_get_exports()) + fname = max(inputs()) j = json.loads(Path(fname).read_text()) assert isinstance(j, list) diff --git a/my/rescuetime.py b/my/rescuetime.py index 3ee2730..5bf136c 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -18,7 +18,7 @@ from my.config import rescuetime as config log = LazyLogger(__package__, level='info') -def _get_exports() -> List[Path]: +def inputs(): return get_files(config.export_path, '*.json') @@ -28,7 +28,7 @@ Model = rescuexport.Model # TODO cache? def get_model(last=0) -> Model: - return Model(_get_exports()[-last:]) + return Model(inputs()[-last:]) def _without_errors(): diff --git a/setup.py b/setup.py index ddf25fb..233829a 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,9 @@ from setuptools import setup, find_namespace_packages # type: ignore INSTALL_REQUIRES = [ - 'appdirs', - 'pytz', # even though it's not needed by the core, it's so common anyway... + 'pytz', # even though it's not needed by the core, it's so common anyway... + 'appdirs', # very common, and makes it portable + 'more-itertools', # it's just too useful and very common anyway ]