diff --git a/my/bluemaestro/__init__.py b/my/bluemaestro/__init__.py index 5e722db..aeea4c3 100755 --- a/my/bluemaestro/__init__.py +++ b/my/bluemaestro/__init__.py @@ -6,8 +6,7 @@ from itertools import chain, islice from pathlib import Path from typing import Any, Dict, Iterable, NamedTuple, Set -from cachew import cachew - +from ..common import mcachew # TODO move to common?? from kython import dictify @@ -16,10 +15,12 @@ from kython.klogging import LazyLogger from mycfg import paths +# TODO reuse common logger = LazyLogger('bluemaestro', level=logging.DEBUG) def get_backup_files(): + # TODO reuse common return list(sorted(chain.from_iterable(d.glob('*.db') for d in paths.bluemaestro.export_paths))) @@ -28,8 +29,7 @@ class Point(NamedTuple): temp: float -# TODO hmm, does cachew have py.typed? -@cachew(cache_path=paths.bluemaestro.cache) +@mcachew(cache_path=paths.bluemaestro.cache) def iter_points(dbs) -> Iterable[Point]: # I guess we can affort keeping them in sorted order points: Set[Point] = set() diff --git a/my/coding/github.py b/my/coding/github.py index 9f6c7fd..667f794 100644 --- a/my/coding/github.py +++ b/my/coding/github.py @@ -2,17 +2,17 @@ from typing import Dict, List, Union, Any, NamedTuple, Tuple, Optional, Iterator from datetime import datetime import json from pathlib import Path -import logging import pytz -from ..common import get_files +from ..common import get_files, mcachew from mycfg import paths import mycfg.repos.ghexport.model as ghexport def get_logger(): + import logging return logging.getLogger('my.github') # TODO __package__??? @@ -211,16 +211,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]: yield e -# TODO FIXME shit, cachew would need to support exceptions?? -# TODO ugh. union types are sort of inevitable.. - -# TODO cachew: perhaps warn when function got no params? might end up as TODO -# TODO instead of hash, use 'deps'?? makes it a bit less misleading.. - -# TODO make cahcew optional, warn if it's not available -# TODO dependencies should involve our package and source packages somehow? that's very hard in general of course -from cachew import cachew -@cachew(paths.github.cache_dir, hashf=lambda model: model.sources) +@mcachew(paths.github.cache_dir, hashf=lambda model: model.sources) def iter_backup_events(model=get_model()) -> Iterator[Event]: for d in model.events(): yield _parse_event(d) diff --git a/my/common.py b/my/common.py index bb90b63..e1389e3 100644 --- a/my/common.py +++ b/my/common.py @@ -72,12 +72,14 @@ def listify(fn=None, wrapper=list): return listify_return(fn) +# TODO FIXME use in bluemaestro # def dictify(fn=None, key=None, value=None): # def md(it): # return make_dict(it, key=key, value=value) # return listify(fn=fn, wrapper=md) +# TODO try importing logzero defensively? def setup_logger(logger, level=None, format=None, datefmt=None): import logging old_root = logging.root @@ -109,3 +111,18 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]: assert path.is_file(), path # TODO FIXME assert matches glob?? return [path] + + +def mcachew(*args, **kwargs): + """ + Stands for 'Maybe cachew'. + Defensive wrapper around @cachew to make it an optional dependency. + """ + try: + import cachew + except ModuleNotFoundError: + import warnings + warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') + return lambda orig_func: orig_func + else: + return cachew.cachew(*args, **kwargs) diff --git a/my/location/takeout.py b/my/location/takeout.py index 77c94a3..6c81992 100644 --- a/my/location/takeout.py +++ b/my/location/takeout.py @@ -103,8 +103,11 @@ def _iter_locations_fo(fo, start, stop) -> Iterator[Location]: tag=tag ) -# TODO hope they are sorted... +# TODO hope they are sorted... (could assert for it) +# TODO actually, path includes timestamp already... so mtime_hash isn't _really_ necessary # TODO CACHEW_OFF env variable? +# TODO use mcachew +from cachew import cachew, mtime_hash @cachew(cache_path, hashf=mtime_hash, cls=Location, chunk_by=10000, logger=get_logger()) def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: if path.suffix == '.json': diff --git a/my/reddit.py b/my/reddit.py index 22fa124..1daeda6 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -2,6 +2,8 @@ from pathlib import Path, PosixPath from typing import List, Sequence, Mapping +from .common import mcachew + from mycfg import paths import mycfg.repos.rexport.model as rexport @@ -30,9 +32,9 @@ def get_model(): model = rexport.Model(get_backup_files()) return model -import logging def get_logger(): + import logging return logging.getLogger('my.reddit') @@ -105,9 +107,7 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]: key=lambda s: s.save.sid, ) -from cachew import cachew -# TODO FIXME make defensive -@cachew('/L/data/.cache/reddit-events.cache') +@mcachew('/L/data/.cache/reddit-events.cache') def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]: # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists? # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?