Use defensive cachew

This commit is contained in:
Dima Gerasimov 2020-01-07 00:12:49 +00:00
parent dd93ba8310
commit 5611fce720
5 changed files with 32 additions and 21 deletions

View file

@ -6,8 +6,7 @@ from itertools import chain, islice
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, NamedTuple, Set from typing import Any, Dict, Iterable, NamedTuple, Set
from cachew import cachew from ..common import mcachew
# TODO move to common?? # TODO move to common??
from kython import dictify from kython import dictify
@ -16,10 +15,12 @@ from kython.klogging import LazyLogger
from mycfg import paths from mycfg import paths
# TODO reuse common
logger = LazyLogger('bluemaestro', level=logging.DEBUG) logger = LazyLogger('bluemaestro', level=logging.DEBUG)
def get_backup_files(): def get_backup_files():
# TODO reuse common
return list(sorted(chain.from_iterable(d.glob('*.db') for d in paths.bluemaestro.export_paths))) return list(sorted(chain.from_iterable(d.glob('*.db') for d in paths.bluemaestro.export_paths)))
@ -28,8 +29,7 @@ class Point(NamedTuple):
temp: float temp: float
# TODO hmm, does cachew have py.typed? @mcachew(cache_path=paths.bluemaestro.cache)
@cachew(cache_path=paths.bluemaestro.cache)
def iter_points(dbs) -> Iterable[Point]: def iter_points(dbs) -> Iterable[Point]:
# I guess we can affort keeping them in sorted order # I guess we can affort keeping them in sorted order
points: Set[Point] = set() points: Set[Point] = set()

View file

@ -2,17 +2,17 @@ from typing import Dict, List, Union, Any, NamedTuple, Tuple, Optional, Iterator
from datetime import datetime from datetime import datetime
import json import json
from pathlib import Path from pathlib import Path
import logging
import pytz import pytz
from ..common import get_files from ..common import get_files, mcachew
from mycfg import paths from mycfg import paths
import mycfg.repos.ghexport.model as ghexport import mycfg.repos.ghexport.model as ghexport
def get_logger(): def get_logger():
import logging
return logging.getLogger('my.github') # TODO __package__??? return logging.getLogger('my.github') # TODO __package__???
@ -211,16 +211,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
yield e yield e
# TODO FIXME shit, cachew would need to support exceptions?? @mcachew(paths.github.cache_dir, hashf=lambda model: model.sources)
# TODO ugh. union types are sort of inevitable..
# TODO cachew: perhaps warn when function got no params? might end up as TODO
# TODO instead of hash, use 'deps'?? makes it a bit less misleading..
# TODO make cahcew optional, warn if it's not available
# TODO dependencies should involve our package and source packages somehow? that's very hard in general of course
from cachew import cachew
@cachew(paths.github.cache_dir, hashf=lambda model: model.sources)
def iter_backup_events(model=get_model()) -> Iterator[Event]: def iter_backup_events(model=get_model()) -> Iterator[Event]:
for d in model.events(): for d in model.events():
yield _parse_event(d) yield _parse_event(d)

View file

@ -72,12 +72,14 @@ def listify(fn=None, wrapper=list):
return listify_return(fn) return listify_return(fn)
# TODO FIXME use in bluemaestro
# def dictify(fn=None, key=None, value=None): # def dictify(fn=None, key=None, value=None):
# def md(it): # def md(it):
# return make_dict(it, key=key, value=value) # return make_dict(it, key=key, value=value)
# return listify(fn=fn, wrapper=md) # return listify(fn=fn, wrapper=md)
# TODO try importing logzero defensively?
def setup_logger(logger, level=None, format=None, datefmt=None): def setup_logger(logger, level=None, format=None, datefmt=None):
import logging import logging
old_root = logging.root old_root = logging.root
@ -109,3 +111,18 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]:
assert path.is_file(), path assert path.is_file(), path
# TODO FIXME assert matches glob?? # TODO FIXME assert matches glob??
return [path] return [path]
def mcachew(*args, **kwargs):
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
try:
import cachew
except ModuleNotFoundError:
import warnings
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
return cachew.cachew(*args, **kwargs)

View file

@ -103,8 +103,11 @@ def _iter_locations_fo(fo, start, stop) -> Iterator[Location]:
tag=tag tag=tag
) )
# TODO hope they are sorted... # TODO hope they are sorted... (could assert for it)
# TODO actually, path includes timestamp already... so mtime_hash isn't _really_ necessary
# TODO CACHEW_OFF env variable? # TODO CACHEW_OFF env variable?
# TODO use mcachew
from cachew import cachew, mtime_hash
@cachew(cache_path, hashf=mtime_hash, cls=Location, chunk_by=10000, logger=get_logger()) @cachew(cache_path, hashf=mtime_hash, cls=Location, chunk_by=10000, logger=get_logger())
def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]: def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
if path.suffix == '.json': if path.suffix == '.json':

View file

@ -2,6 +2,8 @@
from pathlib import Path, PosixPath from pathlib import Path, PosixPath
from typing import List, Sequence, Mapping from typing import List, Sequence, Mapping
from .common import mcachew
from mycfg import paths from mycfg import paths
import mycfg.repos.rexport.model as rexport import mycfg.repos.rexport.model as rexport
@ -30,9 +32,9 @@ def get_model():
model = rexport.Model(get_backup_files()) model = rexport.Model(get_backup_files())
return model return model
import logging
def get_logger(): def get_logger():
import logging
return logging.getLogger('my.reddit') return logging.getLogger('my.reddit')
@ -105,9 +107,7 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
key=lambda s: s.save.sid, key=lambda s: s.save.sid,
) )
from cachew import cachew @mcachew('/L/data/.cache/reddit-events.cache')
# TODO FIXME make defensive
@cachew('/L/data/.cache/reddit-events.cache')
def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]: def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]:
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists? # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it? # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?