Use defensive cachew

This commit is contained in:
Dima Gerasimov 2020-01-07 00:12:49 +00:00
parent dd93ba8310
commit 5611fce720
5 changed files with 32 additions and 21 deletions

View file

@ -6,8 +6,7 @@ from itertools import chain, islice
from pathlib import Path
from typing import Any, Dict, Iterable, NamedTuple, Set
from cachew import cachew
from ..common import mcachew
# TODO move to common??
from kython import dictify
@ -16,10 +15,12 @@ from kython.klogging import LazyLogger
from mycfg import paths
# TODO reuse common
logger = LazyLogger('bluemaestro', level=logging.DEBUG)
def get_backup_files():
# TODO reuse common
return list(sorted(chain.from_iterable(d.glob('*.db') for d in paths.bluemaestro.export_paths)))
@ -28,8 +29,7 @@ class Point(NamedTuple):
temp: float
# TODO hmm, does cachew have py.typed?
@cachew(cache_path=paths.bluemaestro.cache)
@mcachew(cache_path=paths.bluemaestro.cache)
def iter_points(dbs) -> Iterable[Point]:
# I guess we can affort keeping them in sorted order
points: Set[Point] = set()

View file

@ -2,17 +2,17 @@ from typing import Dict, List, Union, Any, NamedTuple, Tuple, Optional, Iterator
from datetime import datetime
import json
from pathlib import Path
import logging
import pytz
from ..common import get_files
from ..common import get_files, mcachew
from mycfg import paths
import mycfg.repos.ghexport.model as ghexport
def get_logger():
import logging
return logging.getLogger('my.github') # TODO __package__???
@ -211,16 +211,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
yield e
# TODO FIXME shit, cachew would need to support exceptions??
# TODO ugh. union types are sort of inevitable..
# TODO cachew: perhaps warn when function got no params? might end up as TODO
# TODO instead of hash, use 'deps'?? makes it a bit less misleading..
# TODO make cahcew optional, warn if it's not available
# TODO dependencies should involve our package and source packages somehow? that's very hard in general of course
from cachew import cachew
@cachew(paths.github.cache_dir, hashf=lambda model: model.sources)
@mcachew(paths.github.cache_dir, hashf=lambda model: model.sources)
def iter_backup_events(model=get_model()) -> Iterator[Event]:
for d in model.events():
yield _parse_event(d)

View file

@ -72,12 +72,14 @@ def listify(fn=None, wrapper=list):
return listify_return(fn)
# TODO FIXME use in bluemaestro
# def dictify(fn=None, key=None, value=None):
# def md(it):
# return make_dict(it, key=key, value=value)
# return listify(fn=fn, wrapper=md)
# TODO try importing logzero defensively?
def setup_logger(logger, level=None, format=None, datefmt=None):
import logging
old_root = logging.root
@ -109,3 +111,18 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]:
assert path.is_file(), path
# TODO FIXME assert matches glob??
return [path]
def mcachew(*args, **kwargs):
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
try:
import cachew
except ModuleNotFoundError:
import warnings
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
return cachew.cachew(*args, **kwargs)

View file

@ -103,8 +103,11 @@ def _iter_locations_fo(fo, start, stop) -> Iterator[Location]:
tag=tag
)
# TODO hope they are sorted...
# TODO hope they are sorted... (could assert for it)
# TODO actually, path includes timestamp already... so mtime_hash isn't _really_ necessary
# TODO CACHEW_OFF env variable?
# TODO use mcachew
from cachew import cachew, mtime_hash
@cachew(cache_path, hashf=mtime_hash, cls=Location, chunk_by=10000, logger=get_logger())
def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
if path.suffix == '.json':

View file

@ -2,6 +2,8 @@
from pathlib import Path, PosixPath
from typing import List, Sequence, Mapping
from .common import mcachew
from mycfg import paths
import mycfg.repos.rexport.model as rexport
@ -30,9 +32,9 @@ def get_model():
model = rexport.Model(get_backup_files())
return model
import logging
def get_logger():
import logging
return logging.getLogger('my.reddit')
@ -105,9 +107,7 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
key=lambda s: s.save.sid,
)
from cachew import cachew
# TODO FIXME make defensive
@cachew('/L/data/.cache/reddit-events.cache')
@mcachew('/L/data/.cache/reddit-events.cache')
def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]:
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?