Use defensive cachew

2020-01-07 00:12:49 +00:00 · 2020-01-07 00:12:49 +00:00 · 5611fce720
commit 5611fce720
parent dd93ba8310
5 changed files with 32 additions and 21 deletions
--- a/my/bluemaestro/init.py
+++ b/my/bluemaestro/init.py
@ -6,8 +6,7 @@ from itertools import chain, islice
 from pathlib import Path
 from typing import Any, Dict, Iterable, NamedTuple, Set
-from cachew import cachew
+from ..common import mcachew
 # TODO move to common??
 from kython import dictify
@ -16,10 +15,12 @@ from kython.klogging import LazyLogger
 from mycfg import paths
 # TODO reuse common
 logger = LazyLogger('bluemaestro', level=logging.DEBUG)
 def get_backup_files():
    # TODO reuse common
    return list(sorted(chain.from_iterable(d.glob('*.db') for d in paths.bluemaestro.export_paths)))
@ -28,8 +29,7 @@ class Point(NamedTuple):
    temp: float
-# TODO hmm, does cachew have py.typed?
+@mcachew(cache_path=paths.bluemaestro.cache)
@cachew(cache_path=paths.bluemaestro.cache)
 def iter_points(dbs) -> Iterable[Point]:
    # I guess we can affort keeping them in sorted order
    points: Set[Point] = set()
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -2,17 +2,17 @@ from typing import Dict, List, Union, Any, NamedTuple, Tuple, Optional, Iterator
 from datetime import datetime
 import json
 from pathlib import Path
 import logging
 import pytz
-from ..common import get_files
+from ..common import get_files, mcachew
 from mycfg import paths
 import mycfg.repos.ghexport.model as ghexport
 def get_logger():
    import logging
    return logging.getLogger('my.github') # TODO __package__???
@ -211,16 +211,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
                yield e
-# TODO FIXME shit, cachew would need to support exceptions??
+@mcachew(paths.github.cache_dir, hashf=lambda model: model.sources)
 # TODO ugh. union types are sort of inevitable..
 # TODO cachew: perhaps warn when function got no params? might end up as TODO
 # TODO instead of hash, use 'deps'?? makes it a bit less misleading..
 # TODO make cahcew optional, warn if it's not available
 # TODO dependencies should involve our package and source packages somehow? that's very hard in general of course
 from cachew import cachew
@cachew(paths.github.cache_dir, hashf=lambda model: model.sources)
 def iter_backup_events(model=get_model()) -> Iterator[Event]:
    for d in model.events():
        yield _parse_event(d)
--- a/my/common.py
+++ b/my/common.py
@ -72,12 +72,14 @@ def listify(fn=None, wrapper=list):
    return listify_return(fn)
 # TODO FIXME use in bluemaestro
 # def dictify(fn=None, key=None, value=None):
 #     def md(it):
 #         return make_dict(it, key=key, value=value)
 #     return listify(fn=fn, wrapper=md)
 # TODO try importing logzero defensively?
 def setup_logger(logger, level=None, format=None, datefmt=None):
    import logging
    old_root = logging.root
@ -109,3 +111,18 @@ def get_files(pp: PathIsh, glob: str, sort=True) -> List[Path]:
        assert path.is_file(), path
        # TODO FIXME assert matches glob??
        return [path]
 def mcachew(*args, **kwargs):
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
    """
    try:
        import cachew
    except ModuleNotFoundError:
        import warnings
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        return cachew.cachew(*args, **kwargs)
--- a/my/location/takeout.py
+++ b/my/location/takeout.py
@ -103,8 +103,11 @@ def _iter_locations_fo(fo, start, stop) -> Iterator[Location]:
            tag=tag
        )
-# TODO hope they are sorted...
+# TODO hope they are sorted... (could assert for it)
 # TODO actually, path includes timestamp already... so mtime_hash isn't _really_ necessary
 # TODO CACHEW_OFF env variable?
 # TODO use mcachew
 from cachew import cachew, mtime_hash
@cachew(cache_path, hashf=mtime_hash, cls=Location, chunk_by=10000, logger=get_logger())
 def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
    if path.suffix == '.json':
--- a/my/reddit.py
+++ b/my/reddit.py
@ -2,6 +2,8 @@
 from pathlib import Path, PosixPath
 from typing import List, Sequence, Mapping
 from .common import mcachew
 from mycfg import paths
 import mycfg.repos.rexport.model as rexport
@ -30,9 +32,9 @@ def get_model():
    model = rexport.Model(get_backup_files())
    return model
 import logging
 def get_logger():
    import logging
    return logging.getLogger('my.reddit')
@ -105,9 +107,7 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
        key=lambda s: s.save.sid,
    )
-from cachew import cachew
+@mcachew('/L/data/.cache/reddit-events.cache')
 # TODO FIXME make defensive
@cachew('/L/data/.cache/reddit-events.cache')
 def _get_events(backups: Sequence[Path]=get_backup_files(), parallel: bool=True) -> Iterator[Event]:
    # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
    # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?