core: move mcachew into my.core.cachew; use better typing annotations (copied from cachew)

2023-06-07 22:06:29 +01:00 · 2023-06-07 22:06:29 +01:00 · 5fe21240b4
commit 5fe21240b4
parent f8cd31044e
7 changed files with 113 additions and 77 deletions
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@ -53,7 +53,7 @@ def is_bad_table(name: str) -> bool:
 from my.core.cachew import cache_dir
 from my.core.common import mcachew
-@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro'))
+@mcachew(depends_on=inputs, cache_path=cache_dir('bluemaestro'))
 def measurements() -> Iterable[Res[Measurement]]:
    # todo ideally this would be via arguments... but needs to be lazy
    dbs = inputs()
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@ -1,8 +1,16 @@
 from .common import assert_subpackage; assert_subpackage(__name__)
 from contextlib import contextmanager
 import logging
 from pathlib import Path
-from typing import Optional
+import sys
 from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type
 import warnings
 import appdirs
 PathIsh = Union[str, Path]  # avoid circular import from .common
 def disable_cachew() -> None:
    try:
@ -12,10 +20,10 @@ def disable_cachew() -> None:
        return
    from cachew import settings
    settings.ENABLE = False
 from typing import Iterator
@contextmanager
 def disabled_cachew() -> Iterator[None]:
    try:
@ -25,20 +33,23 @@ def disabled_cachew() -> Iterator[None]:
        yield
        return
    from cachew.extra import disabled_cachew
    with disabled_cachew():
        yield
 def _appdirs_cache_dir() -> Path:
    import appdirs
    cd = Path(appdirs.user_cache_dir('my'))
    cd.mkdir(exist_ok=True, parents=True)
    return cd
-from . import PathIsh
+_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
 def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
    from . import core_config as CC
    cdir_ = CC.config.get_cache_dir()
    sp: Optional[Path] = None
@ -55,9 +66,86 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
    # this logic is tested via test_cachew_dir_none
    if cdir_ is None:
        from .common import _CACHE_DIR_NONE_HACK
        cdir = _CACHE_DIR_NONE_HACK
    else:
        cdir = cdir_
    return cdir if sp is None else cdir / sp
 """See core.cachew.cache_dir for the explanation"""
 _cache_path_dflt = cast(str, object())
 # TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
 # todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
 # but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
 # but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
 def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
    """
    if cache_path is _cache_path_dflt:
        # wasn't specified... so we need to use cache_dir
        cache_path = cache_dir()
    if isinstance(cache_path, (str, Path)):
        try:
            # check that it starts with 'hack' path
            Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
        except:  # noqa: E722 bare except
            pass  # no action needed, doesn't start with 'hack' string
        else:
            # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
            # if it does, means that user wanted to disable cache
            cache_path = None
    try:
        import cachew
    except ModuleNotFoundError:
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        kwargs['cache_path'] = cache_path
        return cachew.cachew(**kwargs)
 if TYPE_CHECKING:
    R = TypeVar('R')
    if sys.version_info[:2] >= (3, 10):
        from typing import ParamSpec
    else:
        from typing_extensions import ParamSpec
    P = ParamSpec('P')
    CC = Callable[P, R]  # need to give it a name, if inlined into bound=, mypy runs in a bug
    PathProvider = Union[PathIsh, Callable[P, PathIsh]]
    # NOTE: in cachew, HashFunction type returns str
    # however in practice, cachew alwasy calls str for its result
    # so perhaps better to switch it to Any in cachew as well
    HashFunction = Callable[P, Any]
    F = TypeVar('F', bound=Callable)
    # we need two versions due to @doublewrap
    # this is when we just annotate as @cachew without any args
    @overload  # type: ignore[no-overload-impl]
    def mcachew(fun: F) -> F:
        ...
    @overload
    def mcachew(
        cache_path: Optional[PathProvider] = ...,
        *,
        force_file: bool = ...,
        cls: Optional[Type] = ...,
        depends_on: HashFunction = ...,
        logger: Optional[logging.Logger] = ...,
        chunk_by: int = ...,
        synthetic_key: Optional[str] = ...,
    ) -> Callable[[F], F]:
        ...
 else:
    mcachew = _mcachew_impl
--- a/my/core/common.py
+++ b/my/core/common.py
@ -239,70 +239,6 @@ def get_files(
    return tuple(paths)
 # TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
 if TYPE_CHECKING:
    from typing import Callable, TypeVar
    from typing_extensions import Protocol
    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
    # ok, that's actually a super nice 'pattern'
    F = TypeVar('F')
    class McachewType(Protocol):
        def __call__(
                self,
                cache_path: Any=None,
                *,
                hashf: Any=None, # todo deprecate
                depends_on: Any=None,
                force_file: bool=False,
                chunk_by: int=0,
                logger: Any=None,
        ) -> Callable[[F], F]:
            ...
    mcachew: McachewType
 _CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
 """See core.cachew.cache_dir for the explanation"""
 _cache_path_dflt = cast(str, object())
 # TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
 # todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
 # but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
 # but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
 def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
    """
    if cache_path is _cache_path_dflt:
        # wasn't specified... so we need to use cache_dir
        from .cachew import cache_dir
        cache_path = cache_dir()
    if isinstance(cache_path, (str, Path)):
        try:
            # check that it starts with 'hack' path
            Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
        except: # noqa: E722 bare except
            pass # no action needed, doesn't start with 'hack' string
        else:
            # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
            # if it does, means that user wanted to disable cache
            cache_path = None
    try:
        import cachew
    except ModuleNotFoundError:
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
        kwargs['cache_path'] = cache_path
        return cachew.cachew(**kwargs)
@functools.lru_cache(1)
 def _magic():
    import magic # type: ignore
@ -663,4 +599,5 @@ def assert_never(value: NoReturn) -> NoReturn:
 ## legacy imports, keeping them here for backwards compatibility
 from functools import cached_property as cproperty
 from typing import Literal
-## 
+from .cachew import mcachew
 ##
--- a/my/emfit/init.py
+++ b/my/emfit/init.py
@ -32,8 +32,12 @@ def dir_hash(path: Path):
    return mtimes
 def _cachew_depends_on():
    return dir_hash(config.export_path)
 # TODO take __file__ into account somehow?
-@mcachew(cache_path=cache_dir() / 'emfit.cache', hashf=lambda: dir_hash(config.export_path))
+@mcachew(cache_path=cache_dir() / 'emfit.cache', depends_on=_cachew_depends_on)
 def datas() -> Iterable[Res[Emfit]]:
    import dataclasses
--- a/my/github/ghexport.py
+++ b/my/github/ghexport.py
@ -61,7 +61,7 @@ def _dal() -> dal.DAL:
    return dal.DAL(sources)
-@mcachew(depends_on=lambda: inputs())
+@mcachew(depends_on=inputs)
 def events() -> Results:
    from my.core.common import ensure_unique
    key = lambda e: object() if isinstance(e, Exception) else e.eid
--- a/my/orgmode.py
+++ b/my/orgmode.py
@ -78,14 +78,22 @@ def _sanitize(p: Path) -> str:
    return re.sub(r'\W', '_', str(p))
 def _cachew_cache_path(_self, f: Path) -> Path:
    return cache_dir() / 'orgmode' / _sanitize(f)
 def _cachew_depends_on(_self, f: Path):
    return (f, f.stat().st_mtime)
 class Query:
    def __init__(self, files: Sequence[Path]) -> None:
        self.files = files
    # TODO yield errors?
    @mcachew(
-        cache_path=lambda _, f: cache_dir() / 'orgmode' / _sanitize(f), force_file=True,
+        cache_path=_cachew_cache_path, force_file=True,
-        depends_on=lambda _, f: (f, f.stat().st_mtime),
+        depends_on=_cachew_depends_on,
    )
    def _iterate(self, f: Path) -> Iterable[OrgNote]:
        o = orgparse.load(f)
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@ -28,10 +28,9 @@ DAL = dal.DAL
 Entry = dal.Entry
-@mcachew(depends_on=lambda: inputs())
+@mcachew(depends_on=inputs)
 def entries() -> Iterable[Res[Entry]]:
    dal = DAL(inputs())
    it = dal.entries()
    yield from dal.entries()