core: move mcachew into my.core.cachew; use better typing annotations (copied from cachew)

2023-06-07 22:06:29 +01:00 · 2023-06-07 22:06:29 +01:00 · 5fe21240b4
commit 5fe21240b4
parent f8cd31044e
7 changed files with 113 additions and 77 deletions
--- a/my/bluemaestro.py
+++ b/my/bluemaestro.py
@ -53,7 +53,7 @@ def is_bad_table(name: str) -> bool:

 from my.core.cachew import cache_dir
 from my.core.common import mcachew
-@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro'))
+@mcachew(depends_on=inputs, cache_path=cache_dir('bluemaestro'))
 def measurements() -> Iterable[Res[Measurement]]:
    # todo ideally this would be via arguments... but needs to be lazy
    dbs = inputs()
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@ -1,8 +1,16 @@
 from .common import assert_subpackage; assert_subpackage(__name__)

 from contextlib import contextmanager
+import logging
 from pathlib import Path
-from typing import Optional
+import sys
+from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type
+import warnings
+
+import appdirs
+
+PathIsh = Union[str, Path]  # avoid circular import from .common
+

 def disable_cachew() -> None:
    try:
@ -12,10 +20,10 @@ def disable_cachew() -> None:
        return

    from cachew import settings
+
    settings.ENABLE = False


-from typing import Iterator
@contextmanager
 def disabled_cachew() -> Iterator[None]:
    try:
@ -25,20 +33,23 @@ def disabled_cachew() -> Iterator[None]:
        yield
        return
    from cachew.extra import disabled_cachew
+
    with disabled_cachew():
        yield


 def _appdirs_cache_dir() -> Path:
-    import appdirs
    cd = Path(appdirs.user_cache_dir('my'))
    cd.mkdir(exist_ok=True, parents=True)
    return cd


-from . import PathIsh
+_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
+
+
 def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
    from . import core_config as CC
+
    cdir_ = CC.config.get_cache_dir()

    sp: Optional[Path] = None
@ -55,9 +66,86 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
    # this logic is tested via test_cachew_dir_none

    if cdir_ is None:
-        from .common import _CACHE_DIR_NONE_HACK
        cdir = _CACHE_DIR_NONE_HACK
    else:
        cdir = cdir_

    return cdir if sp is None else cdir / sp
+
+
+"""See core.cachew.cache_dir for the explanation"""
+
+
+_cache_path_dflt = cast(str, object())
+
+
+# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
+# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
+# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
+# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
+def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
+    """
+    Stands for 'Maybe cachew'.
+    Defensive wrapper around @cachew to make it an optional dependency.
+    """
+    if cache_path is _cache_path_dflt:
+        # wasn't specified... so we need to use cache_dir
+        cache_path = cache_dir()
+
+    if isinstance(cache_path, (str, Path)):
+        try:
+            # check that it starts with 'hack' path
+            Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
+        except:  # noqa: E722 bare except
+            pass  # no action needed, doesn't start with 'hack' string
+        else:
+            # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
+            # if it does, means that user wanted to disable cache
+            cache_path = None
+    try:
+        import cachew
+    except ModuleNotFoundError:
+        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
+        return lambda orig_func: orig_func
+    else:
+        kwargs['cache_path'] = cache_path
+        return cachew.cachew(**kwargs)
+
+
+if TYPE_CHECKING:
+    R = TypeVar('R')
+    if sys.version_info[:2] >= (3, 10):
+        from typing import ParamSpec
+    else:
+        from typing_extensions import ParamSpec
+    P = ParamSpec('P')
+    CC = Callable[P, R]  # need to give it a name, if inlined into bound=, mypy runs in a bug
+    PathProvider = Union[PathIsh, Callable[P, PathIsh]]
+    # NOTE: in cachew, HashFunction type returns str
+    # however in practice, cachew alwasy calls str for its result
+    # so perhaps better to switch it to Any in cachew as well
+    HashFunction = Callable[P, Any]
+
+    F = TypeVar('F', bound=Callable)
+
+    # we need two versions due to @doublewrap
+    # this is when we just annotate as @cachew without any args
+    @overload  # type: ignore[no-overload-impl]
+    def mcachew(fun: F) -> F:
+        ...
+
+    @overload
+    def mcachew(
+        cache_path: Optional[PathProvider] = ...,
+        *,
+        force_file: bool = ...,
+        cls: Optional[Type] = ...,
+        depends_on: HashFunction = ...,
+        logger: Optional[logging.Logger] = ...,
+        chunk_by: int = ...,
+        synthetic_key: Optional[str] = ...,
+    ) -> Callable[[F], F]:
+        ...
+
+else:
+    mcachew = _mcachew_impl
--- a/my/core/common.py
+++ b/my/core/common.py
@ -239,70 +239,6 @@ def get_files(
    return tuple(paths)


-# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
-if TYPE_CHECKING:
-    from typing import Callable, TypeVar
-    from typing_extensions import Protocol
-    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
-    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
-    # ok, that's actually a super nice 'pattern'
-    F = TypeVar('F')
-
-    class McachewType(Protocol):
-        def __call__(
-                self,
-                cache_path: Any=None,
-                *,
-                hashf: Any=None, # todo deprecate
-                depends_on: Any=None,
-                force_file: bool=False,
-                chunk_by: int=0,
-                logger: Any=None,
-        ) -> Callable[[F], F]:
-            ...
-
-    mcachew: McachewType
-
-
-_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
-"""See core.cachew.cache_dir for the explanation"""
-
-
-_cache_path_dflt = cast(str, object())
-# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
-# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
-# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
-# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
-def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
-    """
-    Stands for 'Maybe cachew'.
-    Defensive wrapper around @cachew to make it an optional dependency.
-    """
-    if cache_path is _cache_path_dflt:
-        # wasn't specified... so we need to use cache_dir
-        from .cachew import cache_dir
-        cache_path = cache_dir()
-
-    if isinstance(cache_path, (str, Path)):
-        try:
-            # check that it starts with 'hack' path
-            Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
-        except: # noqa: E722 bare except
-            pass # no action needed, doesn't start with 'hack' string
-        else:
-            # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
-            # if it does, means that user wanted to disable cache
-            cache_path = None
-    try:
-        import cachew
-    except ModuleNotFoundError:
-        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
-        return lambda orig_func: orig_func
-    else:
-        kwargs['cache_path'] = cache_path
-        return cachew.cachew(**kwargs)
-
-
@functools.lru_cache(1)
 def _magic():
    import magic # type: ignore
@ -663,4 +599,5 @@ def assert_never(value: NoReturn) -> NoReturn:
 ## legacy imports, keeping them here for backwards compatibility
 from functools import cached_property as cproperty
 from typing import Literal
-## 
+from .cachew import mcachew
+##
--- a/my/emfit/init.py
+++ b/my/emfit/init.py
@ -32,8 +32,12 @@ def dir_hash(path: Path):
    return mtimes


+def _cachew_depends_on():
+    return dir_hash(config.export_path)
+
+
 # TODO take __file__ into account somehow?
-@mcachew(cache_path=cache_dir() / 'emfit.cache', hashf=lambda: dir_hash(config.export_path))
+@mcachew(cache_path=cache_dir() / 'emfit.cache', depends_on=_cachew_depends_on)
 def datas() -> Iterable[Res[Emfit]]:
    import dataclasses

--- a/my/github/ghexport.py
+++ b/my/github/ghexport.py
@ -61,7 +61,7 @@ def _dal() -> dal.DAL:
    return dal.DAL(sources)


-@mcachew(depends_on=lambda: inputs())
+@mcachew(depends_on=inputs)
 def events() -> Results:
    from my.core.common import ensure_unique
    key = lambda e: object() if isinstance(e, Exception) else e.eid
--- a/my/orgmode.py
+++ b/my/orgmode.py
@ -78,14 +78,22 @@ def _sanitize(p: Path) -> str:
    return re.sub(r'\W', '_', str(p))


+def _cachew_cache_path(_self, f: Path) -> Path:
+    return cache_dir() / 'orgmode' / _sanitize(f)
+
+
+def _cachew_depends_on(_self, f: Path):
+    return (f, f.stat().st_mtime)
+
+ 
 class Query:
    def __init__(self, files: Sequence[Path]) -> None:
        self.files = files

    # TODO yield errors?
    @mcachew(
-        cache_path=lambda _, f: cache_dir() / 'orgmode' / _sanitize(f), force_file=True,
-        depends_on=lambda _, f: (f, f.stat().st_mtime),
+        cache_path=_cachew_cache_path, force_file=True,
+        depends_on=_cachew_depends_on,
    )
    def _iterate(self, f: Path) -> Iterable[OrgNote]:
        o = orgparse.load(f)
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@ -28,10 +28,9 @@ DAL = dal.DAL
 Entry = dal.Entry


-@mcachew(depends_on=lambda: inputs())
+@mcachew(depends_on=inputs)
 def entries() -> Iterable[Res[Entry]]:
    dal = DAL(inputs())
-    it = dal.entries()
    yield from dal.entries()