core: move mcachew into my.core.cachew; use better typing annotations (copied from cachew)

This commit is contained in:
Dima Gerasimov 2023-06-07 22:06:29 +01:00 committed by karlicoss
parent f8cd31044e
commit 5fe21240b4
7 changed files with 113 additions and 77 deletions

View file

@ -53,7 +53,7 @@ def is_bad_table(name: str) -> bool:
from my.core.cachew import cache_dir
from my.core.common import mcachew
@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro'))
@mcachew(depends_on=inputs, cache_path=cache_dir('bluemaestro'))
def measurements() -> Iterable[Res[Measurement]]:
# todo ideally this would be via arguments... but needs to be lazy
dbs = inputs()

View file

@ -1,8 +1,16 @@
from .common import assert_subpackage; assert_subpackage(__name__)
from contextlib import contextmanager
import logging
from pathlib import Path
from typing import Optional
import sys
from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type
import warnings
import appdirs
PathIsh = Union[str, Path] # avoid circular import from .common
def disable_cachew() -> None:
try:
@ -12,10 +20,10 @@ def disable_cachew() -> None:
return
from cachew import settings
settings.ENABLE = False
from typing import Iterator
@contextmanager
def disabled_cachew() -> Iterator[None]:
try:
@ -25,20 +33,23 @@ def disabled_cachew() -> Iterator[None]:
yield
return
from cachew.extra import disabled_cachew
with disabled_cachew():
yield
def _appdirs_cache_dir() -> Path:
import appdirs
cd = Path(appdirs.user_cache_dir('my'))
cd.mkdir(exist_ok=True, parents=True)
return cd
from . import PathIsh
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
from . import core_config as CC
cdir_ = CC.config.get_cache_dir()
sp: Optional[Path] = None
@ -55,9 +66,86 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
# this logic is tested via test_cachew_dir_none
if cdir_ is None:
from .common import _CACHE_DIR_NONE_HACK
cdir = _CACHE_DIR_NONE_HACK
else:
cdir = cdir_
return cdir if sp is None else cdir / sp
"""See core.cachew.cache_dir for the explanation"""
_cache_path_dflt = cast(str, object())
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
if cache_path is _cache_path_dflt:
# wasn't specified... so we need to use cache_dir
cache_path = cache_dir()
if isinstance(cache_path, (str, Path)):
try:
# check that it starts with 'hack' path
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
except: # noqa: E722 bare except
pass # no action needed, doesn't start with 'hack' string
else:
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
# if it does, means that user wanted to disable cache
cache_path = None
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
kwargs['cache_path'] = cache_path
return cachew.cachew(**kwargs)
if TYPE_CHECKING:
R = TypeVar('R')
if sys.version_info[:2] >= (3, 10):
from typing import ParamSpec
else:
from typing_extensions import ParamSpec
P = ParamSpec('P')
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
# NOTE: in cachew, HashFunction type returns str
# however in practice, cachew alwasy calls str for its result
# so perhaps better to switch it to Any in cachew as well
HashFunction = Callable[P, Any]
F = TypeVar('F', bound=Callable)
# we need two versions due to @doublewrap
# this is when we just annotate as @cachew without any args
@overload # type: ignore[no-overload-impl]
def mcachew(fun: F) -> F:
...
@overload
def mcachew(
cache_path: Optional[PathProvider] = ...,
*,
force_file: bool = ...,
cls: Optional[Type] = ...,
depends_on: HashFunction = ...,
logger: Optional[logging.Logger] = ...,
chunk_by: int = ...,
synthetic_key: Optional[str] = ...,
) -> Callable[[F], F]:
...
else:
mcachew = _mcachew_impl

View file

@ -239,70 +239,6 @@ def get_files(
return tuple(paths)
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
if TYPE_CHECKING:
from typing import Callable, TypeVar
from typing_extensions import Protocol
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(
self,
cache_path: Any=None,
*,
hashf: Any=None, # todo deprecate
depends_on: Any=None,
force_file: bool=False,
chunk_by: int=0,
logger: Any=None,
) -> Callable[[F], F]:
...
mcachew: McachewType
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
"""See core.cachew.cache_dir for the explanation"""
_cache_path_dflt = cast(str, object())
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
if cache_path is _cache_path_dflt:
# wasn't specified... so we need to use cache_dir
from .cachew import cache_dir
cache_path = cache_dir()
if isinstance(cache_path, (str, Path)):
try:
# check that it starts with 'hack' path
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
except: # noqa: E722 bare except
pass # no action needed, doesn't start with 'hack' string
else:
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
# if it does, means that user wanted to disable cache
cache_path = None
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
kwargs['cache_path'] = cache_path
return cachew.cachew(**kwargs)
@functools.lru_cache(1)
def _magic():
import magic # type: ignore
@ -663,4 +599,5 @@ def assert_never(value: NoReturn) -> NoReturn:
## legacy imports, keeping them here for backwards compatibility
from functools import cached_property as cproperty
from typing import Literal
from .cachew import mcachew
##

View file

@ -32,8 +32,12 @@ def dir_hash(path: Path):
return mtimes
def _cachew_depends_on():
return dir_hash(config.export_path)
# TODO take __file__ into account somehow?
@mcachew(cache_path=cache_dir() / 'emfit.cache', hashf=lambda: dir_hash(config.export_path))
@mcachew(cache_path=cache_dir() / 'emfit.cache', depends_on=_cachew_depends_on)
def datas() -> Iterable[Res[Emfit]]:
import dataclasses

View file

@ -61,7 +61,7 @@ def _dal() -> dal.DAL:
return dal.DAL(sources)
@mcachew(depends_on=lambda: inputs())
@mcachew(depends_on=inputs)
def events() -> Results:
from my.core.common import ensure_unique
key = lambda e: object() if isinstance(e, Exception) else e.eid

View file

@ -78,14 +78,22 @@ def _sanitize(p: Path) -> str:
return re.sub(r'\W', '_', str(p))
def _cachew_cache_path(_self, f: Path) -> Path:
return cache_dir() / 'orgmode' / _sanitize(f)
def _cachew_depends_on(_self, f: Path):
return (f, f.stat().st_mtime)
class Query:
def __init__(self, files: Sequence[Path]) -> None:
self.files = files
# TODO yield errors?
@mcachew(
cache_path=lambda _, f: cache_dir() / 'orgmode' / _sanitize(f), force_file=True,
depends_on=lambda _, f: (f, f.stat().st_mtime),
cache_path=_cachew_cache_path, force_file=True,
depends_on=_cachew_depends_on,
)
def _iterate(self, f: Path) -> Iterable[OrgNote]:
o = orgparse.load(f)

View file

@ -28,10 +28,9 @@ DAL = dal.DAL
Entry = dal.Entry
@mcachew(depends_on=lambda: inputs())
@mcachew(depends_on=inputs)
def entries() -> Iterable[Res[Entry]]:
dal = DAL(inputs())
it = dal.entries()
yield from dal.entries()