core: move mcachew into my.core.cachew; use better typing annotations (copied from cachew)

This commit is contained in:
Dima Gerasimov 2023-06-07 22:06:29 +01:00 committed by karlicoss
parent f8cd31044e
commit 5fe21240b4
7 changed files with 113 additions and 77 deletions

View file

@ -53,7 +53,7 @@ def is_bad_table(name: str) -> bool:
from my.core.cachew import cache_dir from my.core.cachew import cache_dir
from my.core.common import mcachew from my.core.common import mcachew
@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro')) @mcachew(depends_on=inputs, cache_path=cache_dir('bluemaestro'))
def measurements() -> Iterable[Res[Measurement]]: def measurements() -> Iterable[Res[Measurement]]:
# todo ideally this would be via arguments... but needs to be lazy # todo ideally this would be via arguments... but needs to be lazy
dbs = inputs() dbs = inputs()

View file

@ -1,8 +1,16 @@
from .common import assert_subpackage; assert_subpackage(__name__) from .common import assert_subpackage; assert_subpackage(__name__)
from contextlib import contextmanager from contextlib import contextmanager
import logging
from pathlib import Path from pathlib import Path
from typing import Optional import sys
from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type
import warnings
import appdirs
PathIsh = Union[str, Path] # avoid circular import from .common
def disable_cachew() -> None: def disable_cachew() -> None:
try: try:
@ -12,10 +20,10 @@ def disable_cachew() -> None:
return return
from cachew import settings from cachew import settings
settings.ENABLE = False settings.ENABLE = False
from typing import Iterator
@contextmanager @contextmanager
def disabled_cachew() -> Iterator[None]: def disabled_cachew() -> Iterator[None]:
try: try:
@ -25,20 +33,23 @@ def disabled_cachew() -> Iterator[None]:
yield yield
return return
from cachew.extra import disabled_cachew from cachew.extra import disabled_cachew
with disabled_cachew(): with disabled_cachew():
yield yield
def _appdirs_cache_dir() -> Path: def _appdirs_cache_dir() -> Path:
import appdirs
cd = Path(appdirs.user_cache_dir('my')) cd = Path(appdirs.user_cache_dir('my'))
cd.mkdir(exist_ok=True, parents=True) cd.mkdir(exist_ok=True, parents=True)
return cd return cd
from . import PathIsh _CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
def cache_dir(suffix: Optional[PathIsh] = None) -> Path: def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
from . import core_config as CC from . import core_config as CC
cdir_ = CC.config.get_cache_dir() cdir_ = CC.config.get_cache_dir()
sp: Optional[Path] = None sp: Optional[Path] = None
@ -55,9 +66,86 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path:
# this logic is tested via test_cachew_dir_none # this logic is tested via test_cachew_dir_none
if cdir_ is None: if cdir_ is None:
from .common import _CACHE_DIR_NONE_HACK
cdir = _CACHE_DIR_NONE_HACK cdir = _CACHE_DIR_NONE_HACK
else: else:
cdir = cdir_ cdir = cdir_
return cdir if sp is None else cdir / sp return cdir if sp is None else cdir / sp
"""See core.cachew.cache_dir for the explanation"""
_cache_path_dflt = cast(str, object())
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs):
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
if cache_path is _cache_path_dflt:
# wasn't specified... so we need to use cache_dir
cache_path = cache_dir()
if isinstance(cache_path, (str, Path)):
try:
# check that it starts with 'hack' path
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
except: # noqa: E722 bare except
pass # no action needed, doesn't start with 'hack' string
else:
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
# if it does, means that user wanted to disable cache
cache_path = None
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
kwargs['cache_path'] = cache_path
return cachew.cachew(**kwargs)
if TYPE_CHECKING:
R = TypeVar('R')
if sys.version_info[:2] >= (3, 10):
from typing import ParamSpec
else:
from typing_extensions import ParamSpec
P = ParamSpec('P')
CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug
PathProvider = Union[PathIsh, Callable[P, PathIsh]]
# NOTE: in cachew, HashFunction type returns str
# however in practice, cachew alwasy calls str for its result
# so perhaps better to switch it to Any in cachew as well
HashFunction = Callable[P, Any]
F = TypeVar('F', bound=Callable)
# we need two versions due to @doublewrap
# this is when we just annotate as @cachew without any args
@overload # type: ignore[no-overload-impl]
def mcachew(fun: F) -> F:
...
@overload
def mcachew(
cache_path: Optional[PathProvider] = ...,
*,
force_file: bool = ...,
cls: Optional[Type] = ...,
depends_on: HashFunction = ...,
logger: Optional[logging.Logger] = ...,
chunk_by: int = ...,
synthetic_key: Optional[str] = ...,
) -> Callable[[F], F]:
...
else:
mcachew = _mcachew_impl

View file

@ -239,70 +239,6 @@ def get_files(
return tuple(paths) return tuple(paths)
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
if TYPE_CHECKING:
from typing import Callable, TypeVar
from typing_extensions import Protocol
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(
self,
cache_path: Any=None,
*,
hashf: Any=None, # todo deprecate
depends_on: Any=None,
force_file: bool=False,
chunk_by: int=0,
logger: Any=None,
) -> Callable[[F], F]:
...
mcachew: McachewType
_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack')
"""See core.cachew.cache_dir for the explanation"""
_cache_path_dflt = cast(str, object())
# TODO I don't really like 'mcachew', just 'cache' would be better... maybe?
# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work
# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable
# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this
def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
if cache_path is _cache_path_dflt:
# wasn't specified... so we need to use cache_dir
from .cachew import cache_dir
cache_path = cache_dir()
if isinstance(cache_path, (str, Path)):
try:
# check that it starts with 'hack' path
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
except: # noqa: E722 bare except
pass # no action needed, doesn't start with 'hack' string
else:
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
# if it does, means that user wanted to disable cache
cache_path = None
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
kwargs['cache_path'] = cache_path
return cachew.cachew(**kwargs)
@functools.lru_cache(1) @functools.lru_cache(1)
def _magic(): def _magic():
import magic # type: ignore import magic # type: ignore
@ -663,4 +599,5 @@ def assert_never(value: NoReturn) -> NoReturn:
## legacy imports, keeping them here for backwards compatibility ## legacy imports, keeping them here for backwards compatibility
from functools import cached_property as cproperty from functools import cached_property as cproperty
from typing import Literal from typing import Literal
from .cachew import mcachew
## ##

View file

@ -32,8 +32,12 @@ def dir_hash(path: Path):
return mtimes return mtimes
def _cachew_depends_on():
return dir_hash(config.export_path)
# TODO take __file__ into account somehow? # TODO take __file__ into account somehow?
@mcachew(cache_path=cache_dir() / 'emfit.cache', hashf=lambda: dir_hash(config.export_path)) @mcachew(cache_path=cache_dir() / 'emfit.cache', depends_on=_cachew_depends_on)
def datas() -> Iterable[Res[Emfit]]: def datas() -> Iterable[Res[Emfit]]:
import dataclasses import dataclasses

View file

@ -61,7 +61,7 @@ def _dal() -> dal.DAL:
return dal.DAL(sources) return dal.DAL(sources)
@mcachew(depends_on=lambda: inputs()) @mcachew(depends_on=inputs)
def events() -> Results: def events() -> Results:
from my.core.common import ensure_unique from my.core.common import ensure_unique
key = lambda e: object() if isinstance(e, Exception) else e.eid key = lambda e: object() if isinstance(e, Exception) else e.eid

View file

@ -78,14 +78,22 @@ def _sanitize(p: Path) -> str:
return re.sub(r'\W', '_', str(p)) return re.sub(r'\W', '_', str(p))
def _cachew_cache_path(_self, f: Path) -> Path:
return cache_dir() / 'orgmode' / _sanitize(f)
def _cachew_depends_on(_self, f: Path):
return (f, f.stat().st_mtime)
class Query: class Query:
def __init__(self, files: Sequence[Path]) -> None: def __init__(self, files: Sequence[Path]) -> None:
self.files = files self.files = files
# TODO yield errors? # TODO yield errors?
@mcachew( @mcachew(
cache_path=lambda _, f: cache_dir() / 'orgmode' / _sanitize(f), force_file=True, cache_path=_cachew_cache_path, force_file=True,
depends_on=lambda _, f: (f, f.stat().st_mtime), depends_on=_cachew_depends_on,
) )
def _iterate(self, f: Path) -> Iterable[OrgNote]: def _iterate(self, f: Path) -> Iterable[OrgNote]:
o = orgparse.load(f) o = orgparse.load(f)

View file

@ -28,10 +28,9 @@ DAL = dal.DAL
Entry = dal.Entry Entry = dal.Entry
@mcachew(depends_on=lambda: inputs()) @mcachew(depends_on=inputs)
def entries() -> Iterable[Res[Entry]]: def entries() -> Iterable[Res[Entry]]:
dal = DAL(inputs()) dal = DAL(inputs())
it = dal.entries()
yield from dal.entries() yield from dal.entries()