diff --git a/my/bluemaestro.py b/my/bluemaestro.py index b50c77c..b49e9e0 100644 --- a/my/bluemaestro.py +++ b/my/bluemaestro.py @@ -53,7 +53,7 @@ def is_bad_table(name: str) -> bool: from my.core.cachew import cache_dir from my.core.common import mcachew -@mcachew(depends_on=lambda: inputs(), cache_path=cache_dir('bluemaestro')) +@mcachew(depends_on=inputs, cache_path=cache_dir('bluemaestro')) def measurements() -> Iterable[Res[Measurement]]: # todo ideally this would be via arguments... but needs to be lazy dbs = inputs() diff --git a/my/core/cachew.py b/my/core/cachew.py index dbc4d49..7dd62d2 100644 --- a/my/core/cachew.py +++ b/my/core/cachew.py @@ -1,8 +1,16 @@ from .common import assert_subpackage; assert_subpackage(__name__) from contextlib import contextmanager +import logging from pathlib import Path -from typing import Optional +import sys +from typing import Optional, Iterator, cast, TYPE_CHECKING, TypeVar, Callable, overload, Union, Any, Type +import warnings + +import appdirs + +PathIsh = Union[str, Path] # avoid circular import from .common + def disable_cachew() -> None: try: @@ -12,10 +20,10 @@ def disable_cachew() -> None: return from cachew import settings + settings.ENABLE = False -from typing import Iterator @contextmanager def disabled_cachew() -> Iterator[None]: try: @@ -25,20 +33,23 @@ def disabled_cachew() -> Iterator[None]: yield return from cachew.extra import disabled_cachew + with disabled_cachew(): yield def _appdirs_cache_dir() -> Path: - import appdirs cd = Path(appdirs.user_cache_dir('my')) cd.mkdir(exist_ok=True, parents=True) return cd -from . import PathIsh +_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack') + + def cache_dir(suffix: Optional[PathIsh] = None) -> Path: from . import core_config as CC + cdir_ = CC.config.get_cache_dir() sp: Optional[Path] = None @@ -55,9 +66,86 @@ def cache_dir(suffix: Optional[PathIsh] = None) -> Path: # this logic is tested via test_cachew_dir_none if cdir_ is None: - from .common import _CACHE_DIR_NONE_HACK cdir = _CACHE_DIR_NONE_HACK else: cdir = cdir_ return cdir if sp is None else cdir / sp + + +"""See core.cachew.cache_dir for the explanation""" + + +_cache_path_dflt = cast(str, object()) + + +# TODO I don't really like 'mcachew', just 'cache' would be better... maybe? +# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work +# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable +# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this +def _mcachew_impl(cache_path=_cache_path_dflt, **kwargs): + """ + Stands for 'Maybe cachew'. + Defensive wrapper around @cachew to make it an optional dependency. + """ + if cache_path is _cache_path_dflt: + # wasn't specified... so we need to use cache_dir + cache_path = cache_dir() + + if isinstance(cache_path, (str, Path)): + try: + # check that it starts with 'hack' path + Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK) + except: # noqa: E722 bare except + pass # no action needed, doesn't start with 'hack' string + else: + # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead? + # if it does, means that user wanted to disable cache + cache_path = None + try: + import cachew + except ModuleNotFoundError: + warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') + return lambda orig_func: orig_func + else: + kwargs['cache_path'] = cache_path + return cachew.cachew(**kwargs) + + +if TYPE_CHECKING: + R = TypeVar('R') + if sys.version_info[:2] >= (3, 10): + from typing import ParamSpec + else: + from typing_extensions import ParamSpec + P = ParamSpec('P') + CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug + PathProvider = Union[PathIsh, Callable[P, PathIsh]] + # NOTE: in cachew, HashFunction type returns str + # however in practice, cachew alwasy calls str for its result + # so perhaps better to switch it to Any in cachew as well + HashFunction = Callable[P, Any] + + F = TypeVar('F', bound=Callable) + + # we need two versions due to @doublewrap + # this is when we just annotate as @cachew without any args + @overload # type: ignore[no-overload-impl] + def mcachew(fun: F) -> F: + ... + + @overload + def mcachew( + cache_path: Optional[PathProvider] = ..., + *, + force_file: bool = ..., + cls: Optional[Type] = ..., + depends_on: HashFunction = ..., + logger: Optional[logging.Logger] = ..., + chunk_by: int = ..., + synthetic_key: Optional[str] = ..., + ) -> Callable[[F], F]: + ... + +else: + mcachew = _mcachew_impl diff --git a/my/core/common.py b/my/core/common.py index 359f451..8c670fa 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -239,70 +239,6 @@ def get_files( return tuple(paths) -# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff) -if TYPE_CHECKING: - from typing import Callable, TypeVar - from typing_extensions import Protocol - # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time.. - # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270 - # ok, that's actually a super nice 'pattern' - F = TypeVar('F') - - class McachewType(Protocol): - def __call__( - self, - cache_path: Any=None, - *, - hashf: Any=None, # todo deprecate - depends_on: Any=None, - force_file: bool=False, - chunk_by: int=0, - logger: Any=None, - ) -> Callable[[F], F]: - ... - - mcachew: McachewType - - -_CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack') -"""See core.cachew.cache_dir for the explanation""" - - -_cache_path_dflt = cast(str, object()) -# TODO I don't really like 'mcachew', just 'cache' would be better... maybe? -# todo ugh. I think it needs @doublewrap, otherwise @mcachew without args doesn't work -# but it's a bit problematic.. doublewrap works by defecting if the first arg is callable -# but here cache_path can also be a callable (for lazy/dynamic path)... so unclear how to detect this -def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef] - """ - Stands for 'Maybe cachew'. - Defensive wrapper around @cachew to make it an optional dependency. - """ - if cache_path is _cache_path_dflt: - # wasn't specified... so we need to use cache_dir - from .cachew import cache_dir - cache_path = cache_dir() - - if isinstance(cache_path, (str, Path)): - try: - # check that it starts with 'hack' path - Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK) - except: # noqa: E722 bare except - pass # no action needed, doesn't start with 'hack' string - else: - # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead? - # if it does, means that user wanted to disable cache - cache_path = None - try: - import cachew - except ModuleNotFoundError: - warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') - return lambda orig_func: orig_func - else: - kwargs['cache_path'] = cache_path - return cachew.cachew(**kwargs) - - @functools.lru_cache(1) def _magic(): import magic # type: ignore @@ -663,4 +599,5 @@ def assert_never(value: NoReturn) -> NoReturn: ## legacy imports, keeping them here for backwards compatibility from functools import cached_property as cproperty from typing import Literal -## \ No newline at end of file +from .cachew import mcachew +## diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py index acaa303..cde6ddc 100644 --- a/my/emfit/__init__.py +++ b/my/emfit/__init__.py @@ -32,8 +32,12 @@ def dir_hash(path: Path): return mtimes +def _cachew_depends_on(): + return dir_hash(config.export_path) + + # TODO take __file__ into account somehow? -@mcachew(cache_path=cache_dir() / 'emfit.cache', hashf=lambda: dir_hash(config.export_path)) +@mcachew(cache_path=cache_dir() / 'emfit.cache', depends_on=_cachew_depends_on) def datas() -> Iterable[Res[Emfit]]: import dataclasses diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 67042fc..9eebbf0 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -61,7 +61,7 @@ def _dal() -> dal.DAL: return dal.DAL(sources) -@mcachew(depends_on=lambda: inputs()) +@mcachew(depends_on=inputs) def events() -> Results: from my.core.common import ensure_unique key = lambda e: object() if isinstance(e, Exception) else e.eid diff --git a/my/orgmode.py b/my/orgmode.py index bb186d1..8293b74 100644 --- a/my/orgmode.py +++ b/my/orgmode.py @@ -78,14 +78,22 @@ def _sanitize(p: Path) -> str: return re.sub(r'\W', '_', str(p)) +def _cachew_cache_path(_self, f: Path) -> Path: + return cache_dir() / 'orgmode' / _sanitize(f) + + +def _cachew_depends_on(_self, f: Path): + return (f, f.stat().st_mtime) + + class Query: def __init__(self, files: Sequence[Path]) -> None: self.files = files # TODO yield errors? @mcachew( - cache_path=lambda _, f: cache_dir() / 'orgmode' / _sanitize(f), force_file=True, - depends_on=lambda _, f: (f, f.stat().st_mtime), + cache_path=_cachew_cache_path, force_file=True, + depends_on=_cachew_depends_on, ) def _iterate(self, f: Path) -> Iterable[OrgNote]: o = orgparse.load(f) diff --git a/my/rescuetime.py b/my/rescuetime.py index 40aa6b7..c986d89 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -28,10 +28,9 @@ DAL = dal.DAL Entry = dal.Entry -@mcachew(depends_on=lambda: inputs()) +@mcachew(depends_on=inputs) def entries() -> Iterable[Res[Entry]]: dal = DAL(inputs()) - it = dal.entries() yield from dal.entries()