diff --git a/my/core/common.py b/my/core/common.py index f84a395..bfc3505 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -6,13 +6,11 @@ import functools from contextlib import contextmanager import os import sys -import types from typing import ( Any, Callable, Dict, Iterable, - Iterator, List, Optional, Sequence, @@ -21,9 +19,6 @@ from typing import ( TypeVar, Union, cast, - get_args, - get_type_hints, - get_origin, ) import warnings @@ -426,58 +421,8 @@ class DummyExecutor(Executor): self._shutdown = True -def _check_all_hashable(fun): - # TODO ok, take callable? - hints = get_type_hints(fun) - # TODO needs to be defensive like in cachew? - return_type = hints.get('return') - # TODO check if None - origin = get_origin(return_type) # Iterator etc? - (arg,) = get_args(return_type) - # options we wanna handle are simple type on the top level or union - arg_origin = get_origin(arg) - - if sys.version_info[:2] >= (3, 10): - is_uniontype = arg_origin is types.UnionType - else: - is_uniontype = False - - is_union = arg_origin is Union or is_uniontype - if is_union: - to_check = get_args(arg) - else: - to_check = (arg,) - - no_hash = [ - t - for t in to_check - # seems that objects that have not overridden hash have the attribute but it's set to None - if getattr(t, '__hash__', None) is None - ] - assert len(no_hash) == 0, f'Types {no_hash} are not hashable, this will result in significant performance downgrade for unique_everseen' - - -_UET = TypeVar('_UET') -_UEU = TypeVar('_UEU') - - -def unique_everseen( - fun: Callable[[], Iterable[_UET]], - key: Optional[Callable[[_UET], _UEU]] = None, -) -> Iterator[_UET]: - # TODO support normal iterable as well? - import more_itertools - - # NOTE: it has to take original callable, because otherwise we don't have access to generator type annotations - iterable = fun() - - if key is None: - # todo check key return type as well? but it's more likely to be hashable - if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None: - # TODO return better error here, e.g. if there is no return type it crashes - _check_all_hashable(fun) - - return more_itertools.unique_everseen(iterable=iterable, key=key) +# TODO deprecate and suggest to use one from my.core directly? not sure +from .utils.itertools import unique_everseen ### legacy imports, keeping them here for backwards compatibility diff --git a/my/core/tests/common.py b/my/core/tests/common.py index d6fb71e..a102ad3 100644 --- a/my/core/tests/common.py +++ b/my/core/tests/common.py @@ -1,4 +1,6 @@ +from contextlib import contextmanager import os +from typing import Iterator, Optional import pytest @@ -10,3 +12,20 @@ skip_if_uses_optional_deps = pytest.mark.skipif( V not in os.environ, reason=f'test only works when optional dependencies are installed. Set env variable {V}=true to override.', ) + + +# TODO maybe move to hpi core? +@contextmanager +def tmp_environ_set(key: str, value: Optional[str]) -> Iterator[None]: + prev_value = os.environ.get(key) + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + try: + yield + finally: + if prev_value is None: + os.environ.pop(key, None) + else: + os.environ[key] = prev_value diff --git a/my/core/utils/itertools.py b/my/core/utils/itertools.py index 3997310..e8802bb 100644 --- a/my/core/utils/itertools.py +++ b/my/core/utils/itertools.py @@ -4,12 +4,26 @@ Various helpers/transforms of iterators Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools """ -from typing import Callable, Dict, Iterable, Iterator, Sized, TypeVar, List, cast, TYPE_CHECKING +from collections.abc import Hashable +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Sized, + Union, + TypeVar, + cast, + TYPE_CHECKING, +) import warnings from ..compat import ParamSpec from decorator import decorator +import more_itertools T = TypeVar('T') K = TypeVar('K') @@ -165,7 +179,6 @@ def test_warn_if_empty_iterator() -> None: res1 = nonempty() assert len(w) == 0 # warning isn't emitted until iterator is consumed assert_type(res1, Iterator[str]) - # assert isinstance(res1, generator) # FIXME ??? how assert list(res1) == ['a', 'aba'] assert len(w) == 0 @@ -177,7 +190,6 @@ def test_warn_if_empty_iterator() -> None: res2 = empty() assert len(w) == 0 # warning isn't emitted until iterator is consumed assert_type(res2, Iterator[int]) - # assert isinstance(res1, generator) # FIXME ??? how assert list(res2) == [] assert len(w) == 1 @@ -191,7 +203,6 @@ def test_warn_if_empty_list() -> None: def nonempty() -> List[int]: return ll - with warnings.catch_warnings(record=True) as w: res1 = nonempty() assert len(w) == 0 @@ -199,12 +210,10 @@ def test_warn_if_empty_list() -> None: assert isinstance(res1, list) assert res1 is ll # object should be unchanged! - @warn_if_empty def empty() -> List[str]: return [] - with warnings.catch_warnings(record=True) as w: res2 = empty() assert len(w) == 1 @@ -218,3 +227,150 @@ def test_warn_if_empty_unsupported() -> None: @warn_if_empty # type: ignore[type-var] def bad_return_type() -> float: return 0.00 + + +_HT = TypeVar('_HT', bound=Hashable) + + +# NOTE: ideally we'do It = TypeVar('It', bound=Iterable[_HT]), and function would be It -> It +# Sadly this doesn't work in mypy, doesn't look like we can have double bound TypeVar +# Not a huge deal, since this function is for unique_eversee and +# we need to pass iterator to unique_everseen anyway +# TODO maybe contribute to more_itertools? https://github.com/more-itertools/more-itertools/issues/898 +def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]: + """ + NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety + Consider hashable type X, and Y that inherits from X, but not hashable + Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable, + but isn't runtime hashable + """ + # Sadly this doesn't work 100% correctly with dataclasses atm... + # they all are considered hashable: https://github.com/python/mypy/issues/11463 + + if isinstance(iterable, Iterator): + + def res() -> Iterator[_HT]: + for i in iterable: + assert isinstance(i, Hashable), i + # ugh. need a cast due to https://github.com/python/mypy/issues/10817 + yield cast(_HT, i) + + return res() + else: + # hopefully, iterable that can be iterated over multiple times? + # not sure if should have 'allowlist' of types that don't have to be transformed instead? + for i in iterable: + assert isinstance(i, Hashable), i + return iterable + + +# TODO different policies -- error/warn/ignore? +def test_check_if_hashable() -> None: + from dataclasses import dataclass + from typing import Set, Tuple + import pytest + from ..compat import assert_type + + x1: List[int] = [1, 2] + r1 = check_if_hashable(x1) + # tgype: ignore[comparison-overlap] # object should be unchanged + assert r1 is x1 + assert_type(r1, Iterable[int]) + + x2: Iterator[Union[int, str]] = iter((123, 'aba')) + r2 = check_if_hashable(x2) + assert list(r2) == [123, 'aba'] + assert_type(r2, Iterable[Union[int, str]]) + + x3: Tuple[object, ...] = (789, 'aba') + r3 = check_if_hashable(x3) + # ttype: ignore[comparison-overlap] # object should be unchanged + assert r3 is x3 + assert_type(r3, Iterable[object]) + + x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}] + with pytest.raises(Exception): + # should be rejected by mypy sice set isn't Hashable, but also throw at runtime + r4 = check_if_hashable(x4) # type: ignore[type-var] + + x5: Iterator[object] = iter([{1, 2}, {3, 4}]) + # here, we hide behind object, which is hashable + # so mypy can't really help us anything + r5 = check_if_hashable(x5) + with pytest.raises(Exception): + # note: this only throws when iterator is advanced + list(r5) + + # dataclass is unhashable by default! unless frozen=True and eq=True, or unsafe_hash=True + @dataclass(unsafe_hash=True) + class X: + a: int + + x6: List[X] = [X(a=123)] + r6 = check_if_hashable(x6) + assert x6 is r6 + + # inherited dataclass will not be hashable! + @dataclass + class Y(X): + b: str + + x7: List[Y] = [Y(a=123, b='aba')] + with pytest.raises(Exception): + # ideally that would also be rejected by mypy, but currently there is a bug + # which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463 + check_if_hashable(x7) + + +_UET = TypeVar('_UET') +_UEU = TypeVar('_UEU') + + +# NOTE: for historic reasons, this function had to accept Callable that retuns iterator +# instead of just iterator +# TODO maybe deprecated Callable support? not sure +def unique_everseen( + fun: Union[ + Callable[[], Iterable[_UET]], + Iterable[_UET] + ], + key: Optional[Callable[[_UET], _UEU]] = None, +) -> Iterator[_UET]: + import os + + if callable(fun): + iterable = fun() + else: + iterable = fun + + if key is None: + # todo check key return type as well? but it's more likely to be hashable + if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None: + iterable = check_if_hashable(iterable) + + return more_itertools.unique_everseen(iterable=iterable, key=key) + + +def test_unique_everseen() -> None: + import pytest + from ..tests.common import tmp_environ_set + + def fun_good() -> Iterator[int]: + yield 123 + + def fun_bad(): + return [{1, 2}, {1, 2}, {1, 3}] + + with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', 'yes'): + assert list(unique_everseen(fun_good)) == [123] + + with pytest.raises(Exception): + # since function retuns a list rather than iterator, check happens immediately + # , even without advancing the iterator + unique_everseen(fun_bad) + + good_list = [4, 3, 2, 1, 2, 3, 4] + assert list(unique_everseen(good_list)) == [4, 3, 2, 1] + + with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', None): + assert list(unique_everseen(fun_bad)) == [{1, 2}, {1, 3}]