core: cleanup my.core.common.unique_everseen
- move to my.core.utils.itertools - more robust check for hashable types -- now checks in runtime (since the one based on types purely isn't necessarily sound) - add more testing
This commit is contained in:
parent
06084a8787
commit
bcc4c15304
3 changed files with 183 additions and 63 deletions
|
@ -6,13 +6,11 @@ import functools
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import types
|
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
Callable,
|
Callable,
|
||||||
Dict,
|
Dict,
|
||||||
Iterable,
|
Iterable,
|
||||||
Iterator,
|
|
||||||
List,
|
List,
|
||||||
Optional,
|
Optional,
|
||||||
Sequence,
|
Sequence,
|
||||||
|
@ -21,9 +19,6 @@ from typing import (
|
||||||
TypeVar,
|
TypeVar,
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
get_args,
|
|
||||||
get_type_hints,
|
|
||||||
get_origin,
|
|
||||||
)
|
)
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
@ -426,58 +421,8 @@ class DummyExecutor(Executor):
|
||||||
self._shutdown = True
|
self._shutdown = True
|
||||||
|
|
||||||
|
|
||||||
def _check_all_hashable(fun):
|
# TODO deprecate and suggest to use one from my.core directly? not sure
|
||||||
# TODO ok, take callable?
|
from .utils.itertools import unique_everseen
|
||||||
hints = get_type_hints(fun)
|
|
||||||
# TODO needs to be defensive like in cachew?
|
|
||||||
return_type = hints.get('return')
|
|
||||||
# TODO check if None
|
|
||||||
origin = get_origin(return_type) # Iterator etc?
|
|
||||||
(arg,) = get_args(return_type)
|
|
||||||
# options we wanna handle are simple type on the top level or union
|
|
||||||
arg_origin = get_origin(arg)
|
|
||||||
|
|
||||||
if sys.version_info[:2] >= (3, 10):
|
|
||||||
is_uniontype = arg_origin is types.UnionType
|
|
||||||
else:
|
|
||||||
is_uniontype = False
|
|
||||||
|
|
||||||
is_union = arg_origin is Union or is_uniontype
|
|
||||||
if is_union:
|
|
||||||
to_check = get_args(arg)
|
|
||||||
else:
|
|
||||||
to_check = (arg,)
|
|
||||||
|
|
||||||
no_hash = [
|
|
||||||
t
|
|
||||||
for t in to_check
|
|
||||||
# seems that objects that have not overridden hash have the attribute but it's set to None
|
|
||||||
if getattr(t, '__hash__', None) is None
|
|
||||||
]
|
|
||||||
assert len(no_hash) == 0, f'Types {no_hash} are not hashable, this will result in significant performance downgrade for unique_everseen'
|
|
||||||
|
|
||||||
|
|
||||||
_UET = TypeVar('_UET')
|
|
||||||
_UEU = TypeVar('_UEU')
|
|
||||||
|
|
||||||
|
|
||||||
def unique_everseen(
|
|
||||||
fun: Callable[[], Iterable[_UET]],
|
|
||||||
key: Optional[Callable[[_UET], _UEU]] = None,
|
|
||||||
) -> Iterator[_UET]:
|
|
||||||
# TODO support normal iterable as well?
|
|
||||||
import more_itertools
|
|
||||||
|
|
||||||
# NOTE: it has to take original callable, because otherwise we don't have access to generator type annotations
|
|
||||||
iterable = fun()
|
|
||||||
|
|
||||||
if key is None:
|
|
||||||
# todo check key return type as well? but it's more likely to be hashable
|
|
||||||
if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None:
|
|
||||||
# TODO return better error here, e.g. if there is no return type it crashes
|
|
||||||
_check_all_hashable(fun)
|
|
||||||
|
|
||||||
return more_itertools.unique_everseen(iterable=iterable, key=key)
|
|
||||||
|
|
||||||
|
|
||||||
### legacy imports, keeping them here for backwards compatibility
|
### legacy imports, keeping them here for backwards compatibility
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
from contextlib import contextmanager
|
||||||
import os
|
import os
|
||||||
|
from typing import Iterator, Optional
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -10,3 +12,20 @@ skip_if_uses_optional_deps = pytest.mark.skipif(
|
||||||
V not in os.environ,
|
V not in os.environ,
|
||||||
reason=f'test only works when optional dependencies are installed. Set env variable {V}=true to override.',
|
reason=f'test only works when optional dependencies are installed. Set env variable {V}=true to override.',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO maybe move to hpi core?
|
||||||
|
@contextmanager
|
||||||
|
def tmp_environ_set(key: str, value: Optional[str]) -> Iterator[None]:
|
||||||
|
prev_value = os.environ.get(key)
|
||||||
|
if value is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = value
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
if prev_value is None:
|
||||||
|
os.environ.pop(key, None)
|
||||||
|
else:
|
||||||
|
os.environ[key] = prev_value
|
||||||
|
|
|
@ -4,12 +4,26 @@ Various helpers/transforms of iterators
|
||||||
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
|
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Callable, Dict, Iterable, Iterator, Sized, TypeVar, List, cast, TYPE_CHECKING
|
from collections.abc import Hashable
|
||||||
|
from typing import (
|
||||||
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Sized,
|
||||||
|
Union,
|
||||||
|
TypeVar,
|
||||||
|
cast,
|
||||||
|
TYPE_CHECKING,
|
||||||
|
)
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from ..compat import ParamSpec
|
from ..compat import ParamSpec
|
||||||
|
|
||||||
from decorator import decorator
|
from decorator import decorator
|
||||||
|
import more_itertools
|
||||||
|
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
K = TypeVar('K')
|
K = TypeVar('K')
|
||||||
|
@ -165,7 +179,6 @@ def test_warn_if_empty_iterator() -> None:
|
||||||
res1 = nonempty()
|
res1 = nonempty()
|
||||||
assert len(w) == 0 # warning isn't emitted until iterator is consumed
|
assert len(w) == 0 # warning isn't emitted until iterator is consumed
|
||||||
assert_type(res1, Iterator[str])
|
assert_type(res1, Iterator[str])
|
||||||
# assert isinstance(res1, generator) # FIXME ??? how
|
|
||||||
assert list(res1) == ['a', 'aba']
|
assert list(res1) == ['a', 'aba']
|
||||||
assert len(w) == 0
|
assert len(w) == 0
|
||||||
|
|
||||||
|
@ -177,7 +190,6 @@ def test_warn_if_empty_iterator() -> None:
|
||||||
res2 = empty()
|
res2 = empty()
|
||||||
assert len(w) == 0 # warning isn't emitted until iterator is consumed
|
assert len(w) == 0 # warning isn't emitted until iterator is consumed
|
||||||
assert_type(res2, Iterator[int])
|
assert_type(res2, Iterator[int])
|
||||||
# assert isinstance(res1, generator) # FIXME ??? how
|
|
||||||
assert list(res2) == []
|
assert list(res2) == []
|
||||||
assert len(w) == 1
|
assert len(w) == 1
|
||||||
|
|
||||||
|
@ -191,7 +203,6 @@ def test_warn_if_empty_list() -> None:
|
||||||
def nonempty() -> List[int]:
|
def nonempty() -> List[int]:
|
||||||
return ll
|
return ll
|
||||||
|
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
res1 = nonempty()
|
res1 = nonempty()
|
||||||
assert len(w) == 0
|
assert len(w) == 0
|
||||||
|
@ -199,12 +210,10 @@ def test_warn_if_empty_list() -> None:
|
||||||
assert isinstance(res1, list)
|
assert isinstance(res1, list)
|
||||||
assert res1 is ll # object should be unchanged!
|
assert res1 is ll # object should be unchanged!
|
||||||
|
|
||||||
|
|
||||||
@warn_if_empty
|
@warn_if_empty
|
||||||
def empty() -> List[str]:
|
def empty() -> List[str]:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
res2 = empty()
|
res2 = empty()
|
||||||
assert len(w) == 1
|
assert len(w) == 1
|
||||||
|
@ -218,3 +227,150 @@ def test_warn_if_empty_unsupported() -> None:
|
||||||
@warn_if_empty # type: ignore[type-var]
|
@warn_if_empty # type: ignore[type-var]
|
||||||
def bad_return_type() -> float:
|
def bad_return_type() -> float:
|
||||||
return 0.00
|
return 0.00
|
||||||
|
|
||||||
|
|
||||||
|
_HT = TypeVar('_HT', bound=Hashable)
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE: ideally we'do It = TypeVar('It', bound=Iterable[_HT]), and function would be It -> It
|
||||||
|
# Sadly this doesn't work in mypy, doesn't look like we can have double bound TypeVar
|
||||||
|
# Not a huge deal, since this function is for unique_eversee and
|
||||||
|
# we need to pass iterator to unique_everseen anyway
|
||||||
|
# TODO maybe contribute to more_itertools? https://github.com/more-itertools/more-itertools/issues/898
|
||||||
|
def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
|
||||||
|
"""
|
||||||
|
NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety
|
||||||
|
Consider hashable type X, and Y that inherits from X, but not hashable
|
||||||
|
Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable,
|
||||||
|
but isn't runtime hashable
|
||||||
|
"""
|
||||||
|
# Sadly this doesn't work 100% correctly with dataclasses atm...
|
||||||
|
# they all are considered hashable: https://github.com/python/mypy/issues/11463
|
||||||
|
|
||||||
|
if isinstance(iterable, Iterator):
|
||||||
|
|
||||||
|
def res() -> Iterator[_HT]:
|
||||||
|
for i in iterable:
|
||||||
|
assert isinstance(i, Hashable), i
|
||||||
|
# ugh. need a cast due to https://github.com/python/mypy/issues/10817
|
||||||
|
yield cast(_HT, i)
|
||||||
|
|
||||||
|
return res()
|
||||||
|
else:
|
||||||
|
# hopefully, iterable that can be iterated over multiple times?
|
||||||
|
# not sure if should have 'allowlist' of types that don't have to be transformed instead?
|
||||||
|
for i in iterable:
|
||||||
|
assert isinstance(i, Hashable), i
|
||||||
|
return iterable
|
||||||
|
|
||||||
|
|
||||||
|
# TODO different policies -- error/warn/ignore?
|
||||||
|
def test_check_if_hashable() -> None:
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Set, Tuple
|
||||||
|
import pytest
|
||||||
|
from ..compat import assert_type
|
||||||
|
|
||||||
|
x1: List[int] = [1, 2]
|
||||||
|
r1 = check_if_hashable(x1)
|
||||||
|
# tgype: ignore[comparison-overlap] # object should be unchanged
|
||||||
|
assert r1 is x1
|
||||||
|
assert_type(r1, Iterable[int])
|
||||||
|
|
||||||
|
x2: Iterator[Union[int, str]] = iter((123, 'aba'))
|
||||||
|
r2 = check_if_hashable(x2)
|
||||||
|
assert list(r2) == [123, 'aba']
|
||||||
|
assert_type(r2, Iterable[Union[int, str]])
|
||||||
|
|
||||||
|
x3: Tuple[object, ...] = (789, 'aba')
|
||||||
|
r3 = check_if_hashable(x3)
|
||||||
|
# ttype: ignore[comparison-overlap] # object should be unchanged
|
||||||
|
assert r3 is x3
|
||||||
|
assert_type(r3, Iterable[object])
|
||||||
|
|
||||||
|
x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}]
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
# should be rejected by mypy sice set isn't Hashable, but also throw at runtime
|
||||||
|
r4 = check_if_hashable(x4) # type: ignore[type-var]
|
||||||
|
|
||||||
|
x5: Iterator[object] = iter([{1, 2}, {3, 4}])
|
||||||
|
# here, we hide behind object, which is hashable
|
||||||
|
# so mypy can't really help us anything
|
||||||
|
r5 = check_if_hashable(x5)
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
# note: this only throws when iterator is advanced
|
||||||
|
list(r5)
|
||||||
|
|
||||||
|
# dataclass is unhashable by default! unless frozen=True and eq=True, or unsafe_hash=True
|
||||||
|
@dataclass(unsafe_hash=True)
|
||||||
|
class X:
|
||||||
|
a: int
|
||||||
|
|
||||||
|
x6: List[X] = [X(a=123)]
|
||||||
|
r6 = check_if_hashable(x6)
|
||||||
|
assert x6 is r6
|
||||||
|
|
||||||
|
# inherited dataclass will not be hashable!
|
||||||
|
@dataclass
|
||||||
|
class Y(X):
|
||||||
|
b: str
|
||||||
|
|
||||||
|
x7: List[Y] = [Y(a=123, b='aba')]
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
# ideally that would also be rejected by mypy, but currently there is a bug
|
||||||
|
# which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463
|
||||||
|
check_if_hashable(x7)
|
||||||
|
|
||||||
|
|
||||||
|
_UET = TypeVar('_UET')
|
||||||
|
_UEU = TypeVar('_UEU')
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
|
||||||
|
# instead of just iterator
|
||||||
|
# TODO maybe deprecated Callable support? not sure
|
||||||
|
def unique_everseen(
|
||||||
|
fun: Union[
|
||||||
|
Callable[[], Iterable[_UET]],
|
||||||
|
Iterable[_UET]
|
||||||
|
],
|
||||||
|
key: Optional[Callable[[_UET], _UEU]] = None,
|
||||||
|
) -> Iterator[_UET]:
|
||||||
|
import os
|
||||||
|
|
||||||
|
if callable(fun):
|
||||||
|
iterable = fun()
|
||||||
|
else:
|
||||||
|
iterable = fun
|
||||||
|
|
||||||
|
if key is None:
|
||||||
|
# todo check key return type as well? but it's more likely to be hashable
|
||||||
|
if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None:
|
||||||
|
iterable = check_if_hashable(iterable)
|
||||||
|
|
||||||
|
return more_itertools.unique_everseen(iterable=iterable, key=key)
|
||||||
|
|
||||||
|
|
||||||
|
def test_unique_everseen() -> None:
|
||||||
|
import pytest
|
||||||
|
from ..tests.common import tmp_environ_set
|
||||||
|
|
||||||
|
def fun_good() -> Iterator[int]:
|
||||||
|
yield 123
|
||||||
|
|
||||||
|
def fun_bad():
|
||||||
|
return [{1, 2}, {1, 2}, {1, 3}]
|
||||||
|
|
||||||
|
with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', 'yes'):
|
||||||
|
assert list(unique_everseen(fun_good)) == [123]
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
# since function retuns a list rather than iterator, check happens immediately
|
||||||
|
# , even without advancing the iterator
|
||||||
|
unique_everseen(fun_bad)
|
||||||
|
|
||||||
|
good_list = [4, 3, 2, 1, 2, 3, 4]
|
||||||
|
assert list(unique_everseen(good_list)) == [4, 3, 2, 1]
|
||||||
|
|
||||||
|
with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', None):
|
||||||
|
assert list(unique_everseen(fun_bad)) == [{1, 2}, {1, 3}]
|
||||||
|
|
Loading…
Add table
Reference in a new issue