core: cleanup my.core.common.unique_everseen

- move to my.core.utils.itertools
- more robust check for hashable types -- now checks in runtime (since the one based on types purely isn't necessarily sound)
- add more testing
This commit is contained in:
Dima Gerasimov 2024-08-15 13:28:45 +03:00 committed by karlicoss
parent 06084a8787
commit bcc4c15304
3 changed files with 183 additions and 63 deletions

View file

@ -4,12 +4,26 @@ Various helpers/transforms of iterators
Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools
"""
from typing import Callable, Dict, Iterable, Iterator, Sized, TypeVar, List, cast, TYPE_CHECKING
from collections.abc import Hashable
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Optional,
Sized,
Union,
TypeVar,
cast,
TYPE_CHECKING,
)
import warnings
from ..compat import ParamSpec
from decorator import decorator
import more_itertools
T = TypeVar('T')
K = TypeVar('K')
@ -165,7 +179,6 @@ def test_warn_if_empty_iterator() -> None:
res1 = nonempty()
assert len(w) == 0 # warning isn't emitted until iterator is consumed
assert_type(res1, Iterator[str])
# assert isinstance(res1, generator) # FIXME ??? how
assert list(res1) == ['a', 'aba']
assert len(w) == 0
@ -177,7 +190,6 @@ def test_warn_if_empty_iterator() -> None:
res2 = empty()
assert len(w) == 0 # warning isn't emitted until iterator is consumed
assert_type(res2, Iterator[int])
# assert isinstance(res1, generator) # FIXME ??? how
assert list(res2) == []
assert len(w) == 1
@ -191,7 +203,6 @@ def test_warn_if_empty_list() -> None:
def nonempty() -> List[int]:
return ll
with warnings.catch_warnings(record=True) as w:
res1 = nonempty()
assert len(w) == 0
@ -199,12 +210,10 @@ def test_warn_if_empty_list() -> None:
assert isinstance(res1, list)
assert res1 is ll # object should be unchanged!
@warn_if_empty
def empty() -> List[str]:
return []
with warnings.catch_warnings(record=True) as w:
res2 = empty()
assert len(w) == 1
@ -218,3 +227,150 @@ def test_warn_if_empty_unsupported() -> None:
@warn_if_empty # type: ignore[type-var]
def bad_return_type() -> float:
return 0.00
_HT = TypeVar('_HT', bound=Hashable)
# NOTE: ideally we'do It = TypeVar('It', bound=Iterable[_HT]), and function would be It -> It
# Sadly this doesn't work in mypy, doesn't look like we can have double bound TypeVar
# Not a huge deal, since this function is for unique_eversee and
# we need to pass iterator to unique_everseen anyway
# TODO maybe contribute to more_itertools? https://github.com/more-itertools/more-itertools/issues/898
def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]:
"""
NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety
Consider hashable type X, and Y that inherits from X, but not hashable
Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable,
but isn't runtime hashable
"""
# Sadly this doesn't work 100% correctly with dataclasses atm...
# they all are considered hashable: https://github.com/python/mypy/issues/11463
if isinstance(iterable, Iterator):
def res() -> Iterator[_HT]:
for i in iterable:
assert isinstance(i, Hashable), i
# ugh. need a cast due to https://github.com/python/mypy/issues/10817
yield cast(_HT, i)
return res()
else:
# hopefully, iterable that can be iterated over multiple times?
# not sure if should have 'allowlist' of types that don't have to be transformed instead?
for i in iterable:
assert isinstance(i, Hashable), i
return iterable
# TODO different policies -- error/warn/ignore?
def test_check_if_hashable() -> None:
from dataclasses import dataclass
from typing import Set, Tuple
import pytest
from ..compat import assert_type
x1: List[int] = [1, 2]
r1 = check_if_hashable(x1)
# tgype: ignore[comparison-overlap] # object should be unchanged
assert r1 is x1
assert_type(r1, Iterable[int])
x2: Iterator[Union[int, str]] = iter((123, 'aba'))
r2 = check_if_hashable(x2)
assert list(r2) == [123, 'aba']
assert_type(r2, Iterable[Union[int, str]])
x3: Tuple[object, ...] = (789, 'aba')
r3 = check_if_hashable(x3)
# ttype: ignore[comparison-overlap] # object should be unchanged
assert r3 is x3
assert_type(r3, Iterable[object])
x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}]
with pytest.raises(Exception):
# should be rejected by mypy sice set isn't Hashable, but also throw at runtime
r4 = check_if_hashable(x4) # type: ignore[type-var]
x5: Iterator[object] = iter([{1, 2}, {3, 4}])
# here, we hide behind object, which is hashable
# so mypy can't really help us anything
r5 = check_if_hashable(x5)
with pytest.raises(Exception):
# note: this only throws when iterator is advanced
list(r5)
# dataclass is unhashable by default! unless frozen=True and eq=True, or unsafe_hash=True
@dataclass(unsafe_hash=True)
class X:
a: int
x6: List[X] = [X(a=123)]
r6 = check_if_hashable(x6)
assert x6 is r6
# inherited dataclass will not be hashable!
@dataclass
class Y(X):
b: str
x7: List[Y] = [Y(a=123, b='aba')]
with pytest.raises(Exception):
# ideally that would also be rejected by mypy, but currently there is a bug
# which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463
check_if_hashable(x7)
_UET = TypeVar('_UET')
_UEU = TypeVar('_UEU')
# NOTE: for historic reasons, this function had to accept Callable that retuns iterator
# instead of just iterator
# TODO maybe deprecated Callable support? not sure
def unique_everseen(
fun: Union[
Callable[[], Iterable[_UET]],
Iterable[_UET]
],
key: Optional[Callable[[_UET], _UEU]] = None,
) -> Iterator[_UET]:
import os
if callable(fun):
iterable = fun()
else:
iterable = fun
if key is None:
# todo check key return type as well? but it's more likely to be hashable
if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None:
iterable = check_if_hashable(iterable)
return more_itertools.unique_everseen(iterable=iterable, key=key)
def test_unique_everseen() -> None:
import pytest
from ..tests.common import tmp_environ_set
def fun_good() -> Iterator[int]:
yield 123
def fun_bad():
return [{1, 2}, {1, 2}, {1, 3}]
with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', 'yes'):
assert list(unique_everseen(fun_good)) == [123]
with pytest.raises(Exception):
# since function retuns a list rather than iterator, check happens immediately
# , even without advancing the iterator
unique_everseen(fun_bad)
good_list = [4, 3, 2, 1, 2, 3, 4]
assert list(unique_everseen(good_list)) == [4, 3, 2, 1]
with tmp_environ_set('HPI_CHECK_UNIQUE_EVERSEEN', None):
assert list(unique_everseen(fun_bad)) == [{1, 2}, {1, 3}]