core: cleanup my.core.common.unique_everseen
- move to my.core.utils.itertools - more robust check for hashable types -- now checks in runtime (since the one based on types purely isn't necessarily sound) - add more testing
This commit is contained in:
parent
06084a8787
commit
bcc4c15304
3 changed files with 183 additions and 63 deletions
|
@ -6,13 +6,11 @@ import functools
|
|||
from contextlib import contextmanager
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
|
@ -21,9 +19,6 @@ from typing import (
|
|||
TypeVar,
|
||||
Union,
|
||||
cast,
|
||||
get_args,
|
||||
get_type_hints,
|
||||
get_origin,
|
||||
)
|
||||
import warnings
|
||||
|
||||
|
@ -426,58 +421,8 @@ class DummyExecutor(Executor):
|
|||
self._shutdown = True
|
||||
|
||||
|
||||
def _check_all_hashable(fun):
|
||||
# TODO ok, take callable?
|
||||
hints = get_type_hints(fun)
|
||||
# TODO needs to be defensive like in cachew?
|
||||
return_type = hints.get('return')
|
||||
# TODO check if None
|
||||
origin = get_origin(return_type) # Iterator etc?
|
||||
(arg,) = get_args(return_type)
|
||||
# options we wanna handle are simple type on the top level or union
|
||||
arg_origin = get_origin(arg)
|
||||
|
||||
if sys.version_info[:2] >= (3, 10):
|
||||
is_uniontype = arg_origin is types.UnionType
|
||||
else:
|
||||
is_uniontype = False
|
||||
|
||||
is_union = arg_origin is Union or is_uniontype
|
||||
if is_union:
|
||||
to_check = get_args(arg)
|
||||
else:
|
||||
to_check = (arg,)
|
||||
|
||||
no_hash = [
|
||||
t
|
||||
for t in to_check
|
||||
# seems that objects that have not overridden hash have the attribute but it's set to None
|
||||
if getattr(t, '__hash__', None) is None
|
||||
]
|
||||
assert len(no_hash) == 0, f'Types {no_hash} are not hashable, this will result in significant performance downgrade for unique_everseen'
|
||||
|
||||
|
||||
_UET = TypeVar('_UET')
|
||||
_UEU = TypeVar('_UEU')
|
||||
|
||||
|
||||
def unique_everseen(
|
||||
fun: Callable[[], Iterable[_UET]],
|
||||
key: Optional[Callable[[_UET], _UEU]] = None,
|
||||
) -> Iterator[_UET]:
|
||||
# TODO support normal iterable as well?
|
||||
import more_itertools
|
||||
|
||||
# NOTE: it has to take original callable, because otherwise we don't have access to generator type annotations
|
||||
iterable = fun()
|
||||
|
||||
if key is None:
|
||||
# todo check key return type as well? but it's more likely to be hashable
|
||||
if os.environ.get('HPI_CHECK_UNIQUE_EVERSEEN') is not None:
|
||||
# TODO return better error here, e.g. if there is no return type it crashes
|
||||
_check_all_hashable(fun)
|
||||
|
||||
return more_itertools.unique_everseen(iterable=iterable, key=key)
|
||||
# TODO deprecate and suggest to use one from my.core directly? not sure
|
||||
from .utils.itertools import unique_everseen
|
||||
|
||||
|
||||
### legacy imports, keeping them here for backwards compatibility
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue