core: add helper for more_iterable to check that all types involved are hashable

Otherwise unique_everseen performance may degrade to quadratic rather than linear

For now hidden behind HPI_CHECK_UNIQUE_EVERSEEN flag

also switch some modules to use it
This commit is contained in:
karlicoss 2023-10-31 00:42:17 +00:00
parent d6786084ca
commit 71cb66df5f
8 changed files with 90 additions and 23 deletions

View file

@ -10,8 +10,6 @@ from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union
from more_itertools import unique_everseen
from my.core import (
get_files,
Paths,
@ -22,6 +20,7 @@ from my.core import (
Res,
assert_never,
)
from my.core.common import unique_everseen
from my.core.cachew import mcachew
from my.core.error import echain
from my.core.sqlite import sqlite_connect_immutable, select
@ -196,7 +195,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
@mcachew(depends_on=inputs)
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
for x in unique_everseen(_entities()):
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
continue

View file

@ -7,7 +7,7 @@ import json
from pathlib import Path
from typing import Iterator, Sequence, Dict, Union
from more_itertools import bucket, unique_everseen
from more_itertools import bucket
from my.core import (
get_files,
@ -17,6 +17,7 @@ from my.core import (
assert_never,
make_logger,
)
from my.core.common import unique_everseen
from my.config import instagram as user_config
@ -196,7 +197,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
# TODO basically copy pasted from android.py... hmm
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
for x in unique_everseen(_entities()):
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x
continue