my.hypothesis: explose data as iterators instead of lists

also add an adapter to support migrating in backwards compatible manner
This commit is contained in:
Dima Gerasimov 2023-10-28 17:46:47 +01:00 committed by karlicoss
parent 4f7c9b4a71
commit d88a1b9933
2 changed files with 64 additions and 42 deletions

View file

@ -6,9 +6,9 @@ import os
import inspect
import re
from types import ModuleType
from typing import List
from typing import Iterator, List, Optional, TypeVar
from my.core import warnings
from . import warnings
def handle_legacy_import(
@ -108,3 +108,40 @@ def _get_dal(cfg, module_name: str):
from importlib import import_module
return import_module(f'my.config.repos.{module_name}.dal')
V = TypeVar('V')
# named to be kinda consistent with more_itertools, e.g. more_itertools.always_iterable
class always_supports_sequence(Iterator[V]):
"""
Helper to make migration from Sequence/List to Iterable/Iterator type backwards compatible
"""
def __init__(self, it: Iterator[V]) -> None:
self.it = it
self._list: Optional[List] = None
def __iter__(self) -> Iterator[V]:
return self.it.__iter__()
def __next__(self) -> V:
return self.it.__next__()
def __getattr__(self, name):
return getattr(self.it, name)
@property
def aslist(self) -> List[V]:
if self._list is None:
qualname = getattr(self.it, '__qualname__', '<no qualname>') # defensive just in case
warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.')
self._list = list(self.it)
return self._list
def __len__(self) -> int:
return len(self.aslist)
def __getitem__(self, i: int) -> V:
return self.aslist[i]

View file

@ -5,21 +5,23 @@ REQUIRES = [
'git+https://github.com/karlicoss/hypexport',
]
from dataclasses import dataclass
from datetime import datetime
from typing import Callable
from .core import Paths
from my.config import hypothesis as user_config
REQUIRES = [
'git+https://github.com/karlicoss/hypexport',
]
from pathlib import Path
from typing import Iterator, Sequence
from my.core import (
get_files,
stat,
Paths,
Res,
Stats,
)
from my.core.cfg import make_config
from my.core.hpi_compat import always_supports_sequence
import my.config
@dataclass
class hypothesis(user_config):
class hypothesis(my.config.hypothesis):
'''
Uses [[https://github.com/karlicoss/hypexport][hypexport]] outputs
'''
@ -28,7 +30,6 @@ class hypothesis(user_config):
export_path: Paths
from .core.cfg import make_config
config = make_config(hypothesis)
@ -39,37 +40,28 @@ except ModuleNotFoundError as e:
dal = pre_pip_dal_handler('hypexport', e, config, requires=REQUIRES)
############################
from typing import List
from .core.error import Res, sort_res_by
Highlight = dal.Highlight
Page = dal.Page
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
def _dal() -> dal.DAL:
from .core import get_files
sources = get_files(config.export_path)
return dal.DAL(sources)
return dal.DAL(inputs())
# TODO they are in reverse chronological order...
def highlights() -> List[Res[Highlight]]:
# todo hmm. otherwise mypy complans
key: Callable[[Highlight], datetime] = lambda h: h.created
return sort_res_by(_dal().highlights(), key=key)
def highlights() -> Iterator[Res[Highlight]]:
return always_supports_sequence(_dal().highlights())
# TODO eh. always provide iterators? although sort_res_by could be neat too...
def pages() -> List[Res[Page]]:
# note: mypy report shows "No Anys on this line here", apparently a bug with type aliases
# https://github.com/python/mypy/issues/8594
key: Callable[[Page], datetime] = lambda h: h.created
return sort_res_by(_dal().pages(), key=key)
def pages() -> Iterator[Res[Page]]:
return always_supports_sequence(_dal().pages())
from .core import stat, Stats
def stats() -> Stats:
return {
**stat(highlights),
@ -77,12 +69,5 @@ def stats() -> Stats:
}
def _main() -> None:
for page in get_pages():
print(page)
if __name__ == '__main__':
_main()
get_highlights = highlights # todo deprecate
get_pages = pages # todo deprecate