core: add hpi query command (#157)

- restructure query code for cli, some test fixes - initial query_range implementation refactored functions in query some more to allow re-use in range_range, select() pretty much just calls out to a bunch of handlers now
2021-04-06 09:19:58 -07:00 · 2021-04-06 09:19:58 -07:00 · fb49243005
commit fb49243005
parent b94120deaf
11 changed files with 902 additions and 134 deletions
--- a/my/core/main.py
+++ b/my/core/main.py
@ -3,7 +3,7 @@ import importlib
 import os
 import sys
 import traceback
-from typing import Optional, Sequence, Iterable, List
+from typing import Optional, Sequence, Iterable, List, Type, Any
 from pathlib import Path
 from subprocess import check_call, run, PIPE, CompletedProcess
@ -329,6 +329,66 @@ def module_install(*, user: bool, module: str) -> None:
    check_call(cmd)
 # handle the 'hpi query' call
 # can raise a QueryException, caught in the click command
 def query_hpi_functions(
    *,
    output: str = 'json',
    qualified_names: List[str],
    order_key: Optional[str],
    order_by_value_type: Optional[Type],
    after: Any,
    before: Any,
    within: Any,
    reverse: bool = False,
    limit: Optional[int],
    drop_unsorted: bool,
    wrap_unsorted: bool,
    raise_exceptions: bool,
    drop_exceptions: bool,
 ) -> None:
    from itertools import chain
    from .query import locate_qualified_function
    from .query_range import select_range, RangeTuple
    # chain list of functions from user, in the order they wrote them on the CLI
    input_src = chain(*(locate_qualified_function(f)() for f in qualified_names))
    res = list(select_range(
        input_src,
        order_key=order_key,
        order_by_value_type=order_by_value_type,
        unparsed_range=RangeTuple(after=after, before=before, within=within),
        reverse=reverse,
        limit=limit,
        drop_unsorted=drop_unsorted,
        wrap_unsorted=wrap_unsorted,
        raise_exceptions=raise_exceptions,
        drop_exceptions=drop_exceptions))
    if output == 'json':
        from .serialize import dumps
        click.echo(dumps(res))
    elif output == 'pprint':
        from pprint import pprint
        pprint(res)
    else:
        # output == 'repl'
        eprint(f"\nInteract with the results by using the {click.style('res', fg='green')} variable\n")
        try:
            import IPython  # type: ignore[import]
        except ModuleNotFoundError:
            eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...")
            import code
            code.interact(local=locals())
        else:
            IPython.embed()
@click.group()
 def main() -> None:
    '''
@ -434,6 +494,147 @@ def module_install_cmd(user: bool, module: str) -> None:
    module_install(user=user, module=module)
@main.command(name='query', short_help='query the results of a HPI function')
@click.option('-o',
              '--output',
              default='json',
              type=click.Choice(['json', 'pprint', 'repl']),
              help='what to do with the result [default: json]')
@click.option('-k',
              '--order-key',
              default=None,
              type=str,
              help='order by an object attribute or dict key on the individual objects returned by the HPI function')
@click.option('-t',
              '--order-type',
              default=None,
              type=click.Choice(['datetime', 'date', 'int', 'float']),
              help='order by searching for some type on the iterable')
@click.option('--after',
              default=None,
              type=str,
              help='while ordering, filter items for the key or type larger than or equal to this')
@click.option('--before',
              default=None,
              type=str,
              help='while ordering, filter items for the key or type smaller than this')
@click.option('--within',
              default=None,
              type=str,
              help="a range 'after' or 'before' to filter items by. see above for further explanation")
@click.option('--recent',
              default=None,
              type=str,
              help="a shorthand for '--order-type datetime --reverse --before now --within'. e.g. --recent 5d")
@click.option('--reverse/--no-reverse',
              default=False,
              help='reverse the results returned from the functions')
@click.option('--limit',
              default=None,
              type=int,
              help='limit the number of items returned from the (functions)')
@click.option('--drop-unsorted',
              default=False,
              is_flag=True,
              help="If the order an item can't be determined while ordering, drop those items from the results")
@click.option('--wrap-unsorted',
              default=False,
              is_flag=True,
              help="If the order an item can't be determined while ordering, drop those items from the results")
@click.option('--raise-exceptions',
              default=False,
              is_flag=True,
              help="If any errors are returned (as objects, not raised) from the functions, raise them")
@click.option('--drop-exceptions',
              default=False,
              is_flag=True,
              help='Ignore any errors returned as objects from the functions')
@click.argument('FUNCTION_NAME', nargs=-1, required=True)
 def query_cmd(
    function_name: Sequence[str],
    output: str,
    order_key: Optional[str],
    order_type: Optional[str],
    after: Optional[str],
    before: Optional[str],
    within: Optional[str],
    recent: Optional[str],
    reverse: bool,
    limit: Optional[int],
    drop_unsorted: bool,
    wrap_unsorted: bool,
    raise_exceptions: bool,
    drop_exceptions: bool,
 ) -> None:
    '''
    This allows you to query the results from one or more functions in HPI
    By default this runs with '-o json', converting the results
    to JSON and printing them to STDOUT
    You can specify '-o pprint' to just print the objects using their
    repr, or '-o repl' to drop into a ipython shell with access to the results
    While filtering using --order-key datetime, the --after, --before and --within
    flags parse the input to their datetime and timedelta equivalents. datetimes can
    be epoch time, the string 'now', or an date formatted in the ISO format. timedelta
    (durations) are parsed from a similar format to the GNU 'sleep' command, e.g.
    1w2d8h5m20s -> 1 week, 2 days, 8 hours, 5 minutes, 20 seconds
    As an example, to query reddit comments I've made in the last month
    \b
    hpi query --order-type datetime --before now --within 4w my.reddit.comments
    or...
    hpi query --recent 4w my.reddit.comments
    \b
    Can also query within a range. To filter comments between 2016 and 2018:
    hpi query --order-type datetime --after '2016-01-01 00:00:00' --before '2019-01-01 00:00:00' my.reddit.comments
    '''
    from datetime import datetime, date
    chosen_order_type: Optional[Type]
    if order_type == "datetime":
        chosen_order_type = datetime
    if order_type == "date":
        chosen_order_type = date
    elif order_type == "int":
        chosen_order_type = int
    elif order_type == "float":
        chosen_order_type = float
    else:
        chosen_order_type = None
    if recent is not None:
        before = "now"
        chosen_order_type = chosen_order_type or datetime  # dont override if the user specified date
        within = recent
        reverse = not reverse
    from .query import QueryException
    try:
        query_hpi_functions(
            output=output,
            qualified_names=list(function_name),
            order_key=order_key,
            order_by_value_type=chosen_order_type,
            after=after,
            before=before,
            within=within,
            reverse=reverse,
            limit=limit,
            drop_unsorted=drop_unsorted,
            wrap_unsorted=wrap_unsorted,
            raise_exceptions=raise_exceptions,
            drop_exceptions=drop_exceptions)
    except QueryException as qe:
        eprint(str(qe))
        sys.exit(1)
 # todo: add more tests?
 # its standard click practice to have the function click calls be a separate
 # function from the decorated function, as it allows the application-specific code to be
--- a/my/core/common.py
+++ b/my/core/common.py
@ -352,21 +352,12 @@ class classproperty(Generic[_R]):
 tzdatetime = datetime
 fromisoformat: Callable[[str], datetime]
 import sys
 if sys.version_info[:2] >= (3, 7):
    # prevent mypy on py3.6 from complaining...
    fromisoformat_real = datetime.fromisoformat
    fromisoformat = fromisoformat_real
 else:
    from .py37 import fromisoformat
 # TODO doctests?
 def isoparse(s: str) -> tzdatetime:
    """
    Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
    """
    from .compat import fromisoformat
    # TODO could use dateutil? but it's quite slow as far as I remember..
    # TODO support non-utc.. somehow?
    assert s.endswith('Z'), s
--- a/my/core/compat.py
+++ b/my/core/compat.py
@ -2,6 +2,8 @@
 Some backwards compatibility stuff/deprecation helpers
 '''
 from types import ModuleType
 from typing import Callable
 from datetime import datetime
 from . import warnings
 from .common import LazyLogger
@ -10,6 +12,16 @@ from .common import LazyLogger
 logger = LazyLogger('my.core.compat')
 fromisoformat: Callable[[str], datetime]
 import sys
 if sys.version_info[:2] >= (3, 7):
    # prevent mypy on py3.6 from complaining...
    fromisoformat_real = datetime.fromisoformat
    fromisoformat = fromisoformat_real
 else:
    from .py37 import fromisoformat
 def pre_pip_dal_handler(
        name: str,
        e: ModuleNotFoundError,
--- a/my/core/error.py
+++ b/my/core/error.py
@ -127,7 +127,7 @@ def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception:
 # todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift)
 def extract_error_datetime(e: Exception) -> Optional[datetime]:
-    from .common import fromisoformat
+    from .compat import fromisoformat
    import re
    for x in reversed(e.args):
        if isinstance(x, datetime):
--- a/my/core/query.py
+++ b/my/core/query.py
@ -5,12 +5,11 @@ The main entrypoint to this library is the 'select' function below; try:
 python3 -c "from my.core.query import select; help(select)"
 """
 import re
 import dataclasses
 import importlib
 import inspect
 import itertools
-from datetime import datetime, date, timedelta
+from datetime import datetime
 from typing import TypeVar, Tuple, Optional, Union, Callable, Iterable, Iterator, Dict, Any, NamedTuple, List
 import more_itertools
@ -24,8 +23,6 @@ T = TypeVar("T")
 ET = Res[T]
 # e.g. ("my.reddit", "comments")
 Locator = Tuple[str, str]
 U = TypeVar("U")
 # In a perfect world, the return value from a OrderFunc would just be U,
 # not Optional[U]. However, since this has to deal with so many edge
@ -34,16 +31,13 @@ U = TypeVar("U")
 OrderFunc = Callable[[ET], Optional[U]]
 Where = Callable[[ET], bool]
 DateLike = Union[datetime, date]
 # the generated OrderFunc couldn't handle sorting this
 class Unsortable(NamedTuple):
    obj: Any
-
+class QueryException(ValueError):
 class QueryException(KeyError):
    """Used to differentiate query-related errors, so the CLI interface is more expressive"""
    pass
@ -65,6 +59,60 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab
    raise QueryException(f"Could not find function {function_name} in {module_name}")
 def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]]:
    """
    As an example, 'my.reddit.comments' -> locate_function('my.reddit', 'comments')
    """
    if "." not in qualified_name:
        raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.comments")
    rdot_index = qualified_name.rindex(".")
    return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1:])
 def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]:
    """
    Attempts to find an attribute which matches the 'where_function' on the object,
    using some getattr/dict checks. Returns a function which when called with
    this object returns the value which the 'where' matched against
    As an example:
    from typing import NamedTuple
    from datetime import datetime
    from my.core.query import attribute_func
    class A(NamedTuple):
        x: int
        y: datetime
    val = A(x=4, y=datetime.now())
    val.y
    > datetime.datetime(2021, 4, 5, 10, 52, 14, 395195)
    orderfunc = attribute_func(val, where=lambda o: isinstance(o, datetime))
    orderfunc(val)
    > datetime.datetime(2021, 4, 5, 10, 52, 14, 395195)
    """
    if isinstance(obj, dict):
        for k, v in obj.items():
            if where(v):
                return lambda o: o.get(k, default)  # type: ignore[union-attr]
    elif dataclasses.is_dataclass(obj):
        for (field_name, _annotation) in obj.__annotations__.items():
            if where(getattr(obj, field_name)):
                return lambda o: getattr(o, field_name, default)
    elif is_namedtuple(obj):
        assert hasattr(obj, '_fields'), "Could not find '_fields' on attribute which is assumed to be a NamedTuple"
        for field_name in getattr(obj, '_fields'):
            if where(getattr(obj, field_name)):
                return lambda o: getattr(o, field_name, default)
    # try using inspect.getmembers (like 'dir()') even if the dataclass/NT checks failed,
    # since the attribute one is searching for might be a @property
    for k, v in inspect.getmembers(obj):
        if where(v):
            return lambda o: getattr(o, k, default)
    return None
 def _generate_order_by_func(
        obj_res: Res[T],
        key: Optional[str] = None,
@ -119,7 +167,6 @@ pass 'drop_exceptions' to ignore exceptions""")
        # that you manually write an OrderFunc which
        # handles the edge cases, or provide a default
        # See tests for an example
        # TODO: write test
        if isinstance(obj, dict):
            if key in obj:  # acts as predicate instead of where_function
                return lambda o: o.get(key, default)  # type: ignore[union-attr]
@ -130,31 +177,16 @@ pass 'drop_exceptions' to ignore exceptions""")
    # Note: if the attribute you're ordering by is an Optional type,
    # and on some objects it'll return None, the getattr(o, field_name, default) won't
    # use the default, since it finds the attribute (it just happens to be set to None)
-    # should this do something like: 'lambda o: getattr(o, k, default) or default'
+    # perhaps this should do something like: 'lambda o: getattr(o, k, default) or default'
    # that would fix the case, but is additional work. Perhaps the user should instead
    # write a 'where' function, to check for that 'isinstance' on an Optional field,
-    # and not include those objects in the src iterable
+    # and not include those objects in the src iterable... becomes a bit messy with multiple sources
    # user must provide either a key or a where predicate
    if where_function is not None:
-        if isinstance(obj, dict):
+        func: Optional[OrderFunc] = attribute_func(obj, where_function, default)
-            for k, v in obj.items():
+        if func is not None:
-                if where_function(v):
+            return func
                    return lambda o: o.get(k, default)  # type: ignore[union-attr]
        elif dataclasses.is_dataclass(obj):
            for (field_name, _annotation) in obj.__annotations__.items():
                if where_function(getattr(obj, field_name)):
                    return lambda o: getattr(o, field_name, default)
        elif is_namedtuple(obj):
            assert hasattr(obj, '_fields'), "Could not find '_fields' on attribute which is assumed to be a NamedTuple"
            for field_name in getattr(obj, '_fields'):
                if where_function(getattr(obj, field_name)):
                    return lambda o: getattr(o, field_name, default)
        # try using inspect.getmembers (like 'dir()') even if the dataclass/NT checks failed,
        # since the attribute one is searching for might be a @property
        for k, v in inspect.getmembers(obj):
            if where_function(v):
                return lambda o: getattr(o, k, default)
    if default is not None:
        # warn here? it seems like you typically wouldn't want to just set the order by to
@ -206,6 +238,18 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]:
        yield o
 # try getting the first value from the iterator
 # similar to my.core.common.warn_if_empty? this doesnt go through the whole iterator though
 def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]:
    itr = more_itertools.peekable(itr)
    try:
        first_item = itr.peek()
    except StopIteration:
        return None, itr
    else:
        return first_item, itr
 # similar to 'my.core.error.sort_res_by'?
 def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]:
    unsortable: List[Unsortable] = []
@ -241,8 +285,71 @@ def _handle_unsorted(
        return iter([]), itr
 # handles creating an order_value functon, using a lookup for
 # different types. ***This consumes the iterator***, so
 # you should definitely itertoolts.tee it beforehand
 # as to not exhaust the values
 def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc:
    # TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
    order_by_lookup: Dict[Any, OrderFunc] = {}
    # need to go through a copy of the whole iterator here to
    # pre-generate functions to support sorting mixed types
    for obj_res in itr:
        key: Any = _determine_order_by_value_key(obj_res)
        if key not in order_by_lookup:
            keyfunc: Optional[OrderFunc] = _generate_order_by_func(
                obj_res,
                where_function=order_value,
                default=default,
                force_unsortable=True)
            # should never be none, as we have force_unsortable=True
            assert keyfunc is not None
            order_by_lookup[key] = keyfunc
    # todo: cache results from above _determine_order_by_value_key call and use here somehow?
    # would require additional state
    # order_by_lookup[_determine_order_by_value_key(o)] returns a function which
    # accepts o, and returns the value which sorted can use to order this by
    return lambda o: order_by_lookup[_determine_order_by_value_key(o)](o)
 # handles the arguments from the user, creating a order_value function
 # at least one of order_by, order_key or order_value must have a value
 def _handle_generate_order_by(
    itr,
    *,
    order_by: Optional[OrderFunc] = None,
    order_key: Optional[str] = None,
    order_value: Optional[Where] = None,
    default: Optional[U] = None,
 ) -> Tuple[Optional[OrderFunc], Iterator[ET]]:
    order_by_chosen: Optional[OrderFunc] = order_by  # if the user just supplied a function themselves
    if order_by is not None:
        return order_by, itr
    if order_key is not None:
        first_item, itr = _peek_iter(itr)
        if first_item is None:
            # signify the iterator was empty, return immediately from parent
            return None, itr
        # try to use a key, if it was supplied
        # order_key doesn't use local state - it just tries to find the passed
        # attribute, or default to the 'default' value. As mentioned above,
        # best used for items with a similar structure
        # note: this could fail if the first item doesn't have a matching attr/key?
        order_by_chosen = _generate_order_by_func(first_item, key=order_key, default=default)
        if order_by_chosen is None:
            raise QueryException(f"Error while ordering: could not find {order_key} on {first_item}")
        return order_by_chosen, itr
    if order_value is not None:
        itr, itr2 = itertools.tee(itr, 2)
        order_by_chosen = _generate_order_value_func(itr2, order_value, default)
        return order_by_chosen, itr
    raise QueryException("Could not determine a way to order src iterable - at least one of the order args must be set")
 def select(
-    src: Union[Locator, Iterable[ET], Callable[[], Iterable[ET]]],
+    src: Union[Iterable[ET], Callable[[], Iterable[ET]]],
    *,
    where: Optional[Where] = None,
    order_by: Optional[OrderFunc] = None,
@ -298,8 +405,8 @@ def select(
    The 'drop_exceptions' and 'raise_exceptions' let you ignore or raise when the src contains exceptions
-    src:            a locator to import a function from, an iterable of mixed types,
+    src:            an iterable of mixed types, or a function to be called,
-                    or a function to be called, as the input to this function
+                    as the input to this function
    where:          a predicate which filters the results before sorting
@ -333,10 +440,7 @@ def select(
    """
    it: Iterable[ET] = []  # default
-    # check if this is a locator
+    if callable(src):
    if type(src) == tuple and len(src) == 2:  # type: ignore[arg-type]
        it = locate_function(src[0], src[1])()  # type: ignore[index]
    elif callable(src):
        # hopefully this returns an iterable and not something that causes a bunch of lag when its called?
        # should typically not be the common case, but giving the option to
        # provide a function as input anyways
@ -344,7 +448,7 @@ def select(
    else:
        # assume it is already an iterable
        if not isinstance(src, Iterable):
-            low(f"""Input was neither a locator for a function, or a function itself.
+            low(f"""Input was neither a function, or some iterable
 Expected 'src' to be an Iterable, but found {type(src).__name__}...
 Will attempt to call iter() on the value""")
        it = src
@ -369,51 +473,21 @@ Will attempt to call iter() on the value""")
        itr = filter(where, itr)
    if order_by is not None or order_key is not None or order_value is not None:
-        # we have some sort of input that specifies we should reorder the iterator
+        order_by_chosen, itr = _handle_generate_order_by(itr, order_by=order_by,
                                                         order_key=order_key,
                                                         order_value=order_value,
                                                         default=default)
-        order_by_chosen: Optional[OrderFunc] = order_by  # if the user just supplied a function themselves
+        # signifies itr was filtered down to no data
-        if order_by is None:
+        if order_by_chosen is None:
-            itr = more_itertools.peekable(itr)
+            # previously would send an warning message here,
-            try:
+            # but sending the warning discourages this use-case
-                first_item = itr.peek()
+            # e.g. take this iterable and see if I've had an event in
-            except StopIteration:
+            # the last week, else notify me to do something
-                low("""While determining order_key, encountered empty iterable.
+            #
-Your 'src' may have been empty of the 'where' clause filtered the iterable to nothing""")
+            # low("""While determining order_key, encountered empty iterable.
-                # 'itr' is an empty iterable
+            # Your 'src' may have been empty of the 'where' clause filtered the iterable to nothing""")
-                return itr
+            return itr
            # try to use a key, if it was supplied
            # order_key doesn't use local state - it just tries to find the passed
            # attribute, or default to the 'default' value. As mentioned above,
            # best used for items with a similar structure
            # note: this could fail if the first item doesn't have a matching attr/key?
            if order_key is not None:
                order_by_chosen = _generate_order_by_func(first_item, key=order_key, default=default)
                if order_by_chosen is None:
                    raise QueryException(f"Error while ordering: could not find {order_key} on {first_item}")
            elif order_value is not None:
                itr1, itr2 = itertools.tee(itr, 2)
                # TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then
                order_by_lookup: Dict[Any, OrderFunc] = {}
                # need to go through a copy of the whole iterator here to
                # pre-generate functions to support sorting mixed types
                for obj_res in itr1:
                    key: Any = _determine_order_by_value_key(obj_res)
                    if key not in order_by_lookup:
                        keyfunc: Optional[OrderFunc] = _generate_order_by_func(obj_res, where_function=order_value, default=default, force_unsortable=True)
                        # should never be none, as we have force_unsortable=True
                        assert keyfunc is not None
                        order_by_lookup[key] = keyfunc
                # set the 'itr' (iterator in higher scope)
                # to the copy (itertools.tee) of the iterator we haven't used yet
                itr = itr2
                # todo: cache results from above _determine_order_by_value_key call and use here somehow?
                # would require additional state
                # order_by_lookup[_determine_order_by_value_key(o)] returns a function which
                # accepts o, and returns the value which sorted can use to order this by
                order_by_chosen = lambda o: order_by_lookup[_determine_order_by_value_key(o)](o)
        assert order_by_chosen is not None
        # note: can't just attach sort unsortable values in the same iterable as the
@ -441,32 +515,6 @@ Your 'src' may have been empty of the 'where' clause filtered the iterable to no
    return itr
 timedelta_regex = re.compile(r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$")
 # https://stackoverflow.com/a/51916936
 def parse_timedelta_string(timedelta_str: str) -> timedelta:
    """
    This uses a syntax similar to the 'GNU sleep' command
    e.g.: 1w5d5h10m50s means '1 week, 5 days, 5 hours, 10 minutes, 50 seconds'
    """
    parts = timedelta_regex.match(timedelta_str)
    if parts is None:
        raise ValueError(f"Could not parse time duration from {timedelta_str}.\nValid examples: '8h', '1w2d8h5m20s', '2m4s'")
    time_params = {name: float(param) for name, param in parts.groupdict().items() if param}
    return timedelta(**time_params)  # type: ignore[arg-type]
 def test_parse_timedelta_string():
    import pytest
    with pytest.raises(ValueError, match=r"Could not parse time duration from"):
        parse_timedelta_string("5xxx")
    res = parse_timedelta_string("1w5d5h10m50s")
    assert res == timedelta(days=7.0 + 5.0, hours=5.0, minutes=10.0, seconds=50.0)
 # classes to use in tests, need to be defined at the top level
 # because of a mypy bug
@ -483,8 +531,6 @@ def test_basic_orders() -> None:
    import random
    import pytest
    def basic_iter() -> Iterator[_Int]:
        for v in range(1, 6):
            yield _Int(v)
@ -509,9 +555,8 @@ def test_basic_orders() -> None:
    res = list(select(input_items, where=filter_two, order_by=custom_order_by, limit=2))
    assert res == [_Int(1), _Int(3)]
-    # filter produces empty iterator
+    # filter produces empty iterator (previously this used to warn, doesn't anymore)
-    with pytest.warns(UserWarning, match=r"encountered empty iterable"):
+    res = list(select(input_items, where=lambda o: o is None, order_key="x"))
        res = list(select(input_items, where=lambda o: o is None, order_key="x"))
    assert len(res) == 0
@ -576,8 +621,6 @@ def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]:
 def test_order_value() -> None:
    default_order = list(_mixed_iter())
    # if the value for some attribute on this item is a datetime
    sorted_by_datetime = list(select(_mixed_iter(), order_value=lambda o: isinstance(o, datetime)))
    assert sorted_by_datetime == [
@ -595,7 +638,7 @@ def test_key_clash() -> None:
    import pytest
    # clashing keys causes errors if you use order_key
-    with pytest.raises(TypeError, match=r"not supported between instances of 'datetime.datetime' and 'int'") as te:
+    with pytest.raises(TypeError, match=r"not supported between instances of 'datetime.datetime' and 'int'"):
        list(select(_mixed_iter(), order_key="y"))
@ -613,7 +656,7 @@ def test_disabled_wrap_unsorted() -> None:
    import pytest
    # if disabled manually, should raise error
-    with pytest.raises(TypeError, match=r"not supported between instances of 'NoneType' and 'int'") as te2:
+    with pytest.raises(TypeError, match=r"not supported between instances of 'NoneType' and 'int'"):
        list(select(_mixed_iter(), order_key="z", wrap_unsorted=False))
@ -652,7 +695,7 @@ def test_wrap_unsortable_with_error_and_warning() -> None:
    from collections import Counter
    # by default should wrap unsortable (error)
-    with pytest.warns(UserWarning, match=r"encountered exception") as w:
+    with pytest.warns(UserWarning, match=r"encountered exception"):
        res = list(select(_mixed_iter_errors(), order_value=lambda o: isinstance(o, datetime)))
    assert Counter(map(lambda t: type(t).__name__, res)) == Counter({"_A": 4, "_B": 2, "Unsortable": 1})
    # compare the returned error wrapped in the Unsortable
@ -662,7 +705,6 @@ def test_wrap_unsortable_with_error_and_warning() -> None:
 def test_order_key_unsortable() -> None:
    import pytest
    from collections import Counter
    # both unsortable and items which dont match the order_by (order_key) in this case should be classified unsorted
--- a/my/core/query_range.py
+++ b/my/core/query_range.py
@ -0,0 +1,521 @@
 """
 An extension of the my.core.query.select function, allowing you to specify
 a type or key to filter the range by -- this creates a filter function
 given those values, coercing values on the iterable, returning you a
 filtered iterator
 See the select_range function below
 """
 import re
 import time
 from functools import lru_cache
 from datetime import datetime, timedelta, date
 from typing import Callable, Iterator, NamedTuple, Optional, Any, Type
 import more_itertools
 from .query import (
    QueryException,
    select,
    OrderFunc,
    Where,
    _handle_generate_order_by,
    ET,
 )
 from .compat import fromisoformat
 from .common import isoparse
 timedelta_regex = re.compile(r"^((?P<weeks>[\.\d]+?)w)?((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)m)?((?P<seconds>[\.\d]+?)s)?$")
 # https://stackoverflow.com/a/51916936
 def parse_timedelta_string(timedelta_str: str) -> timedelta:
    """
    This uses a syntax similar to the 'GNU sleep' command
    e.g.: 1w5d5h10m50s means '1 week, 5 days, 5 hours, 10 minutes, 50 seconds'
    """
    parts = timedelta_regex.match(timedelta_str)
    if parts is None:
        raise ValueError(f"Could not parse time duration from {timedelta_str}.\nValid examples: '8h', '1w2d8h5m20s', '2m4s'")
    time_params = {name: float(param) for name, param in parts.groupdict().items() if param}
    return timedelta(**time_params)  # type: ignore[arg-type]
 def parse_timedelta_float(timedelta_str: str) -> float:
    return parse_timedelta_string(timedelta_str).total_seconds()
 def parse_datetime_float(date_str: str) -> float:
    """
    parses multiple possible representations of a datetime
    into a float, else raises a QueryException
    the query_cli interface compares floats instead of timestamps
    when comparing datetimes since handling it is unknown
    whether the sources the user is selecting from is tz-aware
    or not (or perhaps a mix of both?)
    """
    ds = date_str.strip()
    # special case
    if ds == "now":
        return time.time()
    # epoch timestamp
    try:
        # also handles epoch timestamps as integers
        ds_float = float(ds)
        # convert to make sure its a valid datetime
        datetime.fromtimestamp(ds_float)
    except ValueError:
        pass
    else:
        return ds_float
    try:
        # isoformat - default format when you call str() on datetime
        return fromisoformat(ds).timestamp()
    except ValueError:
        pass
    try:
        return isoparse(ds).timestamp()
    except (AssertionError, ValueError):
        raise QueryException(f"Was not able to parse {ds} into a datetime")
 # probably DateLike input? but a user could specify an order_key
 # which is an epoch timestamp or a float value which they
 # expect to be converted to a datetime to compare
@lru_cache(maxsize=None)
 def _datelike_to_float(dl: Any) -> float:
    if isinstance(dl, datetime):
        return dl.timestamp()
    elif isinstance(dl, date):
        # hmm... sets the hours/minutes/seconds to 0 -- make this configurable?
        return (datetime.combine(dl, datetime.min.time())).timestamp()
    else:
        try:
            return parse_datetime_float(dl)
        except QueryException as q:
            raise QueryException(f"While attempting to extract datetime from {dl}, to order by datetime:\n\n" + str(q))
 class RangeTuple(NamedTuple):
    """Can specify 0, 1 or 2 non-none items in a range -- but not all 3
    As an example, using datetimes/timedelta (some date, and possibly a duration)
    where 1 arg is not None
        - after is not None: filters it to any items 'after' the datetime
        - before is not None: filters to any items 'before' the datetime
        - within: filters to any items 'within' the timedelta, assuming you meant within the current
            timeframe, so before = time.time()
    when 2 args are not None:
        - after and within, filters anything after the initial 'after' time
            but 'within' the timeframe (parsed timedelta, e.g. 5d)
        - before and within, anything 'within' the timeframe, starting at the end
            of the timeframe -- 'before'
        - before and after - anything after 'after' and before 'before', acts as a time range
    """
    # technically doesn't need to be Optional[Any],
    # just to make it more clear these can be None
    after: Optional[Any]
    before: Optional[Any]
    within: Optional[Any]
 Converter = Callable[[Any], Any]
 def _parse_range(
    *,
    unparsed_range: RangeTuple,
    end_parser: Converter,
    within_parser: Converter,
    parsed_range: Optional[RangeTuple] = None,
    error_message: Optional[str] = None) -> Optional[RangeTuple]:
    if parsed_range is not None:
        return parsed_range
    err_msg = error_message or RangeTuple.__doc__
    after, before, within = None, None, None
    none_count = more_itertools.ilen(filter(lambda o: o is None, list(unparsed_range)))
    if none_count == 3:
        return None
    if none_count == 0:
        raise QueryException("Cannot specify 'after', 'before' and 'within' at the same time!\n\n" + err_msg)
    [after_str, before_str, within_str] = tuple(unparsed_range)
    after = end_parser(after_str) if after_str is not None else None
    before = end_parser(before_str) if before_str is not None else None
    within = within_parser(within_str) if within_str is not None else None
    return RangeTuple(after=after, before=before, within=within)
 def _create_range_filter(
    *,
    unparsed_range: RangeTuple,
    end_parser: Converter,
    within_parser: Converter,
    attr_func: Where,
    parsed_range: Optional[RangeTuple] = None,
    default_before: Optional[Any] = None,
    value_coercion_func: Optional[Converter] = None,
    error_message: Optional[str] = None,
 ) -> Optional[Where]:
    """
    Handles:
        - parsing the user input into values that are comparable to items the iterable returns
            - unparsed_range: tuple of raw values from user
            - end_parser: parses 'before' and 'after' (e.g. start/end dates)
            - within_parser: parser for the 'range' (e.g. timedelta)
            - error_message: allow overriding the default error message while parsing
        - converting items from the iterable to some coerced value, so that its comparable to
          the before, after and within parts of the range
            - if value_coercion_func is present, tries to use that
              to convert the value returned by the attr_func
    unparsed_range is a tuple of the input data from the user
    parsed_range can be passed if you've already parsed unparsed_range
    'default_before' specifies what to set if no before or after was specified in
    RangeTuple and we need an endpoint to end the range at. For example, if you wanted
    data from an iterable from the last week, you could specify default_before to be now (time.time()),
    and unparsed_range.within to be 7 days
    Creates a predicate that checks if some item from the iterator is
    within some range. this is typically used for datelike input, but the user could
    specify an integer or float item to order the values by/in some timeframe
    It requires the value you're comparing by to support comparable/addition operators (=, <, >, +, -)
    attr_func is a function which accepts the object from the iterator and returns
    the value to compare the range boundaries to. typically generated by _generate_order_by_func
    To force the values you're sorting by to be in some specified type,
    this allows a 'value_coercion_func', which optionally converts the value
    returned by attr_func to some shared type (see _datelike_to_float for an example)
    """
    rn = _parse_range(unparsed_range=unparsed_range,
                      end_parser=end_parser,
                      within_parser=within_parser,
                      parsed_range=parsed_range,
                      error_message=error_message)
    # user specified all 'None' items in the range, don't need to filter
    if rn is None:
        return None
    after = rn.after
    before = rn.before
    within = rn.within
    # hmm... not sure how to correctly manage
    # inclusivity here? Is [after, before) currently,
    # items are included on the lower bound but not the
    # upper bound
    # typically used for datetimes so doesnt have to
    # be exact in that case
    def generated_predicate(obj: Any) -> bool:
        ov: Any = attr_func(obj)
        if value_coercion_func is not None:
            ov = value_coercion_func(ov)
        if after is not None:
            if before is not None:
                # squeeze between before/after
                return ov >= after and ov < before
            elif within is not None:
                # after some start point + some range
                allow_before = after + within
                return ov >= after and ov < allow_before
            else:
                return ov >= after
        elif before is not None:
            if within is not None:
                allow_after = before - within
                # before a startpoint + some range
                return ov >= allow_after and ov < before
            else:
                # just before the startpoint
                return ov < before
        else:
            # only specified within, default before to now
            if default_before is None:
                raise QueryException("Only recieved a range length, with no start or end point to compare against")
            allow_after = default_before - within
            return ov >= allow_after and ov < default_before
    return generated_predicate
 # main interface to this file from my.core.__main__.py
 def select_range(
    itr: Iterator[ET],
    *,
    where: Optional[Where] = None,
    order_key: Optional[str] = None,
    order_value: Optional[Where] = None,
    order_by_value_type: Optional[Type] = None,
    unparsed_range: Optional[RangeTuple] = None,
    reverse: bool = False,
    limit: Optional[int] = None,
    drop_unsorted: bool = False,
    wrap_unsorted: bool = False,
    drop_exceptions: bool = False,
    raise_exceptions: bool = False,
 ) -> Iterator[ET]:
    """
    A specialized select function which offers generating functions
    to filter/query ranges from an iterable
    order_key and order_value are used in the same way they are in select
    If you specify order_by_value_type, it tries to search for an attribute
    on each object/type which has that type, ordering the iterable by that value
    unparsed_range is a tuple of length 3, specifying 'after', 'before', 'duration',
    i.e. some start point to allow the computed value we're ordering by, some
    end point and a duration (can use the RangeTuple NamedTuple to construct one)
    (this is typically parsed/created in my.core.__main__, from CLI flags
    If you specify a range, drop_unsorted is forced to be True
    """
    # if the user specified a range with no data, set the unparsed_range to None
    if unparsed_range == RangeTuple(None, None, None):
        unparsed_range = None
    # some operations to do before ordering/filtering
    if drop_exceptions or raise_exceptions or where is not None:
        # doesnt wrap unsortable items, because we pass no order related kwargs
        itr = select(itr, where=where, drop_exceptions=drop_exceptions, raise_exceptions=raise_exceptions)
    order_by_chosen: Optional[OrderFunc] = None
    # if the user didn't specify an attribute to order value, but specified a type
    # we should search for on each value in the iterator
    if order_value is None and order_by_value_type is not None:
        # search for that type on the iterator object
        order_value = lambda o: isinstance(o, order_by_value_type)  # type: ignore
    # if the user supplied a order_key, and/or we've generated an order_value, create
    # the function that accesses that type on each value in the iterator
    if order_key is not None or order_value is not None:
        order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value)
        # signifies that itr is empty -- can early return here
        if order_by_chosen is None:
            return itr
    # test if the user is trying to specify a range to filter the items by
    if unparsed_range is not None:
        if order_by_chosen is None:
            raise QueryException("""Can't order by range if we have no way to order_by!
 Specify a type or a key to order the value by""")
        else:
            # force drop_unsorted=True so we can use _create_range_filter
            # sort the iterable by the generated order_by_chosen function
            itr = select(itr, order_by=order_by_chosen, drop_unsorted=True)
            filter_func: Optional[Where]
            if order_by_value_type in [datetime, date]:
                filter_func = _create_range_filter(
                    unparsed_range=unparsed_range,
                    end_parser=parse_datetime_float,
                    within_parser=parse_timedelta_float,
                    attr_func=order_by_chosen,  # type: ignore[arg-type]
                    default_before=time.time(),
                    value_coercion_func=_datelike_to_float)
            elif order_by_value_type in [int, float]:
                # allow primitives to be converted using the default int(), float() callables
                filter_func = _create_range_filter(
                    unparsed_range=unparsed_range,
                    end_parser=order_by_value_type,
                    within_parser=order_by_value_type,
                    attr_func=order_by_chosen,  # type: ignore[arg-type]
                    default_before=None,
                    value_coercion_func=order_by_value_type)
            else:
                # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers?
                # would need to allow passing the end_parser, within parser, default before and value_coercion_func...
                # (seems like a lot?)
                raise QueryException("Sorting by custom types is currently unsupported")
            # use the created filter function
            # we've already applied drop_exceptions and kwargs related to unsortable values above
            itr = select(itr, where=filter_func, limit=limit, reverse=reverse)
    else:
        # wrap_unsorted may be used here if the user specified an order_key,
        # or manually passed a order_value function
        #
        # this select is also run if the user didn't specify anything to
        # order by, and is just returning the data in the same order as
        # as the srouce iterable
        # i.e. none of the range-related filtering code ran, this is just a select
        itr = select(itr,
                     order_by=order_by_chosen,
                     wrap_unsorted=wrap_unsorted,
                     drop_unsorted=drop_unsorted,
                     limit=limit,
                     reverse=reverse)
    return itr
 # re-use items from query for testing
 from .query import _A, _B, _Float, _mixed_iter_errors
 def test_filter_in_timeframe() -> None:
    from itertools import chain
    jan_1_2005 = datetime(year=2005, month=1, day=1, hour=1, minute=1, second=1)
    jan_1_2016 = datetime(year=2016, month=1, day=1, hour=1, minute=1, second=1)
    rng = RangeTuple(after=str(jan_1_2005), before=str(jan_1_2016), within=None)
    # items between 2005 and 2016
    res = list(select_range(_mixed_iter_errors(), order_by_value_type=datetime, unparsed_range=rng, drop_exceptions=True))
    assert res == [_A(x=datetime(2005, 4, 10, 4, 10, 1), y=2, z=-5),
                   _A(x=datetime(2005, 5, 10, 4, 10, 1), y=10, z=2),
                   _A(x=datetime(2009, 3, 10, 4, 10, 1), y=12, z=1),
                   _A(x=datetime(2009, 5, 10, 4, 10, 1), y=5, z=10),
                   _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))]
    rng = RangeTuple(before=str(jan_1_2016), within="52w", after=None)
    # from 2016, going back 52 weeks (about a year?)
    res = list(select_range(_mixed_iter_errors(), order_by_value_type=datetime, unparsed_range=rng, drop_exceptions=True))
    assert res == [_B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))]
    # test passing just a within while using a datetime. should default to using current time
    recent_time = datetime.now() - timedelta(days=5)
    obj = _A(x=recent_time, y=2, z=-5)
    rng = RangeTuple(before=None, after=None, within="1w")
    res = list(select_range(chain(_mixed_iter_errors(), iter([obj])),
                            order_by_value_type=datetime,
                            unparsed_range=rng, drop_exceptions=True))
    assert res == [obj]
    # dont pass any range related stuff, use where/drop_exceptions and the limit flag
    # to make sure this falls through properly to using select kwargs
    using_range = list(select_range(_mixed_iter_errors(), drop_exceptions=True, limit=5))
    normal = list(select(_mixed_iter_errors(), limit=5, where=lambda o: not isinstance(o, Exception)))
    assert using_range == normal
 def test_query_range_float_value_type() -> None:
    def floaty_iter() -> Iterator[_Float]:
        for v in range(1, 6):
            yield _Float(float(v + 0.5))
    rng = RangeTuple(after=2, before=6.1, within=None)
    res = list(select_range(floaty_iter(), order_by_value_type=float, unparsed_range=rng, drop_exceptions=True))
    assert res == [_Float(2.5), _Float(3.5), _Float(4.5), _Float(5.5)]
 def test_range_predicate() -> None:
    from functools import partial
    def src() -> Iterator[str]:
        yield from map(str, range(15))
    identity = lambda o: o
    # convert any float values to ints
    coerce_int_parser = lambda o: int(float(o))
    int_filter_func = partial(_create_range_filter, attr_func=identity, end_parser=coerce_int_parser,
            within_parser=coerce_int_parser, value_coercion_func=coerce_int_parser)
    # filter from 0 to 5
    rn: Optional[RangeTuple] = RangeTuple("0", "5", None)
    zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn)
    assert zero_to_five_filter is not None
    # this is just a Where function, given some input it return True/False if the value is allowed
    assert zero_to_five_filter(3) is True
    assert zero_to_five_filter(10) is False
    # this is expected, range_predicate is not inclusive on the far end
    assert list(filter(zero_to_five_filter, src())) == ["0", "1", "2", "3", "4"]
    # items less than 3, going 3.5 (converted to 3 by the coerce_int_parser) down
    rn = RangeTuple(None, 3, "3.5")
    assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"]
 def test_parse_range() -> None:
    from functools import partial
    import pytest
    rn = RangeTuple("0", "5", None)
    res = _parse_range(unparsed_range=rn, end_parser=int, within_parser=int)
    assert res == RangeTuple(after=0, before=5, within=None)
    dt_parse_range = partial(_parse_range, end_parser=parse_datetime_float, within_parser=parse_timedelta_float)
    start_date = datetime.now()
    end_date = start_date + timedelta(seconds=60)
    # convert start items to strings, which need to be parsed back
    rn = RangeTuple(str(start_date), str(end_date.timestamp()), None)
    res2 = dt_parse_range(unparsed_range=rn)
    assert res2 == RangeTuple(after=start_date.timestamp(), before=end_date.timestamp(), within=None)
    # cant specify all three
    with pytest.raises(QueryException, match=r"Cannot specify 'after', 'before' and 'within'"):
        dt_parse_range(unparsed_range=RangeTuple(str(start_date), str(end_date.timestamp()), "7d"))
    # if you specify noting, should return None
    res3 = dt_parse_range(unparsed_range=RangeTuple(None, None, None))
    assert res3 is None
 def test_parse_timedelta_string() -> None:
    import pytest
    with pytest.raises(ValueError, match=r"Could not parse time duration from"):
        parse_timedelta_string("5xxx")
    res = parse_timedelta_string("1w5d5h10m50s")
    assert res == timedelta(days=7.0 + 5.0, hours=5.0, minutes=10.0, seconds=50.0)
 def test_parse_datetime_float() -> None:
    pnow = parse_datetime_float("now")
    sec_diff = abs((pnow - datetime.now().timestamp()))
    # should probably never fail? could mock time.time
    # but there seems to be issues with doing that use C-libraries (as time.time) does
    # https://docs.python.org/3/library/unittest.mock-examples.html#partial-mocking
    assert sec_diff < 60
    dt = datetime.now()
    dt_float_s = str(dt.timestamp())
    dt_int_s = str(int(dt.timestamp()))
    # float/int representations as strings
    assert dt.timestamp() == parse_datetime_float(dt_float_s)
    assert int(dt.timestamp()) == int(parse_datetime_float(dt_int_s))
    # test parsing isoformat
    assert dt.timestamp() == parse_datetime_float(str(dt))
--- a/my/location/home.py
+++ b/my/location/home.py
@ -6,7 +6,7 @@ from datetime import datetime, date, time, timezone
 from functools import lru_cache
 from typing import Sequence, Tuple, Union, cast
-from ..core.common import fromisoformat
+from ..core.compat import fromisoformat
 from my.config import location as user_config
--- a/my/taplog.py
+++ b/my/taplog.py
@ -6,7 +6,7 @@ from datetime import datetime
 from typing import NamedTuple, Dict, Optional, Iterable
 from .core import get_files
-from .core.common import fromisoformat
+from .core.compat import fromisoformat
 from my.config import taplog as user_config
--- a/my/time/tz/via_location.py
+++ b/my/time/tz/via_location.py
@ -140,7 +140,7 @@ def localize(dt: datetime) -> tzdatetime:
 from ...core import stat, Stats
 def stats() -> Stats:
-    from ...core.common import fromisoformat
+    from ...core.compat import fromisoformat
    # TODO not sure what would be a good stat() for this module...
    # might be nice to print some actual timezones?
    # there aren't really any great iterables to expose
--- a/setup.py
+++ b/setup.py
@ -56,7 +56,7 @@ def main():
            'optional': [
                # todo document these?
                'logzero',
-                'orjson',
+                'orjson',  # for my.core.serialize
                'cachew>=0.8.0',
                'mypy', # used for config checks
            ],
--- a/tests/core.py
+++ b/tests/core.py
@ -19,6 +19,7 @@ from my.core.discovery_pure import *
 from my.core.freezer        import *
 from my.core.stats          import *
 from my.core.query          import *
 from my.core.query_range    import *
 from my.core.serialize      import test_serialize_fallback
 from my.core.sqlite         import *
 from my.core.__main__       import *