From 4a5205d292966a0cab53b7c8734c6cf052181fb7 Mon Sep 17 00:00:00 2001 From: Sean Breckenridge Date: Tue, 27 Apr 2021 20:26:39 -0700 Subject: [PATCH] cli/query: add --stream flag allows you to do something like hpi query --stream my.reddit.comments to stream the JSON objects one per line, makes it nicer to pipe into 'jq'/'fzf' instead of having to process the giant list at the end --- my/core/__main__.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/my/core/__main__.py b/my/core/__main__.py index 4459f98..45a372b 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -410,6 +410,7 @@ def _locate_functions_or_prompt(qualified_names: List[str], prompt: bool = True) def query_hpi_functions( *, output: str = 'json', + stream_json: bool = False, qualified_names: List[str], order_key: Optional[str], order_by_value_type: Optional[Type], @@ -431,7 +432,7 @@ def query_hpi_functions( # chain list of functions from user, in the order they wrote them on the CLI input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names))) - res = list(select_range( + res = select_range( input_src, order_key=order_key, order_by_value_type=order_by_value_type, @@ -441,12 +442,21 @@ def query_hpi_functions( drop_unsorted=drop_unsorted, wrap_unsorted=wrap_unsorted, raise_exceptions=raise_exceptions, - drop_exceptions=drop_exceptions)) + drop_exceptions=drop_exceptions) if output == 'json': from .serialize import dumps - click.echo(dumps(res)) + if stream_json: + for item in res: + # use sys.stdout directly + # the overhead form click.echo isn't a *lot*, but when called in a loop + # with potentially millions of items it makes a noticable difference + sys.stdout.write(dumps(item)) + sys.stdout.write('\n') + sys.stdout.flush() + else: + click.echo(dumps(list(res))) elif output == 'pprint': from pprint import pprint @@ -575,6 +585,11 @@ def module_install_cmd(user: bool, module: str) -> None: default='json', type=click.Choice(['json', 'pprint', 'repl']), help='what to do with the result [default: json]') +@click.option('-s', + '--stream', + default=False, + is_flag=True, + help='stream json objects from the data source instead of printing a list at the end') @click.option('-k', '--order-key', default=None, @@ -628,6 +643,7 @@ def module_install_cmd(user: bool, module: str) -> None: def query_cmd( function_name: Sequence[str], output: str, + stream: bool, order_key: Optional[str], order_type: Optional[str], after: Optional[str], @@ -693,6 +709,7 @@ def query_cmd( try: query_hpi_functions( output=output, + stream_json=stream, qualified_names=list(function_name), order_key=order_key, order_by_value_type=chosen_order_type,