From 0ce44bf0d18f13b43e787030251f2a3cfdfa6045 Mon Sep 17 00:00:00 2001 From: seanbreckenridge Date: Sun, 1 May 2022 16:13:05 -0700 Subject: [PATCH] doctor: better quick option propogation for stats (#239) doctor: better quick option propogation for stats * use contextmanager for quick stats instead of editing global state directly * send quick to lots of stat related functions, so they could possibly be used without doctor, if someone wanted to * if a stats function has a 'quick' kwarg, send the value there as well * add an option to sort locations in my.time.tz.via_location --- doc/MODULES.org | 4 ++++ my/config.py | 1 + my/core/__main__.py | 16 +++++++++++----- my/core/common.py | 29 +++++++++++++++++++++++------ my/core/stats.py | 4 ++-- my/core/util.py | 2 +- my/time/tz/via_location.py | 21 ++++++++++++++++++--- 7 files changed, 60 insertions(+), 17 deletions(-) diff --git a/doc/MODULES.org b/doc/MODULES.org index 239a2be..2bcb052 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -189,6 +189,10 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http # less precise, but faster fast: bool = True + # sort locations by date + # incase multiple sources provide them out of order + sort_locations: bool = True + # if the accuracy for the location is more than 5km (this # isn't an accurate location, so shouldn't use it to determine # timezone), don't use diff --git a/my/config.py b/my/config.py index 0746803..7d31f5d 100644 --- a/my/config.py +++ b/my/config.py @@ -84,6 +84,7 @@ class time: class tz: class via_location: fast: bool + sort_locations: bool require_accuracy: float diff --git a/my/core/__main__.py b/my/core/__main__.py index 22068a6..faff852 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -215,11 +215,11 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li verbose = True vw = '' if verbose else '; pass --verbose to print more information' - from . import common - common.QUICK_STATS = quick # dirty, but hopefully OK for cli - tabulate_warnings() + import contextlib + + from .common import quick_stats from .util import get_stats, HPIModule from .stats import guess_stats from .error import warn_my_config_import_error @@ -256,15 +256,21 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li stats = get_stats(m) if stats is None: # then try guessing.. not sure if should log somehow? - stats = guess_stats(m) + stats = guess_stats(m, quick=quick) if stats is None: eprint(" - no 'stats' function, can't check the data") # todo point to a readme on the module structure or something? continue + quick_context = quick_stats() if quick else contextlib.nullcontext() + try: - res = stats() + kwargs = {} + if callable(stats) and 'quick' in inspect.signature(stats).parameters: + kwargs['quick'] = quick + with quick_context: + res = stats(**kwargs) assert res is not None, 'stats() returned None' except Exception as ee: warning(f' - {click.style("stats:", fg="red")} computing failed{vw}') diff --git a/my/core/common.py b/my/core/common.py index 92806d2..b7db362 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -2,6 +2,7 @@ from glob import glob as do_glob from pathlib import Path from datetime import datetime import functools +from contextlib import contextmanager import types from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING import warnings @@ -425,16 +426,32 @@ def warn_if_empty(f): return wrapped # type: ignore -# hacky hook to speed up for 'hpi doctor' -# todo think about something better +# global state that turns on/off quick stats +# can use the 'quick_stats' contextmanager +# to enable/disable this in cli so that module 'stats' +# functions don't have to implement custom 'quick' logic QUICK_STATS = False +# incase user wants to use the stats functions/quick option +# elsewhere -- can use this decorator instead of editing +# the global state directly +@contextmanager +def quick_stats(): + global QUICK_STATS + prev = QUICK_STATS + try: + QUICK_STATS = True + yield + finally: + QUICK_STATS = prev + + C = TypeVar('C') Stats = Dict[str, Any] StatsFun = Callable[[], Stats] # todo not sure about return type... -def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats: +def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats: if callable(func): fr = func() fname = func.__name__ @@ -451,13 +468,13 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats: rows=len(df), ) else: - res = _stat_iterable(fr) + res = _stat_iterable(fr, quick=quick) return { fname: res, } -def _stat_iterable(it: Iterable[C]) -> Any: +def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any: from more_itertools import ilen, take, first # todo not sure if there is something in more_itertools to compute this? @@ -476,7 +493,7 @@ def _stat_iterable(it: Iterable[C]) -> Any: eit = funcit() count: Any - if QUICK_STATS: + if quick or QUICK_STATS: initial = take(100, eit) count = len(initial) if first(eit, None) is not None: # todo can actually be none... diff --git a/my/core/stats.py b/my/core/stats.py index 9750061..3a93f68 100644 --- a/my/core/stats.py +++ b/my/core/stats.py @@ -13,12 +13,12 @@ from .common import StatsFun, Stats, stat # TODO maybe could be enough to annotate OUTPUTS or something like that? # then stats could just use them as hints? -def guess_stats(module_name: str) -> Optional[StatsFun]: +def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]: providers = guess_data_providers(module_name) if len(providers) == 0: return None def auto_stats() -> Stats: - return {k: stat(v) for k, v in providers.items()} + return {k: stat(v, quick=quick) for k, v in providers.items()} return auto_stats diff --git a/my/core/util.py b/my/core/util.py index 222cdec..a6204d9 100644 --- a/my/core/util.py +++ b/my/core/util.py @@ -20,7 +20,7 @@ def get_stats(module: str) -> Optional[StatsFun]: # todo detect via ast? try: mod = import_module(module) - except Exception as e: + except Exception: return None return getattr(mod, 'stats', None) diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py index 0e91193..d31f04b 100644 --- a/my/time/tz/via_location.py +++ b/my/time/tz/via_location.py @@ -16,6 +16,10 @@ class config(time.tz.via_location): # less precise, but faster fast: bool = True + # sort locations by date + # incase multiple sources provide them out of order + sort_locations: bool = True + # if the accuracy for the location is more than 5km, don't use require_accuracy: float = 5_000 @@ -24,7 +28,7 @@ from collections import Counter from datetime import date, datetime from functools import lru_cache from itertools import groupby -from typing import Iterator, NamedTuple, Optional, Tuple, Any, List +from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable from more_itertools import seekable import pytz @@ -87,8 +91,12 @@ def _iter_local_dates() -> Iterator[DayWithZone]: #pdt = None # TODO: warnings doesnt actually warn? warnings = [] + + locs: Iterable[Tuple[LatLon, datetime]] + locs = _sorted_locations() if config.sort_locations else _locations() + # todo allow to skip if not noo many errors in row? - for (lat, lon), dt in _sorted_locations(): + for (lat, lon), dt in locs: # TODO right. its _very_ slow... zone = finder.timezone_at(lat=lat, lng=lon) if zone is None: @@ -203,7 +211,14 @@ def localize(dt: datetime) -> tzdatetime: from ...core import stat, Stats -def stats() -> Stats: +def stats(quick: bool=False) -> Stats: + if quick: + prev, config.sort_locations = config.sort_locations, False + res = { + 'first': next(_iter_local_dates()) + } + config.sort_locations = prev + return res # TODO not sure what would be a good stat() for this module... # might be nice to print some actual timezones? # there aren't really any great iterables to expose