doctor: better quick option propogation for stats (#239)
doctor: better quick option propogation for stats * use contextmanager for quick stats instead of editing global state directly * send quick to lots of stat related functions, so they could possibly be used without doctor, if someone wanted to * if a stats function has a 'quick' kwarg, send the value there as well * add an option to sort locations in my.time.tz.via_location
This commit is contained in:
parent
f43eedd52a
commit
0ce44bf0d1
7 changed files with 60 additions and 17 deletions
|
@ -189,6 +189,10 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
|
|||
# less precise, but faster
|
||||
fast: bool = True
|
||||
|
||||
# sort locations by date
|
||||
# incase multiple sources provide them out of order
|
||||
sort_locations: bool = True
|
||||
|
||||
# if the accuracy for the location is more than 5km (this
|
||||
# isn't an accurate location, so shouldn't use it to determine
|
||||
# timezone), don't use
|
||||
|
|
|
@ -84,6 +84,7 @@ class time:
|
|||
class tz:
|
||||
class via_location:
|
||||
fast: bool
|
||||
sort_locations: bool
|
||||
require_accuracy: float
|
||||
|
||||
|
||||
|
|
|
@ -215,11 +215,11 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
|
|||
verbose = True
|
||||
vw = '' if verbose else '; pass --verbose to print more information'
|
||||
|
||||
from . import common
|
||||
common.QUICK_STATS = quick # dirty, but hopefully OK for cli
|
||||
|
||||
tabulate_warnings()
|
||||
|
||||
import contextlib
|
||||
|
||||
from .common import quick_stats
|
||||
from .util import get_stats, HPIModule
|
||||
from .stats import guess_stats
|
||||
from .error import warn_my_config_import_error
|
||||
|
@ -256,15 +256,21 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
|
|||
stats = get_stats(m)
|
||||
if stats is None:
|
||||
# then try guessing.. not sure if should log somehow?
|
||||
stats = guess_stats(m)
|
||||
stats = guess_stats(m, quick=quick)
|
||||
|
||||
if stats is None:
|
||||
eprint(" - no 'stats' function, can't check the data")
|
||||
# todo point to a readme on the module structure or something?
|
||||
continue
|
||||
|
||||
quick_context = quick_stats() if quick else contextlib.nullcontext()
|
||||
|
||||
try:
|
||||
res = stats()
|
||||
kwargs = {}
|
||||
if callable(stats) and 'quick' in inspect.signature(stats).parameters:
|
||||
kwargs['quick'] = quick
|
||||
with quick_context:
|
||||
res = stats(**kwargs)
|
||||
assert res is not None, 'stats() returned None'
|
||||
except Exception as ee:
|
||||
warning(f' - {click.style("stats:", fg="red")} computing failed{vw}')
|
||||
|
|
|
@ -2,6 +2,7 @@ from glob import glob as do_glob
|
|||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import functools
|
||||
from contextlib import contextmanager
|
||||
import types
|
||||
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING
|
||||
import warnings
|
||||
|
@ -425,16 +426,32 @@ def warn_if_empty(f):
|
|||
return wrapped # type: ignore
|
||||
|
||||
|
||||
# hacky hook to speed up for 'hpi doctor'
|
||||
# todo think about something better
|
||||
# global state that turns on/off quick stats
|
||||
# can use the 'quick_stats' contextmanager
|
||||
# to enable/disable this in cli so that module 'stats'
|
||||
# functions don't have to implement custom 'quick' logic
|
||||
QUICK_STATS = False
|
||||
|
||||
|
||||
# incase user wants to use the stats functions/quick option
|
||||
# elsewhere -- can use this decorator instead of editing
|
||||
# the global state directly
|
||||
@contextmanager
|
||||
def quick_stats():
|
||||
global QUICK_STATS
|
||||
prev = QUICK_STATS
|
||||
try:
|
||||
QUICK_STATS = True
|
||||
yield
|
||||
finally:
|
||||
QUICK_STATS = prev
|
||||
|
||||
|
||||
C = TypeVar('C')
|
||||
Stats = Dict[str, Any]
|
||||
StatsFun = Callable[[], Stats]
|
||||
# todo not sure about return type...
|
||||
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
|
||||
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats:
|
||||
if callable(func):
|
||||
fr = func()
|
||||
fname = func.__name__
|
||||
|
@ -451,13 +468,13 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
|
|||
rows=len(df),
|
||||
)
|
||||
else:
|
||||
res = _stat_iterable(fr)
|
||||
res = _stat_iterable(fr, quick=quick)
|
||||
return {
|
||||
fname: res,
|
||||
}
|
||||
|
||||
|
||||
def _stat_iterable(it: Iterable[C]) -> Any:
|
||||
def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
|
||||
from more_itertools import ilen, take, first
|
||||
|
||||
# todo not sure if there is something in more_itertools to compute this?
|
||||
|
@ -476,7 +493,7 @@ def _stat_iterable(it: Iterable[C]) -> Any:
|
|||
|
||||
eit = funcit()
|
||||
count: Any
|
||||
if QUICK_STATS:
|
||||
if quick or QUICK_STATS:
|
||||
initial = take(100, eit)
|
||||
count = len(initial)
|
||||
if first(eit, None) is not None: # todo can actually be none...
|
||||
|
|
|
@ -13,12 +13,12 @@ from .common import StatsFun, Stats, stat
|
|||
|
||||
# TODO maybe could be enough to annotate OUTPUTS or something like that?
|
||||
# then stats could just use them as hints?
|
||||
def guess_stats(module_name: str) -> Optional[StatsFun]:
|
||||
def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
|
||||
providers = guess_data_providers(module_name)
|
||||
if len(providers) == 0:
|
||||
return None
|
||||
def auto_stats() -> Stats:
|
||||
return {k: stat(v) for k, v in providers.items()}
|
||||
return {k: stat(v, quick=quick) for k, v in providers.items()}
|
||||
return auto_stats
|
||||
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ def get_stats(module: str) -> Optional[StatsFun]:
|
|||
# todo detect via ast?
|
||||
try:
|
||||
mod = import_module(module)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return getattr(mod, 'stats', None)
|
||||
|
|
|
@ -16,6 +16,10 @@ class config(time.tz.via_location):
|
|||
# less precise, but faster
|
||||
fast: bool = True
|
||||
|
||||
# sort locations by date
|
||||
# incase multiple sources provide them out of order
|
||||
sort_locations: bool = True
|
||||
|
||||
# if the accuracy for the location is more than 5km, don't use
|
||||
require_accuracy: float = 5_000
|
||||
|
||||
|
@ -24,7 +28,7 @@ from collections import Counter
|
|||
from datetime import date, datetime
|
||||
from functools import lru_cache
|
||||
from itertools import groupby
|
||||
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List
|
||||
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable
|
||||
|
||||
from more_itertools import seekable
|
||||
import pytz
|
||||
|
@ -87,8 +91,12 @@ def _iter_local_dates() -> Iterator[DayWithZone]:
|
|||
#pdt = None
|
||||
# TODO: warnings doesnt actually warn?
|
||||
warnings = []
|
||||
|
||||
locs: Iterable[Tuple[LatLon, datetime]]
|
||||
locs = _sorted_locations() if config.sort_locations else _locations()
|
||||
|
||||
# todo allow to skip if not noo many errors in row?
|
||||
for (lat, lon), dt in _sorted_locations():
|
||||
for (lat, lon), dt in locs:
|
||||
# TODO right. its _very_ slow...
|
||||
zone = finder.timezone_at(lat=lat, lng=lon)
|
||||
if zone is None:
|
||||
|
@ -203,7 +211,14 @@ def localize(dt: datetime) -> tzdatetime:
|
|||
|
||||
|
||||
from ...core import stat, Stats
|
||||
def stats() -> Stats:
|
||||
def stats(quick: bool=False) -> Stats:
|
||||
if quick:
|
||||
prev, config.sort_locations = config.sort_locations, False
|
||||
res = {
|
||||
'first': next(_iter_local_dates())
|
||||
}
|
||||
config.sort_locations = prev
|
||||
return res
|
||||
# TODO not sure what would be a good stat() for this module...
|
||||
# might be nice to print some actual timezones?
|
||||
# there aren't really any great iterables to expose
|
||||
|
|
Loading…
Add table
Reference in a new issue