doctor: better quick option propogation for stats (#239)

doctor: better quick option propogation for stats

* use contextmanager for quick stats instead of editing global state
  directly
* send quick to lots of stat related functions, so they
could possibly be used without doctor, if someone wanted to
* if a stats function has a 'quick' kwarg, send the value
there as well
* add an option to sort locations in my.time.tz.via_location
This commit is contained in:
seanbreckenridge 2022-05-01 16:13:05 -07:00 committed by GitHub
parent f43eedd52a
commit 0ce44bf0d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 60 additions and 17 deletions

View file

@ -189,6 +189,10 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http
# less precise, but faster # less precise, but faster
fast: bool = True fast: bool = True
# sort locations by date
# incase multiple sources provide them out of order
sort_locations: bool = True
# if the accuracy for the location is more than 5km (this # if the accuracy for the location is more than 5km (this
# isn't an accurate location, so shouldn't use it to determine # isn't an accurate location, so shouldn't use it to determine
# timezone), don't use # timezone), don't use

View file

@ -84,6 +84,7 @@ class time:
class tz: class tz:
class via_location: class via_location:
fast: bool fast: bool
sort_locations: bool
require_accuracy: float require_accuracy: float

View file

@ -215,11 +215,11 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
verbose = True verbose = True
vw = '' if verbose else '; pass --verbose to print more information' vw = '' if verbose else '; pass --verbose to print more information'
from . import common
common.QUICK_STATS = quick # dirty, but hopefully OK for cli
tabulate_warnings() tabulate_warnings()
import contextlib
from .common import quick_stats
from .util import get_stats, HPIModule from .util import get_stats, HPIModule
from .stats import guess_stats from .stats import guess_stats
from .error import warn_my_config_import_error from .error import warn_my_config_import_error
@ -256,15 +256,21 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
stats = get_stats(m) stats = get_stats(m)
if stats is None: if stats is None:
# then try guessing.. not sure if should log somehow? # then try guessing.. not sure if should log somehow?
stats = guess_stats(m) stats = guess_stats(m, quick=quick)
if stats is None: if stats is None:
eprint(" - no 'stats' function, can't check the data") eprint(" - no 'stats' function, can't check the data")
# todo point to a readme on the module structure or something? # todo point to a readme on the module structure or something?
continue continue
quick_context = quick_stats() if quick else contextlib.nullcontext()
try: try:
res = stats() kwargs = {}
if callable(stats) and 'quick' in inspect.signature(stats).parameters:
kwargs['quick'] = quick
with quick_context:
res = stats(**kwargs)
assert res is not None, 'stats() returned None' assert res is not None, 'stats() returned None'
except Exception as ee: except Exception as ee:
warning(f' - {click.style("stats:", fg="red")} computing failed{vw}') warning(f' - {click.style("stats:", fg="red")} computing failed{vw}')

View file

@ -2,6 +2,7 @@ from glob import glob as do_glob
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
import functools import functools
from contextlib import contextmanager
import types import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING
import warnings import warnings
@ -425,16 +426,32 @@ def warn_if_empty(f):
return wrapped # type: ignore return wrapped # type: ignore
# hacky hook to speed up for 'hpi doctor' # global state that turns on/off quick stats
# todo think about something better # can use the 'quick_stats' contextmanager
# to enable/disable this in cli so that module 'stats'
# functions don't have to implement custom 'quick' logic
QUICK_STATS = False QUICK_STATS = False
# incase user wants to use the stats functions/quick option
# elsewhere -- can use this decorator instead of editing
# the global state directly
@contextmanager
def quick_stats():
global QUICK_STATS
prev = QUICK_STATS
try:
QUICK_STATS = True
yield
finally:
QUICK_STATS = prev
C = TypeVar('C') C = TypeVar('C')
Stats = Dict[str, Any] Stats = Dict[str, Any]
StatsFun = Callable[[], Stats] StatsFun = Callable[[], Stats]
# todo not sure about return type... # todo not sure about return type...
def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats: def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]], quick: bool=False) -> Stats:
if callable(func): if callable(func):
fr = func() fr = func()
fname = func.__name__ fname = func.__name__
@ -451,13 +468,13 @@ def stat(func: Union[Callable[[], Iterable[C]], Iterable[C]]) -> Stats:
rows=len(df), rows=len(df),
) )
else: else:
res = _stat_iterable(fr) res = _stat_iterable(fr, quick=quick)
return { return {
fname: res, fname: res,
} }
def _stat_iterable(it: Iterable[C]) -> Any: def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
from more_itertools import ilen, take, first from more_itertools import ilen, take, first
# todo not sure if there is something in more_itertools to compute this? # todo not sure if there is something in more_itertools to compute this?
@ -476,7 +493,7 @@ def _stat_iterable(it: Iterable[C]) -> Any:
eit = funcit() eit = funcit()
count: Any count: Any
if QUICK_STATS: if quick or QUICK_STATS:
initial = take(100, eit) initial = take(100, eit)
count = len(initial) count = len(initial)
if first(eit, None) is not None: # todo can actually be none... if first(eit, None) is not None: # todo can actually be none...

View file

@ -13,12 +13,12 @@ from .common import StatsFun, Stats, stat
# TODO maybe could be enough to annotate OUTPUTS or something like that? # TODO maybe could be enough to annotate OUTPUTS or something like that?
# then stats could just use them as hints? # then stats could just use them as hints?
def guess_stats(module_name: str) -> Optional[StatsFun]: def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
providers = guess_data_providers(module_name) providers = guess_data_providers(module_name)
if len(providers) == 0: if len(providers) == 0:
return None return None
def auto_stats() -> Stats: def auto_stats() -> Stats:
return {k: stat(v) for k, v in providers.items()} return {k: stat(v, quick=quick) for k, v in providers.items()}
return auto_stats return auto_stats

View file

@ -20,7 +20,7 @@ def get_stats(module: str) -> Optional[StatsFun]:
# todo detect via ast? # todo detect via ast?
try: try:
mod = import_module(module) mod = import_module(module)
except Exception as e: except Exception:
return None return None
return getattr(mod, 'stats', None) return getattr(mod, 'stats', None)

View file

@ -16,6 +16,10 @@ class config(time.tz.via_location):
# less precise, but faster # less precise, but faster
fast: bool = True fast: bool = True
# sort locations by date
# incase multiple sources provide them out of order
sort_locations: bool = True
# if the accuracy for the location is more than 5km, don't use # if the accuracy for the location is more than 5km, don't use
require_accuracy: float = 5_000 require_accuracy: float = 5_000
@ -24,7 +28,7 @@ from collections import Counter
from datetime import date, datetime from datetime import date, datetime
from functools import lru_cache from functools import lru_cache
from itertools import groupby from itertools import groupby
from typing import Iterator, NamedTuple, Optional, Tuple, Any, List from typing import Iterator, NamedTuple, Optional, Tuple, Any, List, Iterable
from more_itertools import seekable from more_itertools import seekable
import pytz import pytz
@ -87,8 +91,12 @@ def _iter_local_dates() -> Iterator[DayWithZone]:
#pdt = None #pdt = None
# TODO: warnings doesnt actually warn? # TODO: warnings doesnt actually warn?
warnings = [] warnings = []
locs: Iterable[Tuple[LatLon, datetime]]
locs = _sorted_locations() if config.sort_locations else _locations()
# todo allow to skip if not noo many errors in row? # todo allow to skip if not noo many errors in row?
for (lat, lon), dt in _sorted_locations(): for (lat, lon), dt in locs:
# TODO right. its _very_ slow... # TODO right. its _very_ slow...
zone = finder.timezone_at(lat=lat, lng=lon) zone = finder.timezone_at(lat=lat, lng=lon)
if zone is None: if zone is None:
@ -203,7 +211,14 @@ def localize(dt: datetime) -> tzdatetime:
from ...core import stat, Stats from ...core import stat, Stats
def stats() -> Stats: def stats(quick: bool=False) -> Stats:
if quick:
prev, config.sort_locations = config.sort_locations, False
res = {
'first': next(_iter_local_dates())
}
config.sort_locations = prev
return res
# TODO not sure what would be a good stat() for this module... # TODO not sure what would be a good stat() for this module...
# might be nice to print some actual timezones? # might be nice to print some actual timezones?
# there aren't really any great iterables to expose # there aren't really any great iterables to expose