general: initial flake8 checks (for now manual)

fix fairly uncontroversial stuff in my.core like
- line spacing, which isn't too annoying (e.g. unlike many inline whitespace checks that break vertical formatting)
- unused imports/variables
- too broad except
This commit is contained in:
Dima Gerasimov 2022-06-05 18:43:36 +01:00 committed by karlicoss
parent fd0c65d176
commit 016f28250b
19 changed files with 124 additions and 58 deletions

37
misc/.flake8-karlicoss Normal file
View file

@ -0,0 +1,37 @@
[flake8]
ignore =
## these mess up vertical aligment
E126 # continuation line over-indented
E202 # whitespace before )
E203 # whitespace before ':' (e.g. in dict)
E221 # multiple spaces before operator
E241 # multiple spaces after ,
E251 # unexpected spaces after =
E261 # 2 spaces before comment. I actually think it's fine so TODO enable back later (TODO or not? still alignment)
E271 # multiple spaces after keyword
E272 # multiple spaces before keyword
##
E266 # 'too many leading # in the comment' -- this is just unnecessary pickiness, sometimes it's nice to format a comment
E302 # 2 blank lines
E501 # 'line too long' -- kinda annoying and the default 79 is shit anyway
E702 E704 # multiple statements on one line -- messes with : ... type declataions + sometimes asserts
E731 # suggests always using def instead of lambda
E402 # FIXME module level import -- we want it later
E252 # TODO later -- whitespace around equals?
# F541: f-string is missing placeholders -- perhaps too picky?
# F841 is pretty useful (unused variables). maybe worth making it an error on CI
# for imports: we might want to check these
# F401 good: unused imports
# E401: import order
# F811: redefinition of unused import
# todo from my.core import __NOT_HPI_MODULE__ this needs to be excluded from 'unused'
#
# as a reference:
# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
# and this https://github.com/karlicoss/HPI/pull/151
# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__

View file

@ -1,6 +1,6 @@
# this file only keeps the most common & critical types/utility functions
from .common import PathIsh, Paths, Json
from .common import get_files
from .common import get_files, PathIsh, Paths
from .common import Json
from .common import LazyLogger
from .common import warn_if_empty
from .common import stat, Stats
@ -8,11 +8,32 @@ from .common import datetime_naive, datetime_aware
from .common import assert_never
from .cfg import make_config
from .util import __NOT_HPI_MODULE__
from .error import Res, unwrap
# just for brevity in modules
# todo not sure about these.. maybe best to rely on regular imports.. perhaps compare?
from dataclasses import dataclass
from pathlib import Path
__all__ = [
'get_files', 'PathIsh', 'Paths',
'Json',
'LazyLogger',
'warn_if_empty',
'stat', 'Stats',
'datetime_aware', 'datetime_naive',
'assert_never',
'make_config',
'__NOT_HPI_MODULE__',
'Res', 'unwrap',
'dataclass', 'Path',
]

View file

@ -17,7 +17,7 @@ import click
def mypy_cmd() -> Optional[Sequence[str]]:
try:
# preferably, use mypy from current python env
import mypy
import mypy # noqa: F401 fine not to use it
except ImportError:
pass
else:
@ -63,6 +63,7 @@ def eprint(x: str) -> None:
def indent(x: str) -> str:
return ''.join(' ' + l for l in x.splitlines(keepends=True))
OK = ''
OFF = '🔲'
@ -178,7 +179,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module
try:
check_call(cmd)
info('syntax check: ' + ' '.join( cmd))
info('syntax check: ' + ' '.join(cmd))
except Exception as e:
errors.append(e)
tb(e)
@ -258,7 +259,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
continue
try:
mod = importlib.import_module(m)
mod = importlib.import_module(m) # noqa: F841
except Exception as e:
# todo more specific command?
error(f'{click.style("FAIL", fg="red")}: {m:<50} loading failed{vw}')
@ -322,6 +323,7 @@ def tabulate_warnings() -> None:
'''
import warnings
orig = warnings.formatwarning
def override(*args, **kwargs) -> str:
res = orig(*args, **kwargs)
return ''.join(' ' + x for x in res.splitlines(keepends=True))

View file

@ -6,7 +6,7 @@ from typing import Optional
def disable_cachew() -> None:
try:
import cachew
import cachew # noqa: F401 # unused, it's fine
except ImportError:
# nothing to disable
return
@ -19,7 +19,7 @@ from typing import Iterator
@contextmanager
def disabled_cachew() -> Iterator[None]:
try:
import cachew
import cachew # noqa: F401 # unused, it's fine
except ImportError:
# nothing to disable
yield

View file

@ -12,8 +12,7 @@ from . import warnings as core_warnings
PathIsh = Union[Path, str]
# TODO only used in tests? not sure if useful at all.
# TODO port annotations to kython?..
def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
def import_file(p: PathIsh, name: Optional[str] = None) -> types.ModuleType:
p = Path(p)
if name is None:
name = p.stem
@ -48,11 +47,12 @@ T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
# TODO deprecate? more_itertools.one should be used
def the(l: Iterable[T]) -> T:
it = iter(l)
try:
first = next(it)
except StopIteration as ee:
except StopIteration:
raise RuntimeError('Empty iterator?')
assert all(e == first for e in it)
return first
@ -234,6 +234,7 @@ if TYPE_CHECKING:
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(
self,
@ -273,7 +274,7 @@ def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
try:
# check that it starts with 'hack' path
Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
except:
except: # noqa: E722 bare except
pass # no action needed, doesn't start with 'hack' string
else:
# todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
@ -336,8 +337,7 @@ class classproperty(Generic[_R]):
# def __get__(self) -> _R:
# return self.f()
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
# TODO e.g. maybe use opaque mypy alias?
# TODO deprecate in favor of datetime_aware
tzdatetime = datetime
@ -352,6 +352,8 @@ def isoparse(s: str) -> tzdatetime:
s = s[:-1] + '+00:00'
return datetime.fromisoformat(s)
# legacy import -- we should use compat directly instead
from .compat import Literal
@ -412,6 +414,7 @@ def warn_if_empty(f: FI) -> FI: ...
def warn_if_empty(f):
from functools import wraps
@wraps(f)
def wrapped(*args, **kwargs):
res = f(*args, **kwargs)
@ -474,6 +477,7 @@ def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
total = 0
errors = 0
last = None
def funcit():
nonlocal errors, last, total
for x in it:
@ -542,7 +546,7 @@ def guess_datetime(x: Any) -> Optional[datetime]:
# todo hmm implement withoutexception..
try:
d = asdict(x)
except:
except: # noqa: E722 bare except
return None
for k, v in d.items():
if isinstance(v, datetime):
@ -591,8 +595,8 @@ def asdict(thing: Any) -> Json:
raise TypeError(f'Could not convert object {thing} to dict')
# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
# TODO e.g. maybe use opaque mypy alias?
datetime_naive = datetime
datetime_aware = datetime

View file

@ -29,7 +29,7 @@ def pre_pip_dal_handler(
Specifying modules' dependencies in the config or in my/config/repos is deprecated!
Please install {' '.join(requires)} as PIP packages (see the corresponding README instructions).
'''.strip(), stacklevel=2)
except ModuleNotFoundError as ee:
except ModuleNotFoundError:
dal = None
if dal is None:
@ -83,7 +83,9 @@ else:
from typing import TypeVar, Callable
Cl = TypeVar('Cl')
R = TypeVar('R')
def cached_property(f: Callable[[Cl], R]) -> R:
import functools
return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
del Cl
del R

View file

@ -95,8 +95,6 @@ class Config(user_config):
return spec
return None
enabled = self.enabled_modules
disabled = self.disabled_modules
on = matches(self.enabled_modules or [])
off = matches(self.disabled_modules or [])
@ -153,7 +151,7 @@ def test_active_modules() -> None:
with reset() as cc:
# if both are set, enable all
cc.disabled_modules = ['my.body.*']
cc.enabled_modules = ['my.body.exercise']
cc.enabled_modules = ['my.body.exercise']
assert cc._is_module_active('my.whatever' ) is None
assert cc._is_module_active('my.core' ) is None
with pytest.warns(UserWarning, match=r"conflicting regexes") as record_warnings:

View file

@ -64,7 +64,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]
k: Optional[K]
try:
k = key(i)
except Exception as e:
except Exception: # error white computing key? dunno, might be nice to handle...
k = None
group.append(i)
if k is not None:
@ -193,7 +193,6 @@ See {help_url} or check the corresponding module.py file for an example\
return False
def test_datetime_errors() -> None:
import pytz
dt_notz = datetime.now()
@ -207,7 +206,6 @@ def test_datetime_errors() -> None:
e2 = RuntimeError(f'something something {dt} something else')
assert extract_error_datetime(e2) == dt
e3 = RuntimeError(str(['one', '2019-11-27T08:56:00', 'three']))
assert extract_error_datetime(e3) is not None

View file

@ -38,6 +38,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
# TODO need to take schema here...
cache: Dict[str, bool] = {}
def good(f, v) -> bool:
c = cache.get(f)
if c is not None:
@ -79,7 +80,6 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
fields=fields,
)
from more_itertools import chunked
# "The optimal batch size is 5000 lines of line protocol."
# some chunking is def necessary, otherwise it fails

View file

@ -78,7 +78,7 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
tf = tarfile.open(pp)
# TODO pass encoding?
x = tf.extractfile(*args); assert x is not None
return x # type: ignore[return-value]
return x # type: ignore[return-value]
else:
return pp.open(mode, *args, **kwargs)

View file

@ -92,6 +92,7 @@ class Wvalue(Zoomable):
def __repr__(self):
return 'WValue{' + repr(self.value) + '}'
from typing import Tuple
def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
res: Zoomable
@ -118,6 +119,7 @@ def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
else:
raise RuntimeError(f'Unexpected type: {type(j)} {j}')
from contextlib import contextmanager
from typing import Iterator
@ -142,8 +144,9 @@ Expected {c} to be fully consumed by the parser.
# TODO log?
pass
from typing import cast
def test_unconsumed():
def test_unconsumed() -> None:
import pytest # type: ignore
with pytest.raises(UnconsumedError):
with wrap({'a': 1234}) as w:
@ -155,7 +158,7 @@ def test_unconsumed():
w = cast(Wdict, w)
d = w['c']['d'].zoom()
def test_consumed():
def test_consumed() -> None:
with wrap({'a': 1234}) as w:
w = cast(Wdict, w)
a = w['a'].zoom()
@ -165,7 +168,7 @@ def test_consumed():
c = w['c'].zoom()
d = c['d'].zoom()
def test_types():
def test_types() -> None:
# (string, number, object, array, boolean or nul
with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
w = cast(Wdict, w)
@ -176,14 +179,14 @@ def test_types():
for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
x.consume()
def test_consume_all():
def test_consume_all() -> None:
with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
w = cast(Wdict, w)
aaa = w['aaa'].zoom()
aaa['bbb'].consume_all()
def test_consume_few():
def test_consume_few() -> None:
import pytest
pytest.skip('Will think about it later..')
with wrap({

View file

@ -26,7 +26,7 @@ else:
def check_dateish(s) -> Iterable[str]:
import pandas as pd # type: ignore
import pandas as pd # type: ignore # noqa: F811 not actually a redefinition
ctype = s.dtype
if str(ctype).startswith('datetime64'):
return
@ -140,7 +140,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
# https://github.com/pandas-dev/pandas/blob/fc9fdba6592bdb5d0d1147ce4d65639acd897565/pandas/core/frame.py#L562
# same for NamedTuple -- seems that it takes whatever schema the first NT has
# so we need to convert each individually... sigh
import pandas as pd
import pandas as pd # noqa: F811 not actually a redefinition
columns = None if schema is None else list(_as_columns(schema).keys())
return pd.DataFrame(to_jsons(it), columns=columns)
@ -148,7 +148,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
def test_as_dataframe() -> None:
import pytest
it = (dict(i=i, s=f'str{i}') for i in range(10))
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings:
with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings: # noqa: F841
df = as_dataframe(it)
# todo test other error col policies
assert list(df.columns) == ['i', 's', 'error']
@ -156,6 +156,7 @@ def test_as_dataframe() -> None:
assert len(as_dataframe([])) == 0
from dataclasses import dataclass
@dataclass
class X:
x: int

View file

@ -520,7 +520,6 @@ Will attempt to call iter() on the value""")
return itr
# classes to use in tests, need to be defined at the top level
# because of a mypy bug
class _Int(NamedTuple):
@ -550,7 +549,7 @@ def test_basic_orders() -> None:
random.shuffle(input_items)
res = list(select(input_items, order_key="x"))
assert res == [_Int(1),_Int(2),_Int(3),_Int(4),_Int(5)]
assert res == [_Int(1), _Int(2), _Int(3), _Int(4), _Int(5)]
# default int ordering
def custom_order_by(obj: Any) -> Any:
@ -571,12 +570,10 @@ def test_order_key_multi_type() -> None:
for v in range(1, 6):
yield _Int(v)
def floaty_iter() -> Iterator[_Float]:
for v in range(1, 6):
yield _Float(float(v + 0.5))
res = list(select(itertools.chain(basic_iter(), floaty_iter()), order_key="x"))
assert res == [
_Int(1), _Float(1.5),

View file

@ -133,7 +133,8 @@ def _parse_range(
end_parser: Converter,
within_parser: Converter,
parsed_range: Optional[RangeTuple] = None,
error_message: Optional[str] = None) -> Optional[RangeTuple]:
error_message: Optional[str] = None
) -> Optional[RangeTuple]:
if parsed_range is not None:
return parsed_range
@ -388,7 +389,6 @@ def test_filter_in_timeframe() -> None:
_A(x=datetime(2009, 5, 10, 4, 10, 1), y=5, z=10),
_B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))]
rng = RangeTuple(before=str(jan_1_2016), within="52w", after=None)
# from 2016, going back 52 weeks (about a year?)
@ -438,8 +438,13 @@ def test_range_predicate() -> None:
# convert any float values to ints
coerce_int_parser = lambda o: int(float(o))
int_filter_func = partial(_create_range_filter, attr_func=identity, end_parser=coerce_int_parser,
within_parser=coerce_int_parser, value_coercion_func=coerce_int_parser)
int_filter_func = partial(
_create_range_filter,
attr_func=identity,
end_parser=coerce_int_parser,
within_parser=coerce_int_parser,
value_coercion_func=coerce_int_parser,
)
# filter from 0 to 5
rn: Optional[RangeTuple] = RangeTuple("0", "5", None)
@ -517,4 +522,3 @@ def test_parse_datetime_float() -> None:
# test parsing isoformat
assert dt.timestamp() == parse_datetime_float(str(dt))

View file

@ -151,8 +151,6 @@ def dumps(
def test_serialize_fallback() -> None:
import json as jsn # dont cause possible conflicts with module code
import pytest
# cant use a namedtuple here, since the default json.dump serializer
# serializes namedtuples as tuples, which become arrays
# just test with an array of mixed objects
@ -168,7 +166,6 @@ def test_serialize_fallback() -> None:
assert res == [5, 5.0]
# this needs to be defined here to prevent a mypy bug
# see https://github.com/python/mypy/issues/7281
class _A(NamedTuple):

View file

@ -3,9 +3,11 @@ Decorator to gracefully handle importing a data source, or warning
and yielding nothing (or a default) when its not available
"""
from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable, Any, cast
from my.core.warnings import medium, warn
from functools import wraps
from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable
import warnings
from .warnings import medium
# The factory function may produce something that has data
# similar to the shared model, but not exactly, so not
@ -55,7 +57,7 @@ def import_source(
medium(f"Module {factory_func.__qualname__} could not be imported, or isn't configured properly")
else:
medium(f"Module {module_name} ({factory_func.__qualname__}) could not be imported, or isn't configured properly")
warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
class core:
disabled_modules = [{repr(module_name)}]
@ -71,4 +73,3 @@ class core:
yield from default
return wrapper
return decorator

View file

@ -6,7 +6,7 @@ import importlib
import inspect
import sys
import typing
from typing import Optional, Callable, Any, Iterator, Sequence, Dict
from typing import Optional, Callable, Any, Iterator, Sequence, Dict, List
from .common import StatsFun, Stats, stat
@ -17,6 +17,7 @@ def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
providers = guess_data_providers(module_name)
if len(providers) == 0:
return None
def auto_stats() -> Stats:
return {k: stat(v, quick=quick) for k, v in providers.items()}
return auto_stats
@ -69,17 +70,17 @@ def test_is_data_provider() -> None:
assert not idp("x")
def no_return_type():
return [1, 2 ,3]
return [1, 2, 3]
assert not idp(no_return_type)
lam = lambda: [1, 2]
assert not idp(lam)
def has_extra_args(count) -> typing.List[int]:
def has_extra_args(count) -> List[int]:
return list(range(count))
assert not idp(has_extra_args)
def has_return_type() -> typing.Sequence[str]:
def has_return_type() -> Sequence[str]:
return ['a', 'b', 'c']
assert idp(has_return_type)
@ -96,7 +97,6 @@ def test_is_data_provider() -> None:
assert not idp(producer_inputs)
# return any parameters the user is required to provide - those which don't have default values
def sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
for param in sig.parameters.values():

View file

@ -161,7 +161,7 @@ def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterabl
path = getattr(sys.modules[mname], '__path__', None) or []
# don't traverse path items we've seen before
path = [p for p in path if not seen(p)]
yield from _walk_packages(path, mname+'.', onerror)
yield from _walk_packages(path, mname + '.', onerror)
# deprecate?
def get_modules() -> List[HPIModule]:

View file

@ -12,9 +12,6 @@ import warnings
import click
# just bring in the scope of this module for convenience
from warnings import warn
def _colorize(x: str, color: Optional[str]=None) -> str:
if color is None:
return x
@ -49,3 +46,7 @@ def high(message: str, *args, **kwargs) -> None:
'''
kwargs['color'] = 'red'
_warn(message, *args, **kwargs)
# NOTE: deprecated -- legacy import
from warnings import warn