general: initial flake8 checks (for now manual)

fix fairly uncontroversial stuff in my.core like - line spacing, which isn't too annoying (e.g. unlike many inline whitespace checks that break vertical formatting) - unused imports/variables - too broad except
2022-06-05 18:43:36 +01:00 · 2022-06-05 18:43:36 +01:00 · 016f28250b
commit 016f28250b
parent fd0c65d176
19 changed files with 124 additions and 58 deletions
--- a/misc/.flake8-karlicoss
+++ b/misc/.flake8-karlicoss
@ -0,0 +1,37 @@
+[flake8]
+ignore =
+   ## these mess up vertical aligment
+   E126 # continuation line over-indented
+   E202 # whitespace before )
+   E203 # whitespace before ':' (e.g. in dict)
+   E221 # multiple spaces before operator 
+   E241 # multiple spaces after ,
+   E251 # unexpected spaces after =
+   E261 # 2 spaces before comment. I actually think it's fine so TODO enable back later (TODO or not? still alignment)
+   E271 # multiple spaces after keyword
+   E272 # multiple spaces before keyword
+   ##
+   E266 # 'too many leading # in the comment' -- this is just unnecessary pickiness, sometimes it's nice to format a comment
+   E302 # 2 blank lines
+   E501 # 'line too long' -- kinda annoying and the default 79 is shit anyway
+   E702 E704 # multiple statements on one line -- messes with : ... type declataions + sometimes asserts
+   E731 # suggests always using def instead of lambda
+
+   E402 # FIXME module level import -- we want it later
+   E252 # TODO later -- whitespace around equals?
+# F541: f-string is missing placeholders -- perhaps too picky?
+
+# F841 is pretty useful (unused variables). maybe worth making it an error on CI
+
+
+# for imports: we might want to check these
+# F401 good: unused imports 
+# E401: import order
+# F811: redefinition of unused import
+# todo from my.core import __NOT_HPI_MODULE__ this needs to be excluded from 'unused'
+# 
+
+# as a reference:
+# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg
+# and this https://github.com/karlicoss/HPI/pull/151
+# find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__
--- a/my/core/init.py
+++ b/my/core/init.py
@ -1,6 +1,6 @@
 # this file only keeps the most common & critical types/utility functions
-from .common import PathIsh, Paths, Json
-from .common import get_files
+from .common import get_files, PathIsh, Paths
+from .common import Json
 from .common import LazyLogger
 from .common import warn_if_empty
 from .common import stat, Stats
@ -8,11 +8,32 @@ from .common import datetime_naive, datetime_aware
 from .common import assert_never

 from .cfg import make_config
+
 from .util import __NOT_HPI_MODULE__

 from .error import Res, unwrap


 # just for brevity in modules
+# todo not sure about these.. maybe best to rely on regular imports.. perhaps compare?
 from dataclasses import dataclass
 from pathlib import Path
+
+
+__all__ = [
+    'get_files', 'PathIsh', 'Paths',
+    'Json',
+    'LazyLogger',
+    'warn_if_empty',
+    'stat', 'Stats',
+    'datetime_aware', 'datetime_naive',
+    'assert_never',
+
+    'make_config',
+
+    '__NOT_HPI_MODULE__',
+
+    'Res', 'unwrap',
+
+    'dataclass', 'Path',
+]
--- a/my/core/main.py
+++ b/my/core/main.py
@ -17,7 +17,7 @@ import click
 def mypy_cmd() -> Optional[Sequence[str]]:
    try:
        # preferably, use mypy from current python env
-        import mypy
+        import mypy  # noqa: F401 fine not to use it
    except ImportError:
        pass
    else:
@ -63,6 +63,7 @@ def eprint(x: str) -> None:
 def indent(x: str) -> str:
    return ''.join('   ' + l for l in x.splitlines(keepends=True))

+
 OK  = '✅'
 OFF = '🔲'

@ -178,7 +179,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module

        try:
            check_call(cmd)
-            info('syntax check: ' + ' '.join( cmd))
+            info('syntax check: ' + ' '.join(cmd))
        except Exception as e:
            errors.append(e)
            tb(e)
@ -258,7 +259,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li
            continue

        try:
-            mod = importlib.import_module(m)
+            mod = importlib.import_module(m)  # noqa: F841
        except Exception as e:
            # todo more specific command?
            error(f'{click.style("FAIL", fg="red")}: {m:<50} loading failed{vw}')
@ -322,6 +323,7 @@ def tabulate_warnings() -> None:
    '''
    import warnings
    orig = warnings.formatwarning
+
    def override(*args, **kwargs) -> str:
        res = orig(*args, **kwargs)
        return ''.join('  ' + x for x in res.splitlines(keepends=True))
--- a/my/core/cachew.py
+++ b/my/core/cachew.py
@ -6,7 +6,7 @@ from typing import Optional

 def disable_cachew() -> None:
    try:
-        import cachew
+        import cachew  # noqa: F401 # unused, it's fine
    except ImportError:
        # nothing to disable
        return
@ -19,7 +19,7 @@ from typing import Iterator
@contextmanager
 def disabled_cachew() -> Iterator[None]:
    try:
-        import cachew
+        import cachew  # noqa: F401 # unused, it's fine
    except ImportError:
        # nothing to disable
        yield
--- a/my/core/common.py
+++ b/my/core/common.py
@ -12,8 +12,7 @@ from . import warnings as core_warnings
 PathIsh = Union[Path, str]

 # TODO only used in tests? not sure if useful at all.
-# TODO port annotations to kython?..
-def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
+def import_file(p: PathIsh, name: Optional[str] = None) -> types.ModuleType:
    p = Path(p)
    if name is None:
        name = p.stem
@ -48,11 +47,12 @@ T = TypeVar('T')
 K = TypeVar('K')
 V = TypeVar('V')

+# TODO deprecate? more_itertools.one should be used
 def the(l: Iterable[T]) -> T:
    it = iter(l)
    try:
        first = next(it)
-    except StopIteration as ee:
+    except StopIteration:
        raise RuntimeError('Empty iterator?')
    assert all(e == first for e in it)
    return first
@ -234,6 +234,7 @@ if TYPE_CHECKING:
    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
    # ok, that's actually a super nice 'pattern'
    F = TypeVar('F')
+
    class McachewType(Protocol):
        def __call__(
                self,
@ -273,7 +274,7 @@ def mcachew(cache_path=_cache_path_dflt, **kwargs): # type: ignore[no-redef]
        try:
            # check that it starts with 'hack' path
            Path(cache_path).relative_to(_CACHE_DIR_NONE_HACK)
-        except:
+        except: # noqa: E722 bare except
            pass # no action needed, doesn't start with 'hack' string
        else:
            # todo show warning? tbh unclear how to detect when user stopped using 'old' way and using suffix instead?
@ -336,8 +337,7 @@ class classproperty(Generic[_R]):
 #     def __get__(self) -> _R:
 #         return self.f()

-# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
-# TODO e.g. maybe use opaque mypy alias?
+# TODO deprecate in favor of datetime_aware
 tzdatetime = datetime


@ -352,6 +352,8 @@ def isoparse(s: str) -> tzdatetime:
    s = s[:-1] + '+00:00'
    return datetime.fromisoformat(s)

+
+# legacy import -- we should use compat directly instead
 from .compat import Literal


@ -412,6 +414,7 @@ def warn_if_empty(f: FI) -> FI: ...

 def warn_if_empty(f):
    from functools import wraps
+
    @wraps(f)
    def wrapped(*args, **kwargs):
        res = f(*args, **kwargs)
@ -474,6 +477,7 @@ def _stat_iterable(it: Iterable[C], quick: bool=False) -> Any:
    total = 0
    errors = 0
    last = None
+
    def funcit():
        nonlocal errors, last, total
        for x in it:
@ -542,7 +546,7 @@ def guess_datetime(x: Any) -> Optional[datetime]:
    # todo hmm implement withoutexception..
    try:
        d = asdict(x)
-    except:
+    except: # noqa: E722 bare except
        return None
    for k, v in d.items():
        if isinstance(v, datetime):
@ -591,8 +595,8 @@ def asdict(thing: Any) -> Json:
    raise TypeError(f'Could not convert object {thing} to dict')


-
-
+# for now just serves documentation purposes... but one day might make it statically verifiable where possible?
+# TODO e.g. maybe use opaque mypy alias?
 datetime_naive = datetime
 datetime_aware = datetime

--- a/my/core/compat.py
+++ b/my/core/compat.py
@ -29,7 +29,7 @@ def pre_pip_dal_handler(
 Specifying modules' dependencies in the config or in my/config/repos is deprecated!
 Please install {' '.join(requires)} as PIP packages (see the corresponding README instructions).
 '''.strip(), stacklevel=2)
-    except ModuleNotFoundError as ee:
+    except ModuleNotFoundError:
        dal = None

    if dal is None:
@ -83,7 +83,9 @@ else:
    from typing import TypeVar, Callable
    Cl = TypeVar('Cl')
    R = TypeVar('R')
+
    def cached_property(f: Callable[[Cl], R]) -> R:
+        import functools
        return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
    del Cl
    del R
--- a/my/core/core_config.py
+++ b/my/core/core_config.py
@ -95,8 +95,6 @@ class Config(user_config):
                    return spec
            return None

-        enabled  = self.enabled_modules
-        disabled = self.disabled_modules
        on  = matches(self.enabled_modules  or [])
        off = matches(self.disabled_modules or [])

@ -153,7 +151,7 @@ def test_active_modules() -> None:
    with reset() as cc:
        # if both are set, enable all
        cc.disabled_modules = ['my.body.*']
-        cc.enabled_modules =  ['my.body.exercise']
+        cc.enabled_modules  = ['my.body.exercise']
        assert cc._is_module_active('my.whatever'     ) is None
        assert cc._is_module_active('my.core'         ) is None
        with pytest.warns(UserWarning, match=r"conflicting regexes") as record_warnings:
--- a/my/core/error.py
+++ b/my/core/error.py
@ -64,7 +64,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]
        k: Optional[K]
        try:
            k = key(i)
-        except Exception as e:
+        except Exception:  # error white computing key? dunno, might be nice to handle...
            k = None
        group.append(i)
        if k is not None:
@ -193,7 +193,6 @@ See {help_url} or check the corresponding module.py file for an example\
    return False


-
 def test_datetime_errors() -> None:
    import pytz
    dt_notz = datetime.now()
@ -207,7 +206,6 @@ def test_datetime_errors() -> None:
        e2 = RuntimeError(f'something something {dt} something else')
        assert extract_error_datetime(e2) == dt

-
    e3 = RuntimeError(str(['one', '2019-11-27T08:56:00', 'three']))
    assert extract_error_datetime(e3) is not None

--- a/my/core/influxdb.py
+++ b/my/core/influxdb.py
@ -38,6 +38,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c

    # TODO need to take schema here...
    cache: Dict[str, bool] = {}
+
    def good(f, v) -> bool:
        c = cache.get(f)
        if c is not None:
@ -79,7 +80,6 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c
                fields=fields,
            )

-
    from more_itertools import chunked
    # "The optimal batch size is 5000 lines of line protocol."
    # some chunking is def necessary, otherwise it fails
--- a/my/core/kompress.py
+++ b/my/core/kompress.py
@ -78,7 +78,7 @@ def kopen(path: PathIsh, *args, mode: str='rt', **kwargs) -> IO[str]:
        tf = tarfile.open(pp)
        # TODO pass encoding?
        x = tf.extractfile(*args); assert x is not None
-        return x  #  type: ignore[return-value]
+        return x  # type: ignore[return-value]
    else:
        return pp.open(mode, *args, **kwargs)

--- a/my/core/konsume.py
+++ b/my/core/konsume.py
@ -92,6 +92,7 @@ class Wvalue(Zoomable):
    def __repr__(self):
        return 'WValue{' + repr(self.value) + '}'

+
 from typing import Tuple
 def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
    res: Zoomable
@ -118,6 +119,7 @@ def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]:
    else:
        raise RuntimeError(f'Unexpected type: {type(j)} {j}')

+
 from contextlib import contextmanager
 from typing import Iterator

@ -142,8 +144,9 @@ Expected {c} to be fully consumed by the parser.
                # TODO log?
                pass

+
 from typing import cast
-def test_unconsumed():
+def test_unconsumed() -> None:
    import pytest # type: ignore
    with pytest.raises(UnconsumedError):
        with wrap({'a': 1234}) as w:
@ -155,7 +158,7 @@ def test_unconsumed():
            w = cast(Wdict, w)
            d = w['c']['d'].zoom()

-def test_consumed():
+def test_consumed() -> None:
    with wrap({'a': 1234}) as w:
        w = cast(Wdict, w)
        a = w['a'].zoom()
@ -165,7 +168,7 @@ def test_consumed():
        c = w['c'].zoom()
        d = c['d'].zoom()

-def test_types():
+def test_types() -> None:
    # (string, number, object, array, boolean or nul
    with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w:
        w = cast(Wdict, w)
@ -176,14 +179,14 @@ def test_types():
        for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing?
            x.consume()

-def test_consume_all():
+def test_consume_all() -> None:
    with wrap({'aaa': {'bbb': {'hi': 123}}}) as w:
        w = cast(Wdict, w)
        aaa = w['aaa'].zoom()
        aaa['bbb'].consume_all()


-def test_consume_few():
+def test_consume_few() -> None:
    import pytest
    pytest.skip('Will think about it later..')
    with wrap({
--- a/my/core/pandas.py
+++ b/my/core/pandas.py
@ -26,7 +26,7 @@ else:


 def check_dateish(s) -> Iterable[str]:
-    import pandas as pd # type: ignore
+    import pandas as pd # type: ignore # noqa: F811 not actually a redefinition
    ctype = s.dtype
    if str(ctype).startswith('datetime64'):
        return
@ -140,7 +140,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
    #    https://github.com/pandas-dev/pandas/blob/fc9fdba6592bdb5d0d1147ce4d65639acd897565/pandas/core/frame.py#L562
    # same for NamedTuple -- seems that it takes whatever schema the first NT has
    # so we need to convert each individually... sigh
-    import pandas as pd
+    import pandas as pd # noqa: F811 not actually a redefinition
    columns = None if schema is None else list(_as_columns(schema).keys())
    return pd.DataFrame(to_jsons(it), columns=columns)

@ -148,7 +148,7 @@ def as_dataframe(it: Iterable[Res[Any]], schema: Optional[Schema]=None) -> DataF
 def test_as_dataframe() -> None:
    import pytest
    it = (dict(i=i, s=f'str{i}') for i in range(10))
-    with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings:
+    with pytest.warns(UserWarning, match=r"No 'error' column") as record_warnings:  # noqa: F841
        df = as_dataframe(it)
        # todo test other error col policies
    assert list(df.columns) == ['i', 's', 'error']
@ -156,6 +156,7 @@ def test_as_dataframe() -> None:
    assert len(as_dataframe([])) == 0

    from dataclasses import dataclass
+
    @dataclass
    class X:
        x: int
--- a/my/core/query.py
+++ b/my/core/query.py
@ -520,7 +520,6 @@ Will attempt to call iter() on the value""")
    return itr


-
 # classes to use in tests, need to be defined at the top level
 # because of a mypy bug
 class _Int(NamedTuple):
@ -550,7 +549,7 @@ def test_basic_orders() -> None:
    random.shuffle(input_items)

    res = list(select(input_items, order_key="x"))
-    assert res == [_Int(1),_Int(2),_Int(3),_Int(4),_Int(5)]
+    assert res == [_Int(1), _Int(2), _Int(3), _Int(4), _Int(5)]

    # default int ordering
    def custom_order_by(obj: Any) -> Any:
@ -571,12 +570,10 @@ def test_order_key_multi_type() -> None:
        for v in range(1, 6):
            yield _Int(v)

-
    def floaty_iter() -> Iterator[_Float]:
        for v in range(1, 6):
            yield _Float(float(v + 0.5))

-
    res = list(select(itertools.chain(basic_iter(), floaty_iter()), order_key="x"))
    assert res == [
        _Int(1), _Float(1.5),
--- a/my/core/query_range.py
+++ b/my/core/query_range.py
@ -133,7 +133,8 @@ def _parse_range(
    end_parser: Converter,
    within_parser: Converter,
    parsed_range: Optional[RangeTuple] = None,
-    error_message: Optional[str] = None) -> Optional[RangeTuple]:
+    error_message: Optional[str] = None
+) -> Optional[RangeTuple]:

    if parsed_range is not None:
        return parsed_range
@ -388,7 +389,6 @@ def test_filter_in_timeframe() -> None:
                   _A(x=datetime(2009, 5, 10, 4, 10, 1), y=5, z=10),
                   _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1))]

-
    rng = RangeTuple(before=str(jan_1_2016), within="52w", after=None)

    # from 2016, going back 52 weeks (about a year?)
@ -438,8 +438,13 @@ def test_range_predicate() -> None:

    # convert any float values to ints
    coerce_int_parser = lambda o: int(float(o))
-    int_filter_func = partial(_create_range_filter, attr_func=identity, end_parser=coerce_int_parser,
-            within_parser=coerce_int_parser, value_coercion_func=coerce_int_parser)
+    int_filter_func = partial(
+        _create_range_filter,
+        attr_func=identity,
+        end_parser=coerce_int_parser,
+        within_parser=coerce_int_parser,
+        value_coercion_func=coerce_int_parser,
+    )

    # filter from 0 to 5
    rn: Optional[RangeTuple] = RangeTuple("0", "5", None)
@ -517,4 +522,3 @@ def test_parse_datetime_float() -> None:

    # test parsing isoformat
    assert dt.timestamp() == parse_datetime_float(str(dt))
-
--- a/my/core/serialize.py
+++ b/my/core/serialize.py
@ -151,8 +151,6 @@ def dumps(
 def test_serialize_fallback() -> None:
    import json as jsn  # dont cause possible conflicts with module code

-    import pytest
-
    # cant use a namedtuple here, since the default json.dump serializer
    # serializes namedtuples as tuples, which become arrays
    # just test with an array of mixed objects
@ -168,7 +166,6 @@ def test_serialize_fallback() -> None:
        assert res == [5, 5.0]


-
 # this needs to be defined here to prevent a mypy bug
 # see https://github.com/python/mypy/issues/7281
 class _A(NamedTuple):
--- a/my/core/source.py
+++ b/my/core/source.py
@ -3,9 +3,11 @@ Decorator to gracefully handle importing a data source, or warning
 and yielding nothing (or a default) when its not available
 """

-from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable, Any, cast
-from my.core.warnings import medium, warn
 from functools import wraps
+from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable
+import warnings
+
+from .warnings import medium

 # The factory function may produce something that has data
 # similar to the shared model, but not exactly, so not
@ -55,7 +57,7 @@ def import_source(
                        medium(f"Module {factory_func.__qualname__} could not be imported, or isn't configured properly")
                    else:
                        medium(f"Module {module_name} ({factory_func.__qualname__}) could not be imported, or isn't configured properly")
-                        warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:
+                        warnings.warn(f"""If you don't want to use this module, to hide this message, add '{module_name}' to your core config disabled_modules in your config, like:

 class core:
    disabled_modules = [{repr(module_name)}]
@ -71,4 +73,3 @@ class core:
                yield from default
        return wrapper
    return decorator
-
--- a/my/core/stats.py
+++ b/my/core/stats.py
@ -6,7 +6,7 @@ import importlib
 import inspect
 import sys
 import typing
-from typing import Optional, Callable, Any, Iterator, Sequence, Dict
+from typing import Optional, Callable, Any, Iterator, Sequence, Dict, List

 from .common import StatsFun, Stats, stat

@ -17,6 +17,7 @@ def guess_stats(module_name: str, quick: bool=False) -> Optional[StatsFun]:
    providers = guess_data_providers(module_name)
    if len(providers) == 0:
        return None
+
    def auto_stats() -> Stats:
        return {k: stat(v, quick=quick) for k, v in providers.items()}
    return auto_stats
@ -69,17 +70,17 @@ def test_is_data_provider() -> None:
    assert not idp("x")

    def no_return_type():
-        return [1, 2 ,3]
+        return [1, 2, 3]
    assert not idp(no_return_type)

    lam = lambda: [1, 2]
    assert not idp(lam)

-    def has_extra_args(count) -> typing.List[int]:
+    def has_extra_args(count) -> List[int]:
        return list(range(count))
    assert not idp(has_extra_args)

-    def has_return_type() -> typing.Sequence[str]:
+    def has_return_type() -> Sequence[str]:
        return ['a', 'b', 'c']
    assert idp(has_return_type)

@ -96,7 +97,6 @@ def test_is_data_provider() -> None:
    assert not idp(producer_inputs)


-
 # return any parameters the user is required to provide - those which don't have default values
 def sig_required_params(sig: inspect.Signature) -> Iterator[inspect.Parameter]:
    for param in sig.parameters.values():
--- a/my/core/util.py
+++ b/my/core/util.py
@ -161,7 +161,7 @@ def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterabl
            path = getattr(sys.modules[mname], '__path__', None) or []
            # don't traverse path items we've seen before
            path = [p for p in path if not seen(p)]
-            yield from _walk_packages(path, mname+'.', onerror)
+            yield from _walk_packages(path, mname + '.', onerror)

 # deprecate?
 def get_modules() -> List[HPIModule]:
--- a/my/core/warnings.py
+++ b/my/core/warnings.py
@ -12,9 +12,6 @@ import warnings
 import click


-# just bring in the scope of this module for convenience
-from warnings import warn
-
 def _colorize(x: str, color: Optional[str]=None) -> str:
    if color is None:
        return x
@ -49,3 +46,7 @@ def high(message: str, *args, **kwargs) -> None:
    '''
    kwargs['color'] = 'red'
    _warn(message, *args, **kwargs)
+
+
+# NOTE: deprecated -- legacy import
+from warnings import warn