From d3f9a8e8b69542361ad0838a1012a1ad10440b5b Mon Sep 17 00:00:00 2001 From: karlicoss Date: Sat, 19 Oct 2024 20:55:09 +0100 Subject: [PATCH 01/11] core: migrate code to benefit from 3.9 stuff (#401) for now keeping ruff on 3.8 target version, need to sort out modules as well --- my/core/__init__.py | 4 +- my/core/__main__.py | 97 +++++++++++++++++++++------------ my/core/_cpu_pool.py | 9 ++- my/core/_deprecated/kompress.py | 5 +- my/core/cachew.py | 29 +++++----- my/core/cfg.py | 20 +++++-- my/core/common.py | 36 ++++++------ my/core/compat.py | 7 ++- my/core/core_config.py | 33 ++++++----- my/core/denylist.py | 28 +++++----- my/core/discovery_pure.py | 19 ++++--- my/core/error.py | 43 ++++++++------- my/core/experimental.py | 6 +- my/core/freezer.py | 29 +++++----- my/core/hpi_compat.py | 13 +++-- my/core/influxdb.py | 29 +++++++--- my/core/init.py | 2 + my/core/kompress.py | 4 +- my/core/konsume.py | 39 +++++++------ my/core/mime.py | 11 ++-- my/core/orgmode.py | 8 ++- my/core/pandas.py | 7 +-- my/core/preinit.py | 1 + my/core/pytest.py | 4 +- my/core/query.py | 76 ++++++++++++-------------- my/core/query_range.py | 68 +++++++++++++---------- my/core/serialize.py | 20 ++++--- my/core/source.py | 12 +++- my/core/sqlite.py | 47 +++++++++------- my/core/stats.py | 34 +++++------- my/core/structure.py | 14 +++-- my/core/tests/auto_stats.py | 2 +- my/core/tests/common.py | 6 +- my/core/tests/denylist.py | 3 +- my/core/tests/test_cachew.py | 8 +-- my/core/tests/test_config.py | 2 +- my/core/time.py | 15 +++-- my/core/types.py | 13 +++-- my/core/util.py | 28 ++++++---- my/core/utils/concurrent.py | 7 ++- my/core/utils/imports.py | 14 ++--- my/core/utils/itertools.py | 59 +++++++++----------- my/core/warnings.py | 8 ++- 43 files changed, 515 insertions(+), 404 deletions(-) diff --git a/my/core/__init__.py b/my/core/__init__.py index ba633f6..cc549d5 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from .cfg import make_config from .common import PathIsh, Paths, get_files from .compat import assert_never -from .error import Res, unwrap, notnone +from .error import Res, notnone, unwrap from .logging import ( make_logger, ) @@ -52,7 +52,7 @@ __all__ = [ # you could put _init_hook.py next to your private my/config # that way you can configure logging/warnings/env variables on every HPI import try: - import my._init_hook # type: ignore[import-not-found] + import my._init_hook # type: ignore[import-not-found] # noqa: F401 except: pass ## diff --git a/my/core/__main__.py b/my/core/__main__.py index 2777008..00ac4ee 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import importlib import inspect @@ -7,17 +9,18 @@ import shutil import sys import tempfile import traceback +from collections.abc import Iterable, Sequence from contextlib import ExitStack from itertools import chain from pathlib import Path from subprocess import PIPE, CompletedProcess, Popen, check_call, run -from typing import Any, Callable, Iterable, List, Optional, Sequence, Type +from typing import Any, Callable import click @functools.lru_cache -def mypy_cmd() -> Optional[Sequence[str]]: +def mypy_cmd() -> Sequence[str] | None: try: # preferably, use mypy from current python env import mypy # noqa: F401 fine not to use it @@ -32,7 +35,7 @@ def mypy_cmd() -> Optional[Sequence[str]]: return None -def run_mypy(cfg_path: Path) -> Optional[CompletedProcess]: +def run_mypy(cfg_path: Path) -> CompletedProcess | None: # todo dunno maybe use the same mypy config in repository? # I'd need to install mypy.ini then?? env = {**os.environ} @@ -63,21 +66,27 @@ def eprint(x: str) -> None: # err=True prints to stderr click.echo(x, err=True) + def indent(x: str) -> str: + # todo use textwrap.indent? return ''.join(' ' + l for l in x.splitlines(keepends=True)) -OK = '✅' +OK = '✅' OFF = '🔲' + def info(x: str) -> None: eprint(OK + ' ' + x) + def error(x: str) -> None: eprint('❌ ' + x) + def warning(x: str) -> None: - eprint('❗ ' + x) # todo yellow? + eprint('❗ ' + x) # todo yellow? + def tb(e: Exception) -> None: tb = ''.join(traceback.format_exception(Exception, e, e.__traceback__)) @@ -86,6 +95,7 @@ def tb(e: Exception) -> None: def config_create() -> None: from .preinit import get_mycfg_dir + mycfg_dir = get_mycfg_dir() created = False @@ -94,7 +104,8 @@ def config_create() -> None: my_config = mycfg_dir / 'my' / 'config' / '__init__.py' my_config.parent.mkdir(parents=True) - my_config.write_text(''' + my_config.write_text( + ''' ### HPI personal config ## see # https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules @@ -117,7 +128,8 @@ class example: ### you can insert your own configuration below ### but feel free to delete the stuff above if you don't need ti -'''.lstrip()) +'''.lstrip() + ) info(f'created empty config: {my_config}') created = True else: @@ -130,12 +142,13 @@ class example: # todo return the config as a result? def config_ok() -> bool: - errors: List[Exception] = [] + errors: list[Exception] = [] # at this point 'my' should already be imported, so doesn't hurt to extract paths from it import my + try: - paths: List[str] = list(my.__path__) + paths: list[str] = list(my.__path__) except Exception as e: errors.append(e) error('failed to determine module import path') @@ -145,19 +158,23 @@ def config_ok() -> bool: # first try doing as much as possible without actually importing my.config from .preinit import get_mycfg_dir + cfg_path = get_mycfg_dir() # alternative is importing my.config and then getting cfg_path from its __file__/__path__ # not sure which is better tbh ## check we're not using stub config import my.core + try: core_pkg_path = str(Path(my.core.__path__[0]).parent) if str(cfg_path).startswith(core_pkg_path): - error(f''' + error( + f''' Seems that the stub config is used ({cfg_path}). This is likely not going to work. See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-modules for more information -'''.strip()) +'''.strip() + ) errors.append(RuntimeError('bad config path')) except Exception as e: errors.append(e) @@ -189,7 +206,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module ## check types mypy_res = run_mypy(cfg_path) - if mypy_res is not None: # has mypy + if mypy_res is not None: # has mypy rc = mypy_res.returncode if rc == 0: info('mypy check : success') @@ -221,7 +238,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-module from .util import HPIModule, modules -def _modules(*, all: bool=False) -> Iterable[HPIModule]: +def _modules(*, all: bool = False) -> Iterable[HPIModule]: skipped = [] for m in modules(): if not all and m.skip_reason is not None: @@ -232,7 +249,7 @@ def _modules(*, all: bool=False) -> Iterable[HPIModule]: warning(f'Skipped {len(skipped)} modules: {skipped}. Pass --all if you want to see them.') -def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: List[str]) -> None: +def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: list[str]) -> None: if len(for_modules) > 0: # if you're checking specific modules, show errors # hopefully makes sense? @@ -256,7 +273,7 @@ def modules_check(*, verbose: bool, list_all: bool, quick: bool, for_modules: Li # todo add a --all argument to disregard is_active check? for mr in mods: skip = mr.skip_reason - m = mr.name + m = mr.name if skip is not None: eprint(f'{OFF} {click.style("SKIP", fg="yellow")}: {m:<50} {skip}') continue @@ -306,8 +323,8 @@ def list_modules(*, list_all: bool) -> None: tabulate_warnings() for mr in _modules(all=list_all): - m = mr.name - sr = mr.skip_reason + m = mr.name + sr = mr.skip_reason if sr is None: pre = OK suf = '' @@ -323,17 +340,20 @@ def tabulate_warnings() -> None: Helper to avoid visual noise in hpi modules/doctor ''' import warnings + orig = warnings.formatwarning def override(*args, **kwargs) -> str: res = orig(*args, **kwargs) return ''.join(' ' + x for x in res.splitlines(keepends=True)) + warnings.formatwarning = override # TODO loggers as well? def _requires(modules: Sequence[str]) -> Sequence[str]: from .discovery_pure import module_by_name + mods = [module_by_name(module) for module in modules] res = [] for mod in mods: @@ -360,7 +380,7 @@ def module_requires(*, module: Sequence[str]) -> None: click.echo(x) -def module_install(*, user: bool, module: Sequence[str], parallel: bool=False, break_system_packages: bool=False) -> None: +def module_install(*, user: bool, module: Sequence[str], parallel: bool = False, break_system_packages: bool = False) -> None: if isinstance(module, str): # legacy behavior, used to take a since argument module = [module] @@ -437,7 +457,7 @@ def _ui_getchar_pick(choices: Sequence[str], prompt: str = 'Select from: ') -> i return result_map[ch] -def _locate_functions_or_prompt(qualified_names: List[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]: +def _locate_functions_or_prompt(qualified_names: list[str], *, prompt: bool = True) -> Iterable[Callable[..., Any]]: from .query import QueryException, locate_qualified_function from .stats import is_data_provider @@ -487,6 +507,7 @@ def _locate_functions_or_prompt(qualified_names: List[str], *, prompt: bool = Tr def _warn_exceptions(exc: Exception) -> None: from my.core import make_logger + logger = make_logger('CLI', level='warning') logger.exception(f'hpi query: {exc}') @@ -498,14 +519,14 @@ def query_hpi_functions( *, output: str = 'json', stream: bool = False, - qualified_names: List[str], - order_key: Optional[str], - order_by_value_type: Optional[Type], + qualified_names: list[str], + order_key: str | None, + order_by_value_type: type | None, after: Any, before: Any, within: Any, reverse: bool = False, - limit: Optional[int], + limit: int | None, drop_unsorted: bool, wrap_unsorted: bool, warn_exceptions: bool, @@ -529,7 +550,8 @@ def query_hpi_functions( warn_exceptions=warn_exceptions, warn_func=_warn_exceptions, raise_exceptions=raise_exceptions, - drop_exceptions=drop_exceptions) + drop_exceptions=drop_exceptions, + ) if output == 'json': from .serialize import dumps @@ -563,7 +585,7 @@ def query_hpi_functions( # can ignore the mypy warning here, locations_to_gpx yields any errors # if you didnt pass it something that matches the LocationProtocol - for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type] + for exc in locations_to_gpx(res, sys.stdout): # type: ignore[arg-type] if warn_exceptions: _warn_exceptions(exc) elif raise_exceptions: @@ -580,6 +602,7 @@ def query_hpi_functions( except ModuleNotFoundError: eprint("'repl' typically uses ipython, install it with 'python3 -m pip install ipython'. falling back to stdlib...") import code + code.interact(local=locals()) else: IPython.embed() @@ -619,13 +642,13 @@ def main(*, debug: bool) -> None: @functools.lru_cache(maxsize=1) -def _all_mod_names() -> List[str]: +def _all_mod_names() -> list[str]: """Should include all modules, in case user is trying to diagnose issues""" # sort this, so that the order doesn't change while tabbing through return sorted([m.name for m in modules()]) -def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> List[str]: +def _module_autocomplete(ctx: click.Context, args: Sequence[str], incomplete: str) -> list[str]: return [m for m in _all_mod_names() if m.startswith(incomplete)] @@ -784,14 +807,14 @@ def query_cmd( function_name: Sequence[str], output: str, stream: bool, - order_key: Optional[str], - order_type: Optional[str], - after: Optional[str], - before: Optional[str], - within: Optional[str], - recent: Optional[str], + order_key: str | None, + order_type: str | None, + after: str | None, + before: str | None, + within: str | None, + recent: str | None, reverse: bool, - limit: Optional[int], + limit: int | None, drop_unsorted: bool, wrap_unsorted: bool, warn_exceptions: bool, @@ -827,7 +850,7 @@ def query_cmd( from datetime import date, datetime - chosen_order_type: Optional[Type] + chosen_order_type: type | None if order_type == "datetime": chosen_order_type = datetime elif order_type == "date": @@ -863,7 +886,8 @@ def query_cmd( wrap_unsorted=wrap_unsorted, warn_exceptions=warn_exceptions, raise_exceptions=raise_exceptions, - drop_exceptions=drop_exceptions) + drop_exceptions=drop_exceptions, + ) except QueryException as qe: eprint(str(qe)) sys.exit(1) @@ -878,6 +902,7 @@ def query_cmd( def test_requires() -> None: from click.testing import CliRunner + result = CliRunner().invoke(main, ['module', 'requires', 'my.github.ghexport', 'my.browser.export']) assert result.exit_code == 0 assert "github.com/karlicoss/ghexport" in result.output diff --git a/my/core/_cpu_pool.py b/my/core/_cpu_pool.py index 2369075..6b107a7 100644 --- a/my/core/_cpu_pool.py +++ b/my/core/_cpu_pool.py @@ -10,15 +10,18 @@ how many cores we want to dedicate to the DAL. Enabled by the env variable, specifying how many cores to dedicate e.g. "HPI_CPU_POOL=4 hpi query ..." """ + +from __future__ import annotations + import os from concurrent.futures import ProcessPoolExecutor -from typing import Optional, cast +from typing import cast _NOT_SET = cast(ProcessPoolExecutor, object()) -_INSTANCE: Optional[ProcessPoolExecutor] = _NOT_SET +_INSTANCE: ProcessPoolExecutor | None = _NOT_SET -def get_cpu_pool() -> Optional[ProcessPoolExecutor]: +def get_cpu_pool() -> ProcessPoolExecutor | None: global _INSTANCE if _INSTANCE is _NOT_SET: use_cpu_pool = os.environ.get('HPI_CPU_POOL') diff --git a/my/core/_deprecated/kompress.py b/my/core/_deprecated/kompress.py index cd27a7f..ce14fad 100644 --- a/my/core/_deprecated/kompress.py +++ b/my/core/_deprecated/kompress.py @@ -1,16 +1,17 @@ """ Various helpers for compression """ + # fmt: off from __future__ import annotations import io import pathlib -import sys +from collections.abc import Iterator, Sequence from datetime import datetime from functools import total_ordering from pathlib import Path -from typing import IO, Any, Iterator, Sequence, Union +from typing import IO, Any, Union PathIsh = Union[Path, str] diff --git a/my/core/cachew.py b/my/core/cachew.py index dc6ed79..9ccee09 100644 --- a/my/core/cachew.py +++ b/my/core/cachew.py @@ -1,16 +1,18 @@ -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations + +from .internal import assert_subpackage + +assert_subpackage(__name__) import logging import sys +from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path from typing import ( TYPE_CHECKING, Any, Callable, - Iterator, - Optional, - Type, TypeVar, Union, cast, @@ -21,7 +23,6 @@ import appdirs # type: ignore[import-untyped] from . import warnings - PathIsh = Union[str, Path] # avoid circular import from .common @@ -60,12 +61,12 @@ def _appdirs_cache_dir() -> Path: _CACHE_DIR_NONE_HACK = Path('/tmp/hpi/cachew_none_hack') -def cache_dir(suffix: Optional[PathIsh] = None) -> Path: +def cache_dir(suffix: PathIsh | None = None) -> Path: from . import core_config as CC cdir_ = CC.config.get_cache_dir() - sp: Optional[Path] = None + sp: Path | None = None if suffix is not None: sp = Path(suffix) # guess if you do need absolute, better path it directly instead of as suffix? @@ -144,21 +145,19 @@ if TYPE_CHECKING: # we need two versions due to @doublewrap # this is when we just annotate as @cachew without any args @overload # type: ignore[no-overload-impl] - def mcachew(fun: F) -> F: - ... + def mcachew(fun: F) -> F: ... @overload def mcachew( - cache_path: Optional[PathProvider] = ..., + cache_path: PathProvider | None = ..., *, force_file: bool = ..., - cls: Optional[Type] = ..., + cls: type | None = ..., depends_on: HashFunction = ..., - logger: Optional[logging.Logger] = ..., + logger: logging.Logger | None = ..., chunk_by: int = ..., - synthetic_key: Optional[str] = ..., - ) -> Callable[[F], F]: - ... + synthetic_key: str | None = ..., + ) -> Callable[[F], F]: ... else: mcachew = _mcachew_impl diff --git a/my/core/cfg.py b/my/core/cfg.py index a71a7e3..9851443 100644 --- a/my/core/cfg.py +++ b/my/core/cfg.py @@ -3,28 +3,32 @@ from __future__ import annotations import importlib import re import sys +from collections.abc import Iterator from contextlib import ExitStack, contextmanager -from typing import Any, Callable, Dict, Iterator, Optional, Type, TypeVar +from typing import Any, Callable, TypeVar -Attrs = Dict[str, Any] +Attrs = dict[str, Any] C = TypeVar('C') + # todo not sure about it, could be overthinking... # but short enough to change later # TODO document why it's necessary? -def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C: +def make_config(cls: type[C], migration: Callable[[Attrs], Attrs] = lambda x: x) -> C: user_config = cls.__base__ old_props = { # NOTE: deliberately use gettatr to 'force' class properties here - k: getattr(user_config, k) for k in vars(user_config) + k: getattr(user_config, k) + for k in vars(user_config) } new_props = migration(old_props) from dataclasses import fields + params = { k: v for k, v in new_props.items() - if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115 + if k in {f.name for f in fields(cls)} # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115 } # todo maybe return type here? return cls(**params) @@ -51,6 +55,8 @@ def _override_config(config: F) -> Iterator[F]: ModuleRegex = str + + @contextmanager def _reload_modules(modules: ModuleRegex) -> Iterator[None]: # need to use list here, otherwise reordering with set might mess things up @@ -81,13 +87,14 @@ def _reload_modules(modules: ModuleRegex) -> Iterator[None]: @contextmanager -def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None): +def tmp_config(*, modules: ModuleRegex | None = None, config=None): if modules is None: assert config is None if modules is not None: assert config is not None import my.config + with ExitStack() as module_reload_stack, _override_config(my.config) as new_config: if config is not None: overrides = {k: v for k, v in vars(config).items() if not k.startswith('__')} @@ -102,6 +109,7 @@ def tmp_config(*, modules: Optional[ModuleRegex]=None, config=None): def test_tmp_config() -> None: class extra: data_path = '/path/to/data' + with tmp_config() as c: assert c.google != 'whatever' assert not hasattr(c, 'extra') diff --git a/my/core/common.py b/my/core/common.py index a2c2ad3..91fe9bd 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -1,20 +1,18 @@ +from __future__ import annotations + import os +from collections.abc import Iterable, Sequence from glob import glob as do_glob from pathlib import Path from typing import ( TYPE_CHECKING, Callable, Generic, - Iterable, - List, - Sequence, - Tuple, TypeVar, Union, ) -from . import compat -from . import warnings +from . import compat, warnings # some helper functions # TODO start deprecating this? soon we'd be able to use Path | str syntax which is shorter and more explicit @@ -24,20 +22,22 @@ Paths = Union[Sequence[PathIsh], PathIsh] DEFAULT_GLOB = '*' + + def get_files( pp: Paths, - glob: str=DEFAULT_GLOB, + glob: str = DEFAULT_GLOB, *, - sort: bool=True, - guess_compression: bool=True, -) -> Tuple[Path, ...]: + sort: bool = True, + guess_compression: bool = True, +) -> tuple[Path, ...]: """ Helper function to avoid boilerplate. Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? - sources: List[Path] + sources: list[Path] if isinstance(pp, Path): sources = [pp] elif isinstance(pp, str): @@ -54,7 +54,7 @@ def get_files( # TODO ugh. very flaky... -3 because [, get_files(), ] return traceback.extract_stack()[-3].filename - paths: List[Path] = [] + paths: list[Path] = [] for src in sources: if src.parts[0] == '~': src = src.expanduser() @@ -64,7 +64,7 @@ def get_files( if glob != DEFAULT_GLOB: warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!") paths.extend(map(Path, do_glob(gs))) - elif os.path.isdir(str(src)): + elif os.path.isdir(str(src)): # noqa: PTH112 # NOTE: we're using os.path here on purpose instead of src.is_dir # the reason is is_dir for archives might return True and then # this clause would try globbing insize the archives @@ -234,16 +234,14 @@ if not TYPE_CHECKING: return types.asdict(*args, **kwargs) # todo wrap these in deprecated decorator as well? + # TODO hmm how to deprecate these in runtime? + # tricky cause they are actually classes/types + from typing import Literal # noqa: F401 + from .cachew import mcachew # noqa: F401 # this is kinda internal, should just use my.core.logging.setup_logger if necessary from .logging import setup_logger - - # TODO hmm how to deprecate these in runtime? - # tricky cause they are actually classes/types - - from typing import Literal # noqa: F401 - from .stats import Stats from .types import ( Json, diff --git a/my/core/compat.py b/my/core/compat.py index 3273ff4..8f719a8 100644 --- a/my/core/compat.py +++ b/my/core/compat.py @@ -3,6 +3,8 @@ Contains backwards compatibility helpers for different python versions. If something is relevant to HPI itself, please put it in .hpi_compat instead ''' +from __future__ import annotations + import sys from typing import TYPE_CHECKING @@ -29,6 +31,7 @@ if not TYPE_CHECKING: @deprecated('use .removesuffix method on string directly instead') def removesuffix(text: str, suffix: str) -> str: return text.removesuffix(suffix) + ## ## used to have compat function before 3.8 for these, keeping for runtime back compatibility @@ -46,13 +49,13 @@ else: # bisect_left doesn't have a 'key' parameter (which we use) # till python3.10 if sys.version_info[:2] <= (3, 9): - from typing import Any, Callable, List, Optional, TypeVar + from typing import Any, Callable, List, Optional, TypeVar # noqa: UP035 X = TypeVar('X') # copied from python src # fmt: off - def bisect_left(a: List[Any], x: Any, lo: int=0, hi: Optional[int]=None, *, key: Optional[Callable[..., Any]]=None) -> int: + def bisect_left(a: list[Any], x: Any, lo: int=0, hi: int | None=None, *, key: Callable[..., Any] | None=None) -> int: if lo < 0: raise ValueError('lo must be non-negative') if hi is None: diff --git a/my/core/core_config.py b/my/core/core_config.py index 9036971..3f26c03 100644 --- a/my/core/core_config.py +++ b/my/core/core_config.py @@ -2,18 +2,21 @@ Bindings for the 'core' HPI configuration ''' +from __future__ import annotations + import re +from collections.abc import Sequence from dataclasses import dataclass from pathlib import Path -from typing import Optional, Sequence -from . import PathIsh, warnings +from . import warnings try: from my.config import core as user_config # type: ignore[attr-defined] except Exception as e: try: from my.config import common as user_config # type: ignore[attr-defined] + warnings.high("'common' config section is deprecated. Please rename it to 'core'.") except Exception as e2: # make it defensive, because it's pretty commonly used and would be annoying if it breaks hpi doctor etc. @@ -24,6 +27,7 @@ except Exception as e: _HPI_CACHE_DIR_DEFAULT = '' + @dataclass class Config(user_config): ''' @@ -34,7 +38,7 @@ class Config(user_config): cache_dir = '/your/custom/cache/path' ''' - cache_dir: Optional[PathIsh] = _HPI_CACHE_DIR_DEFAULT + cache_dir: Path | str | None = _HPI_CACHE_DIR_DEFAULT ''' Base directory for cachew. - if None , means cache is disabled @@ -44,7 +48,7 @@ class Config(user_config): NOTE: you shouldn't use this attribute in HPI modules directly, use Config.get_cache_dir()/cachew.cache_dir() instead ''' - tmp_dir: Optional[PathIsh] = None + tmp_dir: Path | str | None = None ''' Path to a temporary directory. This can be used temporarily while extracting zipfiles etc... @@ -52,34 +56,36 @@ class Config(user_config): - otherwise , use the specified directory as the base temporary directory ''' - enabled_modules : Optional[Sequence[str]] = None + enabled_modules: Sequence[str] | None = None ''' list of regexes/globs - None means 'rely on disabled_modules' ''' - disabled_modules: Optional[Sequence[str]] = None + disabled_modules: Sequence[str] | None = None ''' list of regexes/globs - None means 'rely on enabled_modules' ''' - def get_cache_dir(self) -> Optional[Path]: + def get_cache_dir(self) -> Path | None: cdir = self.cache_dir if cdir is None: return None if cdir == _HPI_CACHE_DIR_DEFAULT: from .cachew import _appdirs_cache_dir + return _appdirs_cache_dir() else: return Path(cdir).expanduser() def get_tmp_dir(self) -> Path: - tdir: Optional[PathIsh] = self.tmp_dir + tdir: Path | str | None = self.tmp_dir tpath: Path # use tempfile if unset if tdir is None: import tempfile + tpath = Path(tempfile.gettempdir()) / 'HPI' else: tpath = Path(tdir) @@ -87,10 +93,10 @@ class Config(user_config): tpath.mkdir(parents=True, exist_ok=True) return tpath - def _is_module_active(self, module: str) -> Optional[bool]: + def _is_module_active(self, module: str) -> bool | None: # None means the config doesn't specify anything # todo might be nice to return the 'reason' too? e.g. which option has matched - def matches(specs: Sequence[str]) -> Optional[str]: + def matches(specs: Sequence[str]) -> str | None: for spec in specs: # not sure because . (packages separate) matches anything, but I guess unlikely to clash if re.match(spec, module): @@ -106,10 +112,10 @@ class Config(user_config): return None else: return False - else: # not None + else: # not None if off is None: return True - else: # not None + else: # not None # fallback onto the 'enable everything', then the user will notice warnings.medium(f"[module]: conflicting regexes '{on}' and '{off}' are set in the config. Please only use one of them.") return True @@ -121,8 +127,8 @@ config = make_config(Config) ### tests start +from collections.abc import Iterator from contextlib import contextmanager as ctx -from typing import Iterator @ctx @@ -163,4 +169,5 @@ def test_active_modules() -> None: assert cc._is_module_active("my.body.exercise") is True assert len(record_warnings) == 1 + ### tests end diff --git a/my/core/denylist.py b/my/core/denylist.py index 92faf2c..c92f9a0 100644 --- a/my/core/denylist.py +++ b/my/core/denylist.py @@ -5,23 +5,25 @@ A helper module for defining denylists for sources programmatically For docs, see doc/DENYLIST.md """ +from __future__ import annotations + import functools import json import sys from collections import defaultdict +from collections.abc import Iterator, Mapping from pathlib import Path -from typing import Any, Dict, Iterator, List, Mapping, Set, TypeVar +from typing import Any, TypeVar import click from more_itertools import seekable -from my.core.common import PathIsh -from my.core.serialize import dumps -from my.core.warnings import medium +from .serialize import dumps +from .warnings import medium T = TypeVar("T") -DenyMap = Mapping[str, Set[Any]] +DenyMap = Mapping[str, set[Any]] def _default_key_func(obj: T) -> str: @@ -29,9 +31,9 @@ def _default_key_func(obj: T) -> str: class DenyList: - def __init__(self, denylist_file: PathIsh): + def __init__(self, denylist_file: Path | str) -> None: self.file = Path(denylist_file).expanduser().absolute() - self._deny_raw_list: List[Dict[str, Any]] = [] + self._deny_raw_list: list[dict[str, Any]] = [] self._deny_map: DenyMap = defaultdict(set) # deny cli, user can override these @@ -45,7 +47,7 @@ class DenyList: return deny_map: DenyMap = defaultdict(set) - data: List[Dict[str, Any]]= json.loads(self.file.read_text()) + data: list[dict[str, Any]] = json.loads(self.file.read_text()) self._deny_raw_list = data for ignore in data: @@ -112,7 +114,7 @@ class DenyList: self._load() self._deny_raw({key: self._stringify_value(value)}, write=write) - def _deny_raw(self, data: Dict[str, Any], *, write: bool = False) -> None: + def _deny_raw(self, data: dict[str, Any], *, write: bool = False) -> None: self._deny_raw_list.append(data) if write: self.write() @@ -131,7 +133,7 @@ class DenyList: def _deny_cli_remember( self, items: Iterator[T], - mem: Dict[str, T], + mem: dict[str, T], ) -> Iterator[str]: keyf = self._deny_cli_key_func or _default_key_func # i.e., convert each item to a string, and map str -> item @@ -157,10 +159,8 @@ class DenyList: # reset the iterator sit.seek(0) # so we can map the selected string from fzf back to the original objects - memory_map: Dict[str, T] = {} - picker = FzfPrompt( - executable_path=self.fzf_path, default_options="--no-multi" - ) + memory_map: dict[str, T] = {} + picker = FzfPrompt(executable_path=self.fzf_path, default_options="--no-multi") picked_l = picker.prompt( self._deny_cli_remember(itr, memory_map), "--read0", diff --git a/my/core/discovery_pure.py b/my/core/discovery_pure.py index b753de8..18a19c4 100644 --- a/my/core/discovery_pure.py +++ b/my/core/discovery_pure.py @@ -10,6 +10,8 @@ This potentially allows it to be: It should be free of external modules, importlib, exec, etc. etc. ''' +from __future__ import annotations + REQUIRES = 'REQUIRES' NOT_HPI_MODULE_VAR = '__NOT_HPI_MODULE__' @@ -19,8 +21,9 @@ import ast import logging import os import re +from collections.abc import Iterable, Sequence from pathlib import Path -from typing import Any, Iterable, List, NamedTuple, Optional, Sequence, cast +from typing import Any, NamedTuple, Optional, cast ''' None means that requirements weren't defined (different from empty requirements) @@ -30,11 +33,11 @@ Requires = Optional[Sequence[str]] class HPIModule(NamedTuple): name: str - skip_reason: Optional[str] - doc: Optional[str] = None - file: Optional[Path] = None + skip_reason: str | None + doc: str | None = None + file: Path | None = None requires: Requires = None - legacy: Optional[str] = None # contains reason/deprecation warning + legacy: str | None = None # contains reason/deprecation warning def ignored(m: str) -> bool: @@ -55,13 +58,13 @@ def has_stats(src: Path) -> bool: def _has_stats(code: str) -> bool: a: ast.Module = ast.parse(code) for x in a.body: - try: # maybe assign + try: # maybe assign [tg] = cast(Any, x).targets if tg.id == 'stats': return True except: pass - try: # maybe def? + try: # maybe def? name = cast(Any, x).name if name == 'stats': return True @@ -144,7 +147,7 @@ def all_modules() -> Iterable[HPIModule]: def _iter_my_roots() -> Iterable[Path]: import my # doesn't import any code, because of namespace package - paths: List[str] = list(my.__path__) + paths: list[str] = list(my.__path__) if len(paths) == 0: # should probably never happen?, if this code is running, it was imported # because something was added to __path__ to match this name diff --git a/my/core/error.py b/my/core/error.py index ed26dda..b308869 100644 --- a/my/core/error.py +++ b/my/core/error.py @@ -3,19 +3,16 @@ Various error handling helpers See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail """ +from __future__ import annotations + import traceback +from collections.abc import Iterable, Iterator from datetime import datetime from itertools import tee from typing import ( Any, Callable, - Iterable, - Iterator, - List, Literal, - Optional, - Tuple, - Type, TypeVar, Union, cast, @@ -33,7 +30,7 @@ Res = ResT[T, Exception] ErrorPolicy = Literal["yield", "raise", "drop"] -def notnone(x: Optional[T]) -> T: +def notnone(x: T | None) -> T: assert x is not None return x @@ -60,13 +57,15 @@ def raise_exceptions(itr: Iterable[Res[T]]) -> Iterator[T]: yield o -def warn_exceptions(itr: Iterable[Res[T]], warn_func: Optional[Callable[[Exception], None]] = None) -> Iterator[T]: +def warn_exceptions(itr: Iterable[Res[T]], warn_func: Callable[[Exception], None] | None = None) -> Iterator[T]: # if not provided, use the 'warnings' module if warn_func is None: from my.core.warnings import medium + def _warn_func(e: Exception) -> None: # TODO: print traceback? but user could always --raise-exceptions as well medium(str(e)) + warn_func = _warn_func for o in itr: @@ -81,7 +80,7 @@ def echain(ex: E, cause: Exception) -> E: return ex -def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]: +def split_errors(l: Iterable[ResT[T, E]], ET: type[E]) -> tuple[Iterable[T], Iterable[E]]: # TODO would be nice to have ET=Exception default? but it causes some mypy complaints? vit, eit = tee(l) # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type? @@ -99,7 +98,9 @@ def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Ite K = TypeVar('K') -def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T]]: + + +def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> list[Res[T]]: """ Sort a sequence potentially interleaved with errors/entries on which the key can't be computed. The general idea is: the error sticks to the non-error entry that follows it @@ -107,7 +108,7 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T] group = [] groups = [] for i in items: - k: Optional[K] + k: K | None try: k = key(i) except Exception: # error white computing key? dunno, might be nice to handle... @@ -117,10 +118,10 @@ def sort_res_by(items: Iterable[Res[T]], key: Callable[[Any], K]) -> List[Res[T] groups.append((k, group)) group = [] - results: List[Res[T]] = [] - for _v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan?? + results: list[Res[T]] = [] + for _v, grp in sorted(groups, key=lambda p: p[0]): # type: ignore[return-value, arg-type] # TODO SupportsLessThan?? results.extend(grp) - results.extend(group) # handle last group (it will always be errors only) + results.extend(group) # handle last group (it will always be errors only) return results @@ -162,20 +163,20 @@ def test_sort_res_by() -> None: # helpers to associate timestamps with the errors (so something meaningful could be displayed on the plots, for example) # todo document it under 'patterns' somewhere... # todo proper typevar? -def set_error_datetime(e: Exception, dt: Optional[datetime]) -> None: +def set_error_datetime(e: Exception, dt: datetime | None) -> None: if dt is None: return e.args = (*e.args, dt) # todo not sure if should return new exception? -def attach_dt(e: Exception, *, dt: Optional[datetime]) -> Exception: +def attach_dt(e: Exception, *, dt: datetime | None) -> Exception: set_error_datetime(e, dt) return e # todo it might be problematic because might mess with timezones (when it's converted to string, it's converted to a shift) -def extract_error_datetime(e: Exception) -> Optional[datetime]: +def extract_error_datetime(e: Exception) -> datetime | None: import re for x in reversed(e.args): @@ -201,10 +202,10 @@ MODULE_SETUP_URL = 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#p def warn_my_config_import_error( - err: Union[ImportError, AttributeError], + err: ImportError | AttributeError, *, - help_url: Optional[str] = None, - module_name: Optional[str] = None, + help_url: str | None = None, + module_name: str | None = None, ) -> bool: """ If the user tried to import something from my.config but it failed, @@ -265,7 +266,7 @@ def test_datetime_errors() -> None: import pytz # noqa: I001 dt_notz = datetime.now() - dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam')) + dt_tz = datetime.now(tz=pytz.timezone('Europe/Amsterdam')) for dt in [dt_tz, dt_notz]: e1 = RuntimeError('whatever') assert extract_error_datetime(e1) is None diff --git a/my/core/experimental.py b/my/core/experimental.py index 1a78272..0a1c3b4 100644 --- a/my/core/experimental.py +++ b/my/core/experimental.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import sys import types -from typing import Any, Dict, Optional +from typing import Any # The idea behind this one is to support accessing "overlaid/shadowed" modules from namespace packages @@ -20,7 +22,7 @@ def import_original_module( file: str, *, star: bool = False, - globals: Optional[Dict[str, Any]] = None, + globals: dict[str, Any] | None = None, ) -> types.ModuleType: module_to_restore = sys.modules[module_name] diff --git a/my/core/freezer.py b/my/core/freezer.py index 93bceb7..4fb0e25 100644 --- a/my/core/freezer.py +++ b/my/core/freezer.py @@ -1,29 +1,29 @@ -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations -import dataclasses as dcl +from .internal import assert_subpackage + +assert_subpackage(__name__) + +import dataclasses import inspect -from typing import Any, Type, TypeVar +from typing import Any, Generic, TypeVar D = TypeVar('D') -def _freeze_dataclass(Orig: Type[D]): - ofields = [(f.name, f.type, f) for f in dcl.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115 +def _freeze_dataclass(Orig: type[D]): + ofields = [(f.name, f.type, f) for f in dataclasses.fields(Orig)] # type: ignore[arg-type] # see https://github.com/python/typing_extensions/issues/115 # extract properties along with their types - props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property))) + props = list(inspect.getmembers(Orig, lambda o: isinstance(o, property))) pfields = [(name, inspect.signature(getattr(prop, 'fget')).return_annotation) for name, prop in props] # FIXME not sure about name? # NOTE: sadly passing bases=[Orig] won't work, python won't let us override properties with fields - RRR = dcl.make_dataclass('RRR', fields=[*ofields, *pfields]) + RRR = dataclasses.make_dataclass('RRR', fields=[*ofields, *pfields]) # todo maybe even declare as slots? return props, RRR -# todo need some decorator thingie? -from typing import Generic - - class Freezer(Generic[D]): ''' Some magic which converts dataclass properties into fields. @@ -31,13 +31,13 @@ class Freezer(Generic[D]): For now only supports dataclasses. ''' - def __init__(self, Orig: Type[D]) -> None: + def __init__(self, Orig: type[D]) -> None: self.Orig = Orig self.props, self.Frozen = _freeze_dataclass(Orig) def freeze(self, value: D) -> D: pvalues = {name: getattr(value, name) for name, _ in self.props} - return self.Frozen(**dcl.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115 + return self.Frozen(**dataclasses.asdict(value), **pvalues) # type: ignore[call-overload] # see https://github.com/python/typing_extensions/issues/115 ### tests @@ -45,7 +45,7 @@ class Freezer(Generic[D]): # this needs to be defined here to prevent a mypy bug # see https://github.com/python/mypy/issues/7281 -@dcl.dataclass +@dataclasses.dataclass class _A: x: Any @@ -71,6 +71,7 @@ def test_freezer() -> None: assert fd['typed'] == 123 assert fd['untyped'] == [1, 2, 3] + ### # TODO shit. what to do with exceptions? diff --git a/my/core/hpi_compat.py b/my/core/hpi_compat.py index 949046d..3687483 100644 --- a/my/core/hpi_compat.py +++ b/my/core/hpi_compat.py @@ -3,11 +3,14 @@ Contains various backwards compatibility/deprecation helpers relevant to HPI its (as opposed to .compat module which implements compatibility between python versions) """ +from __future__ import annotations + import inspect import os import re +from collections.abc import Iterator, Sequence from types import ModuleType -from typing import Iterator, List, Optional, Sequence, TypeVar +from typing import TypeVar from . import warnings @@ -15,7 +18,7 @@ from . import warnings def handle_legacy_import( parent_module_name: str, legacy_submodule_name: str, - parent_module_path: List[str], + parent_module_path: list[str], ) -> bool: ### # this is to trick mypy into treating this as a proper namespace package @@ -122,8 +125,8 @@ class always_supports_sequence(Iterator[V]): def __init__(self, it: Iterator[V]) -> None: self._it = it - self._list: Optional[List[V]] = None - self._lit: Optional[Iterator[V]] = None + self._list: list[V] | None = None + self._lit: Iterator[V] | None = None def __iter__(self) -> Iterator[V]: # noqa: PYI034 if self._list is not None: @@ -142,7 +145,7 @@ class always_supports_sequence(Iterator[V]): return getattr(self._it, name) @property - def _aslist(self) -> List[V]: + def _aslist(self) -> list[V]: if self._list is None: qualname = getattr(self._it, '__qualname__', '') # defensive just in case warnings.medium(f'Using {qualname} as list is deprecated. Migrate to iterative processing or call list() explicitly.') diff --git a/my/core/influxdb.py b/my/core/influxdb.py index 25eeba1..78a439a 100644 --- a/my/core/influxdb.py +++ b/my/core/influxdb.py @@ -2,9 +2,14 @@ TODO doesn't really belong to 'core' morally, but can think of moving out later ''' -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations -from typing import Any, Dict, Iterable, Optional +from .internal import assert_subpackage + +assert_subpackage(__name__) + +from collections.abc import Iterable +from typing import Any import click @@ -21,7 +26,7 @@ class config: RESET_DEFAULT = False -def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_col: str='dt') -> None: +def fill(it: Iterable[Any], *, measurement: str, reset: bool = RESET_DEFAULT, dt_col: str = 'dt') -> None: # todo infer dt column automatically, reuse in stat? # it doesn't like dots, ends up some syntax error? measurement = measurement.replace('.', '_') @@ -30,6 +35,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c db = config.db from influxdb import InfluxDBClient # type: ignore + client = InfluxDBClient() # todo maybe create if not exists? # client.create_database(db) @@ -40,7 +46,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c client.delete_series(database=db, measurement=measurement) # TODO need to take schema here... - cache: Dict[str, bool] = {} + cache: dict[str, bool] = {} def good(f, v) -> bool: c = cache.get(f) @@ -59,9 +65,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c def dit() -> Iterable[Json]: for i in it: d = asdict(i) - tags: Optional[Json] = None - tags_ = d.get('tags') # meh... handle in a more robust manner - if tags_ is not None and isinstance(tags_, dict): # FIXME meh. + tags: Json | None = None + tags_ = d.get('tags') # meh... handle in a more robust manner + if tags_ is not None and isinstance(tags_, dict): # FIXME meh. del d['tags'] tags = tags_ @@ -84,6 +90,7 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c } from more_itertools import chunked + # "The optimal batch size is 5000 lines of line protocol." # some chunking is def necessary, otherwise it fails inserted = 0 @@ -97,9 +104,9 @@ def fill(it: Iterable[Any], *, measurement: str, reset: bool=RESET_DEFAULT, dt_c # todo "Specify timestamp precision when writing to InfluxDB."? -def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> None: +def magic_fill(it, *, name: str | None = None, reset: bool = RESET_DEFAULT) -> None: if name is None: - assert callable(it) # generators have no name/module + assert callable(it) # generators have no name/module name = f'{it.__module__}:{it.__name__}' assert name is not None @@ -109,6 +116,7 @@ def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> No from itertools import tee from more_itertools import first, one + it, x = tee(it) f = first(x, default=None) if f is None: @@ -118,9 +126,11 @@ def magic_fill(it, *, name: Optional[str]=None, reset: bool=RESET_DEFAULT) -> No # TODO can we reuse pandas code or something? # from .pandas import _as_columns + schema = _as_columns(type(f)) from datetime import datetime + dtex = RuntimeError(f'expected single datetime field. schema: {schema}') dtf = one((f for f, t in schema.items() if t == datetime), too_short=dtex, too_long=dtex) @@ -137,6 +147,7 @@ def main() -> None: @click.argument('FUNCTION_NAME', type=str, required=True) def populate(*, function_name: str, reset: bool) -> None: from .__main__ import _locate_functions_or_prompt + [provider] = list(_locate_functions_or_prompt([function_name])) # todo could have a non-interactive version which populates from all data sources for the provider? magic_fill(provider, reset=reset) diff --git a/my/core/init.py b/my/core/init.py index 7a30955..644c7b4 100644 --- a/my/core/init.py +++ b/my/core/init.py @@ -19,6 +19,7 @@ def setup_config() -> None: from pathlib import Path from .preinit import get_mycfg_dir + mycfg_dir = get_mycfg_dir() if not mycfg_dir.exists(): @@ -43,6 +44,7 @@ See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-mo except ImportError as ex: # just in case... who knows what crazy setup users have import logging + logging.exception(ex) warnings.warn(f""" Importing 'my.config' failed! (error: {ex}). This is likely to result in issues. diff --git a/my/core/kompress.py b/my/core/kompress.py index 7cbf310..8accb2d 100644 --- a/my/core/kompress.py +++ b/my/core/kompress.py @@ -1,4 +1,6 @@ -from .internal import assert_subpackage; assert_subpackage(__name__) +from .internal import assert_subpackage + +assert_subpackage(__name__) from . import warnings diff --git a/my/core/konsume.py b/my/core/konsume.py index 0e4a2fe..6d24167 100644 --- a/my/core/konsume.py +++ b/my/core/konsume.py @@ -5,17 +5,21 @@ This can potentially allow both for safer defensive parsing, and let you know if TODO perhaps need to get some inspiration from linear logic to decide on a nice API... ''' +from __future__ import annotations + from collections import OrderedDict -from typing import Any, List +from typing import Any def ignore(w, *keys): for k in keys: w[k].ignore() + def zoom(w, *keys): return [w[k].zoom() for k in keys] + # TODO need to support lists class Zoomable: def __init__(self, parent, *args, **kwargs) -> None: @@ -40,7 +44,7 @@ class Zoomable: assert self.parent is not None self.parent._remove(self) - def zoom(self) -> 'Zoomable': + def zoom(self) -> Zoomable: self.consume() return self @@ -63,6 +67,7 @@ class Wdict(Zoomable, OrderedDict): def this_consumed(self): return len(self) == 0 + # TODO specify mypy type for the index special method? @@ -77,6 +82,7 @@ class Wlist(Zoomable, list): def this_consumed(self): return len(self) == 0 + class Wvalue(Zoomable): def __init__(self, parent, value: Any) -> None: super().__init__(parent) @@ -87,23 +93,20 @@ class Wvalue(Zoomable): return [] def this_consumed(self): - return True # TODO not sure.. + return True # TODO not sure.. def __repr__(self): return 'WValue{' + repr(self.value) + '}' -from typing import Tuple - - -def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]: +def _wrap(j, parent=None) -> tuple[Zoomable, list[Zoomable]]: res: Zoomable - cc: List[Zoomable] + cc: list[Zoomable] if isinstance(j, dict): res = Wdict(parent) cc = [res] for k, v in j.items(): - vv, c = _wrap(v, parent=res) + vv, c = _wrap(v, parent=res) res[k] = vv cc.extend(c) return res, cc @@ -122,13 +125,14 @@ def _wrap(j, parent=None) -> Tuple[Zoomable, List[Zoomable]]: raise RuntimeError(f'Unexpected type: {type(j)} {j}') +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator class UnconsumedError(Exception): pass + # TODO think about error policy later... @contextmanager def wrap(j, *, throw=True) -> Iterator[Zoomable]: @@ -137,7 +141,7 @@ def wrap(j, *, throw=True) -> Iterator[Zoomable]: yield w for c in children: - if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed??? + if not c.this_consumed(): # TODO hmm. how does it figure out if it's consumed??? if throw: # TODO need to keep a full path or something... raise UnconsumedError(f''' @@ -153,6 +157,7 @@ from typing import cast def test_unconsumed() -> None: import pytest + with pytest.raises(UnconsumedError): with wrap({'a': 1234}) as w: w = cast(Wdict, w) @@ -163,6 +168,7 @@ def test_unconsumed() -> None: w = cast(Wdict, w) d = w['c']['d'].zoom() + def test_consumed() -> None: with wrap({'a': 1234}) as w: w = cast(Wdict, w) @@ -173,6 +179,7 @@ def test_consumed() -> None: c = w['c'].zoom() d = c['d'].zoom() + def test_types() -> None: # (string, number, object, array, boolean or nul with wrap({'string': 'string', 'number': 3.14, 'boolean': True, 'null': None, 'list': [1, 2, 3]}) as w: @@ -181,9 +188,10 @@ def test_types() -> None: w['number'].consume() w['boolean'].zoom() w['null'].zoom() - for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing? + for x in list(w['list'].zoom()): # TODO eh. how to avoid the extra list thing? x.consume() + def test_consume_all() -> None: with wrap({'aaa': {'bbb': {'hi': 123}}}) as w: w = cast(Wdict, w) @@ -193,11 +201,9 @@ def test_consume_all() -> None: def test_consume_few() -> None: import pytest + pytest.skip('Will think about it later..') - with wrap({ - 'important': 123, - 'unimportant': 'whatever' - }) as w: + with wrap({'important': 123, 'unimportant': 'whatever'}) as w: w = cast(Wdict, w) w['important'].zoom() w.consume_all() @@ -206,6 +212,7 @@ def test_consume_few() -> None: def test_zoom() -> None: import pytest + with wrap({'aaa': 'whatever'}) as w: w = cast(Wdict, w) with pytest.raises(KeyError): diff --git a/my/core/mime.py b/my/core/mime.py index cf5bdf5..8235960 100644 --- a/my/core/mime.py +++ b/my/core/mime.py @@ -2,11 +2,14 @@ Utils for mime/filetype handling """ -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations + +from .internal import assert_subpackage + +assert_subpackage(__name__) import functools - -from .common import PathIsh +from pathlib import Path @functools.lru_cache(1) @@ -23,7 +26,7 @@ import mimetypes # todo do I need init()? # todo wtf? fastermime thinks it's mime is application/json even if the extension is xz?? # whereas magic detects correctly: application/x-zstd and application/x-xz -def fastermime(path: PathIsh) -> str: +def fastermime(path: Path | str) -> str: paths = str(path) # mimetypes is faster, so try it first (mime, _) = mimetypes.guess_type(paths) diff --git a/my/core/orgmode.py b/my/core/orgmode.py index 979f288..96c09a4 100644 --- a/my/core/orgmode.py +++ b/my/core/orgmode.py @@ -1,6 +1,7 @@ """ Various helpers for reading org-mode data """ + from datetime import datetime @@ -22,17 +23,20 @@ def parse_org_datetime(s: str) -> datetime: # TODO I guess want to borrow inspiration from bs4? element type <-> tag; and similar logic for find_one, find_all -from typing import Callable, Iterable, TypeVar +from collections.abc import Iterable +from typing import Callable, TypeVar from orgparse import OrgNode V = TypeVar('V') + def collect(n: OrgNode, cfun: Callable[[OrgNode], Iterable[V]]) -> Iterable[V]: yield from cfun(n) for c in n.children: yield from collect(c, cfun) + from more_itertools import one from orgparse.extra import Table @@ -46,7 +50,7 @@ class TypedTable(Table): tt = super().__new__(TypedTable) tt.__dict__ = orig.__dict__ blocks = list(orig.blocks) - header = blocks[0] # fist block is schema + header = blocks[0] # fist block is schema if len(header) == 2: # TODO later interpret first line as types header = header[1:] diff --git a/my/core/pandas.py b/my/core/pandas.py index 8f5fd29..d444965 100644 --- a/my/core/pandas.py +++ b/my/core/pandas.py @@ -7,17 +7,14 @@ from __future__ import annotations # todo not sure if belongs to 'core'. It's certainly 'more' core than actual modules, but still not essential # NOTE: this file is meant to be importable without Pandas installed import dataclasses +from collections.abc import Iterable, Iterator from datetime import datetime, timezone from pprint import pformat from typing import ( TYPE_CHECKING, Any, Callable, - Dict, - Iterable, - Iterator, Literal, - Type, TypeVar, ) @@ -178,7 +175,7 @@ def _to_jsons(it: Iterable[Res[Any]]) -> Iterable[Json]: Schema = Any -def _as_columns(s: Schema) -> Dict[str, Type]: +def _as_columns(s: Schema) -> dict[str, type]: # todo would be nice to extract properties; add tests for this as well if dataclasses.is_dataclass(s): return {f.name: f.type for f in dataclasses.fields(s)} # type: ignore[misc] # ugh, why mypy thinks f.type can return str?? diff --git a/my/core/preinit.py b/my/core/preinit.py index be5477b..eb3a34f 100644 --- a/my/core/preinit.py +++ b/my/core/preinit.py @@ -8,6 +8,7 @@ def get_mycfg_dir() -> Path: import os import appdirs # type: ignore[import-untyped] + # not sure if that's necessary, i.e. could rely on PYTHONPATH instead # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path? mvar = os.environ.get('MY_CONFIG') diff --git a/my/core/pytest.py b/my/core/pytest.py index e514957..ad9e7d7 100644 --- a/my/core/pytest.py +++ b/my/core/pytest.py @@ -2,7 +2,9 @@ Helpers to prevent depending on pytest in runtime """ -from .internal import assert_subpackage; assert_subpackage(__name__) +from .internal import assert_subpackage + +assert_subpackage(__name__) import sys import typing diff --git a/my/core/query.py b/my/core/query.py index 45806fb..50724a7 100644 --- a/my/core/query.py +++ b/my/core/query.py @@ -5,23 +5,20 @@ The main entrypoint to this library is the 'select' function below; try: python3 -c "from my.core.query import select; help(select)" """ +from __future__ import annotations + import dataclasses import importlib import inspect import itertools +from collections.abc import Iterable, Iterator from datetime import datetime from typing import ( Any, Callable, - Dict, - Iterable, - Iterator, - List, NamedTuple, Optional, - Tuple, TypeVar, - Union, ) import more_itertools @@ -51,6 +48,7 @@ class Unsortable(NamedTuple): class QueryException(ValueError): """Used to differentiate query-related errors, so the CLI interface is more expressive""" + pass @@ -63,7 +61,7 @@ def locate_function(module_name: str, function_name: str) -> Callable[[], Iterab """ try: mod = importlib.import_module(module_name) - for (fname, f) in inspect.getmembers(mod, inspect.isfunction): + for fname, f in inspect.getmembers(mod, inspect.isfunction): if fname == function_name: return f # in case the function is defined dynamically, @@ -83,10 +81,10 @@ def locate_qualified_function(qualified_name: str) -> Callable[[], Iterable[ET]] if "." not in qualified_name: raise QueryException("Could not find a '.' in the function name, e.g. my.reddit.rexport.comments") rdot_index = qualified_name.rindex(".") - return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1:]) + return locate_function(qualified_name[:rdot_index], qualified_name[rdot_index + 1 :]) -def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optional[OrderFunc]: +def attribute_func(obj: T, where: Where, default: U | None = None) -> OrderFunc | None: """ Attempts to find an attribute which matches the 'where_function' on the object, using some getattr/dict checks. Returns a function which when called with @@ -133,11 +131,11 @@ def attribute_func(obj: T, where: Where, default: Optional[U] = None) -> Optiona def _generate_order_by_func( obj_res: Res[T], *, - key: Optional[str] = None, - where_function: Optional[Where] = None, - default: Optional[U] = None, + key: str | None = None, + where_function: Where | None = None, + default: U | None = None, force_unsortable: bool = False, -) -> Optional[OrderFunc]: +) -> OrderFunc | None: """ Accepts an object Res[T] (Instance of some class or Exception) @@ -202,7 +200,7 @@ pass 'drop_exceptions' to ignore exceptions""") # user must provide either a key or a where predicate if where_function is not None: - func: Optional[OrderFunc] = attribute_func(obj, where_function, default) + func: OrderFunc | None = attribute_func(obj, where_function, default) if func is not None: return func @@ -218,8 +216,6 @@ pass 'drop_exceptions' to ignore exceptions""") return None # couldn't compute a OrderFunc for this class/instance - - # currently using the 'key set' as a proxy for 'this is the same type of thing' def _determine_order_by_value_key(obj_res: ET) -> Any: """ @@ -244,7 +240,7 @@ def _drop_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Iterator[ET]: # try getting the first value from the iterator # similar to my.core.common.warn_if_empty? this doesn't go through the whole iterator though -def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]: +def _peek_iter(itr: Iterator[ET]) -> tuple[ET | None, Iterator[ET]]: itr = more_itertools.peekable(itr) try: first_item = itr.peek() @@ -255,9 +251,9 @@ def _peek_iter(itr: Iterator[ET]) -> Tuple[Optional[ET], Iterator[ET]]: # similar to 'my.core.error.sort_res_by'? -def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> Tuple[Iterator[Unsortable], Iterator[ET]]: - unsortable: List[Unsortable] = [] - sortable: List[ET] = [] +def _wrap_unsorted(itr: Iterator[ET], orderfunc: OrderFunc) -> tuple[Iterator[Unsortable], Iterator[ET]]: + unsortable: list[Unsortable] = [] + sortable: list[ET] = [] for o in itr: # if input to select was another select if isinstance(o, Unsortable): @@ -279,7 +275,7 @@ def _handle_unsorted( orderfunc: OrderFunc, drop_unsorted: bool, wrap_unsorted: bool -) -> Tuple[Iterator[Unsortable], Iterator[ET]]: +) -> tuple[Iterator[Unsortable], Iterator[ET]]: # prefer drop_unsorted to wrap_unsorted, if both were present if drop_unsorted: return iter([]), _drop_unsorted(itr, orderfunc) @@ -294,16 +290,16 @@ def _handle_unsorted( # different types. ***This consumes the iterator***, so # you should definitely itertoolts.tee it beforehand # as to not exhaust the values -def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: Optional[U] = None) -> OrderFunc: +def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: U | None = None) -> OrderFunc: # TODO: add a kwarg to force lookup for every item? would sort of be like core.common.guess_datetime then - order_by_lookup: Dict[Any, OrderFunc] = {} + order_by_lookup: dict[Any, OrderFunc] = {} # need to go through a copy of the whole iterator here to # pre-generate functions to support sorting mixed types for obj_res in itr: key: Any = _determine_order_by_value_key(obj_res) if key not in order_by_lookup: - keyfunc: Optional[OrderFunc] = _generate_order_by_func( + keyfunc: OrderFunc | None = _generate_order_by_func( obj_res, where_function=order_value, default=default, @@ -324,12 +320,12 @@ def _generate_order_value_func(itr: Iterator[ET], order_value: Where, default: O def _handle_generate_order_by( itr, *, - order_by: Optional[OrderFunc] = None, - order_key: Optional[str] = None, - order_value: Optional[Where] = None, - default: Optional[U] = None, -) -> Tuple[Optional[OrderFunc], Iterator[ET]]: - order_by_chosen: Optional[OrderFunc] = order_by # if the user just supplied a function themselves + order_by: OrderFunc | None = None, + order_key: str | None = None, + order_value: Where | None = None, + default: U | None = None, +) -> tuple[OrderFunc | None, Iterator[ET]]: + order_by_chosen: OrderFunc | None = order_by # if the user just supplied a function themselves if order_by is not None: return order_by, itr if order_key is not None: @@ -354,19 +350,19 @@ def _handle_generate_order_by( def select( - src: Union[Iterable[ET], Callable[[], Iterable[ET]]], + src: Iterable[ET] | Callable[[], Iterable[ET]], *, - where: Optional[Where] = None, - order_by: Optional[OrderFunc] = None, - order_key: Optional[str] = None, - order_value: Optional[Where] = None, - default: Optional[U] = None, + where: Where | None = None, + order_by: OrderFunc | None = None, + order_key: str | None = None, + order_value: Where | None = None, + default: U | None = None, reverse: bool = False, - limit: Optional[int] = None, + limit: int | None = None, drop_unsorted: bool = False, wrap_unsorted: bool = True, warn_exceptions: bool = False, - warn_func: Optional[Callable[[Exception], None]] = None, + warn_func: Callable[[Exception], None] | None = None, drop_exceptions: bool = False, raise_exceptions: bool = False, ) -> Iterator[ET]: @@ -617,7 +613,7 @@ class _B(NamedTuple): # move these to tests/? They are re-used so much in the tests below, # not sure where the best place for these is -def _mixed_iter() -> Iterator[Union[_A, _B]]: +def _mixed_iter() -> Iterator[_A | _B]: yield _A(x=datetime(year=2009, month=5, day=10, hour=4, minute=10, second=1), y=5, z=10) yield _B(y=datetime(year=2015, month=5, day=10, hour=4, minute=10, second=1)) yield _A(x=datetime(year=2005, month=5, day=10, hour=4, minute=10, second=1), y=10, z=2) @@ -626,7 +622,7 @@ def _mixed_iter() -> Iterator[Union[_A, _B]]: yield _A(x=datetime(year=2005, month=4, day=10, hour=4, minute=10, second=1), y=2, z=-5) -def _mixed_iter_errors() -> Iterator[Res[Union[_A, _B]]]: +def _mixed_iter_errors() -> Iterator[Res[_A | _B]]: m = _mixed_iter() yield from itertools.islice(m, 0, 3) yield RuntimeError("Unhandled error!") diff --git a/my/core/query_range.py b/my/core/query_range.py index 1f4a7ff..2a8d7bd 100644 --- a/my/core/query_range.py +++ b/my/core/query_range.py @@ -7,11 +7,14 @@ filtered iterator See the select_range function below """ +from __future__ import annotations + import re import time +from collections.abc import Iterator from datetime import date, datetime, timedelta -from functools import lru_cache -from typing import Any, Callable, Iterator, NamedTuple, Optional, Type +from functools import cache +from typing import Any, Callable, NamedTuple import more_itertools @@ -25,7 +28,9 @@ from .query import ( select, ) -timedelta_regex = re.compile(r"^((?P[\.\d]+?)w)?((?P[\.\d]+?)d)?((?P[\.\d]+?)h)?((?P[\.\d]+?)m)?((?P[\.\d]+?)s)?$") +timedelta_regex = re.compile( + r"^((?P[\.\d]+?)w)?((?P[\.\d]+?)d)?((?P[\.\d]+?)h)?((?P[\.\d]+?)m)?((?P[\.\d]+?)s)?$" +) # https://stackoverflow.com/a/51916936 @@ -88,7 +93,7 @@ def parse_datetime_float(date_str: str) -> float: # dateparser is a bit more lenient than the above, lets you type # all sorts of dates as inputs # https://github.com/scrapinghub/dateparser#how-to-use - res: Optional[datetime] = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"}) + res: datetime | None = dateparser.parse(ds, settings={"DATE_ORDER": "YMD"}) if res is not None: return res.timestamp() @@ -98,7 +103,7 @@ def parse_datetime_float(date_str: str) -> float: # probably DateLike input? but a user could specify an order_key # which is an epoch timestamp or a float value which they # expect to be converted to a datetime to compare -@lru_cache(maxsize=None) +@cache def _datelike_to_float(dl: Any) -> float: if isinstance(dl, datetime): return dl.timestamp() @@ -130,11 +135,12 @@ class RangeTuple(NamedTuple): of the timeframe -- 'before' - before and after - anything after 'after' and before 'before', acts as a time range """ + # technically doesn't need to be Optional[Any], # just to make it more clear these can be None - after: Optional[Any] - before: Optional[Any] - within: Optional[Any] + after: Any | None + before: Any | None + within: Any | None Converter = Callable[[Any], Any] @@ -145,9 +151,9 @@ def _parse_range( unparsed_range: RangeTuple, end_parser: Converter, within_parser: Converter, - parsed_range: Optional[RangeTuple] = None, - error_message: Optional[str] = None -) -> Optional[RangeTuple]: + parsed_range: RangeTuple | None = None, + error_message: str | None = None, +) -> RangeTuple | None: if parsed_range is not None: return parsed_range @@ -176,11 +182,11 @@ def _create_range_filter( end_parser: Converter, within_parser: Converter, attr_func: Where, - parsed_range: Optional[RangeTuple] = None, - default_before: Optional[Any] = None, - value_coercion_func: Optional[Converter] = None, - error_message: Optional[str] = None, -) -> Optional[Where]: + parsed_range: RangeTuple | None = None, + default_before: Any | None = None, + value_coercion_func: Converter | None = None, + error_message: str | None = None, +) -> Where | None: """ Handles: - parsing the user input into values that are comparable to items the iterable returns @@ -272,17 +278,17 @@ def _create_range_filter( def select_range( itr: Iterator[ET], *, - where: Optional[Where] = None, - order_key: Optional[str] = None, - order_value: Optional[Where] = None, - order_by_value_type: Optional[Type] = None, - unparsed_range: Optional[RangeTuple] = None, + where: Where | None = None, + order_key: str | None = None, + order_value: Where | None = None, + order_by_value_type: type | None = None, + unparsed_range: RangeTuple | None = None, reverse: bool = False, - limit: Optional[int] = None, + limit: int | None = None, drop_unsorted: bool = False, wrap_unsorted: bool = False, warn_exceptions: bool = False, - warn_func: Optional[Callable[[Exception], None]] = None, + warn_func: Callable[[Exception], None] | None = None, drop_exceptions: bool = False, raise_exceptions: bool = False, ) -> Iterator[ET]: @@ -317,9 +323,10 @@ def select_range( drop_exceptions=drop_exceptions, raise_exceptions=raise_exceptions, warn_exceptions=warn_exceptions, - warn_func=warn_func) + warn_func=warn_func, + ) - order_by_chosen: Optional[OrderFunc] = None + order_by_chosen: OrderFunc | None = None # if the user didn't specify an attribute to order value, but specified a type # we should search for on each value in the iterator @@ -345,7 +352,7 @@ Specify a type or a key to order the value by""") # force drop_unsorted=True so we can use _create_range_filter # sort the iterable by the generated order_by_chosen function itr = select(itr, order_by=order_by_chosen, drop_unsorted=True) - filter_func: Optional[Where] + filter_func: Where | None if order_by_value_type in [datetime, date]: filter_func = _create_range_filter( unparsed_range=unparsed_range, @@ -353,7 +360,8 @@ Specify a type or a key to order the value by""") within_parser=parse_timedelta_float, attr_func=order_by_chosen, # type: ignore[arg-type] default_before=time.time(), - value_coercion_func=_datelike_to_float) + value_coercion_func=_datelike_to_float, + ) elif order_by_value_type in [int, float]: # allow primitives to be converted using the default int(), float() callables filter_func = _create_range_filter( @@ -362,7 +370,8 @@ Specify a type or a key to order the value by""") within_parser=order_by_value_type, attr_func=order_by_chosen, # type: ignore[arg-type] default_before=None, - value_coercion_func=order_by_value_type) + value_coercion_func=order_by_value_type, + ) else: # TODO: add additional kwargs to let the user sort by other values, by specifying the parsers? # would need to allow passing the end_parser, within parser, default before and value_coercion_func... @@ -470,7 +479,7 @@ def test_range_predicate() -> None: # filter from 0 to 5 rn: RangeTuple = RangeTuple("0", "5", None) - zero_to_five_filter: Optional[Where] = int_filter_func(unparsed_range=rn) + zero_to_five_filter: Where | None = int_filter_func(unparsed_range=rn) assert zero_to_five_filter is not None # this is just a Where function, given some input it return True/False if the value is allowed assert zero_to_five_filter(3) is True @@ -483,6 +492,7 @@ def test_range_predicate() -> None: rn = RangeTuple(None, 3, "3.5") assert list(filter(int_filter_func(unparsed_range=rn, attr_func=identity), src())) == ["0", "1", "2"] + def test_parse_range() -> None: from functools import partial diff --git a/my/core/serialize.py b/my/core/serialize.py index ab11a20..e36da8f 100644 --- a/my/core/serialize.py +++ b/my/core/serialize.py @@ -1,9 +1,11 @@ +from __future__ import annotations + import datetime from dataclasses import asdict, is_dataclass from decimal import Decimal -from functools import lru_cache +from functools import cache from pathlib import Path -from typing import Any, Callable, NamedTuple, Optional +from typing import Any, Callable, NamedTuple from .error import error_to_json from .pytest import parametrize @@ -57,12 +59,12 @@ def _default_encode(obj: Any) -> Any: # could possibly run multiple times/raise warning if you provide different 'default' # functions or change the kwargs? The alternative is to maintain all of this at the module # level, which is just as annoying -@lru_cache(maxsize=None) +@cache def _dumps_factory(**kwargs) -> Callable[[Any], str]: use_default: DefaultEncoder = _default_encode # if the user passed an additional 'default' parameter, # try using that to serialize before before _default_encode - _additional_default: Optional[DefaultEncoder] = kwargs.get("default") + _additional_default: DefaultEncoder | None = kwargs.get("default") if _additional_default is not None and callable(_additional_default): def wrapped_default(obj: Any) -> Any: @@ -78,9 +80,9 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]: kwargs["default"] = use_default - prefer_factory: Optional[str] = kwargs.pop('_prefer_factory', None) + prefer_factory: str | None = kwargs.pop('_prefer_factory', None) - def orjson_factory() -> Optional[Dumps]: + def orjson_factory() -> Dumps | None: try: import orjson except ModuleNotFoundError: @@ -95,7 +97,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]: return _orjson_dumps - def simplejson_factory() -> Optional[Dumps]: + def simplejson_factory() -> Dumps | None: try: from simplejson import dumps as simplejson_dumps except ModuleNotFoundError: @@ -115,7 +117,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]: return _simplejson_dumps - def stdlib_factory() -> Optional[Dumps]: + def stdlib_factory() -> Dumps | None: import json from .warnings import high @@ -150,7 +152,7 @@ def _dumps_factory(**kwargs) -> Callable[[Any], str]: def dumps( obj: Any, - default: Optional[DefaultEncoder] = None, + default: DefaultEncoder | None = None, **kwargs, ) -> str: """ diff --git a/my/core/source.py b/my/core/source.py index 52c58c1..a309d13 100644 --- a/my/core/source.py +++ b/my/core/source.py @@ -3,9 +3,12 @@ Decorator to gracefully handle importing a data source, or warning and yielding nothing (or a default) when its not available """ +from __future__ import annotations + import warnings +from collections.abc import Iterable, Iterator from functools import wraps -from typing import Any, Callable, Iterable, Iterator, Optional, TypeVar +from typing import Any, Callable, TypeVar from .warnings import medium @@ -26,8 +29,8 @@ _DEFAULT_ITR = () def import_source( *, default: Iterable[T] = _DEFAULT_ITR, - module_name: Optional[str] = None, - help_url: Optional[str] = None, + module_name: str | None = None, + help_url: str | None = None, ) -> Callable[..., Callable[..., Iterator[T]]]: """ doesn't really play well with types, but is used to catch @@ -50,6 +53,7 @@ def import_source( except (ImportError, AttributeError) as err: from . import core_config as CC from .error import warn_my_config_import_error + suppressed_in_conf = False if module_name is not None and CC.config._is_module_active(module_name) is False: suppressed_in_conf = True @@ -72,5 +76,7 @@ class core: if not matched_config_err and isinstance(err, AttributeError): raise err yield from default + return wrapper + return decorator diff --git a/my/core/sqlite.py b/my/core/sqlite.py index 08a80e5..aa41ab3 100644 --- a/my/core/sqlite.py +++ b/my/core/sqlite.py @@ -1,12 +1,16 @@ -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations +from .internal import assert_subpackage # noqa: I001 + +assert_subpackage(__name__) import shutil import sqlite3 +from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Callable, Iterator, Literal, Optional, Tuple, Union, overload +from typing import Any, Callable, Literal, Union, overload from .common import PathIsh from .compat import assert_never @@ -22,6 +26,7 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None: conn.execute('CREATE TABLE testtable (col)') import pytest + with pytest.raises(sqlite3.OperationalError, match='readonly database'): with sqlite_connect_immutable(db) as conn: conn.execute('DROP TABLE testtable') @@ -33,6 +38,7 @@ def test_sqlite_connect_immutable(tmp_path: Path) -> None: SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any] + def dict_factory(cursor, row): fields = [column[0] for column in cursor.description] return dict(zip(fields, row)) @@ -40,8 +46,9 @@ def dict_factory(cursor, row): Factory = Union[SqliteRowFactory, Literal['row', 'dict']] + @contextmanager -def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]: +def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]: dbp = f'file:{db}' # https://www.sqlite.org/draft/uri.html#uriimmutable if immutable: @@ -97,30 +104,32 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection: # and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :( # a bit annoying to have this copy-pasting, but hopefully not a big issue +# fmt: off @overload -def select(cols: Tuple[str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any ]]: ... +def select(cols: tuple[str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any ]]: ... @overload -def select(cols: Tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any ]]: ... +def select(cols: tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any ]]: ... +def select(cols: tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any, Any ]]: ... +def select(cols: tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any, Any, Any ]]: ... +def select(cols: tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any, Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any, Any, Any, Any ]]: ... +def select(cols: tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any, Any, Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any ]]: ... +def select(cols: tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any, Any, Any, Any, Any ]]: ... @overload -def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \ - Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ... +def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \ + Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ... +# fmt: on def select(cols, rest, *, db): # db arg is last cause that results in nicer code formatting.. diff --git a/my/core/stats.py b/my/core/stats.py index 674a8d1..a553db3 100644 --- a/my/core/stats.py +++ b/my/core/stats.py @@ -2,10 +2,13 @@ Helpers for hpi doctor/stats functionality. ''' +from __future__ import annotations + import collections.abc import importlib import inspect import typing +from collections.abc import Iterable, Iterator, Sequence from contextlib import contextmanager from datetime import datetime from pathlib import Path @@ -13,20 +16,13 @@ from types import ModuleType from typing import ( Any, Callable, - Dict, - Iterable, - Iterator, - List, - Optional, Protocol, - Sequence, - Union, cast, ) from .types import asdict -Stats = Dict[str, Any] +Stats = dict[str, Any] class StatsFun(Protocol): @@ -55,10 +51,10 @@ def quick_stats(): def stat( - func: Union[Callable[[], Iterable[Any]], Iterable[Any]], + func: Callable[[], Iterable[Any]] | Iterable[Any], *, quick: bool = False, - name: Optional[str] = None, + name: str | None = None, ) -> Stats: """ Extracts various statistics from a passed iterable/callable, e.g.: @@ -153,8 +149,8 @@ def test_stat() -> None: # -def get_stats(module_name: str, *, guess: bool = False) -> Optional[StatsFun]: - stats: Optional[StatsFun] = None +def get_stats(module_name: str, *, guess: bool = False) -> StatsFun | None: + stats: StatsFun | None = None try: module = importlib.import_module(module_name) except Exception: @@ -167,7 +163,7 @@ def get_stats(module_name: str, *, guess: bool = False) -> Optional[StatsFun]: # TODO maybe could be enough to annotate OUTPUTS or something like that? # then stats could just use them as hints? -def guess_stats(module: ModuleType) -> Optional[StatsFun]: +def guess_stats(module: ModuleType) -> StatsFun | None: """ If the module doesn't have explicitly defined 'stat' function, this is used to try to guess what could be included in stats automatically @@ -206,7 +202,7 @@ def test_guess_stats() -> None: } -def _guess_data_providers(module: ModuleType) -> Dict[str, Callable]: +def _guess_data_providers(module: ModuleType) -> dict[str, Callable]: mfunctions = inspect.getmembers(module, inspect.isfunction) return {k: v for k, v in mfunctions if is_data_provider(v)} @@ -263,7 +259,7 @@ def test_is_data_provider() -> None: lam = lambda: [1, 2] assert not idp(lam) - def has_extra_args(count) -> List[int]: + def has_extra_args(count) -> list[int]: return list(range(count)) assert not idp(has_extra_args) @@ -340,10 +336,10 @@ def test_type_is_iterable() -> None: assert not fun(None) assert not fun(int) assert not fun(Any) - assert not fun(Dict[int, int]) + assert not fun(dict[int, int]) - assert fun(List[int]) - assert fun(Sequence[Dict[str, str]]) + assert fun(list[int]) + assert fun(Sequence[dict[str, str]]) assert fun(Iterable[Any]) @@ -434,7 +430,7 @@ def test_stat_iterable() -> None: # experimental, not sure about it.. -def _guess_datetime(x: Any) -> Optional[datetime]: +def _guess_datetime(x: Any) -> datetime | None: # todo hmm implement without exception.. try: d = asdict(x) diff --git a/my/core/structure.py b/my/core/structure.py index fa26532..bb049e4 100644 --- a/my/core/structure.py +++ b/my/core/structure.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import atexit import os import shutil @@ -5,9 +7,9 @@ import sys import tarfile import tempfile import zipfile +from collections.abc import Generator, Sequence from contextlib import contextmanager from pathlib import Path -from typing import Generator, List, Sequence, Tuple, Union from .logging import make_logger @@ -42,10 +44,10 @@ TARGZ_EXT = {".tar.gz"} @contextmanager def match_structure( base: Path, - expected: Union[str, Sequence[str]], + expected: str | Sequence[str], *, partial: bool = False, -) -> Generator[Tuple[Path, ...], None, None]: +) -> Generator[tuple[Path, ...], None, None]: """ Given a 'base' directory or archive (zip/tar.gz), recursively search for one or more paths that match the pattern described in 'expected'. That can be a single string, or a list @@ -140,8 +142,8 @@ def match_structure( if not searchdir.is_dir(): raise NotADirectoryError(f"Expected either a zip/tar.gz archive or a directory, received {searchdir}") - matches: List[Path] = [] - possible_targets: List[Path] = [searchdir] + matches: list[Path] = [] + possible_targets: list[Path] = [searchdir] while len(possible_targets) > 0: p = possible_targets.pop(0) @@ -172,7 +174,7 @@ def warn_leftover_files() -> None: from . import core_config as CC base_tmp: Path = CC.config.get_tmp_dir() - leftover: List[Path] = list(base_tmp.iterdir()) + leftover: list[Path] = list(base_tmp.iterdir()) if leftover: logger.debug(f"at exit warning: Found leftover files in temporary directory '{leftover}'. this may be because you have multiple hpi processes running -- if so this can be ignored") diff --git a/my/core/tests/auto_stats.py b/my/core/tests/auto_stats.py index d10d4c4..fc49e03 100644 --- a/my/core/tests/auto_stats.py +++ b/my/core/tests/auto_stats.py @@ -2,11 +2,11 @@ Helper 'module' for test_guess_stats """ +from collections.abc import Iterable, Iterator, Sequence from contextlib import contextmanager from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Iterable, Iterator, Sequence @dataclass diff --git a/my/core/tests/common.py b/my/core/tests/common.py index 22a74d7..073ea5f 100644 --- a/my/core/tests/common.py +++ b/my/core/tests/common.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import os +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator, Optional import pytest @@ -15,7 +17,7 @@ skip_if_uses_optional_deps = pytest.mark.skipif( # TODO maybe move to hpi core? @contextmanager -def tmp_environ_set(key: str, value: Optional[str]) -> Iterator[None]: +def tmp_environ_set(key: str, value: str | None) -> Iterator[None]: prev_value = os.environ.get(key) if value is None: os.environ.pop(key, None) diff --git a/my/core/tests/denylist.py b/my/core/tests/denylist.py index 2688319..73c3165 100644 --- a/my/core/tests/denylist.py +++ b/my/core/tests/denylist.py @@ -1,8 +1,9 @@ import json import warnings +from collections.abc import Iterator from datetime import datetime from pathlib import Path -from typing import Iterator, NamedTuple +from typing import NamedTuple from ..denylist import DenyList diff --git a/my/core/tests/test_cachew.py b/my/core/tests/test_cachew.py index 70ac76f..a0d2267 100644 --- a/my/core/tests/test_cachew.py +++ b/my/core/tests/test_cachew.py @@ -1,6 +1,6 @@ -from .common import skip_if_uses_optional_deps as pytestmark +from __future__ import annotations -from typing import List +from .common import skip_if_uses_optional_deps as pytestmark # TODO ugh, this is very messy.. need to sort out config overriding here @@ -16,7 +16,7 @@ def test_cachew() -> None: # TODO ugh. need doublewrap or something to avoid having to pass parens @mcachew() - def cf() -> List[int]: + def cf() -> list[int]: nonlocal called called += 1 return [1, 2, 3] @@ -43,7 +43,7 @@ def test_cachew_dir_none() -> None: called = 0 @mcachew(cache_path=cache_dir() / 'ctest') - def cf() -> List[int]: + def cf() -> list[int]: nonlocal called called += 1 return [called, called, called] diff --git a/my/core/tests/test_config.py b/my/core/tests/test_config.py index 78d1a62..f6d12ba 100644 --- a/my/core/tests/test_config.py +++ b/my/core/tests/test_config.py @@ -2,8 +2,8 @@ Various tests that are checking behaviour of user config wrt to various things """ -import sys import os +import sys from pathlib import Path import pytest diff --git a/my/core/time.py b/my/core/time.py index fa20a7c..a9b180d 100644 --- a/my/core/time.py +++ b/my/core/time.py @@ -1,5 +1,7 @@ -from functools import lru_cache -from typing import Dict, Sequence +from __future__ import annotations + +from collections.abc import Sequence +from functools import cache, lru_cache import pytz @@ -11,6 +13,7 @@ def user_forced() -> Sequence[str]: # https://stackoverflow.com/questions/36067621/python-all-possible-timezone-abbreviations-for-given-timezone-name-and-vise-ve try: from my.config import time as user_config + return user_config.tz.force_abbreviations # type: ignore[attr-defined] # noqa: TRY300 # note: noqa since we're catching case where config doesn't have attribute here as well except: @@ -19,15 +22,15 @@ def user_forced() -> Sequence[str]: @lru_cache(1) -def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]: +def _abbr_to_timezone_map() -> dict[str, pytz.BaseTzInfo]: # also force UTC to always correspond to utc # this makes more sense than Zulu it ends up by default timezones = [*pytz.all_timezones, 'UTC', *user_forced()] - res: Dict[str, pytz.BaseTzInfo] = {} + res: dict[str, pytz.BaseTzInfo] = {} for tzname in timezones: tz = pytz.timezone(tzname) - infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present? + infos = getattr(tz, '_tzinfos', []) # not sure if can rely on attr always present? for info in infos: abbr = info[-1] # todo could support this with a better error handling strategy? @@ -43,7 +46,7 @@ def _abbr_to_timezone_map() -> Dict[str, pytz.BaseTzInfo]: return res -@lru_cache(maxsize=None) +@cache def abbr_to_timezone(abbr: str) -> pytz.BaseTzInfo: return _abbr_to_timezone_map()[abbr] diff --git a/my/core/types.py b/my/core/types.py index b1cf103..dc19c19 100644 --- a/my/core/types.py +++ b/my/core/types.py @@ -1,14 +1,15 @@ -from .internal import assert_subpackage; assert_subpackage(__name__) +from __future__ import annotations + +from .internal import assert_subpackage + +assert_subpackage(__name__) from dataclasses import asdict as dataclasses_asdict from dataclasses import is_dataclass from datetime import datetime -from typing import ( - Any, - Dict, -) +from typing import Any -Json = Dict[str, Any] +Json = dict[str, Any] # for now just serves documentation purposes... but one day might make it statically verifiable where possible? diff --git a/my/core/util.py b/my/core/util.py index a247f81..74e71e1 100644 --- a/my/core/util.py +++ b/my/core/util.py @@ -1,10 +1,12 @@ +from __future__ import annotations + import os import pkgutil import sys +from collections.abc import Iterable from itertools import chain from pathlib import Path from types import ModuleType -from typing import Iterable, List, Optional from .discovery_pure import HPIModule, _is_not_module_src, has_stats, ignored @@ -20,13 +22,14 @@ from .discovery_pure import NOT_HPI_MODULE_VAR assert NOT_HPI_MODULE_VAR in globals() # check name consistency -def is_not_hpi_module(module: str) -> Optional[str]: + +def is_not_hpi_module(module: str) -> str | None: ''' None if a module, otherwise returns reason ''' import importlib.util - path: Optional[str] = None + path: str | None = None try: # TODO annoying, this can cause import of the parent module? spec = importlib.util.find_spec(module) @@ -35,7 +38,7 @@ def is_not_hpi_module(module: str) -> Optional[str]: except Exception as e: # todo a bit misleading.. it actually shouldn't import in most cases, it's just the weird parent module import thing return "import error (possibly missing config entry)" # todo add exc message? - assert path is not None # not sure if can happen? + assert path is not None # not sure if can happen? if _is_not_module_src(Path(path)): return f"marked explicitly (via {NOT_HPI_MODULE_VAR})" @@ -57,9 +60,10 @@ def _iter_all_importables(pkg: ModuleType) -> Iterable[HPIModule]: def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModule]: - from .core_config import config - """Yield all importables under a given path and package.""" + + from .core_config import config # noqa: F401 + for dir_path, dirs, file_names in os.walk(pkg_pth): file_names.sort() # NOTE: sorting dirs in place is intended, it's the way you're supposed to do it with os.walk @@ -82,6 +86,7 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu # TODO might need to make it defensive and yield Exception (otherwise hpi doctor might fail for no good reason) # use onerror=? + # ignored explicitly -> not HPI # if enabled in config -> HPI # if disabled in config -> HPI @@ -90,7 +95,7 @@ def _discover_path_importables(pkg_pth: Path, pkg_name: str) -> Iterable[HPIModu # TODO when do we need to recurse? -def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterable[HPIModule]: +def _walk_packages(path: Iterable[str], prefix: str = '', onerror=None) -> Iterable[HPIModule]: """ Modified version of https://github.com/python/cpython/blob/d50a0700265536a20bcce3fb108c954746d97625/Lib/pkgutil.py#L53, to avoid importing modules that are skipped @@ -153,8 +158,9 @@ def _walk_packages(path: Iterable[str], prefix: str='', onerror=None) -> Iterabl path = [p for p in path if not seen(p)] yield from _walk_packages(path, mname + '.', onerror) + # deprecate? -def get_modules() -> List[HPIModule]: +def get_modules() -> list[HPIModule]: return list(modules()) @@ -169,14 +175,14 @@ def test_module_detection() -> None: with reset() as cc: cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*'] mods = {m.name: m for m in modules()} - assert mods['my.demo'] .skip_reason == "has no 'stats()' function" + assert mods['my.demo'].skip_reason == "has no 'stats()' function" with reset() as cc: cc.disabled_modules = ['my.location.*', 'my.body.*', 'my.workouts.*', 'my.private.*', 'my.lastfm'] - cc.enabled_modules = ['my.demo'] + cc.enabled_modules = ['my.demo'] mods = {m.name: m for m in modules()} - assert mods['my.demo'] .skip_reason is None # not skipped + assert mods['my.demo'].skip_reason is None # not skipped assert mods['my.lastfm'].skip_reason == "suppressed in the user config" diff --git a/my/core/utils/concurrent.py b/my/core/utils/concurrent.py index 73944ec..515c3f1 100644 --- a/my/core/utils/concurrent.py +++ b/my/core/utils/concurrent.py @@ -1,6 +1,7 @@ -import sys +from __future__ import annotations + from concurrent.futures import Executor, Future -from typing import Any, Callable, Optional, TypeVar +from typing import Any, Callable, TypeVar from ..compat import ParamSpec @@ -15,7 +16,7 @@ class DummyExecutor(Executor): but also want to provide an option to run the code serially (e.g. for debugging) """ - def __init__(self, max_workers: Optional[int] = 1) -> None: + def __init__(self, max_workers: int | None = 1) -> None: self._shutdown = False self._max_workers = max_workers diff --git a/my/core/utils/imports.py b/my/core/utils/imports.py index 4666a5e..e0fb01d 100644 --- a/my/core/utils/imports.py +++ b/my/core/utils/imports.py @@ -1,27 +1,27 @@ +from __future__ import annotations + import importlib import importlib.util import sys from pathlib import Path from types import ModuleType -from typing import Optional - -from ..common import PathIsh # TODO only used in tests? not sure if useful at all. -def import_file(p: PathIsh, name: Optional[str] = None) -> ModuleType: +def import_file(p: Path | str, name: str | None = None) -> ModuleType: p = Path(p) if name is None: name = p.stem spec = importlib.util.spec_from_file_location(name, p) assert spec is not None, f"Fatal error; Could not create module spec from {name} {p}" foo = importlib.util.module_from_spec(spec) - loader = spec.loader; assert loader is not None + loader = spec.loader + assert loader is not None loader.exec_module(foo) return foo -def import_from(path: PathIsh, name: str) -> ModuleType: +def import_from(path: Path | str, name: str) -> ModuleType: path = str(path) sys.path.append(path) try: @@ -30,7 +30,7 @@ def import_from(path: PathIsh, name: str) -> ModuleType: sys.path.remove(path) -def import_dir(path: PathIsh, extra: str = '') -> ModuleType: +def import_dir(path: Path | str, extra: str = '') -> ModuleType: p = Path(path) if p.parts[0] == '~': p = p.expanduser() # TODO eh. not sure about this.. diff --git a/my/core/utils/itertools.py b/my/core/utils/itertools.py index ae9402d..501ebbe 100644 --- a/my/core/utils/itertools.py +++ b/my/core/utils/itertools.py @@ -4,17 +4,13 @@ Various helpers/transforms of iterators Ideally this should be as small as possible and we should rely on stdlib itertools or more_itertools """ +from __future__ import annotations + import warnings -from collections.abc import Hashable +from collections.abc import Hashable, Iterable, Iterator, Sized from typing import ( TYPE_CHECKING, Callable, - Dict, - Iterable, - Iterator, - List, - Optional, - Sized, TypeVar, Union, cast, @@ -23,9 +19,8 @@ from typing import ( import more_itertools from decorator import decorator -from ..compat import ParamSpec from .. import warnings as core_warnings - +from ..compat import ParamSpec T = TypeVar('T') K = TypeVar('K') @@ -39,7 +34,7 @@ def _identity(v: T) -> V: # type: ignore[type-var] # ugh. nothing in more_itertools? # perhaps duplicates_everseen? but it doesn't yield non-unique elements? def ensure_unique(it: Iterable[T], *, key: Callable[[T], K]) -> Iterable[T]: - key2item: Dict[K, T] = {} + key2item: dict[K, T] = {} for i in it: k = key(i) pi = key2item.get(k, None) @@ -72,10 +67,10 @@ def make_dict( key: Callable[[T], K], # TODO make value optional instead? but then will need a typing override for it? value: Callable[[T], V] = _identity, -) -> Dict[K, V]: +) -> dict[K, V]: with_keys = ((key(i), i) for i in it) uniques = ensure_unique(with_keys, key=lambda p: p[0]) - res: Dict[K, V] = {} + res: dict[K, V] = {} for k, i in uniques: res[k] = i if value is None else value(i) return res @@ -93,8 +88,8 @@ def test_make_dict() -> None: d = make_dict(it, key=lambda i: i % 2, value=lambda i: i) # check type inference - d2: Dict[str, int] = make_dict(it, key=lambda i: str(i)) - d3: Dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0) + d2: dict[str, int] = make_dict(it, key=lambda i: str(i)) + d3: dict[str, bool] = make_dict(it, key=lambda i: str(i), value=lambda i: i % 2 == 0) LFP = ParamSpec('LFP') @@ -102,7 +97,7 @@ LV = TypeVar('LV') @decorator -def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> List[LV]: +def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.kwargs) -> list[LV]: """ Wraps a function's return value in wrapper (e.g. list) Useful when an algorithm can be expressed more cleanly as a generator @@ -115,7 +110,7 @@ def _listify(func: Callable[LFP, Iterable[LV]], *args: LFP.args, **kwargs: LFP.k # so seems easiest to just use specialize instantiations of decorator instead if TYPE_CHECKING: - def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, List[LV]]: ... # noqa: ARG001 + def listify(func: Callable[LFP, Iterable[LV]]) -> Callable[LFP, list[LV]]: ... # noqa: ARG001 else: listify = _listify @@ -130,7 +125,7 @@ def test_listify() -> None: yield 2 res = it() - assert_type(res, List[int]) + assert_type(res, list[int]) assert res == [1, 2] @@ -201,24 +196,24 @@ def test_warn_if_empty_list() -> None: ll = [1, 2, 3] @warn_if_empty - def nonempty() -> List[int]: + def nonempty() -> list[int]: return ll with warnings.catch_warnings(record=True) as w: res1 = nonempty() assert len(w) == 0 - assert_type(res1, List[int]) + assert_type(res1, list[int]) assert isinstance(res1, list) assert res1 is ll # object should be unchanged! @warn_if_empty - def empty() -> List[str]: + def empty() -> list[str]: return [] with warnings.catch_warnings(record=True) as w: res2 = empty() assert len(w) == 1 - assert_type(res2, List[str]) + assert_type(res2, list[str]) assert isinstance(res2, list) assert res2 == [] @@ -242,7 +237,7 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]: """ NOTE: Despite Hashable bound, typing annotation doesn't guarantee runtime safety Consider hashable type X, and Y that inherits from X, but not hashable - Then l: List[X] = [Y(...)] is a valid expression, and type checks against Hashable, + Then l: list[X] = [Y(...)] is a valid expression, and type checks against Hashable, but isn't runtime hashable """ # Sadly this doesn't work 100% correctly with dataclasses atm... @@ -268,28 +263,27 @@ def check_if_hashable(iterable: Iterable[_HT]) -> Iterable[_HT]: # TODO different policies -- error/warn/ignore? def test_check_if_hashable() -> None: from dataclasses import dataclass - from typing import Set, Tuple import pytest from ..compat import assert_type - x1: List[int] = [1, 2] + x1: list[int] = [1, 2] r1 = check_if_hashable(x1) assert_type(r1, Iterable[int]) assert r1 is x1 - x2: Iterator[Union[int, str]] = iter((123, 'aba')) + x2: Iterator[int | str] = iter((123, 'aba')) r2 = check_if_hashable(x2) assert_type(r2, Iterable[Union[int, str]]) assert list(r2) == [123, 'aba'] - x3: Tuple[object, ...] = (789, 'aba') + x3: tuple[object, ...] = (789, 'aba') r3 = check_if_hashable(x3) assert_type(r3, Iterable[object]) assert r3 is x3 # object should be unchanged - x4: List[Set[int]] = [{1, 2, 3}, {4, 5, 6}] + x4: list[set[int]] = [{1, 2, 3}, {4, 5, 6}] with pytest.raises(Exception): # should be rejected by mypy sice set isn't Hashable, but also throw at runtime r4 = check_if_hashable(x4) # type: ignore[type-var] @@ -307,7 +301,7 @@ def test_check_if_hashable() -> None: class X: a: int - x6: List[X] = [X(a=123)] + x6: list[X] = [X(a=123)] r6 = check_if_hashable(x6) assert x6 is r6 @@ -316,7 +310,7 @@ def test_check_if_hashable() -> None: class Y(X): b: str - x7: List[Y] = [Y(a=123, b='aba')] + x7: list[Y] = [Y(a=123, b='aba')] with pytest.raises(Exception): # ideally that would also be rejected by mypy, but currently there is a bug # which treats all dataclasses as hashable: https://github.com/python/mypy/issues/11463 @@ -331,11 +325,8 @@ _UEU = TypeVar('_UEU') # instead of just iterator # TODO maybe deprecated Callable support? not sure def unique_everseen( - fun: Union[ - Callable[[], Iterable[_UET]], - Iterable[_UET] - ], - key: Optional[Callable[[_UET], _UEU]] = None, + fun: Callable[[], Iterable[_UET]] | Iterable[_UET], + key: Callable[[_UET], _UEU] | None = None, ) -> Iterator[_UET]: import os diff --git a/my/core/warnings.py b/my/core/warnings.py index 2ffc3e4..d67ec7d 100644 --- a/my/core/warnings.py +++ b/my/core/warnings.py @@ -5,14 +5,16 @@ since who looks at the terminal output? E.g. would be nice to propagate the warnings in the UI (it's even a subclass of Exception!) ''' +from __future__ import annotations + import sys import warnings -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING import click -def _colorize(x: str, color: Optional[str] = None) -> str: +def _colorize(x: str, color: str | None = None) -> str: if color is None: return x @@ -24,7 +26,7 @@ def _colorize(x: str, color: Optional[str] = None) -> str: return click.style(x, fg=color) -def _warn(message: str, *args, color: Optional[str] = None, **kwargs) -> None: +def _warn(message: str, *args, color: str | None = None, **kwargs) -> None: stacklevel = kwargs.get('stacklevel', 1) kwargs['stacklevel'] = stacklevel + 2 # +1 for this function, +1 for medium/high wrapper warnings.warn(_colorize(message, color=color), *args, **kwargs) # noqa: B028 From 8496d131e7e44b3effcc289762a4218aa1457725 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 19 Oct 2024 22:10:40 +0100 Subject: [PATCH 02/11] general: migrate modules to use 3.9 features --- my/arbtt.py | 23 +++++++----- my/bluemaestro.py | 11 +++--- my/body/blood.py | 36 ++++++++++--------- my/body/exercise/all.py | 6 ++-- my/body/exercise/cardio.py | 1 - my/body/exercise/cross_trainer.py | 18 ++++++---- my/body/sleep/common.py | 5 +-- my/body/sleep/main.py | 5 ++- my/body/weight.py | 8 ++--- my/books/kobo.py | 7 ++-- my/browser/active_browser.py | 8 +++-- my/browser/all.py | 6 ++-- my/browser/export.py | 9 ++--- my/bumble/android.py | 26 +++++++------- my/calendar/holidays.py | 3 +- my/cfg.py | 1 - my/codeforces.py | 9 +++-- my/coding/commits.py | 34 +++++++++--------- my/common.py | 2 +- my/config.py | 36 ++++++++++--------- my/core/_deprecated/kompress.py | 2 +- my/core/common.py | 4 +-- my/demo.py | 6 ++-- my/emfit/__init__.py | 46 +++++++++++++----------- my/endomondo.py | 24 ++++++++----- my/error.py | 2 +- my/experimental/destructive_parsing.py | 9 ++--- my/fbmessenger/__init__.py | 1 + my/fbmessenger/all.py | 6 ++-- my/fbmessenger/android.py | 35 ++++++++++--------- my/fbmessenger/common.py | 18 ++++++---- my/fbmessenger/export.py | 9 +++-- my/foursquare.py | 9 +++-- my/github/all.py | 3 +- my/github/common.py | 21 ++++++----- my/github/gdpr.py | 3 +- my/github/ghexport.py | 25 +++++++++----- my/goodreads.py | 16 +++++---- my/google/maps/_android_protobuf.py | 4 +-- my/google/maps/android.py | 12 +++---- my/google/takeout/html.py | 28 ++++++++------- my/google/takeout/parser.py | 20 ++++++----- my/google/takeout/paths.py | 12 ++++--- my/hackernews/dogsheep.py | 12 +++---- my/hackernews/harmonic.py | 25 +++++++++----- my/hackernews/materialistic.py | 11 +++--- my/hypothesis.py | 10 +++--- my/instagram/all.py | 5 ++- my/instagram/android.py | 37 ++++++++++---------- my/instagram/common.py | 9 ++--- my/instagram/gdpr.py | 19 +++++----- my/instapaper.py | 10 ++++-- my/ip/all.py | 3 +- my/ip/common.py | 7 ++-- my/jawbone/__init__.py | 23 ++++++------ my/jawbone/plots.py | 17 ++++----- my/kobo.py | 31 +++++++++-------- my/kython/kompress.py | 3 +- my/lastfm.py | 14 +++++--- my/location/all.py | 5 ++- my/location/common.py | 11 +++--- my/location/fallback/all.py | 10 +++--- my/location/fallback/common.py | 31 +++++++++-------- my/location/fallback/via_home.py | 32 ++++++++--------- my/location/fallback/via_ip.py | 8 ++--- my/location/google.py | 21 ++++++----- my/location/google_takeout.py | 7 ++-- my/location/google_takeout_semantic.py | 11 +++--- my/location/gpslogger.py | 10 +++--- my/location/home.py | 4 +-- my/location/via_ip.py | 4 +-- my/materialistic.py | 1 + my/media/imdb.py | 10 +++--- my/media/youtube.py | 2 +- my/monzo/monzoexport.py | 5 +-- my/orgmode.py | 10 +++--- my/pdfs.py | 14 ++++---- my/photos/main.py | 29 +++++++++------- my/photos/utils.py | 15 ++++---- my/pinboard.py | 9 ++--- my/pocket.py | 12 ++++--- my/polar.py | 33 ++++++++++-------- my/reddit/__init__.py | 1 + my/reddit/all.py | 7 ++-- my/reddit/common.py | 10 +++--- my/reddit/pushshift.py | 12 +++---- my/reddit/rexport.py | 17 ++++----- my/rescuetime.py | 23 +++++++----- my/roamresearch.py | 29 +++++++++------- my/rss/all.py | 4 +-- my/rss/common.py | 16 +++++---- my/rss/feedbin.py | 8 ++--- my/rss/feedly.py | 3 +- my/rtm.py | 24 ++++++------- my/runnerup.py | 14 ++++---- my/simple.py | 5 ++- my/smscalls.py | 48 ++++++++++++++------------ my/stackexchange/gdpr.py | 20 ++++++++--- my/stackexchange/stexport.py | 3 +- my/taplog.py | 14 ++++---- my/telegram/telegram_backup.py | 30 ++++++++-------- my/tests/bluemaestro.py | 2 +- my/tests/body/weight.py | 6 ++-- my/tests/commits.py | 7 ++-- my/tests/location/fallback.py | 2 +- my/tests/reddit.py | 10 +++--- my/time/tz/common.py | 1 - my/time/tz/main.py | 1 + my/time/tz/via_location.py | 36 +++++++++---------- my/tinder/android.py | 18 +++++----- my/topcoder.py | 8 ++--- my/twitter/all.py | 6 ++-- my/twitter/android.py | 16 ++++----- my/twitter/archive.py | 3 +- my/twitter/common.py | 10 +++--- my/twitter/talon.py | 3 +- my/twitter/twint.py | 10 +++--- my/util/hpi_heartbeat.py | 11 +++--- my/vk/favorites.py | 13 +++---- my/vk/vk_messages_backup.py | 12 +++---- my/whatsapp/android.py | 23 ++++++------ my/youtube/takeout.py | 3 +- my/zotero.py | 22 ++++++------ my/zulip/organization.py | 2 +- ruff.toml | 16 ++++----- 125 files changed, 889 insertions(+), 739 deletions(-) diff --git a/my/arbtt.py b/my/arbtt.py index 2bcf291..5d4bf8e 100644 --- a/my/arbtt.py +++ b/my/arbtt.py @@ -2,20 +2,22 @@ [[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking ''' +from __future__ import annotations + REQUIRES = ['ijson', 'cffi'] # NOTE likely also needs libyajl2 from apt or elsewhere? +from collections.abc import Iterable, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterable, List, Optional def inputs() -> Sequence[Path]: try: from my.config import arbtt as user_config except ImportError: - from .core.warnings import low + from my.core.warnings import low low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.") return [] else: @@ -55,7 +57,7 @@ class Entry: return fromisoformat(ds) @property - def active(self) -> Optional[str]: + def active(self) -> str | None: # NOTE: WIP, might change this in the future... ait = (w for w in self.json['windows'] if w['active']) a = next(ait, None) @@ -74,17 +76,18 @@ class Entry: def entries() -> Iterable[Entry]: inps = list(inputs()) - base: List[PathIsh] = ['arbtt-dump', '--format=json'] + base: list[PathIsh] = ['arbtt-dump', '--format=json'] - cmds: List[List[PathIsh]] + cmds: list[list[PathIsh]] if len(inps) == 0: cmds = [base] # rely on default else: # otherwise, 'merge' them cmds = [[*base, '--logfile', f] for f in inps] - import ijson.backends.yajl2_cffi as ijson # type: ignore - from subprocess import Popen, PIPE + from subprocess import PIPE, Popen + + import ijson.backends.yajl2_cffi as ijson # type: ignore for cmd in cmds: with Popen(cmd, stdout=PIPE) as p: out = p.stdout; assert out is not None @@ -93,8 +96,8 @@ def entries() -> Iterable[Entry]: def fill_influxdb() -> None: - from .core.influxdb import magic_fill from .core.freezer import Freezer + from .core.influxdb import magic_fill freezer = Freezer(Entry) fit = (freezer.freeze(e) for e in entries()) # TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722 @@ -106,6 +109,8 @@ def fill_influxdb() -> None: magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}') -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return stat(entries) diff --git a/my/bluemaestro.py b/my/bluemaestro.py index 4c33fd1..8c739f0 100644 --- a/my/bluemaestro.py +++ b/my/bluemaestro.py @@ -2,14 +2,17 @@ [[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor """ +from __future__ import annotations + # todo most of it belongs to DAL... but considering so few people use it I didn't bother for now import re import sqlite3 from abc import abstractmethod +from collections.abc import Iterable, Sequence from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Iterable, Optional, Protocol, Sequence, Set +from typing import Protocol import pytz @@ -87,17 +90,17 @@ def measurements() -> Iterable[Res[Measurement]]: total = len(paths) width = len(str(total)) - last: Optional[datetime] = None + last: datetime | None = None # tables are immutable, so can save on processing.. - processed_tables: Set[str] = set() + processed_tables: set[str] = set() for idx, path in enumerate(paths): logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') tot = 0 new = 0 # todo assert increasing timestamp? with sqlite_connect_immutable(path) as db: - db_dt: Optional[datetime] = None + db_dt: datetime | None = None try: datas = db.execute( f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index' diff --git a/my/body/blood.py b/my/body/blood.py index fb035eb..867568c 100644 --- a/my/body/blood.py +++ b/my/body/blood.py @@ -2,41 +2,42 @@ Blood tracking (manual org-mode entries) """ +from __future__ import annotations + +from collections.abc import Iterable from datetime import datetime -from typing import Iterable, NamedTuple, Optional +from typing import NamedTuple -from ..core.error import Res -from ..core.orgmode import parse_org_datetime, one_table - - -import pandas as pd import orgparse - +import pandas as pd from my.config import blood as config # type: ignore[attr-defined] +from ..core.error import Res +from ..core.orgmode import one_table, parse_org_datetime + class Entry(NamedTuple): dt: datetime - ketones : Optional[float]=None - glucose : Optional[float]=None + ketones : float | None=None + glucose : float | None=None - vitamin_d : Optional[float]=None - vitamin_b12 : Optional[float]=None + vitamin_d : float | None=None + vitamin_b12 : float | None=None - hdl : Optional[float]=None - ldl : Optional[float]=None - triglycerides: Optional[float]=None + hdl : float | None=None + ldl : float | None=None + triglycerides: float | None=None - source : Optional[str]=None - extra : Optional[str]=None + source : str | None=None + extra : str | None=None Result = Res[Entry] -def try_float(s: str) -> Optional[float]: +def try_float(s: str) -> float | None: l = s.split() if len(l) == 0: return None @@ -105,6 +106,7 @@ def blood_tests_data() -> Iterable[Result]: def data() -> Iterable[Result]: from itertools import chain + from ..core.error import sort_res_by datas = chain(glucose_ketones_data(), blood_tests_data()) return sort_res_by(datas, key=lambda e: e.dt) diff --git a/my/body/exercise/all.py b/my/body/exercise/all.py index e86a5af..d0df747 100644 --- a/my/body/exercise/all.py +++ b/my/body/exercise/all.py @@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe @check_dataframe def dataframe() -> DataFrameT: # this should be somehow more flexible... - from ...endomondo import dataframe as EDF - from ...runnerup import dataframe as RDF - import pandas as pd + + from ...endomondo import dataframe as EDF + from ...runnerup import dataframe as RDF return pd.concat([ EDF(), RDF(), diff --git a/my/body/exercise/cardio.py b/my/body/exercise/cardio.py index 083b972..d8a6afd 100644 --- a/my/body/exercise/cardio.py +++ b/my/body/exercise/cardio.py @@ -3,7 +3,6 @@ Cardio data, filtered from various data sources ''' from ...core.pandas import DataFrameT, check_dataframe - CARDIO = { 'Running', 'Running, treadmill', diff --git a/my/body/exercise/cross_trainer.py b/my/body/exercise/cross_trainer.py index edbb557..30f96f9 100644 --- a/my/body/exercise/cross_trainer.py +++ b/my/body/exercise/cross_trainer.py @@ -5,16 +5,18 @@ This is probably too specific to my needs, so later I will move it away to a per For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules. ''' -from datetime import datetime, timedelta -from typing import Optional +from __future__ import annotations -from ...core.pandas import DataFrameT, check_dataframe as cdf -from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable +from datetime import datetime, timedelta + +import pytz from my.config import exercise as config +from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime +from ...core.pandas import DataFrameT +from ...core.pandas import check_dataframe as cdf -import pytz # FIXME how to attach it properly? tz = pytz.timezone('Europe/London') @@ -114,7 +116,7 @@ def dataframe() -> DataFrameT: rows.append(rd) # presumably has an error set continue - idx: Optional[int] + idx: int | None close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA] if len(close) == 0: idx = None @@ -163,7 +165,9 @@ def dataframe() -> DataFrameT: # TODO wtf?? where is speed coming from?? -from ...core import stat, Stats +from ...core import Stats, stat + + def stats() -> Stats: return stat(cross_trainer_data) diff --git a/my/body/sleep/common.py b/my/body/sleep/common.py index 1100814..fc288e5 100644 --- a/my/body/sleep/common.py +++ b/my/body/sleep/common.py @@ -1,5 +1,6 @@ -from ...core import stat, Stats -from ...core.pandas import DataFrameT, check_dataframe as cdf +from ...core import Stats, stat +from ...core.pandas import DataFrameT +from ...core.pandas import check_dataframe as cdf class Combine: diff --git a/my/body/sleep/main.py b/my/body/sleep/main.py index 29b12a7..2460e03 100644 --- a/my/body/sleep/main.py +++ b/my/body/sleep/main.py @@ -1,7 +1,6 @@ -from ... import jawbone -from ... import emfit - +from ... import emfit, jawbone from .common import Combine + _combined = Combine([ jawbone, emfit, diff --git a/my/body/weight.py b/my/body/weight.py index 51e6513..d5478ef 100644 --- a/my/body/weight.py +++ b/my/body/weight.py @@ -2,14 +2,14 @@ Weight data (manually logged) ''' +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -from typing import Any, Iterator - -from my.core import make_logger -from my.core.error import Res, extract_error_datetime, set_error_datetime +from typing import Any from my import orgmode +from my.core import make_logger +from my.core.error import Res, extract_error_datetime, set_error_datetime config = Any diff --git a/my/books/kobo.py b/my/books/kobo.py index 2a469d0..899ef31 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -1,7 +1,6 @@ -from ..core import warnings +from my.core import warnings warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!') -from ..core.util import __NOT_HPI_MODULE__ - -from ..kobo import * # type: ignore[no-redef] +from my.core.util import __NOT_HPI_MODULE__ +from my.kobo import * # type: ignore[no-redef] diff --git a/my/browser/active_browser.py b/my/browser/active_browser.py index 6f335bd..8051f1b 100644 --- a/my/browser/active_browser.py +++ b/my/browser/active_browser.py @@ -19,16 +19,18 @@ class config(user_config.active_browser): export_path: Paths +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Sequence, Iterator -from my.core import get_files, Stats, make_logger -from browserexport.merge import read_visits, Visit +from browserexport.merge import Visit, read_visits from sqlite_backup import sqlite_backup +from my.core import Stats, get_files, make_logger + logger = make_logger(__name__) from .common import _patch_browserexport_logs + _patch_browserexport_logs(logger.level) diff --git a/my/browser/all.py b/my/browser/all.py index a7d12b4..feb973a 100644 --- a/my/browser/all.py +++ b/my/browser/all.py @@ -1,9 +1,9 @@ -from typing import Iterator +from collections.abc import Iterator + +from browserexport.merge import Visit, merge_visits from my.core import Stats from my.core.source import import_source -from browserexport.merge import merge_visits, Visit - src_export = import_source(module_name="my.browser.export") src_active = import_source(module_name="my.browser.active_browser") diff --git a/my/browser/export.py b/my/browser/export.py index 1b428b5..351cf6e 100644 --- a/my/browser/export.py +++ b/my/browser/export.py @@ -4,11 +4,12 @@ Parses browser history using [[http://github.com/seanbreckenridge/browserexport] REQUIRES = ["browserexport"] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence -import my.config +from browserexport.merge import Visit, read_and_merge + from my.core import ( Paths, Stats, @@ -18,10 +19,10 @@ from my.core import ( ) from my.core.cachew import mcachew -from browserexport.merge import read_and_merge, Visit - from .common import _patch_browserexport_logs +import my.config # isort: skip + @dataclass class config(my.config.browser.export): diff --git a/my/bumble/android.py b/my/bumble/android.py index 54a0441..3f9fa13 100644 --- a/my/bumble/android.py +++ b/my/bumble/android.py @@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C """ from __future__ import annotations +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -from typing import Iterator, Sequence, Optional, Dict +from pathlib import Path from more_itertools import unique_everseen -from my.config import bumble as user_config +from my.core import Paths, get_files + +from my.config import bumble as user_config # isort: skip -from ..core import Paths @dataclass class config(user_config.android): # paths[s]/glob to the exported sqlite databases export_path: Paths -from ..core import get_files -from pathlib import Path def inputs() -> Sequence[Path]: return get_files(config.export_path) @@ -43,22 +43,24 @@ class _BaseMessage: @dataclass(unsafe_hash=True) class _Message(_BaseMessage): conversation_id: str - reply_to_id: Optional[str] + reply_to_id: str | None @dataclass(unsafe_hash=True) class Message(_BaseMessage): person: Person - reply_to: Optional[Message] + reply_to: Message | None import json -from typing import Union -from ..core import Res import sqlite3 -from ..core.sqlite import sqlite_connect_immutable, select +from typing import Union + from my.core.compat import assert_never +from ..core import Res +from ..core.sqlite import select, sqlite_connect_immutable + EntitiesRes = Res[Union[Person, _Message]] def _entities() -> Iterator[EntitiesRes]: @@ -120,8 +122,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON" def messages() -> Iterator[Res[Message]]: - id2person: Dict[str, Person] = {} - id2msg: Dict[str, Message] = {} + id2person: dict[str, Person] = {} + id2msg: dict[str, Message] = {} for x in unique_everseen(_entities(), key=_key): if isinstance(x, Exception): yield x diff --git a/my/calendar/holidays.py b/my/calendar/holidays.py index af51696..522672e 100644 --- a/my/calendar/holidays.py +++ b/my/calendar/holidays.py @@ -15,7 +15,8 @@ from my.core.time import zone_to_countrycode @lru_cache(1) def _calendar(): - from workalendar.registry import registry # type: ignore + from workalendar.registry import registry # type: ignore + # todo switch to using time.tz.main once _get_tz stabilizes? from ..time.tz import via_location as LTZ # TODO would be nice to do it dynamically depending on the past timezones... diff --git a/my/cfg.py b/my/cfg.py index e4020b4..9331e8a 100644 --- a/my/cfg.py +++ b/my/cfg.py @@ -1,7 +1,6 @@ import my.config as config from .core import __NOT_HPI_MODULE__ - from .core import warnings as W # still used in Promnesia, maybe in dashboard? diff --git a/my/codeforces.py b/my/codeforces.py index f2d150a..9c6b7c9 100644 --- a/my/codeforces.py +++ b/my/codeforces.py @@ -1,13 +1,12 @@ +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from functools import cached_property -import json from pathlib import Path -from typing import Dict, Iterator, Sequence - -from my.core import get_files, Res, datetime_aware from my.config import codeforces as config # type: ignore[attr-defined] +from my.core import Res, datetime_aware, get_files def inputs() -> Sequence[Path]: @@ -39,7 +38,7 @@ class Competition: class Parser: def __init__(self, *, inputs: Sequence[Path]) -> None: self.inputs = inputs - self.contests: Dict[ContestId, Contest] = {} + self.contests: dict[ContestId, Contest] = {} def _parse_allcontests(self, p: Path) -> Iterator[Contest]: j = json.loads(p.read_text()) diff --git a/my/coding/commits.py b/my/coding/commits.py index 31c366e..fe17dee 100644 --- a/my/coding/commits.py +++ b/my/coding/commits.py @@ -1,29 +1,32 @@ """ Git commits data for repositories on your filesystem """ + +from __future__ import annotations + REQUIRES = [ 'gitpython', ] - import shutil -from pathlib import Path -from datetime import datetime, timezone +from collections.abc import Iterator, Sequence from dataclasses import dataclass, field -from typing import List, Optional, Iterator, Set, Sequence, cast +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, cast - -from my.core import PathIsh, LazyLogger, make_config +from my.core import LazyLogger, PathIsh, make_config from my.core.cachew import cache_dir, mcachew from my.core.warnings import high +from my.config import commits as user_config # isort: skip + -from my.config import commits as user_config @dataclass class commits_cfg(user_config): roots: Sequence[PathIsh] = field(default_factory=list) - emails: Optional[Sequence[str]] = None - names: Optional[Sequence[str]] = None + emails: Sequence[str] | None = None + names: Sequence[str] | None = None # experiment to make it lazy? @@ -40,7 +43,6 @@ def config() -> commits_cfg: import git from git.repo.fun import is_git_dir - log = LazyLogger(__name__, level='info') @@ -93,7 +95,7 @@ def _git_root(git_dir: PathIsh) -> Path: return gd # must be bare -def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]: +def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]: # without path might not handle pull heads properly for c in gr.iter_commits(rev=rev): if not by_me(c): @@ -120,7 +122,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Com def repo_commits(repo: PathIsh): gr = git.Repo(str(repo)) - emitted: Set[str] = set() + emitted: set[str] = set() for r in gr.references: yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted) @@ -141,14 +143,14 @@ def canonical_name(repo: Path) -> str: def _fd_path() -> str: # todo move it to core - fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd") + fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd") if fd_path is None: high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation") assert fd_path is not None return fd_path -def git_repos_in(roots: List[Path]) -> List[Path]: +def git_repos_in(roots: list[Path]) -> list[Path]: from subprocess import check_output outputs = check_output([ _fd_path(), @@ -172,7 +174,7 @@ def git_repos_in(roots: List[Path]) -> List[Path]: return repos -def repos() -> List[Path]: +def repos() -> list[Path]: return git_repos_in(list(map(Path, config().roots))) @@ -190,7 +192,7 @@ def _repo_depends_on(_repo: Path) -> int: raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}") -def _commits(_repos: List[Path]) -> Iterator[Commit]: +def _commits(_repos: list[Path]) -> Iterator[Commit]: for r in _repos: yield from _cached_commits(r) diff --git a/my/common.py b/my/common.py index 1b56fb5..22e9487 100644 --- a/my/common.py +++ b/my/common.py @@ -1,6 +1,6 @@ from .core.warnings import high + high("DEPRECATED! Please use my.core.common instead.") from .core import __NOT_HPI_MODULE__ - from .core.common import * diff --git a/my/config.py b/my/config.py index 2dd9cda..301bf49 100644 --- a/my/config.py +++ b/my/config.py @@ -9,17 +9,18 @@ This file is used for: - mypy: this file provides some type annotations - for loading the actual user config ''' + +from __future__ import annotations + #### NOTE: you won't need this line VVVV in your personal config -from my.core import init # noqa: F401 +from my.core import init # noqa: F401 # isort: skip ### from datetime import tzinfo from pathlib import Path -from typing import List - -from my.core import Paths, PathIsh +from my.core import PathIsh, Paths class hypothesis: @@ -75,14 +76,16 @@ class google: takeout_path: Paths = '' -from typing import Sequence, Union, Tuple -from datetime import datetime, date, timedelta +from collections.abc import Sequence +from datetime import date, datetime, timedelta +from typing import Union + DateIsh = Union[datetime, date, str] -LatLon = Tuple[float, float] +LatLon = tuple[float, float] class location: # todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce # and we can't import the types from the module itself, otherwise would be circular. common module? - home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0) + home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0) home_accuracy = 30_000.0 class via_ip: @@ -103,6 +106,8 @@ class location: from typing import Literal + + class time: class tz: policy: Literal['keep', 'convert', 'throw'] @@ -121,10 +126,9 @@ class arbtt: logfiles: Paths -from typing import Optional class commits: - emails: Optional[Sequence[str]] - names: Optional[Sequence[str]] + emails: Sequence[str] | None + names: Sequence[str] | None roots: Sequence[PathIsh] @@ -150,8 +154,8 @@ class tinder: class instagram: class android: export_path: Paths - username: Optional[str] - full_name: Optional[str] + username: str | None + full_name: str | None class gdpr: export_path: Paths @@ -169,7 +173,7 @@ class materialistic: class fbmessenger: class fbmessengerexport: export_db: PathIsh - facebook_id: Optional[str] + facebook_id: str | None class android: export_path: Paths @@ -247,7 +251,7 @@ class runnerup: class emfit: export_path: Path timezone: tzinfo - excluded_sids: List[str] + excluded_sids: list[str] class foursquare: @@ -270,7 +274,7 @@ class roamresearch: class whatsapp: class android: export_path: Paths - my_user_id: Optional[str] + my_user_id: str | None class harmonic: diff --git a/my/core/_deprecated/kompress.py b/my/core/_deprecated/kompress.py index ce14fad..c3f333f 100644 --- a/my/core/_deprecated/kompress.py +++ b/my/core/_deprecated/kompress.py @@ -11,7 +11,7 @@ from collections.abc import Iterator, Sequence from datetime import datetime from functools import total_ordering from pathlib import Path -from typing import IO, Any, Union +from typing import IO, Union PathIsh = Union[Path, str] diff --git a/my/core/common.py b/my/core/common.py index 91fe9bd..aa994ea 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -63,7 +63,7 @@ def get_files( if '*' in gs: if glob != DEFAULT_GLOB: warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!") - paths.extend(map(Path, do_glob(gs))) + paths.extend(map(Path, do_glob(gs))) # noqa: PTH207 elif os.path.isdir(str(src)): # noqa: PTH112 # NOTE: we're using os.path here on purpose instead of src.is_dir # the reason is is_dir for archives might return True and then @@ -157,7 +157,7 @@ def get_valid_filename(s: str) -> str: # TODO deprecate and suggest to use one from my.core directly? not sure -from .utils.itertools import unique_everseen +from .utils.itertools import unique_everseen # noqa: F401 ### legacy imports, keeping them here for backwards compatibility ## hiding behind TYPE_CHECKING so it works in runtime diff --git a/my/demo.py b/my/demo.py index 0c54792..fa80b2a 100644 --- a/my/demo.py +++ b/my/demo.py @@ -1,12 +1,14 @@ ''' Just a demo module for testing and documentation purposes ''' +from __future__ import annotations import json +from collections.abc import Iterable, Sequence from dataclasses import dataclass from datetime import datetime, timezone, tzinfo from pathlib import Path -from typing import Iterable, Optional, Protocol, Sequence +from typing import Protocol from my.core import Json, PathIsh, Paths, get_files @@ -20,7 +22,7 @@ class config(Protocol): # this is to check optional attribute handling timezone: tzinfo = timezone.utc - external: Optional[PathIsh] = None + external: PathIsh | None = None @property def external_module(self): diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py index 9934903..0d50b06 100644 --- a/my/emfit/__init__.py +++ b/my/emfit/__init__.py @@ -4,31 +4,34 @@ Consumes data exported by https://github.com/karlicoss/emfitexport """ +from __future__ import annotations + REQUIRES = [ 'git+https://github.com/karlicoss/emfitexport', ] -from contextlib import contextmanager import dataclasses -from datetime import datetime, time, timedelta import inspect +from collections.abc import Iterable, Iterator +from contextlib import contextmanager +from datetime import datetime, time, timedelta from pathlib import Path -from typing import Any, Dict, Iterable, Iterator, List, Optional - -from my.core import ( - get_files, - stat, - Res, - Stats, -) -from my.core.cachew import cache_dir, mcachew -from my.core.error import set_error_datetime, extract_error_datetime -from my.core.pandas import DataFrameT - -from my.config import emfit as config +from typing import Any import emfitexport.dal as dal +from my.core import ( + Res, + Stats, + get_files, + stat, +) +from my.core.cachew import cache_dir, mcachew +from my.core.error import extract_error_datetime, set_error_datetime +from my.core.pandas import DataFrameT + +from my.config import emfit as config # isort: skip + Emfit = dal.Emfit @@ -85,7 +88,7 @@ def datas() -> Iterable[Res[Emfit]]: # TODO should be used for jawbone data as well? def pre_dataframe() -> Iterable[Res[Emfit]]: # TODO shit. I need some sort of interrupted sleep detection? - g: List[Emfit] = [] + g: list[Emfit] = [] def flush() -> Iterable[Res[Emfit]]: if len(g) == 0: @@ -112,10 +115,10 @@ def pre_dataframe() -> Iterable[Res[Emfit]]: def dataframe() -> DataFrameT: - dicts: List[Dict[str, Any]] = [] - last: Optional[Emfit] = None + dicts: list[dict[str, Any]] = [] + last: Emfit | None = None for s in pre_dataframe(): - d: Dict[str, Any] + d: dict[str, Any] if isinstance(s, Exception): edt = extract_error_datetime(s) d = { @@ -166,11 +169,12 @@ def stats() -> Stats: @contextmanager def fake_data(nights: int = 500) -> Iterator: - from my.core.cfg import tmp_config from tempfile import TemporaryDirectory import pytz + from my.core.cfg import tmp_config + with TemporaryDirectory() as td: tdir = Path(td) gen = dal.FakeData() @@ -187,7 +191,7 @@ def fake_data(nights: int = 500) -> Iterator: # TODO remove/deprecate it? I think used by timeline -def get_datas() -> List[Emfit]: +def get_datas() -> list[Emfit]: # todo ugh. run lint properly return sorted(datas(), key=lambda e: e.start) # type: ignore diff --git a/my/endomondo.py b/my/endomondo.py index 293a542..7732c00 100644 --- a/my/endomondo.py +++ b/my/endomondo.py @@ -7,13 +7,14 @@ REQUIRES = [ ] # todo use ast in setup.py or doctor to extract the corresponding pip packages? +from collections.abc import Iterable, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterable + +from my.config import endomondo as user_config from .core import Paths, get_files -from my.config import endomondo as user_config @dataclass class endomondo(user_config): @@ -33,15 +34,17 @@ def inputs() -> Sequence[Path]: import endoexport.dal as dal from endoexport.dal import Point, Workout # noqa: F401 - from .core import Res + + # todo cachew? def workouts() -> Iterable[Res[Workout]]: _dal = dal.DAL(inputs()) yield from _dal.workouts() -from .core.pandas import check_dataframe, DataFrameT +from .core.pandas import DataFrameT, check_dataframe + @check_dataframe def dataframe(*, defensive: bool=True) -> DataFrameT: @@ -75,7 +78,9 @@ def dataframe(*, defensive: bool=True) -> DataFrameT: return df -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return { # todo pretty print stats? @@ -86,13 +91,16 @@ def stats() -> Stats: # TODO make sure it's possible to 'advise' functions and override stuff +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator + + @contextmanager def fake_data(count: int=100) -> Iterator: - from my.core.cfg import tmp_config - from tempfile import TemporaryDirectory import json + from tempfile import TemporaryDirectory + + from my.core.cfg import tmp_config with TemporaryDirectory() as td: tdir = Path(td) fd = dal.FakeData() diff --git a/my/error.py b/my/error.py index c0b734c..e3c1e11 100644 --- a/my/error.py +++ b/my/error.py @@ -1,6 +1,6 @@ from .core.warnings import high + high("DEPRECATED! Please use my.core.error instead.") from .core import __NOT_HPI_MODULE__ - from .core.error import * diff --git a/my/experimental/destructive_parsing.py b/my/experimental/destructive_parsing.py index b389f7e..0c4092a 100644 --- a/my/experimental/destructive_parsing.py +++ b/my/experimental/destructive_parsing.py @@ -1,5 +1,6 @@ +from collections.abc import Iterator from dataclasses import dataclass -from typing import Any, Iterator, List, Tuple +from typing import Any from my.core.compat import NoneType, assert_never @@ -9,7 +10,7 @@ from my.core.compat import NoneType, assert_never class Helper: manager: 'Manager' item: Any # todo realistically, list or dict? could at least type as indexable or something - path: Tuple[str, ...] + path: tuple[str, ...] def pop_if_primitive(self, *keys: str) -> None: """ @@ -40,9 +41,9 @@ def is_empty(x) -> bool: class Manager: def __init__(self) -> None: - self.helpers: List[Helper] = [] + self.helpers: list[Helper] = [] - def helper(self, item: Any, *, path: Tuple[str, ...] = ()) -> Helper: + def helper(self, item: Any, *, path: tuple[str, ...] = ()) -> Helper: res = Helper(manager=self, item=item, path=path) self.helpers.append(res) return res diff --git a/my/fbmessenger/__init__.py b/my/fbmessenger/__init__.py index 40fb235..f729de9 100644 --- a/my/fbmessenger/__init__.py +++ b/my/fbmessenger/__init__.py @@ -20,6 +20,7 @@ REQUIRES = [ from my.core.hpi_compat import handle_legacy_import + is_legacy_import = handle_legacy_import( parent_module_name=__name__, legacy_submodule_name='export', diff --git a/my/fbmessenger/all.py b/my/fbmessenger/all.py index 13689db..a057dca 100644 --- a/my/fbmessenger/all.py +++ b/my/fbmessenger/all.py @@ -1,10 +1,10 @@ -from typing import Iterator -from my.core import Res, stat, Stats +from collections.abc import Iterator + +from my.core import Res, Stats from my.core.source import import_source from .common import Message, _merge_messages - src_export = import_source(module_name='my.fbmessenger.export') src_android = import_source(module_name='my.fbmessenger.android') diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index effabab..a16d924 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -4,19 +4,20 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab from __future__ import annotations +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import sqlite3 -from typing import Iterator, Sequence, Optional, Dict, Union, List +from typing import Union -from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_config from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain from my.core.sqlite import sqlite_connection -from my.config import fbmessenger as user_config +from my.config import fbmessenger as user_config # isort: skip logger = LazyLogger(__name__) @@ -27,7 +28,7 @@ class Config(user_config.android): # paths[s]/glob to the exported sqlite databases export_path: Paths - facebook_id: Optional[str] = None + facebook_id: str | None = None # hmm. this is necessary for default value (= None) to work @@ -42,13 +43,13 @@ def inputs() -> Sequence[Path]: @dataclass(unsafe_hash=True) class Sender: id: str - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) class Thread: id: str - name: Optional[str] # isn't set for groups or one to one messages + name: str | None # isn't set for groups or one to one messages # todo not sure about order of fields... @@ -56,14 +57,14 @@ class Thread: class _BaseMessage: id: str dt: datetime_aware - text: Optional[str] + text: str | None @dataclass(unsafe_hash=True) class _Message(_BaseMessage): thread_id: str sender_id: str - reply_to_id: Optional[str] + reply_to_id: str | None # todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here @@ -72,7 +73,7 @@ class _Message(_BaseMessage): class Message(_BaseMessage): thread: Thread sender: Sender - reply_to: Optional[Message] + reply_to: Message | None Entity = Union[Sender, Thread, _Message] @@ -110,7 +111,7 @@ def _normalise_thread_id(key) -> str: # NOTE: this is sort of copy pasted from other _process_db method # maybe later could unify them def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: - senders: Dict[str, Sender] = {} + senders: dict[str, Sender] = {} for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'): s = Sender( id=r['id'], # looks like it's server id? same used on facebook site @@ -127,7 +128,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: # TODO can we get it from db? could infer as the most common id perhaps? self_id = config.facebook_id - thread_users: Dict[str, List[Sender]] = {} + thread_users: dict[str, list[Sender]] = {} for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'): thread_key = r['thread_key'] user_key = r['contact_id'] @@ -193,7 +194,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]: - senders: Dict[str, Sender] = {} + senders: dict[str, Sender] = {} for r in db.execute('''SELECT * FROM thread_users'''): # for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None # but they are still referenced, so need to keep @@ -207,7 +208,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]: yield s self_id = config.facebook_id - thread_users: Dict[str, List[Sender]] = {} + thread_users: dict[str, list[Sender]] = {} for r in db.execute('SELECT * from thread_participants'): thread_key = r['thread_key'] user_key = r['user_key'] @@ -267,9 +268,9 @@ def contacts() -> Iterator[Res[Sender]]: def messages() -> Iterator[Res[Message]]: - senders: Dict[str, Sender] = {} - msgs: Dict[str, Message] = {} - threads: Dict[str, Thread] = {} + senders: dict[str, Sender] = {} + msgs: dict[str, Message] = {} + threads: dict[str, Thread] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/fbmessenger/common.py b/my/fbmessenger/common.py index 33d1b20..0f5a374 100644 --- a/my/fbmessenger/common.py +++ b/my/fbmessenger/common.py @@ -1,6 +1,9 @@ -from my.core import __NOT_HPI_MODULE__ +from __future__ import annotations -from typing import Iterator, Optional, Protocol +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterator +from typing import Protocol from my.core import datetime_aware @@ -10,7 +13,7 @@ class Thread(Protocol): def id(self) -> str: ... @property - def name(self) -> Optional[str]: ... + def name(self) -> str | None: ... class Sender(Protocol): @@ -18,7 +21,7 @@ class Sender(Protocol): def id(self) -> str: ... @property - def name(self) -> Optional[str]: ... + def name(self) -> str | None: ... class Message(Protocol): @@ -29,7 +32,7 @@ class Message(Protocol): def dt(self) -> datetime_aware: ... @property - def text(self) -> Optional[str]: ... + def text(self) -> str | None: ... @property def thread(self) -> Thread: ... @@ -39,8 +42,11 @@ class Message(Protocol): from itertools import chain + from more_itertools import unique_everseen -from my.core import warn_if_empty, Res + +from my.core import Res, warn_if_empty + @warn_if_empty def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: diff --git a/my/fbmessenger/export.py b/my/fbmessenger/export.py index 201fad8..3b06618 100644 --- a/my/fbmessenger/export.py +++ b/my/fbmessenger/export.py @@ -7,16 +7,15 @@ REQUIRES = [ 'git+https://github.com/karlicoss/fbmessengerexport', ] +from collections.abc import Iterator from contextlib import ExitStack, contextmanager from dataclasses import dataclass -from typing import Iterator - -from my.core import PathIsh, Res, stat, Stats -from my.core.warnings import high -from my.config import fbmessenger as user_config import fbmessengerexport.dal as messenger +from my.config import fbmessenger as user_config +from my.core import PathIsh, Res, Stats, stat +from my.core.warnings import high ### # support old style config diff --git a/my/foursquare.py b/my/foursquare.py index 394fdf3..3b418aa 100644 --- a/my/foursquare.py +++ b/my/foursquare.py @@ -2,15 +2,14 @@ Foursquare/Swarm checkins ''' -from datetime import datetime, timezone, timedelta -from itertools import chain import json +from datetime import datetime, timedelta, timezone +from itertools import chain -# TODO pytz for timezone??? - -from my.core import get_files, make_logger from my.config import foursquare as config +# TODO pytz for timezone??? +from my.core import get_files, make_logger logger = make_logger(__name__) diff --git a/my/github/all.py b/my/github/all.py index f885dde..f5e13cf 100644 --- a/my/github/all.py +++ b/my/github/all.py @@ -3,8 +3,7 @@ Unified Github data (merged from GDPR export and periodic API updates) """ from . import gdpr, ghexport - -from .common import merge_events, Results +from .common import Results, merge_events def events() -> Results: diff --git a/my/github/common.py b/my/github/common.py index e54bc4d..22ba47e 100644 --- a/my/github/common.py +++ b/my/github/common.py @@ -1,24 +1,27 @@ """ Github events and their metadata: comments/issues/pull requests """ -from ..core import __NOT_HPI_MODULE__ + +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip +from collections.abc import Iterable from datetime import datetime, timezone -from typing import Optional, NamedTuple, Iterable, Set, Tuple +from typing import NamedTuple, Optional -from ..core import warn_if_empty, LazyLogger -from ..core.error import Res +from my.core import make_logger, warn_if_empty +from my.core.error import Res - -logger = LazyLogger(__name__) +logger = make_logger(__name__) class Event(NamedTuple): dt: datetime summary: str eid: str link: Optional[str] - body: Optional[str]=None + body: Optional[str] = None is_bot: bool = False @@ -27,7 +30,7 @@ Results = Iterable[Res[Event]] @warn_if_empty def merge_events(*sources: Results) -> Results: from itertools import chain - emitted: Set[Tuple[datetime, str]] = set() + emitted: set[tuple[datetime, str]] = set() for e in chain(*sources): if isinstance(e, Exception): yield e @@ -52,7 +55,7 @@ def parse_dt(s: str) -> datetime: # experimental way of supportint event ids... not sure class EventIds: @staticmethod - def repo_created(*, dts: str, name: str, ref_type: str, ref: Optional[str]) -> str: + def repo_created(*, dts: str, name: str, ref_type: str, ref: str | None) -> str: return f'{dts}_repocreated_{name}_{ref_type}_{ref}' @staticmethod diff --git a/my/github/gdpr.py b/my/github/gdpr.py index a56ff46..be56454 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -6,8 +6,9 @@ from __future__ import annotations import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Any, Iterator, Sequence +from typing import Any from my.core import Paths, Res, Stats, get_files, make_logger, stat, warnings from my.core.error import echain diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 80106a5..3e17c10 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -1,13 +1,17 @@ """ Github data: events, comments, etc. (API data) """ + +from __future__ import annotations + REQUIRES = [ 'git+https://github.com/karlicoss/ghexport', ] + from dataclasses import dataclass -from my.core import Paths from my.config import github as user_config +from my.core import Paths @dataclass @@ -21,7 +25,9 @@ class github(user_config): ### -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config + + def migration(attrs: Attrs) -> Attrs: export_dir = 'export_dir' if export_dir in attrs: # legacy name @@ -41,15 +47,14 @@ except ModuleNotFoundError as e: ############################ +from collections.abc import Sequence from functools import lru_cache from pathlib import Path -from typing import Tuple, Dict, Sequence, Optional -from my.core import get_files, LazyLogger +from my.core import LazyLogger, get_files from my.core.cachew import mcachew -from .common import Event, parse_dt, Results, EventIds - +from .common import Event, EventIds, Results, parse_dt logger = LazyLogger(__name__) @@ -82,7 +87,9 @@ def _events() -> Results: yield e -from my.core import stat, Stats +from my.core import Stats, stat + + def stats() -> Stats: return { **stat(events), @@ -99,7 +106,7 @@ def _log_if_unhandled(e) -> None: Link = str EventId = str Body = str -def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Body]]: +def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]: # TODO would be nice to give access to raw event within timeline dts = e['created_at'] eid = e['id'] @@ -195,7 +202,7 @@ def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Bo return tp, None, None, None -def _parse_event(d: Dict) -> Event: +def _parse_event(d: dict) -> Event: summary, link, eid, body = _get_summary(d) if eid is None: eid = d['id'] # meh diff --git a/my/goodreads.py b/my/goodreads.py index 864bd64..559efda 100644 --- a/my/goodreads.py +++ b/my/goodreads.py @@ -7,15 +7,18 @@ REQUIRES = [ from dataclasses import dataclass -from my.core import datetime_aware, Paths + from my.config import goodreads as user_config +from my.core import Paths, datetime_aware + @dataclass class goodreads(user_config): # paths[s]/glob to the exported JSON data export_path: Paths -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config + def _migration(attrs: Attrs) -> Attrs: export_dir = 'export_dir' @@ -29,18 +32,19 @@ config = make_config(goodreads, migration=_migration) #############################3 -from my.core import get_files -from typing import Sequence, Iterator +from collections.abc import Iterator, Sequence from pathlib import Path +from my.core import get_files + + def inputs() -> Sequence[Path]: return get_files(config.export_path) from datetime import datetime + import pytz - - from goodrexport import dal diff --git a/my/google/maps/_android_protobuf.py b/my/google/maps/_android_protobuf.py index 1d43ae0..615623d 100644 --- a/my/google/maps/_android_protobuf.py +++ b/my/google/maps/_android_protobuf.py @@ -1,8 +1,8 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip # NOTE: this tool was quite useful https://github.com/aj3423/aproto -from google.protobuf import descriptor_pool, descriptor_pb2, message_factory +from google.protobuf import descriptor_pb2, descriptor_pool, message_factory TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES diff --git a/my/google/maps/android.py b/my/google/maps/android.py index 279231a..95ecacf 100644 --- a/my/google/maps/android.py +++ b/my/google/maps/android.py @@ -7,20 +7,20 @@ REQUIRES = [ "protobuf", # for parsing blobs from the database ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any, Iterator, Optional, Sequence +from typing import Any from urllib.parse import quote -from my.core import datetime_aware, get_files, LazyLogger, Paths, Res +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen from my.core.sqlite import sqlite_connection -import my.config - from ._android_protobuf import parse_labeled, parse_list, parse_place +import my.config # isort: skip logger = LazyLogger(__name__) @@ -59,8 +59,8 @@ class Place: updated_at: datetime_aware # TODO double check it's utc? title: str location: Location - address: Optional[str] - note: Optional[str] + address: str | None + note: str | None @property def place_url(self) -> str: diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index 750beac..3f2b5db 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -2,18 +2,22 @@ Google Takeout exports: browsing history, search/youtube/google play activity ''' -from enum import Enum +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip + import re -from pathlib import Path +from collections.abc import Iterable from datetime import datetime +from enum import Enum from html.parser import HTMLParser -from typing import List, Optional, Any, Callable, Iterable, Tuple +from pathlib import Path +from typing import Any, Callable from urllib.parse import unquote import pytz -from ...core.time import abbr_to_timezone - +from my.core.time import abbr_to_timezone # NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly _TIME_FORMATS = [ @@ -36,7 +40,7 @@ def parse_dt(s: str) -> datetime: s, tzabbr = s.rsplit(maxsplit=1) tz = abbr_to_timezone(tzabbr) - dt: Optional[datetime] = None + dt: datetime | None = None for fmt in _TIME_FORMATS: try: dt = datetime.strptime(s, fmt) @@ -73,7 +77,7 @@ class State(Enum): Url = str Title = str -Parsed = Tuple[datetime, Url, Title] +Parsed = tuple[datetime, Url, Title] Callback = Callable[[datetime, Url, Title], None] @@ -83,9 +87,9 @@ class TakeoutHTMLParser(HTMLParser): super().__init__() self.state: State = State.OUTSIDE - self.title_parts: List[str] = [] - self.title: Optional[str] = None - self.url: Optional[str] = None + self.title_parts: list[str] = [] + self.title: str | None = None + self.url: str | None = None self.callback = callback @@ -148,7 +152,7 @@ class TakeoutHTMLParser(HTMLParser): def read_html(tpath: Path, file: str) -> Iterable[Parsed]: - results: List[Parsed] = [] + results: list[Parsed] = [] def cb(dt: datetime, url: Url, title: Title) -> None: results.append((dt, url, title)) parser = TakeoutHTMLParser(callback=cb) @@ -156,5 +160,3 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]: data = fo.read() parser.feed(data) return results - -from ...core import __NOT_HPI_MODULE__ diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py index 170553a..80c2be1 100644 --- a/my/google/takeout/parser.py +++ b/my/google/takeout/parser.py @@ -14,24 +14,27 @@ the cachew cache REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +import os +from collections.abc import Sequence from contextlib import ExitStack from dataclasses import dataclass -import os -from typing import List, Sequence, cast from pathlib import Path -from my.core import make_config, stat, Stats, get_files, Paths, make_logger +from typing import cast + +from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES + +from my.core import Paths, Stats, get_files, make_config, make_logger, stat from my.core.cachew import mcachew from my.core.error import ErrorPolicy from my.core.structure import match_structure - from my.core.time import user_forced -from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES + ABBR_TIMEZONES.extend(user_forced()) import google_takeout_parser -from google_takeout_parser.path_dispatch import TakeoutParser -from google_takeout_parser.merge import GoogleEventSet, CacheResults +from google_takeout_parser.merge import CacheResults, GoogleEventSet from google_takeout_parser.models import BaseEvent +from google_takeout_parser.path_dispatch import TakeoutParser # see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example from my.config import google as user_config @@ -56,6 +59,7 @@ logger = make_logger(__name__, level="warning") # patch the takeout parser logger to match the computed loglevel from google_takeout_parser.log import setup as setup_takeout_logger + setup_takeout_logger(logger.level) @@ -83,7 +87,7 @@ except ImportError: google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown')) -def _cachew_depends_on() -> List[str]: +def _cachew_depends_on() -> list[str]: exports = sorted([str(p) for p in inputs()]) # add google takeout parser pip version to hash, so this re-creates on breaking changes exports.insert(0, f"google_takeout_version: {google_takeout_version}") diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index 948cf2e..6a523e2 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -2,13 +2,17 @@ Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data ''' +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip + from abc import abstractmethod +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, Optional, Protocol from more_itertools import last -from my.core import __NOT_HPI_MODULE__, Paths, get_files +from my.core import Paths, get_files class config: @@ -33,7 +37,7 @@ def make_config() -> config: return combined_config() -def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]: +def get_takeouts(*, path: str | None = None) -> Iterable[Path]: """ Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need """ @@ -45,7 +49,7 @@ def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]: yield takeout -def get_last_takeout(*, path: Optional[str] = None) -> Optional[Path]: +def get_last_takeout(*, path: str | None = None) -> Path | None: return last(get_takeouts(path=path), default=None) diff --git a/my/hackernews/dogsheep.py b/my/hackernews/dogsheep.py index de6c58d..8303284 100644 --- a/my/hackernews/dogsheep.py +++ b/my/hackernews/dogsheep.py @@ -3,14 +3,14 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee """ from __future__ import annotations +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Sequence, Optional -from my.core import get_files, Paths, Res, datetime_aware -from my.core.sqlite import sqlite_connection import my.config +from my.core import Paths, Res, datetime_aware, get_files +from my.core.sqlite import sqlite_connection from .common import hackernews_link @@ -33,9 +33,9 @@ class Item: id: str type: str created: datetime_aware # checked and it's utc - title: Optional[str] # only present for Story - text_html: Optional[str] # should be present for Comment and might for Story - url: Optional[str] # might be present for Story + title: str | None # only present for Story + text_html: str | None # should be present for Comment and might for Story + url: str | None # might be present for Story # todo process 'deleted'? fields? # todo process 'parent'? diff --git a/my/hackernews/harmonic.py b/my/hackernews/harmonic.py index 3b4ae61..08a82e6 100644 --- a/my/hackernews/harmonic.py +++ b/my/hackernews/harmonic.py @@ -1,17 +1,22 @@ """ [[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews """ + +from __future__ import annotations + REQUIRES = ['lxml', 'orjson'] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone -import orjson from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast +from typing import Any, TypedDict, cast +import orjson from lxml import etree from more_itertools import one +import my.config from my.core import ( Paths, Res, @@ -22,8 +27,10 @@ from my.core import ( stat, ) from my.core.common import unique_everseen -import my.config -from .common import hackernews_link, SavedBase + +from .common import SavedBase, hackernews_link + +import my.config # isort: skip logger = make_logger(__name__) @@ -43,7 +50,7 @@ class Cached(TypedDict): created_at_i: int id: str points: int - test: Optional[str] + test: str | None title: str type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho url: str @@ -94,16 +101,16 @@ def _saved() -> Iterator[Res[Saved]]: # TODO defensive for each item! tr = etree.parse(path) - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) cached_ids = [x.text.split('-')[0] for x in res] - cached: Dict[str, Cached] = {} + cached: dict[str, Cached] = {} for sid in cached_ids: - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]'))) j = orjson.loads(res.text) cached[sid] = j - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]'))) for x in res.text.split('-'): ids, item_timestamp = x.split('q') # not sure if timestamp is any useful? diff --git a/my/hackernews/materialistic.py b/my/hackernews/materialistic.py index 4d5cd47..ccf285b 100644 --- a/my/hackernews/materialistic.py +++ b/my/hackernews/materialistic.py @@ -1,19 +1,20 @@ """ [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews """ +from collections.abc import Iterator, Sequence from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, Iterator, NamedTuple, Sequence +from typing import Any, NamedTuple from more_itertools import unique_everseen -from my.core import get_files, datetime_aware, make_logger +from my.core import datetime_aware, get_files, make_logger from my.core.sqlite import sqlite_connection -from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic - from .common import hackernews_link +# todo migrate config to my.hackernews.materialistic +from my.config import materialistic as config # isort: skip logger = make_logger(__name__) @@ -22,7 +23,7 @@ def inputs() -> Sequence[Path]: return get_files(config.export_path) -Row = Dict[str, Any] +Row = dict[str, Any] class Saved(NamedTuple): diff --git a/my/hypothesis.py b/my/hypothesis.py index 82104cd..15e854b 100644 --- a/my/hypothesis.py +++ b/my/hypothesis.py @@ -4,20 +4,22 @@ REQUIRES = [ 'git+https://github.com/karlicoss/hypexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING from my.core import ( - get_files, - stat, Paths, Res, Stats, + get_files, + stat, ) from my.core.cfg import make_config from my.core.hpi_compat import always_supports_sequence -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/instagram/all.py b/my/instagram/all.py index 8007399..214e6ac 100644 --- a/my/instagram/all.py +++ b/my/instagram/all.py @@ -1,11 +1,10 @@ -from typing import Iterator +from collections.abc import Iterator -from my.core import Res, stat, Stats +from my.core import Res, Stats, stat from my.core.source import import_source from .common import Message, _merge_messages - src_gdpr = import_source(module_name='my.instagram.gdpr') @src_gdpr def _messages_gdpr() -> Iterator[Res[Message]]: diff --git a/my/instagram/android.py b/my/instagram/android.py index 96b75d2..12c11d3 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -3,30 +3,29 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data """ from __future__ import annotations +import json +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -import json from pathlib import Path -import sqlite3 -from typing import Iterator, Sequence, Optional, Dict, Union from my.core import ( - get_files, - Paths, - make_config, - make_logger, - datetime_naive, Json, + Paths, Res, assert_never, + datetime_naive, + get_files, + make_config, + make_logger, ) -from my.core.common import unique_everseen from my.core.cachew import mcachew +from my.core.common import unique_everseen from my.core.error import echain -from my.core.sqlite import sqlite_connect_immutable, select - -from my.config import instagram as user_config +from my.core.sqlite import select, sqlite_connect_immutable +from my.config import instagram as user_config # isort: skip logger = make_logger(__name__) @@ -38,8 +37,8 @@ class instagram_android_config(user_config.android): # sadly doesn't seem easy to extract user's own handle/name from the db... # todo maybe makes more sense to keep in parent class? not sure... - username: Optional[str] = None - full_name: Optional[str] = None + username: str | None = None + full_name: str | None = None config = make_config(instagram_android_config) @@ -101,13 +100,13 @@ class MessageError(RuntimeError): return self.rest == other.rest -def _parse_message(j: Json) -> Optional[_Message]: +def _parse_message(j: Json) -> _Message | None: id = j['item_id'] t = j['item_type'] tid = j['thread_key']['thread_id'] uid = j['user_id'] created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000) - text: Optional[str] = None + text: str | None = None if t == 'text': text = j['text'] elif t == 'reel_share': @@ -133,7 +132,7 @@ def _parse_message(j: Json) -> Optional[_Message]: ) -def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]: +def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]: # TODO ugh. seems like no way to extract username? # sometimes messages (e.g. media_share) contain it in message field # but generally it's not present. ugh @@ -175,7 +174,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]: yield e -def _entities() -> Iterator[Res[Union[User, _Message]]]: +def _entities() -> Iterator[Res[User | _Message]]: # NOTE: definitely need to merge multiple, app seems to recycle old messages # TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data... # todo use TypedDict? @@ -194,7 +193,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: @mcachew(depends_on=inputs) def messages() -> Iterator[Res[Message]]: - id2user: Dict[str, User] = {} + id2user: dict[str, User] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/instagram/common.py b/my/instagram/common.py index 4df07a1..17d130f 100644 --- a/my/instagram/common.py +++ b/my/instagram/common.py @@ -1,9 +1,10 @@ +from collections.abc import Iterator from dataclasses import replace from datetime import datetime from itertools import chain -from typing import Iterator, Dict, Any, Protocol +from typing import Any, Protocol -from my.core import warn_if_empty, Res +from my.core import Res, warn_if_empty class User(Protocol): @@ -40,7 +41,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: # ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump) # so the only way to correlate is to try and match messages # we also can't use unique_everseen here, otherwise will never get a chance to unify threads - mmap: Dict[str, Message] = {} + mmap: dict[str, Message] = {} thread_map = {} user_map = {} @@ -60,7 +61,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: user_map[m.user.id] = mm.user else: # not emitted yet, need to emit - repls: Dict[str, Any] = {} + repls: dict[str, Any] = {} tid = thread_map.get(m.thread_id) if tid is not None: repls['thread_id'] = tid diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 1415d55..7454a04 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -2,26 +2,27 @@ Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]]) """ +from __future__ import annotations + +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -import json from pathlib import Path -from typing import Iterator, Sequence, Dict, Union from more_itertools import bucket from my.core import ( - get_files, Paths, - datetime_naive, Res, assert_never, + datetime_naive, + get_files, make_logger, ) from my.core.common import unique_everseen -from my.config import instagram as user_config - +from my.config import instagram as user_config # isort: skip logger = make_logger(__name__) @@ -70,7 +71,7 @@ def _decode(s: str) -> str: return s.encode('latin-1').decode('utf8') -def _entities() -> Iterator[Res[Union[User, _Message]]]: +def _entities() -> Iterator[Res[User | _Message]]: # it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones # NOTE: here there are basically two options # - process inputs as is (from oldest to newest) @@ -84,7 +85,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: yield from _entitites_from_path(path) -def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]: +def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]: # TODO make sure it works both with plan directory # idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here # e.g. possible options are: @@ -202,7 +203,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]: # TODO basically copy pasted from android.py... hmm def messages() -> Iterator[Res[Message]]: - id2user: Dict[str, User] = {} + id2user: dict[str, User] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/instapaper.py b/my/instapaper.py index df1f70b..d79e7e4 100644 --- a/my/instapaper.py +++ b/my/instapaper.py @@ -7,10 +7,10 @@ REQUIRES = [ from dataclasses import dataclass -from .core import Paths - from my.config import instapaper as user_config +from .core import Paths + @dataclass class instapaper(user_config): @@ -22,6 +22,7 @@ class instapaper(user_config): from .core.cfg import make_config + config = make_config(instapaper) @@ -39,9 +40,12 @@ Bookmark = dal.Bookmark Page = dal.Page -from typing import Sequence, Iterable +from collections.abc import Iterable, Sequence from pathlib import Path + from .core import get_files + + def inputs() -> Sequence[Path]: return get_files(config.export_path) diff --git a/my/ip/all.py b/my/ip/all.py index 46c1fec..e8277c1 100644 --- a/my/ip/all.py +++ b/my/ip/all.py @@ -9,10 +9,9 @@ For an example of how this could be used, see https://github.com/seanbreckenridg REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] -from typing import Iterator +from collections.abc import Iterator from my.core import Stats, warn_if_empty - from my.ip.common import IP diff --git a/my/ip/common.py b/my/ip/common.py index 244ddc5..ef54ee3 100644 --- a/my/ip/common.py +++ b/my/ip/common.py @@ -2,11 +2,12 @@ Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]] """ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip import ipaddress -from typing import NamedTuple, Iterator, Tuple +from collections.abc import Iterator from datetime import datetime +from typing import NamedTuple import ipgeocache @@ -22,7 +23,7 @@ class IP(NamedTuple): return ipgeocache.get(self.addr) @property - def latlon(self) -> Tuple[float, float]: + def latlon(self) -> tuple[float, float]: loc: str = self.ipgeocache()["loc"] lat, _, lon = loc.partition(",") return float(lat), float(lon) diff --git a/my/jawbone/__init__.py b/my/jawbone/__init__.py index 35112ba..463d735 100644 --- a/my/jawbone/__init__.py +++ b/my/jawbone/__init__.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import Dict, Any, List, Iterable import json +from collections.abc import Iterable +from datetime import date, datetime, time, timedelta from functools import lru_cache -from datetime import datetime, date, time, timedelta from pathlib import Path +from typing import Any import pytz @@ -14,7 +15,6 @@ logger = make_logger(__name__) from my.config import jawbone as config # type: ignore[attr-defined] - BDIR = config.export_dir PHASES_FILE = BDIR / 'phases.json' SLEEPS_FILE = BDIR / 'sleeps.json' @@ -24,7 +24,7 @@ GRAPHS_DIR = BDIR / 'graphs' XID = str # TODO how to shared with backup thing? -Phases = Dict[XID, Any] +Phases = dict[XID, Any] @lru_cache(1) def get_phases() -> Phases: return json.loads(PHASES_FILE.read_text()) @@ -89,7 +89,7 @@ class SleepEntry: # TODO might be useful to cache these?? @property - def phases(self) -> List[datetime]: + def phases(self) -> list[datetime]: # TODO make sure they are consistent with emfit? return [self._fromts(i['time']) for i in get_phases()[self.xid]] @@ -100,12 +100,13 @@ class SleepEntry: return str(self) -def load_sleeps() -> List[SleepEntry]: +def load_sleeps() -> list[SleepEntry]: sleeps = json.loads(SLEEPS_FILE.read_text()) return [SleepEntry(js) for js in sleeps] -from ..core.error import Res, set_error_datetime, extract_error_datetime +from ..core.error import Res, extract_error_datetime, set_error_datetime + def pre_dataframe() -> Iterable[Res[SleepEntry]]: from more_itertools import bucket @@ -129,9 +130,9 @@ def pre_dataframe() -> Iterable[Res[SleepEntry]]: def dataframe(): - dicts: List[Dict[str, Any]] = [] + dicts: list[dict[str, Any]] = [] for s in pre_dataframe(): - d: Dict[str, Any] + d: dict[str, Any] if isinstance(s, Exception): dt = extract_error_datetime(s) d = { @@ -181,7 +182,7 @@ def plot_one(sleep: SleepEntry, fig, axes, xlims=None, *, showtext=True): print(f"{sleep.xid} span: {span}") # pip install imageio - from imageio import imread # type: ignore + from imageio import imread # type: ignore img = imread(sleep.graph) # all of them are 300x300 images apparently @@ -260,8 +261,8 @@ def predicate(sleep: SleepEntry): # TODO move to dashboard def plot() -> None: - from matplotlib.figure import Figure # type: ignore[import-not-found] import matplotlib.pyplot as plt # type: ignore[import-not-found] + from matplotlib.figure import Figure # type: ignore[import-not-found] # TODO FIXME melatonin data melatonin_data = {} # type: ignore[var-annotated] diff --git a/my/jawbone/plots.py b/my/jawbone/plots.py index d26d606..5968412 100755 --- a/my/jawbone/plots.py +++ b/my/jawbone/plots.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 # TODO this should be in dashboard -from pathlib import Path # from kython.plotting import * from csv import DictReader +from pathlib import Path +from typing import Any, NamedTuple -from typing import Dict, Any, NamedTuple +import matplotlib.pylab as pylab # type: ignore # sleep = [] # with open('2017.csv', 'r') as fo: @@ -12,16 +13,14 @@ from typing import Dict, Any, NamedTuple # for line in islice(reader, 0, 10): # sleep # print(line) - -import matplotlib.pyplot as plt # type: ignore +import matplotlib.pyplot as plt # type: ignore from numpy import genfromtxt -import matplotlib.pylab as pylab # type: ignore pylab.rcParams['figure.figsize'] = (32.0, 24.0) pylab.rcParams['font.size'] = 10 jawboneDataFeatures = Path(__file__).parent / 'features.csv' # Data File Path -featureDesc: Dict[str, str] = {} +featureDesc: dict[str, str] = {} for x in genfromtxt(jawboneDataFeatures, dtype='unicode', delimiter=','): featureDesc[x[0]] = x[1] @@ -52,7 +51,7 @@ class SleepData(NamedTuple): quality: float # ??? @classmethod - def from_jawbone_dict(cls, d: Dict[str, Any]): + def from_jawbone_dict(cls, d: dict[str, Any]): return cls( date=d['DATE'], asleep_time=_safe_mins(_safe_float(d['s_asleep_time'])), @@ -75,7 +74,7 @@ class SleepData(NamedTuple): def iter_useful(data_file: str): - with open(data_file) as fo: + with Path(data_file).open() as fo: reader = DictReader(fo) for d in reader: dt = SleepData.from_jawbone_dict(d) @@ -95,6 +94,7 @@ files = [ ] from kython import concat, parse_date # type: ignore + useful = concat(*(list(iter_useful(str(f))) for f in files)) # for u in useful: @@ -108,6 +108,7 @@ dates = [parse_date(u.date, yearfirst=True, dayfirst=False) for u in useful] # TODO don't need this anymore? it's gonna be in dashboards package from kython.plotting import plot_timestamped # type: ignore + for attr, lims, mavg, fig in [ ('light', (0, 400), 5, None), ('deep', (0, 600), 5, None), diff --git a/my/kobo.py b/my/kobo.py index 85bc50f..b4a1575 100644 --- a/my/kobo.py +++ b/my/kobo.py @@ -7,21 +7,22 @@ REQUIRES = [ 'kobuddy', ] +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator - -from my.core import ( - get_files, - stat, - Paths, - Stats, -) -from my.core.cfg import make_config -import my.config import kobuddy -from kobuddy import Highlight, get_highlights from kobuddy import * +from kobuddy import Highlight, get_highlights + +from my.core import ( + Paths, + Stats, + get_files, + stat, +) +from my.core.cfg import make_config + +import my.config # isort: skip @dataclass @@ -51,7 +52,7 @@ def stats() -> Stats: ## TODO hmm. not sure if all this really belongs here?... perhaps orger? -from typing import Callable, Union, List +from typing import Callable, Union # TODO maybe type over T? _Predicate = Callable[[str], bool] @@ -69,17 +70,17 @@ def from_predicatish(p: Predicatish) -> _Predicate: return p -def by_annotation(predicatish: Predicatish, **kwargs) -> List[Highlight]: +def by_annotation(predicatish: Predicatish, **kwargs) -> list[Highlight]: pred = from_predicatish(predicatish) - res: List[Highlight] = [] + res: list[Highlight] = [] for h in get_highlights(**kwargs): if pred(h.annotation): res.append(h) return res -def get_todos() -> List[Highlight]: +def get_todos() -> list[Highlight]: def with_todo(ann): if ann is None: ann = '' diff --git a/my/kython/kompress.py b/my/kython/kompress.py index 01e24e4..a5d9c29 100644 --- a/my/kython/kompress.py +++ b/my/kython/kompress.py @@ -1,5 +1,4 @@ -from my.core import __NOT_HPI_MODULE__ -from my.core import warnings +from my.core import __NOT_HPI_MODULE__, warnings warnings.high('my.kython.kompress is deprecated, please use "kompress" library directly. See https://github.com/karlicoss/kompress') diff --git a/my/lastfm.py b/my/lastfm.py index d20ebf3..cd9fa8b 100644 --- a/my/lastfm.py +++ b/my/lastfm.py @@ -3,9 +3,9 @@ Last.fm scrobbles ''' from dataclasses import dataclass -from my.core import Paths, Json, make_logger, get_files -from my.config import lastfm as user_config +from my.config import lastfm as user_config +from my.core import Json, Paths, get_files, make_logger logger = make_logger(__name__) @@ -19,13 +19,15 @@ class lastfm(user_config): from my.core.cfg import make_config + config = make_config(lastfm) -from datetime import datetime, timezone import json +from collections.abc import Iterable, Sequence +from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Sequence, Iterable +from typing import NamedTuple from my.core.cachew import mcachew @@ -76,7 +78,9 @@ def scrobbles() -> Iterable[Scrobble]: yield Scrobble(raw=raw) -from my.core import stat, Stats +from my.core import Stats, stat + + def stats() -> Stats: return stat(scrobbles) diff --git a/my/location/all.py b/my/location/all.py index fd88721..c6e8cab 100644 --- a/my/location/all.py +++ b/my/location/all.py @@ -2,14 +2,13 @@ Merges location data from multiple sources """ -from typing import Iterator +from collections.abc import Iterator -from my.core import Stats, LazyLogger +from my.core import LazyLogger, Stats from my.core.source import import_source from .common import Location - logger = LazyLogger(__name__, level="warning") diff --git a/my/location/common.py b/my/location/common.py index f406370..4c47ef0 100644 --- a/my/location/common.py +++ b/my/location/common.py @@ -1,12 +1,13 @@ -from datetime import date, datetime -from typing import Union, Tuple, Optional, Iterable, TextIO, Iterator, Protocol -from dataclasses import dataclass +from my.core import __NOT_HPI_MODULE__ # isort: skip -from my.core import __NOT_HPI_MODULE__ +from collections.abc import Iterable, Iterator +from dataclasses import dataclass +from datetime import date, datetime +from typing import Optional, Protocol, TextIO, Union DateIsh = Union[datetime, date, str] -LatLon = Tuple[float, float] +LatLon = tuple[float, float] class LocationProtocol(Protocol): diff --git a/my/location/fallback/all.py b/my/location/fallback/all.py index a5daa05..d340148 100644 --- a/my/location/fallback/all.py +++ b/my/location/fallback/all.py @@ -1,14 +1,16 @@ # TODO: add config here which passes kwargs to estimate_from (under_accuracy) # overwritable by passing the kwarg name here to the top-level estimate_location -from typing import Iterator, Optional +from __future__ import annotations + +from collections.abc import Iterator from my.core.source import import_source from my.location.fallback.common import ( - estimate_from, - FallbackLocation, DateExact, + FallbackLocation, LocationEstimator, + estimate_from, ) @@ -24,7 +26,7 @@ def fallback_estimators() -> Iterator[LocationEstimator]: yield _home_estimate -def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: Optional[int] = None) -> FallbackLocation: +def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: int | None = None) -> FallbackLocation: loc = estimate_from(dt, estimators=list(fallback_estimators()), first_match=first_match, under_accuracy=under_accuracy) # should never happen if the user has home configured if loc is None: diff --git a/my/location/fallback/common.py b/my/location/fallback/common.py index 13bc603..622b2f5 100644 --- a/my/location/fallback/common.py +++ b/my/location/fallback/common.py @@ -1,9 +1,12 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import Optional, Callable, Sequence, Iterator, List, Union -from datetime import datetime, timedelta, timezone -from ..common import LocationProtocol, Location +from collections.abc import Iterator, Sequence +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Callable, Union + +from ..common import Location, LocationProtocol + DateExact = Union[datetime, float, int] # float/int as epoch timestamps Second = float @@ -13,10 +16,10 @@ class FallbackLocation(LocationProtocol): lat: float lon: float dt: datetime - duration: Optional[Second] = None - accuracy: Optional[float] = None - elevation: Optional[float] = None - datasource: Optional[str] = None # which module provided this, useful for debugging + duration: Second | None = None + accuracy: float | None = None + elevation: float | None = None + datasource: str | None = None # which module provided this, useful for debugging def to_location(self, *, end: bool = False) -> Location: ''' @@ -43,9 +46,9 @@ class FallbackLocation(LocationProtocol): lon: float, dt: datetime, end_dt: datetime, - accuracy: Optional[float] = None, - elevation: Optional[float] = None, - datasource: Optional[str] = None, + accuracy: float | None = None, + elevation: float | None = None, + datasource: str | None = None, ) -> FallbackLocation: ''' Create FallbackLocation from a start date and an end date @@ -93,13 +96,13 @@ def estimate_from( estimators: LocationEstimators, *, first_match: bool = False, - under_accuracy: Optional[int] = None, -) -> Optional[FallbackLocation]: + under_accuracy: int | None = None, +) -> FallbackLocation | None: ''' first_match: if True, return the first location found under_accuracy: if set, only return locations with accuracy under this value ''' - found: List[FallbackLocation] = [] + found: list[FallbackLocation] = [] for loc in _iter_estimate_from(dt, estimators): if under_accuracy is not None and loc.accuracy is not None and loc.accuracy > under_accuracy: continue diff --git a/my/location/fallback/via_home.py b/my/location/fallback/via_home.py index e44c59d..f88fee0 100644 --- a/my/location/fallback/via_home.py +++ b/my/location/fallback/via_home.py @@ -2,25 +2,22 @@ Simple location provider, serving as a fallback when more detailed data isn't available ''' +from __future__ import annotations + +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, time, timezone -from functools import lru_cache -from typing import Sequence, Tuple, Union, cast, List, Iterator +from functools import cache +from typing import cast from my.config import location as user_config +from my.location.common import DateIsh, LatLon +from my.location.fallback.common import DateExact, FallbackLocation -from my.location.common import LatLon, DateIsh -from my.location.fallback.common import FallbackLocation, DateExact @dataclass class Config(user_config): - home: Union[ - LatLon, # either single, 'current' location - Sequence[Tuple[ # or, a sequence of location history - DateIsh, # date when you moved to - LatLon, # the location - ]] - ] + home: LatLon | Sequence[tuple[DateIsh, LatLon]] # default ~30km accuracy # this is called 'home_accuracy' since it lives on the base location.config object, @@ -29,13 +26,13 @@ class Config(user_config): # TODO could make current Optional and somehow determine from system settings? @property - def _history(self) -> Sequence[Tuple[datetime, LatLon]]: + def _history(self) -> Sequence[tuple[datetime, LatLon]]: home1 = self.home # todo ugh, can't test for isnstance LatLon, it's a tuple itself - home2: Sequence[Tuple[DateIsh, LatLon]] + home2: Sequence[tuple[DateIsh, LatLon]] if isinstance(home1[0], tuple): # already a sequence - home2 = cast(Sequence[Tuple[DateIsh, LatLon]], home1) + home2 = cast(Sequence[tuple[DateIsh, LatLon]], home1) else: # must be a pair of coordinates. also doesn't really matter which date to pick? loc = cast(LatLon, home1) @@ -60,10 +57,11 @@ class Config(user_config): from ...core.cfg import make_config + config = make_config(Config) -@lru_cache(maxsize=None) +@cache def get_location(dt: datetime) -> LatLon: ''' Interpolates the location at dt @@ -74,8 +72,8 @@ def get_location(dt: datetime) -> LatLon: # TODO: in python3.8, use functools.cached_property instead? -@lru_cache(maxsize=None) -def homes_cached() -> List[Tuple[datetime, LatLon]]: +@cache +def homes_cached() -> list[tuple[datetime, LatLon]]: return list(config._history) diff --git a/my/location/fallback/via_ip.py b/my/location/fallback/via_ip.py index 79a452c..732af67 100644 --- a/my/location/fallback/via_ip.py +++ b/my/location/fallback/via_ip.py @@ -7,8 +7,8 @@ REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] from dataclasses import dataclass from datetime import timedelta -from my.core import Stats, make_config from my.config import location +from my.core import Stats, make_config from my.core.warnings import medium @@ -24,13 +24,13 @@ class ip_config(location.via_ip): config = make_config(ip_config) +from collections.abc import Iterator from functools import lru_cache -from typing import Iterator, List from my.core import make_logger from my.core.compat import bisect_left from my.location.common import Location -from my.location.fallback.common import FallbackLocation, DateExact, _datetime_timestamp +from my.location.fallback.common import DateExact, FallbackLocation, _datetime_timestamp logger = make_logger(__name__, level="warning") @@ -60,7 +60,7 @@ def locations() -> Iterator[Location]: @lru_cache(1) -def _sorted_fallback_locations() -> List[FallbackLocation]: +def _sorted_fallback_locations() -> list[FallbackLocation]: fl = list(filter(lambda l: l.duration is not None, fallback_locations())) logger.debug(f"Fallback locations: {len(fl)}, sorting...:") fl.sort(key=lambda l: l.dt.timestamp()) diff --git a/my/location/google.py b/my/location/google.py index b966ec6..750c847 100644 --- a/my/location/google.py +++ b/my/location/google.py @@ -3,28 +3,27 @@ Location data from Google Takeout DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead """ +from __future__ import annotations REQUIRES = [ 'geopy', # checking that coordinates are valid 'ijson', ] +import re +from collections.abc import Iterable, Sequence from datetime import datetime, timezone from itertools import islice from pathlib import Path -from subprocess import Popen, PIPE -from typing import Iterable, NamedTuple, Optional, Sequence, IO, Tuple -import re +from subprocess import PIPE, Popen +from typing import IO, NamedTuple, Optional # pip3 install geopy -import geopy # type: ignore +import geopy # type: ignore -from my.core import stat, Stats, make_logger +from my.core import Stats, make_logger, stat, warnings from my.core.cachew import cache_dir, mcachew -from my.core import warnings - - warnings.high("Please set up my.google.takeout.parser module for better takeout support") @@ -43,7 +42,7 @@ class Location(NamedTuple): alt: Optional[float] -TsLatLon = Tuple[int, int, int] +TsLatLon = tuple[int, int, int] def _iter_via_ijson(fo) -> Iterable[TsLatLon]: @@ -51,10 +50,10 @@ def _iter_via_ijson(fo) -> Iterable[TsLatLon]: # todo extract to common? try: # pip3 install ijson cffi - import ijson.backends.yajl2_cffi as ijson # type: ignore + import ijson.backends.yajl2_cffi as ijson # type: ignore except: warnings.medium("Falling back to default ijson because 'cffi' backend isn't found. It's up to 2x faster, you might want to check it out") - import ijson # type: ignore + import ijson # type: ignore for d in ijson.items(fo, 'locations.item'): yield ( diff --git a/my/location/google_takeout.py b/my/location/google_takeout.py index eb757ce..cb5bef3 100644 --- a/my/location/google_takeout.py +++ b/my/location/google_takeout.py @@ -4,13 +4,14 @@ Extracts locations using google_takeout_parser -- no shared code with the deprec REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] -from typing import Iterator +from collections.abc import Iterator -from my.google.takeout.parser import events, _cachew_depends_on from google_takeout_parser.models import Location as GoogleLocation -from my.core import stat, Stats, LazyLogger +from my.core import LazyLogger, Stats, stat from my.core.cachew import mcachew +from my.google.takeout.parser import _cachew_depends_on, events + from .common import Location logger = LazyLogger(__name__) diff --git a/my/location/google_takeout_semantic.py b/my/location/google_takeout_semantic.py index 5f2c055..7bddfa8 100644 --- a/my/location/google_takeout_semantic.py +++ b/my/location/google_takeout_semantic.py @@ -7,21 +7,24 @@ Extracts semantic location history using google_takeout_parser REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator, List -from my.google.takeout.parser import events, _cachew_depends_on as _parser_cachew_depends_on from google_takeout_parser.models import PlaceVisit as SemanticLocation -from my.core import make_config, stat, LazyLogger, Stats +from my.core import LazyLogger, Stats, make_config, stat from my.core.cachew import mcachew from my.core.error import Res +from my.google.takeout.parser import _cachew_depends_on as _parser_cachew_depends_on +from my.google.takeout.parser import events + from .common import Location logger = LazyLogger(__name__) from my.config import location as user_config + @dataclass class semantic_locations_config(user_config.google_takeout_semantic): # a value between 0 and 100, 100 being the most confident @@ -36,7 +39,7 @@ config = make_config(semantic_locations_config) # add config to cachew dependency so it recomputes on config changes -def _cachew_depends_on() -> List[str]: +def _cachew_depends_on() -> list[str]: dep = _parser_cachew_depends_on() dep.insert(0, f"require_confidence={config.require_confidence} accuracy={config.accuracy}") return dep diff --git a/my/location/gpslogger.py b/my/location/gpslogger.py index 6d158a0..bbbf70e 100644 --- a/my/location/gpslogger.py +++ b/my/location/gpslogger.py @@ -20,20 +20,20 @@ class config(location.gpslogger): accuracy: float = 50.0 -from itertools import chain +from collections.abc import Iterator, Sequence from datetime import datetime, timezone +from itertools import chain from pathlib import Path -from typing import Iterator, Sequence, List import gpxpy from gpxpy.gpx import GPXXMLSyntaxException from more_itertools import unique_everseen -from my.core import Stats, LazyLogger +from my.core import LazyLogger, Stats from my.core.cachew import mcachew from my.core.common import get_files -from .common import Location +from .common import Location logger = LazyLogger(__name__, level="warning") @@ -49,7 +49,7 @@ def inputs() -> Sequence[Path]: return sorted(get_files(config.export_path, glob="*.gpx", sort=False), key=_input_sort_key) -def _cachew_depends_on() -> List[float]: +def _cachew_depends_on() -> list[float]: return [p.stat().st_mtime for p in inputs()] diff --git a/my/location/home.py b/my/location/home.py index f6e6978..c82dda7 100644 --- a/my/location/home.py +++ b/my/location/home.py @@ -1,7 +1,7 @@ -from .fallback.via_home import * - from my.core.warnings import high +from .fallback.via_home import * + high( "my.location.home is deprecated, use my.location.fallback.via_home instead, or estimate locations using the higher-level my.location.fallback.all.estimate_location" ) diff --git a/my/location/via_ip.py b/my/location/via_ip.py index df48f8b..d465ad0 100644 --- a/my/location/via_ip.py +++ b/my/location/via_ip.py @@ -1,7 +1,7 @@ REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] -from .fallback.via_ip import * - from my.core.warnings import high +from .fallback.via_ip import * + high("my.location.via_ip is deprecated, use my.location.fallback.via_ip instead") diff --git a/my/materialistic.py b/my/materialistic.py index 8a6a997..45af3f9 100644 --- a/my/materialistic.py +++ b/my/materialistic.py @@ -1,4 +1,5 @@ from .core.warnings import high + high("DEPRECATED! Please use my.hackernews.materialistic instead.") from .hackernews.materialistic import * diff --git a/my/media/imdb.py b/my/media/imdb.py index df31032..131f6a7 100644 --- a/my/media/imdb.py +++ b/my/media/imdb.py @@ -1,10 +1,12 @@ import csv +from collections.abc import Iterator from datetime import datetime -from typing import Iterator, List, NamedTuple +from typing import NamedTuple -from ..core import get_files +from my.core import get_files + +from my.config import imdb as config # isort: skip -from my.config import imdb as config def _get_last(): return max(get_files(config.export_path)) @@ -31,7 +33,7 @@ def iter_movies() -> Iterator[Movie]: yield Movie(created=created, title=title, rating=rating) -def get_movies() -> List[Movie]: +def get_movies() -> list[Movie]: return sorted(iter_movies(), key=lambda m: m.created) diff --git a/my/media/youtube.py b/my/media/youtube.py index 3ddbc14..9a38c43 100644 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -1,4 +1,4 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip from typing import TYPE_CHECKING diff --git a/my/monzo/monzoexport.py b/my/monzo/monzoexport.py index 3aa0cf5..f5e1cd1 100644 --- a/my/monzo/monzoexport.py +++ b/my/monzo/monzoexport.py @@ -5,16 +5,17 @@ REQUIRES = [ 'git+https://github.com/karlicoss/monzoexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterator from my.core import ( Paths, get_files, make_logger, ) -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/orgmode.py b/my/orgmode.py index cf14e43..10f53c0 100644 --- a/my/orgmode.py +++ b/my/orgmode.py @@ -1,15 +1,17 @@ ''' Programmatic access and queries to org-mode files on the filesystem ''' +from __future__ import annotations REQUIRES = [ 'orgparse', ] import re +from collections.abc import Iterable, Sequence from datetime import datetime from pathlib import Path -from typing import Iterable, List, NamedTuple, Optional, Sequence, Tuple +from typing import NamedTuple, Optional import orgparse @@ -34,7 +36,7 @@ def make_config() -> config: class OrgNote(NamedTuple): created: Optional[datetime] heading: str - tags: List[str] + tags: list[str] def inputs() -> Sequence[Path]: @@ -45,7 +47,7 @@ def inputs() -> Sequence[Path]: _rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE) -def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]: +def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]: heading = n.heading # meh.. support in orgparse? pp = {} if n.is_root() else n.properties @@ -68,7 +70,7 @@ def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]: def to_note(x: orgparse.OrgNode) -> OrgNote: # ugh. hack to merely make it cacheable heading = x.heading - created: Optional[datetime] + created: datetime | None try: c, heading = _created(x) if isinstance(c, datetime): diff --git a/my/pdfs.py b/my/pdfs.py index de9324d..eefd573 100644 --- a/my/pdfs.py +++ b/my/pdfs.py @@ -1,6 +1,7 @@ ''' PDF documents and annotations on your filesystem ''' +from __future__ import annotations as _annotations REQUIRES = [ 'git+https://github.com/0xabu/pdfannots', @@ -8,9 +9,10 @@ REQUIRES = [ ] import time +from collections.abc import Iterator, Sequence from datetime import datetime from pathlib import Path -from typing import Iterator, List, NamedTuple, Optional, Protocol, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING, NamedTuple, Optional, Protocol import pdfannots from more_itertools import bucket @@ -72,7 +74,7 @@ class Annotation(NamedTuple): created: Optional[datetime] # note: can be tz unaware in some bad pdfs... @property - def date(self) -> Optional[datetime]: + def date(self) -> datetime | None: # legacy name return self.created @@ -93,7 +95,7 @@ def _as_annotation(*, raw: pdfannots.Annotation, path: str) -> Annotation: ) -def get_annots(p: Path) -> List[Annotation]: +def get_annots(p: Path) -> list[Annotation]: b = time.time() with p.open('rb') as fo: doc = pdfannots.process_file(fo, emit_progress_to=None) @@ -150,17 +152,17 @@ class Pdf(NamedTuple): annotations: Sequence[Annotation] @property - def created(self) -> Optional[datetime]: + def created(self) -> datetime | None: annots = self.annotations return None if len(annots) == 0 else annots[-1].created @property - def date(self) -> Optional[datetime]: + def date(self) -> datetime | None: # legacy return self.created -def annotated_pdfs(*, filelist: Optional[Sequence[PathIsh]] = None) -> Iterator[Res[Pdf]]: +def annotated_pdfs(*, filelist: Sequence[PathIsh] | None = None) -> Iterator[Res[Pdf]]: if filelist is not None: # hacky... keeping it backwards compatible # https://github.com/karlicoss/HPI/pull/74 diff --git a/my/photos/main.py b/my/photos/main.py index bf912e4..f98cb15 100644 --- a/my/photos/main.py +++ b/my/photos/main.py @@ -1,27 +1,30 @@ """ Photos and videos on your filesystem, their GPS and timestamps """ + +from __future__ import annotations + REQUIRES = [ 'geopy', 'magic', ] # NOTE: also uses fdfind to search photos +import json +from collections.abc import Iterable, Iterator from concurrent.futures import ProcessPoolExecutor as Pool from datetime import datetime -import json from pathlib import Path -from typing import Optional, NamedTuple, Iterator, Iterable, List +from typing import NamedTuple, Optional from geopy.geocoders import Nominatim # type: ignore from my.core import LazyLogger -from my.core.error import Res, sort_res_by from my.core.cachew import cache_dir, mcachew +from my.core.error import Res, sort_res_by from my.core.mime import fastermime -from my.config import photos as config # type: ignore[attr-defined] - +from my.config import photos as config # type: ignore[attr-defined] # isort: skip logger = LazyLogger(__name__) @@ -55,15 +58,15 @@ class Photo(NamedTuple): return f'{config.base_url}{self._basename}' -from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref +from .utils import Exif, ExifTags, convert_ref, dt_from_path, get_exif_from_file Result = Res[Photo] -def _make_photo_aux(*args, **kwargs) -> List[Result]: +def _make_photo_aux(*args, **kwargs) -> list[Result]: # for the process pool.. return list(_make_photo(*args, **kwargs)) -def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Iterator[Result]: +def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterator[Result]: exif: Exif if any(x in mtype for x in ['image/png', 'image/x-ms-bmp', 'video']): # TODO don't remember why.. @@ -77,7 +80,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite yield e exif = {} - def _get_geo() -> Optional[LatLon]: + def _get_geo() -> LatLon | None: meta = exif.get(ExifTags.GPSINFO, {}) if ExifTags.LAT in meta and ExifTags.LON in meta: return LatLon( @@ -87,7 +90,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite return parent_geo # TODO aware on unaware? - def _get_dt() -> Optional[datetime]: + def _get_dt() -> datetime | None: edt = exif.get(ExifTags.DATETIME, None) if edt is not None: dtimes = edt.replace(' 24', ' 00') # jeez maybe log it? @@ -123,7 +126,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite def _candidates() -> Iterable[Res[str]]: # TODO that could be a bit slow if there are to many extra files? - from subprocess import Popen, PIPE + from subprocess import PIPE, Popen # TODO could extract this to common? # TODO would be nice to reuse get_files (or even let it use find) # that way would be easier to exclude @@ -162,7 +165,7 @@ def _photos(candidates: Iterable[Res[str]]) -> Iterator[Result]: from functools import lru_cache @lru_cache(None) - def get_geo(d: Path) -> Optional[LatLon]: + def get_geo(d: Path) -> LatLon | None: geof = d / 'geo.json' if not geof.exists(): if d == d.parent: @@ -214,5 +217,7 @@ def print_all() -> None: # todo cachew -- invalidate if function code changed? from ..core import Stats, stat + + def stats() -> Stats: return stat(photos) diff --git a/my/photos/utils.py b/my/photos/utils.py index c614c4a..e88def2 100644 --- a/my/photos/utils.py +++ b/my/photos/utils.py @@ -1,11 +1,13 @@ +from __future__ import annotations + +from ..core import __NOT_HPI_MODULE__ # isort: skip + from pathlib import Path -from typing import Dict import PIL.Image -from PIL.ExifTags import TAGS, GPSTAGS +from PIL.ExifTags import GPSTAGS, TAGS - -Exif = Dict +Exif = dict # TODO PIL.ExifTags.TAGS @@ -62,18 +64,15 @@ def convert_ref(cstr, ref: str) -> float: import re from datetime import datetime -from typing import Optional # TODO surely there is a library that does it?? # TODO this belongs to a private overlay or something # basically have a function that patches up dates after the files were yielded.. _DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D') -def dt_from_path(p: Path) -> Optional[datetime]: +def dt_from_path(p: Path) -> datetime | None: name = p.stem mm = _DT_REGEX.search(name) if mm is None: return None dates = mm.group(1) + mm.group(2) return datetime.strptime(dates, "%Y%m%d%H%M%S") - -from ..core import __NOT_HPI_MODULE__ diff --git a/my/pinboard.py b/my/pinboard.py index ef4ca36..e98dc78 100644 --- a/my/pinboard.py +++ b/my/pinboard.py @@ -5,15 +5,16 @@ REQUIRES = [ 'git+https://github.com/karlicoss/pinbexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence - -from my.core import get_files, Paths, Res -import my.config import pinbexport.dal as pinbexport +from my.core import Paths, Res, get_files + +import my.config # isort: skip + @dataclass class config(my.config.pinboard): # TODO rename to pinboard.pinbexport? diff --git a/my/pocket.py b/my/pocket.py index b638fba..ff9a788 100644 --- a/my/pocket.py +++ b/my/pocket.py @@ -7,10 +7,10 @@ REQUIRES = [ from dataclasses import dataclass from typing import TYPE_CHECKING -from .core import Paths - from my.config import pocket as user_config +from .core import Paths + @dataclass class pocket(user_config): @@ -23,6 +23,7 @@ class pocket(user_config): from .core.cfg import make_config + config = make_config(pocket) @@ -37,7 +38,7 @@ except ModuleNotFoundError as e: Article = dal.Article -from typing import Sequence, Iterable +from collections.abc import Iterable, Sequence # todo not sure if should be defensive against empty? @@ -51,9 +52,12 @@ def articles() -> Iterable[Article]: yield from _dal().articles() -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: from itertools import chain + from more_itertools import ilen return { **stat(articles), diff --git a/my/polar.py b/my/polar.py index e52bb14..2172014 100644 --- a/my/polar.py +++ b/my/polar.py @@ -1,11 +1,12 @@ """ [[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights """ +from __future__ import annotations + from pathlib import Path -from typing import cast, TYPE_CHECKING +from typing import TYPE_CHECKING, cast - -import my.config +import my.config # isort: skip # todo use something similar to tz.via_location for config fallback if not TYPE_CHECKING: @@ -20,8 +21,11 @@ if user_config is None: pass -from .core import PathIsh from dataclasses import dataclass + +from .core import PathIsh + + @dataclass class polar(user_config): ''' @@ -32,20 +36,21 @@ class polar(user_config): from .core import make_config + config = make_config(polar) # todo not sure where it keeps stuff on Windows? # https://github.com/burtonator/polar-bookshelf/issues/296 -from datetime import datetime -from typing import List, Dict, Iterable, NamedTuple, Sequence, Optional import json +from collections.abc import Iterable, Sequence +from datetime import datetime +from typing import NamedTuple -from .core import LazyLogger, Json, Res +from .core import Json, LazyLogger, Res from .core.compat import fromisoformat from .core.error import echain, sort_res_by -from .core.konsume import wrap, Zoomable, Wdict - +from .core.konsume import Wdict, Zoomable, wrap logger = LazyLogger(__name__) @@ -65,7 +70,7 @@ class Highlight(NamedTuple): comments: Sequence[Comment] tags: Sequence[str] page: int # 1-indexed - color: Optional[str] = None + color: str | None = None Uid = str @@ -73,7 +78,7 @@ class Book(NamedTuple): created: datetime uid: Uid path: Path - title: Optional[str] + title: str | None # TODO hmmm. I think this needs to be defensive as well... # think about it later. items: Sequence[Highlight] @@ -129,7 +134,7 @@ class Loader: pi['dimensions'].consume_all() # TODO how to make it nicer? - cmap: Dict[Hid, List[Comment]] = {} + cmap: dict[Hid, list[Comment]] = {} vals = list(comments) for v in vals: cid = v['id'].zoom() @@ -163,7 +168,7 @@ class Loader: h['rects'].ignore() # TODO make it more generic.. - htags: List[str] = [] + htags: list[str] = [] if 'tags' in h: ht = h['tags'].zoom() for _k, v in list(ht.items()): @@ -242,7 +247,7 @@ def iter_entries() -> Iterable[Result]: yield err -def get_entries() -> List[Result]: +def get_entries() -> list[Result]: # sorting by first annotation is reasonable I guess??? # todo perhaps worth making it a pattern? X() returns iterable, get_X returns reasonably sorted list? return list(sort_res_by(iter_entries(), key=lambda e: e.created)) diff --git a/my/reddit/__init__.py b/my/reddit/__init__.py index e81aaf9..f344eeb 100644 --- a/my/reddit/__init__.py +++ b/my/reddit/__init__.py @@ -20,6 +20,7 @@ REQUIRES = [ from my.core.hpi_compat import handle_legacy_import + is_legacy_import = handle_legacy_import( parent_module_name=__name__, legacy_submodule_name='rexport', diff --git a/my/reddit/all.py b/my/reddit/all.py index daedba1..27e22df 100644 --- a/my/reddit/all.py +++ b/my/reddit/all.py @@ -1,8 +1,9 @@ -from typing import Iterator -from my.core import stat, Stats +from collections.abc import Iterator + +from my.core import Stats, stat from my.core.source import import_source -from .common import Save, Upvote, Comment, Submission, _merge_comments +from .common import Comment, Save, Submission, Upvote, _merge_comments # Man... ideally an all.py file isn't this verbose, but # reddit just feels like that much of a complicated source and diff --git a/my/reddit/common.py b/my/reddit/common.py index c01258b..40f9f6e 100644 --- a/my/reddit/common.py +++ b/my/reddit/common.py @@ -2,12 +2,14 @@ This defines Protocol classes, which make sure that each different type of shared models have a standardized interface """ -from my.core import __NOT_HPI_MODULE__ -from typing import Set, Iterator, Protocol +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterator from itertools import chain +from typing import Protocol -from my.core import datetime_aware, Json +from my.core import Json, datetime_aware # common fields across all the Protocol classes, so generic code can be written @@ -49,7 +51,7 @@ class Submission(RedditBase, Protocol): def _merge_comments(*sources: Iterator[Comment]) -> Iterator[Comment]: #from .rexport import logger #ignored = 0 - emitted: Set[str] = set() + emitted: set[str] = set() for e in chain(*sources): uid = e.id if uid in emitted: diff --git a/my/reddit/pushshift.py b/my/reddit/pushshift.py index 9580005..1bfa048 100644 --- a/my/reddit/pushshift.py +++ b/my/reddit/pushshift.py @@ -10,13 +10,13 @@ REQUIRES = [ from dataclasses import dataclass +# note: keeping pushshift import before config import, so it's handled gracefully by import_source +from pushshift_comment_export.dal import PComment, read_file + +from my.config import reddit as uconfig from my.core import Paths, Stats, stat from my.core.cfg import make_config -# note: keeping pushshift import before config import, so it's handled gracefully by import_source -from pushshift_comment_export.dal import read_file, PComment - -from my.config import reddit as uconfig @dataclass class pushshift_config(uconfig.pushshift): @@ -29,10 +29,10 @@ class pushshift_config(uconfig.pushshift): config = make_config(pushshift_config) -from my.core import get_files -from typing import Sequence, Iterator +from collections.abc import Iterator, Sequence from pathlib import Path +from my.core import get_files def inputs() -> Sequence[Path]: diff --git a/my/reddit/rexport.py b/my/reddit/rexport.py index 5dcd7d9..cb6af01 100644 --- a/my/reddit/rexport.py +++ b/my/reddit/rexport.py @@ -7,23 +7,24 @@ REQUIRES = [ 'git+https://github.com/karlicoss/rexport', ] -from dataclasses import dataclass import inspect +from collections.abc import Iterator, Sequence +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Iterator, Sequence +from typing import TYPE_CHECKING from my.core import ( - get_files, - make_logger, - warnings, - stat, Paths, Stats, + get_files, + make_logger, + stat, + warnings, ) from my.core.cachew import mcachew -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config -from my.config import reddit as uconfig +from my.config import reddit as uconfig # isort: skip logger = make_logger(__name__) diff --git a/my/rescuetime.py b/my/rescuetime.py index 76a0d4c..0c9fd28 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -5,16 +5,15 @@ REQUIRES = [ 'git+https://github.com/karlicoss/rescuexport', ] -from pathlib import Path +from collections.abc import Iterable, Sequence from datetime import timedelta -from typing import Sequence, Iterable +from pathlib import Path -from my.core import get_files, make_logger, stat, Stats +from my.core import Stats, get_files, make_logger, stat from my.core.cachew import mcachew from my.core.error import Res, split_errors -from my.config import rescuetime as config - +from my.config import rescuetime as config # isort: skip logger = make_logger(__name__) @@ -24,6 +23,7 @@ def inputs() -> Sequence[Path]: import rescuexport.dal as dal + DAL = dal.DAL Entry = dal.Entry @@ -43,6 +43,8 @@ def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: # todo automatic dataframe interface? from .core.pandas import DataFrameT, as_dataframe + + def dataframe() -> DataFrameT: return as_dataframe(entries()) @@ -56,16 +58,19 @@ def stats() -> Stats: # basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this? +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator + + # todo take seed, or what? @contextmanager def fake_data(rows: int=1000) -> Iterator: # todo also disable cachew automatically for such things? - from my.core.cfg import tmp_config - from my.core.cachew import disabled_cachew - from tempfile import TemporaryDirectory import json + from tempfile import TemporaryDirectory + + from my.core.cachew import disabled_cachew + from my.core.cfg import tmp_config with disabled_cachew(), TemporaryDirectory() as td: tdir = Path(td) f = tdir / 'rescuetime.json' diff --git a/my/roamresearch.py b/my/roamresearch.py index 2fe06d4..7322774 100644 --- a/my/roamresearch.py +++ b/my/roamresearch.py @@ -1,16 +1,19 @@ """ [[https://roamresearch.com][Roam]] data """ -from datetime import datetime, timezone -from pathlib import Path -from itertools import chain -import re -from typing import NamedTuple, Iterator, List, Optional +from __future__ import annotations -from .core import get_files, LazyLogger, Json +import re +from collections.abc import Iterator +from datetime import datetime, timezone +from itertools import chain +from pathlib import Path +from typing import NamedTuple from my.config import roamresearch as config +from .core import Json, LazyLogger, get_files + logger = LazyLogger(__name__) @@ -57,15 +60,15 @@ class Node(NamedTuple): return datetime.fromtimestamp(rt / 1000, tz=timezone.utc) @property - def title(self) -> Optional[str]: + def title(self) -> str | None: return self.raw.get(Keys.TITLE) @property - def body(self) -> Optional[str]: + def body(self) -> str | None: return self.raw.get(Keys.STRING) @property - def children(self) -> List['Node']: + def children(self) -> list[Node]: # TODO cache? needs a key argument (because of Json) ch = self.raw.get(Keys.CHILDREN, []) return list(map(Node, ch)) @@ -95,7 +98,7 @@ class Node(NamedTuple): # - heading -- notes that haven't been created yet return len(self.body or '') == 0 and len(self.children) == 0 - def traverse(self) -> Iterator['Node']: + def traverse(self) -> Iterator[Node]: # not sure about __iter__, because might be a bit unintuitive that it's recursive.. yield self for c in self.children: @@ -120,7 +123,7 @@ class Node(NamedTuple): return f'Node(created={self.created}, title={self.title}, body={self.body})' @staticmethod - def make(raw: Json) -> Iterator['Node']: + def make(raw: Json) -> Iterator[Node]: is_empty = set(raw.keys()) == {Keys.EDITED, Keys.EDIT_EMAIL, Keys.TITLE} # not sure about that... but daily notes end up like that if is_empty: @@ -130,11 +133,11 @@ class Node(NamedTuple): class Roam: - def __init__(self, raw: List[Json]) -> None: + def __init__(self, raw: list[Json]) -> None: self.raw = raw @property - def notes(self) -> List[Node]: + def notes(self) -> list[Node]: return list(chain.from_iterable(map(Node.make, self.raw))) def traverse(self) -> Iterator[Node]: diff --git a/my/rss/all.py b/my/rss/all.py index b4dbdbd..e10e4d2 100644 --- a/my/rss/all.py +++ b/my/rss/all.py @@ -3,9 +3,9 @@ Unified RSS data, merged from different services I used historically ''' # NOTE: you can comment out the sources you're not using -from . import feedbin, feedly +from collections.abc import Iterable -from typing import Iterable +from . import feedbin, feedly from .common import Subscription, compute_subscriptions diff --git a/my/rss/common.py b/my/rss/common.py index bb75297..bf9506e 100644 --- a/my/rss/common.py +++ b/my/rss/common.py @@ -1,10 +1,12 @@ -from my.core import __NOT_HPI_MODULE__ +from __future__ import annotations +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterable, Sequence from dataclasses import dataclass, replace from itertools import chain -from typing import Optional, List, Dict, Iterable, Tuple, Sequence -from my.core import warn_if_empty, datetime_aware +from my.core import datetime_aware, warn_if_empty @dataclass @@ -13,16 +15,16 @@ class Subscription: url: str id: str # TODO not sure about it... # eh, not all of them got reasonable 'created' time - created_at: Optional[datetime_aware] + created_at: datetime_aware | None subscribed: bool = True # snapshot of subscriptions at time -SubscriptionState = Tuple[datetime_aware, Sequence[Subscription]] +SubscriptionState = tuple[datetime_aware, Sequence[Subscription]] @warn_if_empty -def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]: +def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> list[Subscription]: """ Keeps track of everything I ever subscribed to. In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed) @@ -30,7 +32,7 @@ def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscri states = list(chain.from_iterable(sources)) # TODO keep 'source'/'provider'/'service' attribute? - by_url: Dict[str, Subscription] = {} + by_url: dict[str, Subscription] = {} # ah. dates are used for sorting for _when, state in sorted(states): # TODO use 'when'? diff --git a/my/rss/feedbin.py b/my/rss/feedbin.py index dc13a17..5f4da0a 100644 --- a/my/rss/feedbin.py +++ b/my/rss/feedbin.py @@ -3,15 +3,15 @@ Feedbin RSS reader """ import json +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Iterator, Sequence -from my.core import get_files, stat, Stats +from my.core import Stats, get_files, stat from my.core.compat import fromisoformat + from .common import Subscription, SubscriptionState -from my.config import feedbin as config - +from my.config import feedbin as config # isort: skip def inputs() -> Sequence[Path]: return get_files(config.export_path) diff --git a/my/rss/feedly.py b/my/rss/feedly.py index 127ef61..9bf5429 100644 --- a/my/rss/feedly.py +++ b/my/rss/feedly.py @@ -4,9 +4,10 @@ Feedly RSS reader import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Protocol, Sequence +from typing import Protocol from my.core import Paths, get_files diff --git a/my/rtm.py b/my/rtm.py index b559ba4..217c969 100644 --- a/my/rtm.py +++ b/my/rtm.py @@ -6,21 +6,19 @@ REQUIRES = [ 'icalendar', ] +import re +from collections.abc import Iterator from datetime import datetime from functools import cached_property -import re -from typing import Dict, List, Iterator -from my.core import make_logger, get_files -from my.core.utils.itertools import make_dict - -from my.config import rtm as config - - -from more_itertools import bucket import icalendar # type: ignore from icalendar.cal import Todo # type: ignore +from more_itertools import bucket +from my.core import get_files, make_logger +from my.core.utils.itertools import make_dict + +from my.config import rtm as config # isort: skip logger = make_logger(__name__) @@ -32,14 +30,14 @@ class MyTodo: self.revision = revision @cached_property - def notes(self) -> List[str]: + def notes(self) -> list[str]: # TODO can there be multiple?? desc = self.todo['DESCRIPTION'] notes = re.findall(r'---\n\n(.*?)\n\nUpdated:', desc, flags=re.DOTALL) return notes @cached_property - def tags(self) -> List[str]: + def tags(self) -> list[str]: desc = self.todo['DESCRIPTION'] [tags_str] = re.findall(r'\nTags: (.*?)\n', desc, flags=re.DOTALL) if tags_str == 'none': @@ -92,11 +90,11 @@ class DAL: for t in self.cal.walk('VTODO'): yield MyTodo(t, self.revision) - def get_todos_by_uid(self) -> Dict[str, MyTodo]: + def get_todos_by_uid(self) -> dict[str, MyTodo]: todos = self.all_todos() return make_dict(todos, key=lambda t: t.uid) - def get_todos_by_title(self) -> Dict[str, List[MyTodo]]: + def get_todos_by_title(self) -> dict[str, list[MyTodo]]: todos = self.all_todos() bucketed = bucket(todos, lambda todo: todo.title) return {k: list(bucketed[k]) for k in bucketed} diff --git a/my/runnerup.py b/my/runnerup.py index a21075a..f5d7d1e 100644 --- a/my/runnerup.py +++ b/my/runnerup.py @@ -6,17 +6,15 @@ REQUIRES = [ 'python-tcxparser', ] +from collections.abc import Iterable from datetime import timedelta from pathlib import Path -from typing import Iterable - -from my.core import Res, get_files, Json -from my.core.compat import fromisoformat import tcxparser # type: ignore[import-untyped] from my.config import runnerup as config - +from my.core import Json, Res, get_files +from my.core.compat import fromisoformat # TODO later, use a proper namedtuple? Workout = Json @@ -70,6 +68,8 @@ def workouts() -> Iterable[Res[Workout]]: from .core.pandas import DataFrameT, check_dataframe, error_to_row + + @check_dataframe def dataframe() -> DataFrameT: def it(): @@ -85,6 +85,8 @@ def dataframe() -> DataFrameT: return df -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return stat(dataframe) diff --git a/my/simple.py b/my/simple.py index 7462291..b7f25cd 100644 --- a/my/simple.py +++ b/my/simple.py @@ -1,12 +1,11 @@ ''' Just a demo module for testing and documentation purposes ''' +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator - -from my.core import make_config from my.config import simple as user_config +from my.core import make_config @dataclass diff --git a/my/smscalls.py b/my/smscalls.py index 78bf7ee..ccaac72 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -2,6 +2,7 @@ Phone calls and SMS messages Exported using https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore&hl=en_US """ +from __future__ import annotations # See: https://www.synctech.com.au/sms-backup-restore/fields-in-xml-backup-files/ for schema @@ -9,8 +10,9 @@ REQUIRES = ['lxml'] from dataclasses import dataclass -from my.core import get_files, stat, Paths, Stats from my.config import smscalls as user_config +from my.core import Paths, Stats, get_files, stat + @dataclass class smscalls(user_config): @@ -18,11 +20,13 @@ class smscalls(user_config): export_path: Paths from my.core.cfg import make_config + config = make_config(smscalls) +from collections.abc import Iterator from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Iterator, Set, Tuple, Optional, Any, Dict, List +from typing import Any, NamedTuple import lxml.etree as etree @@ -33,7 +37,7 @@ class Call(NamedTuple): dt: datetime dt_readable: str duration_s: int - who: Optional[str] + who: str | None # type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List. call_type: int @@ -50,7 +54,7 @@ class Call(NamedTuple): # All the field values are read as-is from the underlying database and no conversion is done by the app in most cases. # # The '(Unknown)' is just what my android phone does, not sure if there are others -UNKNOWN: Set[str] = {'(Unknown)'} +UNKNOWN: set[str] = {'(Unknown)'} def _extract_calls(path: Path) -> Iterator[Res[Call]]: @@ -83,7 +87,7 @@ def calls() -> Iterator[Res[Call]]: files = get_files(config.export_path, glob='calls-*.xml') # TODO always replacing with the latter is good, we get better contact names?? - emitted: Set[datetime] = set() + emitted: set[datetime] = set() for p in files: for c in _extract_calls(p): if isinstance(c, Exception): @@ -98,7 +102,7 @@ def calls() -> Iterator[Res[Call]]: class Message(NamedTuple): dt: datetime dt_readable: str - who: Optional[str] + who: str | None message: str phone_number: str # type - 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox, 5 = Failed, 6 = Queued @@ -112,7 +116,7 @@ class Message(NamedTuple): def messages() -> Iterator[Res[Message]]: files = get_files(config.export_path, glob='sms-*.xml') - emitted: Set[Tuple[datetime, Optional[str], bool]] = set() + emitted: set[tuple[datetime, str | None, bool]] = set() for p in files: for c in _extract_messages(p): if isinstance(c, Exception): @@ -155,20 +159,20 @@ class MMSContentPart(NamedTuple): sequence_index: int content_type: str filename: str - text: Optional[str] - data: Optional[str] + text: str | None + data: str | None class MMS(NamedTuple): dt: datetime dt_readable: str - parts: List[MMSContentPart] + parts: list[MMSContentPart] # NOTE: these is often something like 'Name 1, Name 2', but might be different depending on your client - who: Optional[str] + who: str | None # NOTE: This can be a single phone number, or multiple, split by '~' or ','. Its better to think # of this as a 'key' or 'conversation ID', phone numbers are also present in 'addresses' phone_number: str - addresses: List[Tuple[str, int]] + addresses: list[tuple[str, int]] # 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox message_type: int @@ -194,7 +198,7 @@ class MMS(NamedTuple): def mms() -> Iterator[Res[MMS]]: files = get_files(config.export_path, glob='sms-*.xml') - emitted: Set[Tuple[datetime, Optional[str], str]] = set() + emitted: set[tuple[datetime, str | None, str]] = set() for p in files: for c in _extract_mms(p): if isinstance(c, Exception): @@ -207,7 +211,7 @@ def mms() -> Iterator[Res[MMS]]: yield c -def _resolve_null_str(value: Optional[str]) -> Optional[str]: +def _resolve_null_str(value: str | None) -> str | None: if value is None: return None # hmm.. theres some risk of the text actually being 'null', but theres @@ -235,7 +239,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: yield RuntimeError(f'Missing one or more required attributes [date, readable_date, msg_box, address] in {mxml_str}') continue - addresses: List[Tuple[str, int]] = [] + addresses: list[tuple[str, int]] = [] for addr_parent in mxml.findall('addrs'): for addr in addr_parent.findall('addr'): addr_data = addr.attrib @@ -250,7 +254,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: continue addresses.append((user_address, int(user_type))) - content: List[MMSContentPart] = [] + content: list[MMSContentPart] = [] for part_root in mxml.findall('parts'): @@ -267,8 +271,8 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: # # man, attrib is some internal cpython ._Attrib type which can't # be typed by any sort of mappingproxy. maybe a protocol could work..? - part_data: Dict[str, Any] = part.attrib # type: ignore - seq: Optional[str] = part_data.get('seq') + part_data: dict[str, Any] = part.attrib # type: ignore + seq: str | None = part_data.get('seq') if seq == '-1': continue @@ -276,13 +280,13 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: yield RuntimeError(f'seq must be a number, was seq={seq} {type(seq)} in {part_data}') continue - charset_type: Optional[str] = _resolve_null_str(part_data.get('ct')) - filename: Optional[str] = _resolve_null_str(part_data.get('name')) + charset_type: str | None = _resolve_null_str(part_data.get('ct')) + filename: str | None = _resolve_null_str(part_data.get('name')) # in some cases (images, cards), the filename is set in 'cl' instead if filename is None: filename = _resolve_null_str(part_data.get('cl')) - text: Optional[str] = _resolve_null_str(part_data.get('text')) - data: Optional[str] = _resolve_null_str(part_data.get('data')) + text: str | None = _resolve_null_str(part_data.get('text')) + data: str | None = _resolve_null_str(part_data.get('data')) if charset_type is None or filename is None or (text is None and data is None): yield RuntimeError(f'Missing one or more required attributes [ct, name, (text, data)] must be present in {part_data}') diff --git a/my/stackexchange/gdpr.py b/my/stackexchange/gdpr.py index 5292bef..78987be 100644 --- a/my/stackexchange/gdpr.py +++ b/my/stackexchange/gdpr.py @@ -6,8 +6,11 @@ Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][officia ### config from dataclasses import dataclass + from my.config import stackexchange as user_config -from my.core import PathIsh, make_config, get_files, Json +from my.core import Json, PathIsh, get_files, make_config + + @dataclass class stackexchange(user_config): gdpr_path: PathIsh # path to GDPR zip file @@ -17,9 +20,13 @@ config = make_config(stackexchange) # TODO just merge all of them and then filter?.. not sure -from my.core.compat import fromisoformat -from typing import NamedTuple, Iterable +from collections.abc import Iterable from datetime import datetime +from typing import NamedTuple + +from my.core.compat import fromisoformat + + class Vote(NamedTuple): j: Json # todo ip? @@ -62,7 +69,10 @@ class Vote(NamedTuple): # todo expose vote type? import json + from ..core.error import Res + + def votes() -> Iterable[Res[Vote]]: # TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed # todo should be defensive? not sure if present when user has no votes @@ -74,6 +84,8 @@ def votes() -> Iterable[Res[Vote]]: yield Vote(r) -from ..core import stat, Stats +from ..core import Stats, stat + + def stats() -> Stats: return stat(votes) diff --git a/my/stackexchange/stexport.py b/my/stackexchange/stexport.py index 812a155..111ed28 100644 --- a/my/stackexchange/stexport.py +++ b/my/stackexchange/stexport.py @@ -16,7 +16,8 @@ from my.core import ( make_config, stat, ) -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/taplog.py b/my/taplog.py index 51eeb72..5e64a72 100644 --- a/my/taplog.py +++ b/my/taplog.py @@ -1,24 +1,26 @@ ''' [[https://play.google.com/store/apps/details?id=com.waterbear.taglog][Taplog]] app data ''' -from datetime import datetime -from typing import NamedTuple, Dict, Optional, Iterable +from __future__ import annotations -from my.core import get_files, stat, Stats -from my.core.sqlite import sqlite_connection +from collections.abc import Iterable +from datetime import datetime +from typing import NamedTuple from my.config import taplog as user_config +from my.core import Stats, get_files, stat +from my.core.sqlite import sqlite_connection class Entry(NamedTuple): - row: Dict + row: dict @property def id(self) -> str: return str(self.row['_id']) @property - def number(self) -> Optional[float]: + def number(self) -> float | None: ns = self.row['number'] # TODO ?? if isinstance(ns, str): diff --git a/my/telegram/telegram_backup.py b/my/telegram/telegram_backup.py index ff4f904..eea7e50 100644 --- a/my/telegram/telegram_backup.py +++ b/my/telegram/telegram_backup.py @@ -1,17 +1,17 @@ """ Telegram data via [fabianonline/telegram_backup](https://github.com/fabianonline/telegram_backup) tool """ +from __future__ import annotations +import sqlite3 +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime, timezone -from struct import unpack_from, calcsize -import sqlite3 -from typing import Dict, Iterator, Optional - -from my.core import datetime_aware, PathIsh -from my.core.sqlite import sqlite_connection +from struct import calcsize, unpack_from from my.config import telegram as user_config +from my.core import PathIsh, datetime_aware +from my.core.sqlite import sqlite_connection @dataclass @@ -23,17 +23,17 @@ class config(user_config.telegram_backup): @dataclass class Chat: id: str - name: Optional[str] + name: str | None # not all users have short handle + groups don't have them either? # TODO hmm some groups have it -- it's just the tool doesn't dump them?? - handle: Optional[str] + handle: str | None # not sure if need type? @dataclass class User: id: str - name: Optional[str] + name: str | None @dataclass @@ -44,7 +44,7 @@ class Message: chat: Chat sender: User text: str - extra_media_info: Optional[str] = None + extra_media_info: str | None = None @property def permalink(self) -> str: @@ -61,7 +61,7 @@ class Message: -Chats = Dict[str, Chat] +Chats = dict[str, Chat] def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bool) -> Message: ts = r['time'] # desktop export uses UTC (checked by exporting in winter time vs summer time) @@ -70,7 +70,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo chat = chats[r['source_id']] sender = chats[r['sender_id']] - extra_media_info: Optional[str] = None + extra_media_info: str | None = None if with_extra_media_info and r['has_media'] == 1: # also it's quite hacky, so at least for now it's just an optional attribute behind the flag # defensive because it's a bit tricky to correctly parse without a proper api parser.. @@ -90,7 +90,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo ) -def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=False) -> Iterator[Message]: +def messages(*, extra_where: str | None=None, with_extra_media_info: bool=False) -> Iterator[Message]: messages_query = 'SELECT * FROM messages WHERE message_type NOT IN ("service_message", "empty_message")' if extra_where is not None: messages_query += ' AND ' + extra_where @@ -106,7 +106,7 @@ def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=Fal for r in db.execute('SELECT * FROM users ORDER BY id'): first = r["first_name"] last = r["last_name"] - name: Optional[str] + name: str | None if first is not None and last is not None: name = f'{first} {last}' else: @@ -121,7 +121,7 @@ def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=Fal yield _message_from_row(r, chats=chats, with_extra_media_info=with_extra_media_info) -def _extract_extra_media_info(data: bytes) -> Optional[str]: +def _extract_extra_media_info(data: bytes) -> str | None: # ugh... very hacky, but it does manage to extract from 90% of messages that have media pos = 0 diff --git a/my/tests/bluemaestro.py b/my/tests/bluemaestro.py index 2d7c81e..d139a8f 100644 --- a/my/tests/bluemaestro.py +++ b/my/tests/bluemaestro.py @@ -1,4 +1,4 @@ -from typing import Iterator +from collections.abc import Iterator import pytest from more_itertools import one diff --git a/my/tests/body/weight.py b/my/tests/body/weight.py index 069e940..f26ccf2 100644 --- a/my/tests/body/weight.py +++ b/my/tests/body/weight.py @@ -1,8 +1,10 @@ from pathlib import Path -import pytz -from my.core.cfg import tmp_config + import pytest +import pytz + from my.body.weight import from_orgmode +from my.core.cfg import tmp_config def test_body_weight() -> None: diff --git a/my/tests/commits.py b/my/tests/commits.py index c967027..48e349f 100644 --- a/my/tests/commits.py +++ b/my/tests/commits.py @@ -1,14 +1,11 @@ import os from pathlib import Path -from more_itertools import bucket import pytest - - -from my.core.cfg import tmp_config +from more_itertools import bucket from my.coding.commits import commits - +from my.core.cfg import tmp_config pytestmark = pytest.mark.skipif( os.name == 'nt', diff --git a/my/tests/location/fallback.py b/my/tests/location/fallback.py index 10a4e5b..c09b902 100644 --- a/my/tests/location/fallback.py +++ b/my/tests/location/fallback.py @@ -2,8 +2,8 @@ To test my.location.fallback_location.all """ +from collections.abc import Iterator from datetime import datetime, timedelta, timezone -from typing import Iterator import pytest from more_itertools import ilen diff --git a/my/tests/reddit.py b/my/tests/reddit.py index 4f1ec51..4ddccf8 100644 --- a/my/tests/reddit.py +++ b/my/tests/reddit.py @@ -1,16 +1,14 @@ import pytest from more_itertools import consume -from my.core.cfg import tmp_config -from my.core.utils.itertools import ensure_unique - -from .common import testdata - - # deliberately use mixed style imports on the top level and inside the methods to test tmp_config stuff # todo won't really be necessary once we migrate to lazy user config import my.reddit.all as my_reddit_all import my.reddit.rexport as my_reddit_rexport +from my.core.cfg import tmp_config +from my.core.utils.itertools import ensure_unique + +from .common import testdata def test_basic_1() -> None: diff --git a/my/time/tz/common.py b/my/time/tz/common.py index 13c8ac0..c0dd262 100644 --- a/my/time/tz/common.py +++ b/my/time/tz/common.py @@ -3,7 +3,6 @@ from typing import Callable, Literal, cast from my.core import datetime_aware - ''' Depending on the specific data provider and your level of paranoia you might expect different behaviour.. E.g.: - if your objects already have tz info, you might not need to call localize() at all diff --git a/my/time/tz/main.py b/my/time/tz/main.py index fafc5fe..bdd36b1 100644 --- a/my/time/tz/main.py +++ b/my/time/tz/main.py @@ -6,6 +6,7 @@ from datetime import datetime from my.core import datetime_aware + # todo hmm, kwargs isn't mypy friendly.. but specifying types would require duplicating default args. uhoh def localize(dt: datetime, **kwargs) -> datetime_aware: # todo document patterns for combining multiple data sources diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py index 4920333..58b5bf7 100644 --- a/my/time/tz/via_location.py +++ b/my/time/tz/via_location.py @@ -2,6 +2,8 @@ Timezone data provider, guesses timezone based on location data (e.g. GPS) ''' +from __future__ import annotations + REQUIRES = [ # for determining timezone by coordinate 'timezonefinder', @@ -10,6 +12,7 @@ REQUIRES = [ import heapq import os from collections import Counter +from collections.abc import Iterable, Iterator from dataclasses import dataclass from datetime import date, datetime from functools import lru_cache @@ -17,14 +20,7 @@ from itertools import groupby from typing import ( TYPE_CHECKING, Any, - Dict, - Iterable, - Iterator, - List, - Optional, Protocol, - Set, - Tuple, ) import pytz @@ -102,7 +98,7 @@ def _timezone_finder(*, fast: bool) -> Any: # for backwards compatibility -def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]: +def _locations() -> Iterator[tuple[LatLon, datetime_aware]]: try: import my.location.all @@ -125,7 +121,7 @@ def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]: # TODO: could use heapmerge or sort the underlying iterators somehow? # see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934 -def _sorted_locations() -> List[Tuple[LatLon, datetime_aware]]: +def _sorted_locations() -> list[tuple[LatLon, datetime_aware]]: return sorted(_locations(), key=lambda x: x[1]) @@ -140,7 +136,7 @@ class DayWithZone: zone: Zone -def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]: +def _find_tz_for_locs(finder: Any, locs: Iterable[tuple[LatLon, datetime]]) -> Iterator[DayWithZone]: for (lat, lon), dt in locs: # TODO right. its _very_ slow... zone = finder.timezone_at(lat=lat, lng=lon) @@ -172,7 +168,7 @@ def _iter_local_dates() -> Iterator[DayWithZone]: # TODO: warnings doesn't actually warn? # warnings = [] - locs: Iterable[Tuple[LatLon, datetime]] + locs: Iterable[tuple[LatLon, datetime]] locs = _sorted_locations() if cfg.sort_locations else _locations() yield from _find_tz_for_locs(finder, locs) @@ -187,7 +183,7 @@ def _iter_local_dates_fallback() -> Iterator[DayWithZone]: cfg = make_config() - def _fallback_locations() -> Iterator[Tuple[LatLon, datetime]]: + def _fallback_locations() -> Iterator[tuple[LatLon, datetime]]: for loc in sorted(flocs(), key=lambda x: x.dt): yield ((loc.lat, loc.lon), loc.dt) @@ -225,14 +221,14 @@ def _iter_tzs() -> Iterator[DayWithZone]: # we need to sort them first before we can do a groupby by_day = lambda p: p.day - local_dates: List[DayWithZone] = sorted(_iter_local_dates(), key=by_day) + local_dates: list[DayWithZone] = sorted(_iter_local_dates(), key=by_day) logger.debug(f"no. of items using exact locations: {len(local_dates)}") - local_dates_fallback: List[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day) + local_dates_fallback: list[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day) # find days that are in fallback but not in local_dates (i.e., missing days) - local_dates_set: Set[date] = {d.day for d in local_dates} - use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set] + local_dates_set: set[date] = {d.day for d in local_dates} + use_fallback_days: list[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set] logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}") # combine local_dates and missing days from fallback into a sorted list @@ -246,20 +242,20 @@ def _iter_tzs() -> Iterator[DayWithZone]: @lru_cache(1) -def _day2zone() -> Dict[date, pytz.BaseTzInfo]: +def _day2zone() -> dict[date, pytz.BaseTzInfo]: # NOTE: kinda unfortunate that this will have to process all days before returning result for just one # however otherwise cachew cache might never be initialized properly # so we'll always end up recomputing everyting during subsequent runs return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()} -def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]: +def _get_day_tz(d: date) -> pytz.BaseTzInfo | None: return _day2zone().get(d) # ok to cache, there are only a few home locations? @lru_cache(None) -def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]: +def _get_home_tz(loc: LatLon) -> pytz.BaseTzInfo | None: (lat, lng) = loc finder = _timezone_finder(fast=False) # ok to use slow here for better precision zone = finder.timezone_at(lat=lat, lng=lng) @@ -270,7 +266,7 @@ def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]: return pytz.timezone(zone) -def get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]: +def get_tz(dt: datetime) -> pytz.BaseTzInfo | None: ''' Given a datetime, returns the timezone for that date. ''' diff --git a/my/tinder/android.py b/my/tinder/android.py index d9b256b..a09794f 100644 --- a/my/tinder/android.py +++ b/my/tinder/android.py @@ -3,20 +3,22 @@ Tinder data from Android app database (in =/data/data/com.tinder/databases/tinde """ from __future__ import annotations -from collections import defaultdict, Counter +import sqlite3 +from collections import Counter, defaultdict +from collections.abc import Iterator, Mapping, Sequence from dataclasses import dataclass from datetime import datetime, timezone from itertools import chain from pathlib import Path -import sqlite3 -from typing import Sequence, Iterator, Union, Dict, List, Mapping +from typing import Union -from my.core import Paths, get_files, Res, stat, Stats, datetime_aware, make_logger +from my.core import Paths, Res, Stats, datetime_aware, get_files, make_logger, stat from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain from my.core.sqlite import sqlite_connection -import my.config + +import my.config # isort: skip logger = make_logger(__name__) @@ -164,8 +166,8 @@ def _parse_msg(row: sqlite3.Row) -> _Message: # todo maybe it's rich_entities method? def entities() -> Iterator[Res[Entity]]: - id2person: Dict[str, Person] = {} - id2match: Dict[str, Match] = {} + id2person: dict[str, Person] = {} + id2match: dict[str, Match] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x @@ -217,7 +219,7 @@ def messages() -> Iterator[Res[Message]]: # todo not sure, maybe it's not fundamental enough to keep here... def match2messages() -> Iterator[Res[Mapping[Match, Sequence[Message]]]]: - res: Dict[Match, List[Message]] = defaultdict(list) + res: dict[Match, list[Message]] = defaultdict(list) for x in entities(): if isinstance(x, Exception): yield x diff --git a/my/topcoder.py b/my/topcoder.py index 07f71be..56403e2 100644 --- a/my/topcoder.py +++ b/my/topcoder.py @@ -1,14 +1,14 @@ +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from functools import cached_property -import json from pathlib import Path -from typing import Iterator, Sequence -from my.core import get_files, Res, datetime_aware +from my.core import Res, datetime_aware, get_files from my.core.compat import fromisoformat from my.experimental.destructive_parsing import Manager -from my.config import topcoder as config # type: ignore[attr-defined] +from my.config import topcoder as config # type: ignore[attr-defined] # isort: skip def inputs() -> Sequence[Path]: diff --git a/my/twitter/all.py b/my/twitter/all.py index 4714021..c2c471e 100644 --- a/my/twitter/all.py +++ b/my/twitter/all.py @@ -1,11 +1,11 @@ """ Unified Twitter data (merged from the archive and periodic updates) """ -from typing import Iterator +from collections.abc import Iterator + from ..core import Res from ..core.source import import_source -from .common import merge_tweets, Tweet - +from .common import Tweet, merge_tweets # NOTE: you can comment out the sources you don't need src_twint = import_source(module_name='my.twitter.twint') diff --git a/my/twitter/android.py b/my/twitter/android.py index ada04ae..88c9389 100644 --- a/my/twitter/android.py +++ b/my/twitter/android.py @@ -4,21 +4,21 @@ Twitter data from official app for Android from __future__ import annotations +import re +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import re from struct import unpack_from -from typing import Iterator, Sequence, Set -from my.core import datetime_aware, get_files, LazyLogger, Paths, Res +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen from my.core.sqlite import sqlite_connect_immutable -import my.config - from .common import permalink +import my.config # isort: skip + logger = LazyLogger(__name__) @@ -155,7 +155,7 @@ _SELECT_OWN_TWEETS = '_SELECT_OWN_TWEETS' def get_own_user_id(conn) -> str: # unclear what's the reliable way to query it, so we use multiple different ones and arbitrate # NOTE: 'SELECT DISTINCT ev_owner_id FROM lists' doesn't work, might include lists from other people? - res: Set[str] = set() + res: set[str] = set() # need to cast as it's int by default for q in [ 'SELECT DISTINCT CAST(list_mapping_user_id AS TEXT) FROM list_mapping', @@ -239,7 +239,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]: NOT (statuses.in_r_user_id == -1 AND statuses.in_r_status_id == -1 AND statuses.conversation_id == 0) ''' - def _query_one(*, where: str, quoted: Set[int]) -> Iterator[Res[Tweet]]: + def _query_one(*, where: str, quoted: set[int]) -> Iterator[Res[Tweet]]: for ( tweet_id, user_username, @@ -263,7 +263,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]: text=content, ) - quoted: Set[int] = set() + quoted: set[int] = set() yield from _query_one(where=db_where, quoted=quoted) # get quoted tweets 'recursively' # TODO maybe do it for favs/bookmarks too? not sure diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 1573754..c9d2dbc 100644 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -7,6 +7,7 @@ from __future__ import annotations import html import json # hmm interesting enough, orjson didn't give much speedup here? from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime from functools import cached_property @@ -14,8 +15,6 @@ from itertools import chain from pathlib import Path from typing import ( TYPE_CHECKING, - Iterator, - Sequence, ) from more_itertools import unique_everseen diff --git a/my/twitter/common.py b/my/twitter/common.py index 258216f..8c346f6 100644 --- a/my/twitter/common.py +++ b/my/twitter/common.py @@ -1,17 +1,19 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip +from collections.abc import Iterator from itertools import chain -from typing import Iterator, Any +from typing import Any from more_itertools import unique_everseen - # TODO add proper Protocol for Tweet Tweet = Any TweetId = str -from my.core import warn_if_empty, Res +from my.core import Res, warn_if_empty + + @warn_if_empty def merge_tweets(*sources: Iterator[Res[Tweet]]) -> Iterator[Res[Tweet]]: def key(r: Res[Tweet]): diff --git a/my/twitter/talon.py b/my/twitter/talon.py index 1b79727..dbf2e2e 100644 --- a/my/twitter/talon.py +++ b/my/twitter/talon.py @@ -7,10 +7,11 @@ from __future__ import annotations import re import sqlite3 from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Sequence, Union +from typing import Union from my.core import Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen diff --git a/my/twitter/twint.py b/my/twitter/twint.py index ceb5406..5106923 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -1,17 +1,17 @@ """ Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export. """ +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Iterator, List +from typing import NamedTuple - -from my.core import Paths, Res, get_files, LazyLogger, Json, datetime_aware, stat, Stats +from my.core import Json, LazyLogger, Paths, Res, Stats, datetime_aware, get_files, stat from my.core.cfg import make_config from my.core.sqlite import sqlite_connection -from my.config import twint as user_config +from my.config import twint as user_config # isort: skip # TODO move to twitter.twint config structure @@ -76,7 +76,7 @@ class Tweet(NamedTuple): return text @property - def urls(self) -> List[str]: + def urls(self) -> list[str]: ustr = self.row['urls'] if len(ustr) == 0: return [] diff --git a/my/util/hpi_heartbeat.py b/my/util/hpi_heartbeat.py index 84790a4..6dcac7e 100644 --- a/my/util/hpi_heartbeat.py +++ b/my/util/hpi_heartbeat.py @@ -5,12 +5,13 @@ In particular the behaviour of import_original_module function The idea of testing is that overlays extend this module, and add their own items to items(), and the checker asserts all overlays have contributed. """ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip + +import sys +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -import sys -from typing import Iterator, List NOW = datetime.now() @@ -19,10 +20,10 @@ NOW = datetime.now() class Item: dt: datetime message: str - path: List[str] + path: list[str] -def get_pkg_path() -> List[str]: +def get_pkg_path() -> list[str]: pkg = sys.modules[__package__] return list(pkg.__path__) diff --git a/my/vk/favorites.py b/my/vk/favorites.py index 9caae6d..5f278ff 100644 --- a/my/vk/favorites.py +++ b/my/vk/favorites.py @@ -1,20 +1,21 @@ # todo: uses my private export script?, timezone +from __future__ import annotations + +import json +from collections.abc import Iterable, Iterator from dataclasses import dataclass from datetime import datetime, timezone -import json -from typing import Iterator, Iterable, Optional - -from my.core import Json, datetime_aware, stat, Stats -from my.core.error import Res from my.config import vk as config # type: ignore[attr-defined] +from my.core import Json, Stats, datetime_aware, stat +from my.core.error import Res @dataclass class Favorite: dt: datetime_aware title: str - url: Optional[str] + url: str | None text: str diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py index c73587f..4f593c8 100644 --- a/my/vk/vk_messages_backup.py +++ b/my/vk/vk_messages_backup.py @@ -2,18 +2,16 @@ VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]]) ''' # note: could reuse the original repo, but little point I guess since VK closed their API +import json +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -import json -from typing import Dict, Iterator import pytz -from my.core import stat, Stats, Json, Res, datetime_aware, get_files -from my.core.common import unique_everseen - from my.config import vk_messages_backup as config - +from my.core import Json, Res, Stats, datetime_aware, get_files, stat +from my.core.common import unique_everseen # I think vk_messages_backup used this tz? # not sure if vk actually used to return this tz in api? @@ -45,7 +43,7 @@ class Message: body: str -Users = Dict[Uid, User] +Users = dict[Uid, User] def users() -> Users: diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py index 3dfed3e..27ee743 100644 --- a/my/whatsapp/android.py +++ b/my/whatsapp/android.py @@ -3,18 +3,19 @@ Whatsapp data from Android app database (in =/data/data/com.whatsapp/databases/m """ from __future__ import annotations +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import sqlite3 -from typing import Union, Sequence, Iterator, Optional +from typing import Union -from my.core import get_files, Paths, datetime_aware, Res, make_logger, make_config +from my.core import Paths, Res, datetime_aware, get_files, make_config, make_logger from my.core.common import unique_everseen from my.core.error import echain, notnone from my.core.sqlite import sqlite_connection -import my.config +import my.config # isort: skip logger = make_logger(__name__) @@ -23,7 +24,7 @@ logger = make_logger(__name__) class Config(my.config.whatsapp.android): # paths[s]/glob to the exported sqlite databases export_path: Paths - my_user_id: Optional[str] = None + my_user_id: str | None = None config = make_config(Config) @@ -38,13 +39,13 @@ class Chat: id: str # todo not sure how to support renames? # could change Chat object itself, but this won't work well with incremental processing.. - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) class Sender: id: str - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) @@ -53,7 +54,7 @@ class Message: id: str dt: datetime_aware sender: Sender - text: Optional[str] + text: str | None Entity = Union[Chat, Sender, Message] @@ -125,9 +126,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: ts: int = notnone(r['timestamp']) dt = datetime.fromtimestamp(ts / 1000, tz=timezone.utc) - text: Optional[str] = r['text_data'] - media_file_path: Optional[str] = r['file_path'] - media_file_size: Optional[int] = r['file_size'] + text: str | None = r['text_data'] + media_file_path: str | None = r['file_path'] + media_file_size: int | None = r['file_size'] message_type = r['message_type'] diff --git a/my/youtube/takeout.py b/my/youtube/takeout.py index f29b2e3..703715f 100644 --- a/my/youtube/takeout.py +++ b/my/youtube/takeout.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Iterable, Iterator from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Iterable, Iterator +from typing import TYPE_CHECKING, Any from my.core import Res, Stats, datetime_aware, make_logger, stat, warnings from my.core.compat import deprecated diff --git a/my/zotero.py b/my/zotero.py index 4440aae..8eb34ba 100644 --- a/my/zotero.py +++ b/my/zotero.py @@ -1,14 +1,16 @@ +from __future__ import annotations as _annotations + +import json +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone -import json -from typing import Iterator, Optional, Dict, Any, Sequence from pathlib import Path -import sqlite3 +from typing import Any -from my.core import make_logger, Res, datetime_aware +from my.core import Res, datetime_aware, make_logger from my.core.sqlite import sqlite_copy_and_open - logger = make_logger(__name__) @@ -26,7 +28,7 @@ class Item: """Corresponds to 'Zotero item'""" file: Path title: str - url: Optional[Url] + url: Url | None tags: Sequence[str] @@ -39,8 +41,8 @@ class Annotation: page: int """0-indexed""" - text: Optional[str] - comment: Optional[str] + text: str | None + comment: str | None tags: Sequence[str] color_hex: str """Original hex-encoded color in zotero""" @@ -97,7 +99,7 @@ WHERE ID.fieldID = 13 AND IA.itemID = ? # TODO maybe exclude 'private' methods from detection? -def _query_raw() -> Iterator[Res[Dict[str, Any]]]: +def _query_raw() -> Iterator[Res[dict[str, Any]]]: [db] = inputs() with sqlite_copy_and_open(db) as conn: @@ -157,7 +159,7 @@ def _hex2human(color_hex: str) -> str: }.get(color_hex, color_hex) -def _parse_annotation(r: Dict) -> Annotation: +def _parse_annotation(r: dict) -> Annotation: text = r['text'] comment = r['comment'] # todo use json query for this? diff --git a/my/zulip/organization.py b/my/zulip/organization.py index 2e0df4b..d0cfcb7 100644 --- a/my/zulip/organization.py +++ b/my/zulip/organization.py @@ -6,11 +6,11 @@ from __future__ import annotations import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from itertools import count from pathlib import Path -from typing import Iterator, Sequence from my.core import ( Json, diff --git a/ruff.toml b/ruff.toml index 5fbd657..3d803e7 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,4 @@ -target-version = "py38" # NOTE: inferred from pyproject.toml if present +target-version = "py39" # NOTE: inferred from pyproject.toml if present lint.extend-select = [ "F", # flakes rules -- default, but extend just in case @@ -26,8 +26,8 @@ lint.extend-select = [ "TID", # various imports suggestions "TRY", # various exception handling rules "UP", # detect deprecated python stdlib stuff - # "FA", # suggest using from __future__ import annotations TODO enable later after we make sure cachew works? - # "PTH", # pathlib migration -- TODO enable later + "FA", # suggest using from __future__ import annotations + "PTH", # pathlib migration "ARG", # unused argument checks # "A", # builtin shadowing -- TODO handle later # "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying @@ -35,6 +35,11 @@ lint.extend-select = [ # "ALL", # uncomment this to check for new rules! ] +# Preserve types, even if a file imports `from __future__ import annotations` +# we need this for cachew to work with HPI types on 3.9 +# can probably remove after 3.10? +lint.pyupgrade.keep-runtime-typing = true + lint.ignore = [ "D", # annoying nags about docstrings "N", # pep naming @@ -68,11 +73,6 @@ lint.ignore = [ "F841", # Local variable `count` is assigned to but never used ### -### TODO should be fine to use these with from __future__ import annotations? -### there was some issue with cachew though... double check this? - "UP006", # use type instead of Type - "UP007", # use X | Y instead of Union -### "RUF100", # unused noqa -- handle later "RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs From a2b397ec4a83e6fded7c758470c49f6f18f2ab81 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 22 Oct 2024 20:50:37 +0100 Subject: [PATCH 03/11] my.whatsapp.android: adapt to new db format --- my/books/kobo.py | 2 +- my/whatsapp/android.py | 33 ++++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/my/books/kobo.py b/my/books/kobo.py index 899ef31..40b7ed7 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -3,4 +3,4 @@ from my.core import warnings warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!') from my.core.util import __NOT_HPI_MODULE__ -from my.kobo import * # type: ignore[no-redef] +from my.kobo import * diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py index 27ee743..3cd4436 100644 --- a/my/whatsapp/android.py +++ b/my/whatsapp/android.py @@ -1,6 +1,7 @@ """ Whatsapp data from Android app database (in =/data/data/com.whatsapp/databases/msgstore.db=) """ + from __future__ import annotations import sqlite3 @@ -63,11 +64,27 @@ Entity = Union[Chat, Sender, Message] def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: # TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources + try: + db.execute('SELECT jid_row_id FROM chat_view') + except sqlite3.OperationalError as oe: + if 'jid_row_id' not in str(oe): + raise oe + new_version_202410 = False + else: + new_version_202410 = True + + if new_version_202410: + chat_id_col = 'jid.raw_string' + jid_join = 'JOIN jid ON jid._id == chat_view.jid_row_id' + else: + chat_id_col = 'chat_view.raw_string_jid' + jid_join = '' + chats = {} for r in db.execute( - ''' - SELECT raw_string_jid AS chat_id, subject - FROM chat_view + f''' + SELECT {chat_id_col} AS chat_id, subject + FROM chat_view {jid_join} WHERE chat_id IS NOT NULL /* seems that it might be null for chats that are 'recycled' (the db is more like an LRU cache) */ ''' ): @@ -89,6 +106,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: ): # TODO seems that msgstore.db doesn't have contact names # perhaps should extract from wa.db and match against wa_contacts.jid? + # TODO these can also be chats? not sure if need to include... s = Sender( id=r['raw_string'], name=None, @@ -100,9 +118,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: # so even if it seems as if it has a column (e.g. for attachment path), there is actually no such data # so makes more sense to just query message column directly for r in db.execute( - ''' + f''' SELECT - C.raw_string_jid AS chat_id, + {chat_id_col} AS chat_id, M.key_id, M.timestamp, sender_jid_row_id, M.from_me, @@ -111,8 +129,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: MM.file_size, M.message_type FROM message AS M - LEFT JOIN chat_view AS C ON M.chat_row_id = C._id - LEFT JOIN message_media AS MM ON M._id = MM.message_row_id + LEFT JOIN chat_view ON M.chat_row_id = chat_view._id + {jid_join} + left JOIN message_media AS MM ON M._id = MM.message_row_id WHERE M.key_id != -1 /* key_id -1 is some sort of fake message where everything is null */ /* type 7 seems to be some dummy system message. sometimes contain chat name, but usually null, so ignore them From 7ab6f0d5cbce2241ba8a7848ff1bf18e147d26cf Mon Sep 17 00:00:00 2001 From: purarue <7804791+purarue@users.noreply.github.com> Date: Fri, 25 Oct 2024 09:39:00 -0700 Subject: [PATCH 04/11] chore: update urls --- README.org | 4 ++-- doc/DENYLIST.md | 6 +++--- doc/MODULES.org | 12 ++++++------ doc/MODULE_DESIGN.org | 8 ++++---- doc/OVERLAYS.org | 2 +- doc/QUERY.md | 6 +++--- doc/SETUP.org | 2 +- misc/.flake8-karlicoss | 2 +- my/browser/active_browser.py | 2 +- my/browser/export.py | 2 +- my/google/takeout/parser.py | 10 +++++----- my/ip/all.py | 4 ++-- my/ip/common.py | 2 +- my/location/fallback/via_ip.py | 2 +- my/location/google_takeout.py | 2 +- my/location/google_takeout_semantic.py | 2 +- my/location/via_ip.py | 2 +- my/reddit/pushshift.py | 6 +++--- 18 files changed, 38 insertions(+), 38 deletions(-) diff --git a/README.org b/README.org index c065a0c..79621a5 100644 --- a/README.org +++ b/README.org @@ -723,10 +723,10 @@ If you want to write modules for personal use but don't want to merge them into Other HPI Repositories: -- [[https://github.com/seanbreckenridge/HPI][seanbreckenridge/HPI]] +- [[https://github.com/purarue/HPI][purarue/HPI]] - [[https://github.com/madelinecameron/hpi][madelinecameron/HPI]] -If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/seanbreckenridge/HPI-template][template]]. +If you want to create your own to create your own modules/override something here, you can use the [[https://github.com/purarue/HPI-template][template]]. * Related links :PROPERTIES: diff --git a/doc/DENYLIST.md b/doc/DENYLIST.md index 440715c..3d8dea0 100644 --- a/doc/DENYLIST.md +++ b/doc/DENYLIST.md @@ -76,7 +76,7 @@ This would typically be used in an overridden `all.py` file, or in a one-off scr which you may want to filter out some items from a source, progressively adding more items to the denylist as you go. -A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/seanbreckenridge/HPI)): +A potential `my/ip/all.py` file might look like (Sidenote: `discord` module from [here](https://github.com/purarue/HPI)): ```python from typing import Iterator @@ -119,9 +119,9 @@ python3 -c 'from my.ip import all; all.deny.deny_cli(all.ips())' To edit the `all.py`, you could either: - install it as editable (`python3 -m pip install --user -e ./HPI`), and then edit the file directly -- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/seanbreckenridge/reorder_editable), and possibly the [`HPI-template`](https://github.com/seanbreckenridge/HPI-template) to create your own HPI namespace package to create your own `all.py` file. +- or, create a namespace package, which splits the package across multiple directories. For info on that see [`MODULE_DESIGN`](https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#namespace-packages), [`reorder_editable`](https://github.com/purarue/reorder_editable), and possibly the [`HPI-template`](https://github.com/purarue/HPI-template) to create your own HPI namespace package to create your own `all.py` file. -For a real example of this see, [seanbreckenridge/HPI-personal](https://github.com/seanbreckenridge/HPI-personal/blob/master/my/ip/all.py) +For a real example of this see, [purarue/HPI-personal](https://github.com/purarue/HPI-personal/blob/master/my/ip/all.py) Sidenote: the reason why we want to specifically override the all.py and not just create a script that filters out the items you're diff --git a/doc/MODULES.org b/doc/MODULES.org index 9f48024..347d88d 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -76,7 +76,7 @@ The config snippets below are meant to be modified accordingly and *pasted into You don't have to set up all modules at once, it's recommended to do it gradually, to get the feel of how HPI works. -For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py][config]] +For an extensive/complex example, you can check out ~@purarue~'s [[https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py][config]] # Nested Configurations before the doc generation using the block below ** [[file:../my/reddit][my.reddit]] @@ -96,7 +96,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http class pushshift: ''' - Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments + Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments ''' # path[s]/glob to the exported JSON data @@ -106,7 +106,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http ** [[file:../my/browser/][my.browser]] - Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]] + Parses browser history using [[http://github.com/purarue/browserexport][browserexport]] #+begin_src python class browser: @@ -132,7 +132,7 @@ For an extensive/complex example, you can check out ~@seanbreckenridge~'s [[http You might also be able to use [[file:../my/location/via_ip.py][my.location.via_ip]] which uses =my.ip.all= to provide geolocation data for an IPs (though no IPs are provided from any - of the sources here). For an example of usage, see [[https://github.com/seanbreckenridge/HPI/tree/master/my/ip][here]] + of the sources here). For an example of usage, see [[https://github.com/purarue/HPI/tree/master/my/ip][here]] #+begin_src python class location: @@ -256,9 +256,9 @@ for cls, p in modules: ** [[file:../my/google/takeout/parser.py][my.google.takeout.parser]] - Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] + Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] - See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts + See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts If the =DISABLE_TAKEOUT_CACHE= environment variable is set, this won't cache individual exports in =~/.cache/google_takeout_parser= diff --git a/doc/MODULE_DESIGN.org b/doc/MODULE_DESIGN.org index 7aedf2f..442dbf2 100644 --- a/doc/MODULE_DESIGN.org +++ b/doc/MODULE_DESIGN.org @@ -67,7 +67,7 @@ If you want to disable a source, you have a few options. ... that suppresses the warning message and lets you use ~my.location.all~ without having to change any lines of code -Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/seanbreckenridge/HPI#partially-in-usewith-overrides][seanbreckenridge]]s location and ip modules. +Another benefit is that all the custom sources/data is localized to the ~all.py~ file, so a user can override the ~all.py~ (see the sections below on ~namespace packages~) file in their own HPI repository, adding additional sources without having to maintain a fork and patching in changes as things eventually change. For a 'real world' example of that, see [[https://github.com/purarue/HPI#partially-in-usewith-overrides][purarue]]s location and ip modules. This is of course not required for personal or single file modules, its just the pattern that seems to have the least amount of friction for the user, while being extendable, and without using a bulky plugin system to let users add additional sources. @@ -208,13 +208,13 @@ Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install. -If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository. +If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/purarue/reorder_editable][reorder_editable]] repository. There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage. -There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/seanbreckenridge/HPI-template][template]] to get started. +There are some other users [[https://github.com/hpi/hpi][who have begun publishing their own modules]] as namespace packages, which you could potentially install and use, in addition to this repository, if any of those interest you. If you want to create your own you can use the [[https://github.com/purarue/HPI-template][template]] to get started. -Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/seanbreckenridge/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example. +Though, enabling this many modules may make ~hpi doctor~ look pretty busy. You can explicitly choose to enable/disable modules with a list of modules/regexes in your [[https://github.com/karlicoss/HPI/blob/f559e7cb899107538e6c6bbcf7576780604697ef/my/core/core_config.py#L24-L55][core config]], see [[https://github.com/purarue/dotfiles/blob/a1a77c581de31bd55a6af3d11b8af588614a207e/.config/my/my/config/__init__.py#L42-L72][here]] for an example. You may use the other modules or [[https://github.com/karlicoss/hpi-personal-overlay][my overlay]] as reference, but python packaging is already a complicated issue, before adding complexities like namespace packages and editable installs on top of it... If you're having trouble extending HPI in this fashion, you can open an issue here, preferably with a link to your code/repository and/or ~setup.py~ you're trying to use. diff --git a/doc/OVERLAYS.org b/doc/OVERLAYS.org index 1e6cf8f..a573007 100644 --- a/doc/OVERLAYS.org +++ b/doc/OVERLAYS.org @@ -66,7 +66,7 @@ This basically means that modules will be searched in both paths, with overlay t ** Installing with =--use-pep517= -See here for discussion https://github.com/seanbreckenridge/reorder_editable/issues/2, but TLDR it should work similarly. +See here for discussion https://github.com/purarue/reorder_editable/issues/2, but TLDR it should work similarly. * Testing runtime behaviour (editable install) diff --git a/doc/QUERY.md b/doc/QUERY.md index b672dff..a85450a 100644 --- a/doc/QUERY.md +++ b/doc/QUERY.md @@ -99,7 +99,7 @@ Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.tim authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), message='sources.smscalls: propogate errors if there are breaking ' 'schema changes', - repo='/home/sean/Repos/promnesia-fork', + repo='/home/username/Repos/promnesia-fork', sha='22a434fca9a28df9b0915ccf16368df129d2c9ce', ref='refs/heads/smscalls-handle-result') ``` @@ -195,7 +195,7 @@ To preview, you can use something like [`qgis`](https://qgis.org/en/site/) or fo chicago trip -(Sidenote: this is [`@seanbreckenridge`](https://github.com/seanbreckenridge/)s locations, on a trip to Chicago) +(Sidenote: this is [`@purarue`](https://github.com/purarue/)s locations, on a trip to Chicago) ## Python reference @@ -301,4 +301,4 @@ The `hpi query` command is a CLI wrapper around the code in [`query.py`](../my/c If you specify a range, drop_unsorted is forced to be True ``` -Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/seanbreckenridge/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range` +Those can be imported and accept any sort of iterator, `hpi query` just defaults to the output of functions here. As an example, see [`listens`](https://github.com/purarue/HPI-personal/blob/master/scripts/listens) which just passes an generator (iterator) as the first argument to `query_range` diff --git a/doc/SETUP.org b/doc/SETUP.org index 0fced62..ee9571c 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -387,7 +387,7 @@ But there is an extra caveat: rexport is already coming with nice [[https://gith Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc. -Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments= +Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/purarue/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments= ** Twitter diff --git a/misc/.flake8-karlicoss b/misc/.flake8-karlicoss index 3c98b96..5933253 100644 --- a/misc/.flake8-karlicoss +++ b/misc/.flake8-karlicoss @@ -32,6 +32,6 @@ ignore = # # as a reference: -# https://github.com/seanbreckenridge/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg +# https://github.com/purarue/cookiecutter-template/blob/master/%7B%7Bcookiecutter.module_name%7D%7D/setup.cfg # and this https://github.com/karlicoss/HPI/pull/151 # find ./my | entr flake8 --ignore=E402,E501,E741,W503,E266,E302,E305,E203,E261,E252,E251,E221,W291,E225,E303,E702,E202,F841,E731,E306,E127 E722,E231 my | grep -v __NOT_HPI_MODULE__ diff --git a/my/browser/active_browser.py b/my/browser/active_browser.py index 8051f1b..1686fc5 100644 --- a/my/browser/active_browser.py +++ b/my/browser/active_browser.py @@ -1,5 +1,5 @@ """ -Parses active browser history by backing it up with [[http://github.com/seanbreckenridge/sqlite_backup][sqlite_backup]] +Parses active browser history by backing it up with [[http://github.com/purarue/sqlite_backup][sqlite_backup]] """ REQUIRES = ["browserexport", "sqlite_backup"] diff --git a/my/browser/export.py b/my/browser/export.py index 351cf6e..52ade0e 100644 --- a/my/browser/export.py +++ b/my/browser/export.py @@ -1,5 +1,5 @@ """ -Parses browser history using [[http://github.com/seanbreckenridge/browserexport][browserexport]] +Parses browser history using [[http://github.com/purarue/browserexport][browserexport]] """ REQUIRES = ["browserexport"] diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py index 80c2be1..13fd04a 100644 --- a/my/google/takeout/parser.py +++ b/my/google/takeout/parser.py @@ -1,7 +1,7 @@ """ -Parses Google Takeout using [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] +Parses Google Takeout using [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] -See [[https://github.com/seanbreckenridge/google_takeout_parser][google_takeout_parser]] for more information +See [[https://github.com/purarue/google_takeout_parser][google_takeout_parser]] for more information about how to export and organize your takeouts If the DISABLE_TAKEOUT_CACHE environment variable is set, this won't cache individual @@ -12,7 +12,7 @@ zip files of the exports, which are temporarily unpacked while creating the cachew cache """ -REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"] import os from collections.abc import Sequence @@ -36,7 +36,7 @@ from google_takeout_parser.merge import CacheResults, GoogleEventSet from google_takeout_parser.models import BaseEvent from google_takeout_parser.path_dispatch import TakeoutParser -# see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example +# see https://github.com/purarue/dotfiles/blob/master/.config/my/my/config/__init__.py for an example from my.config import google as user_config @@ -123,7 +123,7 @@ def events(disable_takeout_cache: bool = DISABLE_TAKEOUT_CACHE) -> CacheResults: else: results = exit_stack.enter_context(match_structure(path, expected=EXPECTED, partial=True)) for m in results: - # e.g. /home/sean/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457' + # e.g. /home/username/data/google_takeout/Takeout-1634932457.zip") -> 'Takeout-1634932457' # means that zipped takeouts have nice filenames from cachew cw_id, _, _ = path.name.rpartition(".") # each takeout result is cached as well, in individual databases per-type diff --git a/my/ip/all.py b/my/ip/all.py index e8277c1..c267383 100644 --- a/my/ip/all.py +++ b/my/ip/all.py @@ -3,10 +3,10 @@ An example all.py stub module that provides ip data To use this, you'd add IP providers that yield IPs to the 'ips' function -For an example of how this could be used, see https://github.com/seanbreckenridge/HPI/tree/master/my/ip +For an example of how this could be used, see https://github.com/purarue/HPI/tree/master/my/ip """ -REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] +REQUIRES = ["git+https://github.com/purarue/ipgeocache"] from collections.abc import Iterator diff --git a/my/ip/common.py b/my/ip/common.py index ef54ee3..b551281 100644 --- a/my/ip/common.py +++ b/my/ip/common.py @@ -1,5 +1,5 @@ """ -Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]] +Provides location/timezone data from IP addresses, using [[https://github.com/purarue/ipgeocache][ipgeocache]] """ from my.core import __NOT_HPI_MODULE__ # isort: skip diff --git a/my/location/fallback/via_ip.py b/my/location/fallback/via_ip.py index 732af67..8b50878 100644 --- a/my/location/fallback/via_ip.py +++ b/my/location/fallback/via_ip.py @@ -2,7 +2,7 @@ Converts IP addresses provided by my.location.ip to estimated locations """ -REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] +REQUIRES = ["git+https://github.com/purarue/ipgeocache"] from dataclasses import dataclass from datetime import timedelta diff --git a/my/location/google_takeout.py b/my/location/google_takeout.py index cb5bef3..8613257 100644 --- a/my/location/google_takeout.py +++ b/my/location/google_takeout.py @@ -2,7 +2,7 @@ Extracts locations using google_takeout_parser -- no shared code with the deprecated my.location.google """ -REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"] from collections.abc import Iterator diff --git a/my/location/google_takeout_semantic.py b/my/location/google_takeout_semantic.py index 7bddfa8..e84a932 100644 --- a/my/location/google_takeout_semantic.py +++ b/my/location/google_takeout_semantic.py @@ -5,7 +5,7 @@ Extracts semantic location history using google_takeout_parser # This is a separate module to prevent ImportError and a new config block from breaking # previously functional my.location.google_takeout locations -REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +REQUIRES = ["git+https://github.com/purarue/google_takeout_parser"] from collections.abc import Iterator from dataclasses import dataclass diff --git a/my/location/via_ip.py b/my/location/via_ip.py index d465ad0..240ec5f 100644 --- a/my/location/via_ip.py +++ b/my/location/via_ip.py @@ -1,4 +1,4 @@ -REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] +REQUIRES = ["git+https://github.com/purarue/ipgeocache"] from my.core.warnings import high diff --git a/my/reddit/pushshift.py b/my/reddit/pushshift.py index 1bfa048..12f592b 100644 --- a/my/reddit/pushshift.py +++ b/my/reddit/pushshift.py @@ -1,11 +1,11 @@ """ Gives you access to older comments possibly not accessible with rexport using pushshift -See https://github.com/seanbreckenridge/pushshift_comment_export +See https://github.com/purarue/pushshift_comment_export """ REQUIRES = [ - "git+https://github.com/seanbreckenridge/pushshift_comment_export", + "git+https://github.com/purarue/pushshift_comment_export", ] from dataclasses import dataclass @@ -21,7 +21,7 @@ from my.core.cfg import make_config @dataclass class pushshift_config(uconfig.pushshift): ''' - Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments + Uses [[https://github.com/purarue/pushshift_comment_export][pushshift]] to get access to old comments ''' # path[s]/glob to the exported JSON data From ad55c5c345888abaebf59ae85923339b7ceccbb4 Mon Sep 17 00:00:00 2001 From: Srajan Garg Date: Tue, 12 Nov 2024 19:05:27 -0500 Subject: [PATCH 05/11] fix typo in rexport DAL (#405) * fix typo in rexport DAL --- my/reddit/rexport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my/reddit/rexport.py b/my/reddit/rexport.py index cb6af01..262635b 100644 --- a/my/reddit/rexport.py +++ b/my/reddit/rexport.py @@ -146,7 +146,7 @@ if not TYPE_CHECKING: # here we just check that types are available, we don't actually want to import them # fmt: off dal.Subreddit # noqa: B018 - dal.Profil # noqa: B018e + dal.Profile # noqa: B018 dal.Multireddit # noqa: B018 # fmt: on except AttributeError as ae: From a7f05c2cad0c500210f966e0f50e0b309490cc53 Mon Sep 17 00:00:00 2001 From: purarue <7804791+purarue@users.noreply.github.com> Date: Wed, 20 Nov 2024 00:03:40 -0800 Subject: [PATCH 06/11] doc: spelling fixes --- CHANGELOG.md | 2 +- doc/OVERLAYS.org | 6 +++--- doc/QUERY.md | 2 +- my/core/cachew.py | 2 +- my/core/konsume.py | 2 +- my/core/logging.py | 2 +- my/core/tests/test_tmp_config.py | 2 +- my/core/utils/itertools.py | 4 ++-- my/fbmessenger/__init__.py | 2 +- my/fbmessenger/android.py | 2 +- my/instagram/all.py | 2 +- my/instagram/gdpr.py | 4 ++-- my/reddit/__init__.py | 2 +- my/smscalls.py | 4 ++-- my/stackexchange/gdpr.py | 2 +- my/time/tz/via_location.py | 2 +- my/tinder/android.py | 2 +- my/topcoder.py | 2 +- my/twitter/android.py | 2 +- my/twitter/twint.py | 2 +- my/whatsapp/android.py | 2 +- my/youtube/takeout.py | 2 +- 22 files changed, 27 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dd19df..d60ef35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ General/my.core changes: - e81dddddf083ffd81aa7e2b715bd34f59949479c properly resolve class properties in make_config + add test Modules: -- some innitial work on filling **InfluxDB** with HPI data +- some initial work on filling **InfluxDB** with HPI data - pinboard - 42399f6250d9901d93dcedcfe05f7857babcf834: **breaking backwards compatibility**, use pinbexport module directly diff --git a/doc/OVERLAYS.org b/doc/OVERLAYS.org index a573007..7bafa48 100644 --- a/doc/OVERLAYS.org +++ b/doc/OVERLAYS.org @@ -10,7 +10,7 @@ Relevant discussion about overlays: https://github.com/karlicoss/HPI/issues/102 # You can see them TODO in overlays dir -Consider a toy package/module structure with minimal code, wihout any actual data parsing, just for demonstration purposes. +Consider a toy package/module structure with minimal code, without any actual data parsing, just for demonstration purposes. - =main= package structure # TODO do links @@ -19,7 +19,7 @@ Consider a toy package/module structure with minimal code, wihout any actual dat Extracts Twitter data from GDPR archive. - =my/twitter/all.py= Merges twitter data from multiple sources (only =gdpr= in this case), so data consumers are agnostic of specific data sources used. - This will be overriden by =overlay=. + This will be overridden by =overlay=. - =my/twitter/common.py= Contains helper function to merge data, so they can be reused by overlay's =all.py=. - =my/reddit.py= @@ -126,7 +126,7 @@ https://github.com/python/mypy/blob/1dd8e7fe654991b01bd80ef7f1f675d9e3910c3a/myp For now, I opened an issue in mypy repository https://github.com/python/mypy/issues/16683 -But ok, maybe mypy treats =main= as an external package somhow but still type checks it properly? +But ok, maybe mypy treats =main= as an external package somehow but still type checks it properly? Let's see what's going on with imports: : $ mypy --namespace-packages --strict -p my --follow-imports=error diff --git a/doc/QUERY.md b/doc/QUERY.md index a85450a..9a5d9d3 100644 --- a/doc/QUERY.md +++ b/doc/QUERY.md @@ -97,7 +97,7 @@ By default, this just returns the items in the order they were returned by the f hpi query my.coding.commits.commits --order-key committed_dt --limit 1 --reverse --output pprint --stream Commit(committed_dt=datetime.datetime(2023, 4, 14, 23, 9, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), authored_dt=datetime.datetime(2023, 4, 14, 23, 4, 1, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), - message='sources.smscalls: propogate errors if there are breaking ' + message='sources.smscalls: propagate errors if there are breaking ' 'schema changes', repo='/home/username/Repos/promnesia-fork', sha='22a434fca9a28df9b0915ccf16368df129d2c9ce', diff --git a/my/core/cachew.py b/my/core/cachew.py index 9ccee09..8ce2f2b 100644 --- a/my/core/cachew.py +++ b/my/core/cachew.py @@ -136,7 +136,7 @@ if TYPE_CHECKING: CC = Callable[P, R] # need to give it a name, if inlined into bound=, mypy runs in a bug PathProvider = Union[PathIsh, Callable[P, PathIsh]] # NOTE: in cachew, HashFunction type returns str - # however in practice, cachew alwasy calls str for its result + # however in practice, cachew always calls str for its result # so perhaps better to switch it to Any in cachew as well HashFunction = Callable[P, Any] diff --git a/my/core/konsume.py b/my/core/konsume.py index 6d24167..41b5a4e 100644 --- a/my/core/konsume.py +++ b/my/core/konsume.py @@ -236,7 +236,7 @@ def test_zoom() -> None: # - very flexible, easy to adjust behaviour # - cons: # - can forget to assert about extra entities etc, so error prone -# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes erro handling harder +# - if we do something like =assert j.pop('status') == 200, j=, by the time assert happens we already popped item -- makes error handling harder # - a bit verbose.. so probably requires some helper functions though (could be much leaner than current konsume though) # - if we assert, then terminates parsing too early, if we're defensive then inflates the code a lot with if statements # - TODO perhaps combine warnings somehow or at least only emit once per module? diff --git a/my/core/logging.py b/my/core/logging.py index bdee9aa..167a167 100644 --- a/my/core/logging.py +++ b/my/core/logging.py @@ -250,7 +250,7 @@ if __name__ == '__main__': test() -## legacy/deprecated methods for backwards compatilibity +## legacy/deprecated methods for backwards compatibility if not TYPE_CHECKING: from .compat import deprecated diff --git a/my/core/tests/test_tmp_config.py b/my/core/tests/test_tmp_config.py index e5a24cc..d99621d 100644 --- a/my/core/tests/test_tmp_config.py +++ b/my/core/tests/test_tmp_config.py @@ -12,7 +12,7 @@ def _init_default_config() -> None: def test_tmp_config() -> None: ## ugh. ideally this would be on the top level (would be a better test) - ## but pytest imports eveything first, executes hooks, and some reset_modules() fictures mess stuff up + ## but pytest imports everything first, executes hooks, and some reset_modules() fictures mess stuff up ## later would be nice to be a bit more careful about them _init_default_config() from my.simple import items diff --git a/my/core/utils/itertools.py b/my/core/utils/itertools.py index 501ebbe..42b2b77 100644 --- a/my/core/utils/itertools.py +++ b/my/core/utils/itertools.py @@ -321,7 +321,7 @@ _UET = TypeVar('_UET') _UEU = TypeVar('_UEU') -# NOTE: for historic reasons, this function had to accept Callable that retuns iterator +# NOTE: for historic reasons, this function had to accept Callable that returns iterator # instead of just iterator # TODO maybe deprecated Callable support? not sure def unique_everseen( @@ -358,7 +358,7 @@ def test_unique_everseen() -> None: assert list(unique_everseen(fun_good)) == [123] with pytest.raises(Exception): - # since function retuns a list rather than iterator, check happens immediately + # since function returns a list rather than iterator, check happens immediately # , even without advancing the iterator unique_everseen(fun_bad) diff --git a/my/fbmessenger/__init__.py b/my/fbmessenger/__init__.py index f729de9..e5e417c 100644 --- a/my/fbmessenger/__init__.py +++ b/my/fbmessenger/__init__.py @@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info. """ -# prevent it from apprearing in modules list/doctor +# prevent it from appearing in modules list/doctor from ..core import __NOT_HPI_MODULE__ # kinda annoying to keep it, but it's so legacy 'hpi module install my.fbmessenger' works diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index a16d924..db4cc54 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -174,7 +174,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: However seems that when message is not sent yet it doesn't have this server id yet (happened only once, but could be just luck of course!) We exclude these messages to avoid duplication. - However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if mesage ids change or something + However poisitive filter (e.g. message_id LIKE 'mid%') feels a bit wrong, e.g. what if message ids change or something So instead this excludes only such unsent messages. */ message_id != offline_threading_id diff --git a/my/instagram/all.py b/my/instagram/all.py index 214e6ac..ce78409 100644 --- a/my/instagram/all.py +++ b/my/instagram/all.py @@ -23,7 +23,7 @@ def messages() -> Iterator[Res[Message]]: # TODO in general best to prefer android, it has more data # - message ids # - usernames are correct for Android data - # - thread ids more meaninful? + # - thread ids more meaningful? # but for now prefer gdpr prefix since it makes a bit things a bit more consistent? # e.g. a new batch of android exports can throw off ids if we rely on it for mapping yield from _merge_messages( diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 7454a04..d417fdb 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -76,7 +76,7 @@ def _entities() -> Iterator[Res[User | _Message]]: # NOTE: here there are basically two options # - process inputs as is (from oldest to newest) # this would be more stable wrt newer exports (e.g. existing thread ids won't change) - # the downside is that newer exports seem to have better thread ids, so might be preferrable to use them + # the downside is that newer exports seem to have better thread ids, so might be preferable to use them # - process inputs reversed (from newest to oldest) # the upside is that thread ids/usernames might be better # the downside is that if for example the user renames, thread ids will change _a lot_, might be undesirable.. @@ -137,7 +137,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]: j = json.loads(ffile.read_text()) id_len = 10 - # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation + # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole conversation # but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :( # so basically the only way to merge is to actually try some magic and correlate timestamps/message texts? # another option is perhaps to query user id from username with some free API diff --git a/my/reddit/__init__.py b/my/reddit/__init__.py index f344eeb..982901a 100644 --- a/my/reddit/__init__.py +++ b/my/reddit/__init__.py @@ -9,7 +9,7 @@ since that allows for easier overriding using namespace packages See https://github.com/karlicoss/HPI/blob/master/doc/MODULE_DESIGN.org#allpy for more info. """ -# prevent it from apprearing in modules list/doctor +# prevent it from appearing in modules list/doctor from ..core import __NOT_HPI_MODULE__ # kinda annoying to keep it, but it's so legacy 'hpi module install my.reddit' works diff --git a/my/smscalls.py b/my/smscalls.py index ccaac72..324bc44 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -186,7 +186,7 @@ class MMS(NamedTuple): for (addr, _type) in self.addresses: if _type == 137: return addr - # hmm, maybe return instead? but this probably shouldnt happen, means + # hmm, maybe return instead? but this probably shouldn't happen, means # something is very broken raise RuntimeError(f'No from address matching 137 found in {self.addresses}') @@ -214,7 +214,7 @@ def mms() -> Iterator[Res[MMS]]: def _resolve_null_str(value: str | None) -> str | None: if value is None: return None - # hmm.. theres some risk of the text actually being 'null', but theres + # hmm.. there's some risk of the text actually being 'null', but there's # no way to distinguish that from XML values if value == 'null': return None diff --git a/my/stackexchange/gdpr.py b/my/stackexchange/gdpr.py index 78987be..8ed0d30 100644 --- a/my/stackexchange/gdpr.py +++ b/my/stackexchange/gdpr.py @@ -49,7 +49,7 @@ class Vote(NamedTuple): # hmm, this loads very raw comments without the rest of the page? # - https://meta.stackexchange.com/posts/27319/comments#comment-57475 # - # parentPostId is the original quesion + # parentPostId is the original question # TODO is not always present? fucking hell # seems like there is no way to get a hierarchical comment link.. guess this needs to be handled in Promnesia normalisation... # postId is the answer diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py index 58b5bf7..1b2275b 100644 --- a/my/time/tz/via_location.py +++ b/my/time/tz/via_location.py @@ -245,7 +245,7 @@ def _iter_tzs() -> Iterator[DayWithZone]: def _day2zone() -> dict[date, pytz.BaseTzInfo]: # NOTE: kinda unfortunate that this will have to process all days before returning result for just one # however otherwise cachew cache might never be initialized properly - # so we'll always end up recomputing everyting during subsequent runs + # so we'll always end up recomputing everything during subsequent runs return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()} diff --git a/my/tinder/android.py b/my/tinder/android.py index a09794f..5a5d887 100644 --- a/my/tinder/android.py +++ b/my/tinder/android.py @@ -106,7 +106,7 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]: user_profile_rows = list(db.execute('SELECT * FROM profile_user_view')) if len(user_profile_rows) == 0: - # shit, sometime in 2023 profile_user_view stoppped containing user profile.. + # shit, sometime in 2023 profile_user_view stopped containing user profile.. # presumably the most common from_id/to_id would be our own username counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')]) if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out) diff --git a/my/topcoder.py b/my/topcoder.py index 56403e2..40df77c 100644 --- a/my/topcoder.py +++ b/my/topcoder.py @@ -81,7 +81,7 @@ def _parse_one(p: Path) -> Iterator[Res[Competition]]: # but also expects cooperation from .make method (e.g. popping items from the dict) # could also wrap in helper and pass to .make .. not sure # an argument could be made that .make isn't really a class methond.. - # it's pretty specific to this parser onl + # it's pretty specific to this parser only yield from Competition.make(j=c) yield from m.check() diff --git a/my/twitter/android.py b/my/twitter/android.py index 88c9389..8159ee7 100644 --- a/my/twitter/android.py +++ b/my/twitter/android.py @@ -192,7 +192,7 @@ def get_own_user_id(conn) -> str: # - timeline_data_type # 1 : the bulk of tweets, but also some notifications etc?? # 2 : who-to-follow/community-to-join. contains a couple of tweets, but their corresponding status_id is NULL -# 8 : who-to-follow/notfication +# 8 : who-to-follow/notification # 13: semantic-core/who-to-follow # 14: cursor # 17: trends diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 5106923..9d36a93 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -54,7 +54,7 @@ class Tweet(NamedTuple): # https://github.com/thomasancheriyil/Red-Tide-Detection-based-on-Twitter/blob/beb200be60cc66dcbc394e670513715509837812/python/twitterGapParse.py#L61-L62 # # twint is also saving 'timezone', but this is local machine timezone at the time of scraping? - # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keepin lots on unnecessary stuff in the db) + # perhaps they thought date-time-ms was local time... or just kept it just in case (they are keeping lots on unnecessary stuff in the db) return datetime.fromtimestamp(seconds, tz=tz) @property diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py index 3cd4436..a8dbe8d 100644 --- a/my/whatsapp/android.py +++ b/my/whatsapp/android.py @@ -199,7 +199,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: sender_row_id = r['sender_jid_row_id'] if sender_row_id == 0: # seems that it's always 0 for 1-1 chats - # for group chats our onw id is still 0, but other ids are properly set + # for group chats our own id is still 0, but other ids are properly set if from_me: myself_user_id = config.my_user_id or 'MYSELF_USER_ID' sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well? diff --git a/my/youtube/takeout.py b/my/youtube/takeout.py index 703715f..8eca328 100644 --- a/my/youtube/takeout.py +++ b/my/youtube/takeout.py @@ -36,7 +36,7 @@ def watched() -> Iterator[Res[Watched]]: continue # older exports (e.g. html) didn't have microseconds - # wheras newer json ones do have them + # whereas newer json ones do have them # seconds resolution is enough to distinguish watched videos # also we're processing takeouts in HPI in reverse order, so first seen watch would contain microseconds, resulting in better data without_microsecond = w.when.replace(microsecond=0) From 95a16b956f8ab24bea3002d1428c0c10b30a3455 Mon Sep 17 00:00:00 2001 From: purarue <7804791+purarue@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:53:10 -0800 Subject: [PATCH 07/11] doc: some performance notes for query_range (#409) * doc: some performance notes for query_range * add ruff_cache to gitignore --- .gitignore | 3 +++ my/core/__init__.py | 33 ++++++++++++++++++--------------- my/core/__main__.py | 3 +++ my/core/query_range.py | 4 +++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 19c3380..65ba630 100644 --- a/.gitignore +++ b/.gitignore @@ -155,6 +155,9 @@ celerybeat-schedule .dmypy.json dmypy.json +# linters +.ruff_cache/ + # Pyre type checker .pyre/ diff --git a/my/core/__init__.py b/my/core/__init__.py index cc549d5..a8a41f4 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -29,22 +29,25 @@ if not TYPE_CHECKING: __all__ = [ - 'get_files', 'PathIsh', 'Paths', - 'Json', - 'make_logger', - 'LazyLogger', # legacy import - 'warn_if_empty', - 'stat', 'Stats', - 'datetime_aware', 'datetime_naive', - 'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon - - 'make_config', - '__NOT_HPI_MODULE__', - - 'Res', 'unwrap', 'notnone', - - 'dataclass', 'Path', + 'Json', + 'LazyLogger', # legacy import + 'Path', + 'PathIsh', + 'Paths', + 'Res', + 'Stats', + 'assert_never', # TODO maybe deprecate from use in my.core? will be in stdlib soon + 'dataclass', + 'datetime_aware', + 'datetime_naive', + 'get_files', + 'make_config', + 'make_logger', + 'notnone', + 'stat', + 'unwrap', + 'warn_if_empty', ] diff --git a/my/core/__main__.py b/my/core/__main__.py index 00ac4ee..7e2d8f9 100644 --- a/my/core/__main__.py +++ b/my/core/__main__.py @@ -538,6 +538,9 @@ def query_hpi_functions( # chain list of functions from user, in the order they wrote them on the CLI input_src = chain(*(f() for f in _locate_functions_or_prompt(qualified_names))) + # NOTE: if passing just one function to this which returns a single namedtuple/dataclass, + # using both --order-key and --order-type will often be faster as it does not need to + # duplicate the iterator in memory, or try to find the --order-type type on each object before sorting res = select_range( input_src, order_key=order_key, diff --git a/my/core/query_range.py b/my/core/query_range.py index 2a8d7bd..83728bf 100644 --- a/my/core/query_range.py +++ b/my/core/query_range.py @@ -337,6 +337,8 @@ def select_range( # if the user supplied a order_key, and/or we've generated an order_value, create # the function that accesses that type on each value in the iterator if order_key is not None or order_value is not None: + # _generate_order_value_func internally here creates a copy of the iterator, which has to + # be consumed in-case we're sorting by mixed types order_by_chosen, itr = _handle_generate_order_by(itr, order_key=order_key, order_value=order_value) # signifies that itr is empty -- can early return here if order_by_chosen is None: @@ -398,7 +400,7 @@ Specify a type or a key to order the value by""") return itr -# re-use items from query for testing +# reuse items from query for testing from .query import _A, _B, _Float, _mixed_iter_errors From d8c53bde34e2a5e68f2bac18941ae426ed468b02 Mon Sep 17 00:00:00 2001 From: purarue <7804791+purarue@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:31:22 -0800 Subject: [PATCH 08/11] smscalls: add phone number to model --- my/smscalls.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/my/smscalls.py b/my/smscalls.py index 324bc44..0ff2553 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -37,6 +37,7 @@ class Call(NamedTuple): dt: datetime dt_readable: str duration_s: int + phone_number: str who: str | None # type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List. call_type: int @@ -65,12 +66,13 @@ def _extract_calls(path: Path) -> Iterator[Res[Call]]: duration = cxml.get('duration') who = cxml.get('contact_name') call_type = cxml.get('type') + number = cxml.get('number') # if name is missing, its not None (its some string), depends on the phone/message app if who is not None and who in UNKNOWN: who = None - if dt is None or dt_readable is None or duration is None or call_type is None: + if dt is None or dt_readable is None or duration is None or call_type is None or number is None: call_str = etree.tostring(cxml).decode('utf-8') - yield RuntimeError(f"Missing one or more required attributes [date, readable_date, duration, type] in {call_str}") + yield RuntimeError(f"Missing one or more required attributes [date, readable_date, duration, type, number] in {call_str}") continue # TODO we've got local tz here, not sure if useful.. # ok, so readable date is local datetime, changing throughout the backup @@ -78,6 +80,7 @@ def _extract_calls(path: Path) -> Iterator[Res[Call]]: dt=_parse_dt_ms(dt), dt_readable=dt_readable, duration_s=int(duration), + phone_number=number, who=who, call_type=int(call_type), ) From f1d23c5e96d95819d383485f22b480d8d190fe98 Mon Sep 17 00:00:00 2001 From: purarue <7804791+purarue@users.noreply.github.com> Date: Sun, 22 Dec 2024 21:50:03 -0800 Subject: [PATCH 09/11] smscalls: allow large XML files as input once XML files increase past a certain size (was about 220MB for me), the parser just throws an error because the tree is too large (iirc for security reasons) could maybe look at using iterparse in the future to parse it without loading the whole file, but this seems to fix it fine for me --- my/smscalls.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/my/smscalls.py b/my/smscalls.py index 0ff2553..27d08be 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -57,9 +57,12 @@ class Call(NamedTuple): # The '(Unknown)' is just what my android phone does, not sure if there are others UNKNOWN: set[str] = {'(Unknown)'} +def _parse_xml(xml: Path) -> Any: + return etree.parse(str(xml), parser=etree.XMLParser(huge_tree=True)) + def _extract_calls(path: Path) -> Iterator[Res[Call]]: - tr = etree.parse(str(path)) + tr = _parse_xml(path) for cxml in tr.findall('call'): dt = cxml.get('date') dt_readable = cxml.get('readable_date') @@ -133,7 +136,7 @@ def messages() -> Iterator[Res[Message]]: def _extract_messages(path: Path) -> Iterator[Res[Message]]: - tr = etree.parse(str(path)) + tr = _parse_xml(path) for mxml in tr.findall('sms'): dt = mxml.get('date') dt_readable = mxml.get('readable_date') @@ -225,8 +228,7 @@ def _resolve_null_str(value: str | None) -> str | None: def _extract_mms(path: Path) -> Iterator[Res[MMS]]: - tr = etree.parse(str(path)) - + tr = _parse_xml(path) for mxml in tr.findall('mms'): dt = mxml.get('date') dt_readable = mxml.get('readable_date') @@ -271,10 +273,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: # # This seems pretty useless, so we should try and skip it, and just return the # text/images/data - # - # man, attrib is some internal cpython ._Attrib type which can't - # be typed by any sort of mappingproxy. maybe a protocol could work..? - part_data: dict[str, Any] = part.attrib # type: ignore + part_data: dict[str, Any] = part.attrib seq: str | None = part_data.get('seq') if seq == '-1': continue From 54df429f614a5e5d0617dcd196bf8566608e987c Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 29 Dec 2024 15:06:49 +0000 Subject: [PATCH 10/11] core.sqlite: add helper SqliteTool to get table schemas --- my/core/sqlite.py | 43 +++++++++++++++++++++++++++++++++++++++ my/fbmessenger/android.py | 4 ++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/my/core/sqlite.py b/my/core/sqlite.py index aa41ab3..6167d2e 100644 --- a/my/core/sqlite.py +++ b/my/core/sqlite.py @@ -134,3 +134,46 @@ def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db def select(cols, rest, *, db): # db arg is last cause that results in nicer code formatting.. return db.execute('SELECT ' + ','.join(cols) + ' ' + rest) + + +class SqliteTool: + def __init__(self, connection: sqlite3.Connection) -> None: + self.connection = connection + + def _get_sqlite_master(self) -> dict[str, str]: + res = {} + for c in self.connection.execute('SELECT name, type FROM sqlite_master'): + [name, type_] = c + assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_) # just in case + res[name] = type_ + return res + + def get_table_names(self) -> list[str]: + master = self._get_sqlite_master() + res = [] + for name, type_ in master.items(): + if type_ != 'table': + continue + res.append(name) + return res + + def get_table_schema(self, name: str) -> dict[str, str]: + """ + Returns map from column name to column type + + NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps) + In this case you might still be able to use get_table_names + """ + schema: dict[str, str] = {} + for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'): + col = row[1] + type_ = row[2] + # hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase + # let's do this just in case since python < 3.10 are using the old version + # e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs) + type_ = type_.upper() + schema[col] = type_ + return schema + + def get_table_schemas(self) -> dict[str, dict[str, str]]: + return {name: self.get_table_schema(name) for name in self.get_table_names()} diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index db4cc54..f6fdb82 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -15,7 +15,7 @@ from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_conf from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain -from my.core.sqlite import sqlite_connection +from my.core.sqlite import sqlite_connection, SqliteTool from my.config import fbmessenger as user_config # isort: skip @@ -86,8 +86,8 @@ def _entities() -> Iterator[Res[Entity]]: for idx, path in enumerate(paths): logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') with sqlite_connection(path, immutable=True, row_factory='row') as db: + use_msys = "logging_events_v2" in SqliteTool(db).get_table_names() try: - use_msys = len(list(db.execute('SELECT * FROM sqlite_master WHERE name = "logging_events_v2"'))) > 0 if use_msys: yield from _process_db_msys(db) else: From bb703c8c6a7ef80205030f640316c222bc48a6e1 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 29 Dec 2024 15:37:10 +0000 Subject: [PATCH 11/11] twitter.android: fix get_own_user_id for latest exports --- my/twitter/android.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/my/twitter/android.py b/my/twitter/android.py index 8159ee7..7e8f170 100644 --- a/my/twitter/android.py +++ b/my/twitter/android.py @@ -161,9 +161,22 @@ def get_own_user_id(conn) -> str: 'SELECT DISTINCT CAST(list_mapping_user_id AS TEXT) FROM list_mapping', 'SELECT DISTINCT CAST(owner_id AS TEXT) FROM cursors', 'SELECT DISTINCT CAST(user_id AS TEXT) FROM users WHERE _id == 1', + # ugh, sometimes all of the above are empty... + # for the rest it seems: + # - is_active_creator is NULL + # - is_graduated is NULL + # - profile_highlighted_info is NULL + 'SELECT DISTINCT CAST(user_id AS TEXT) FROM users WHERE is_active_creator == 0 AND is_graduated == 1 AND profile_highlights_info IS NOT NULL', ]: - for (r,) in conn.execute(q): - res.add(r) + res |= {r for (r,) in conn.execute(q)} + + assert len(res) <= 1, res + if len(res) == 0: + # sometimes even all of the above doesn't help... + # last resort is trying to get from status_groups table + # however we can't always use it because it might contain multiple different owner_id? + # not sure, maybe it will break as well and we'll need to fallback on the most common or something.. + res |= {r for (r,) in conn.execute('SELECT DISTINCT CAST(owner_id AS TEXT) FROM status_groups')} assert len(res) == 1, res [r] = res return r