diff --git a/my/arbtt.py b/my/arbtt.py index 2bcf291..5d4bf8e 100644 --- a/my/arbtt.py +++ b/my/arbtt.py @@ -2,20 +2,22 @@ [[https://github.com/nomeata/arbtt#arbtt-the-automatic-rule-based-time-tracker][Arbtt]] time tracking ''' +from __future__ import annotations + REQUIRES = ['ijson', 'cffi'] # NOTE likely also needs libyajl2 from apt or elsewhere? +from collections.abc import Iterable, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterable, List, Optional def inputs() -> Sequence[Path]: try: from my.config import arbtt as user_config except ImportError: - from .core.warnings import low + from my.core.warnings import low low("Couldn't find 'arbtt' config section, falling back to the default capture.log (usually in HOME dir). Add 'arbtt' section with logfiles = '' to suppress this warning.") return [] else: @@ -55,7 +57,7 @@ class Entry: return fromisoformat(ds) @property - def active(self) -> Optional[str]: + def active(self) -> str | None: # NOTE: WIP, might change this in the future... ait = (w for w in self.json['windows'] if w['active']) a = next(ait, None) @@ -74,17 +76,18 @@ class Entry: def entries() -> Iterable[Entry]: inps = list(inputs()) - base: List[PathIsh] = ['arbtt-dump', '--format=json'] + base: list[PathIsh] = ['arbtt-dump', '--format=json'] - cmds: List[List[PathIsh]] + cmds: list[list[PathIsh]] if len(inps) == 0: cmds = [base] # rely on default else: # otherwise, 'merge' them cmds = [[*base, '--logfile', f] for f in inps] - import ijson.backends.yajl2_cffi as ijson # type: ignore - from subprocess import Popen, PIPE + from subprocess import PIPE, Popen + + import ijson.backends.yajl2_cffi as ijson # type: ignore for cmd in cmds: with Popen(cmd, stdout=PIPE) as p: out = p.stdout; assert out is not None @@ -93,8 +96,8 @@ def entries() -> Iterable[Entry]: def fill_influxdb() -> None: - from .core.influxdb import magic_fill from .core.freezer import Freezer + from .core.influxdb import magic_fill freezer = Freezer(Entry) fit = (freezer.freeze(e) for e in entries()) # TODO crap, influxdb doesn't like None https://github.com/influxdata/influxdb/issues/7722 @@ -106,6 +109,8 @@ def fill_influxdb() -> None: magic_fill(fit, name=f'{entries.__module__}:{entries.__name__}') -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return stat(entries) diff --git a/my/bluemaestro.py b/my/bluemaestro.py index 4c33fd1..8c739f0 100644 --- a/my/bluemaestro.py +++ b/my/bluemaestro.py @@ -2,14 +2,17 @@ [[https://bluemaestro.com/products/product-details/bluetooth-environmental-monitor-and-logger][Bluemaestro]] temperature/humidity/pressure monitor """ +from __future__ import annotations + # todo most of it belongs to DAL... but considering so few people use it I didn't bother for now import re import sqlite3 from abc import abstractmethod +from collections.abc import Iterable, Sequence from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Iterable, Optional, Protocol, Sequence, Set +from typing import Protocol import pytz @@ -87,17 +90,17 @@ def measurements() -> Iterable[Res[Measurement]]: total = len(paths) width = len(str(total)) - last: Optional[datetime] = None + last: datetime | None = None # tables are immutable, so can save on processing.. - processed_tables: Set[str] = set() + processed_tables: set[str] = set() for idx, path in enumerate(paths): logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') tot = 0 new = 0 # todo assert increasing timestamp? with sqlite_connect_immutable(path) as db: - db_dt: Optional[datetime] = None + db_dt: datetime | None = None try: datas = db.execute( f'SELECT "{path.name}" as name, Time, Temperature, Humidity, Pressure, Dewpoint FROM data ORDER BY log_index' diff --git a/my/body/blood.py b/my/body/blood.py index fb035eb..867568c 100644 --- a/my/body/blood.py +++ b/my/body/blood.py @@ -2,41 +2,42 @@ Blood tracking (manual org-mode entries) """ +from __future__ import annotations + +from collections.abc import Iterable from datetime import datetime -from typing import Iterable, NamedTuple, Optional +from typing import NamedTuple -from ..core.error import Res -from ..core.orgmode import parse_org_datetime, one_table - - -import pandas as pd import orgparse - +import pandas as pd from my.config import blood as config # type: ignore[attr-defined] +from ..core.error import Res +from ..core.orgmode import one_table, parse_org_datetime + class Entry(NamedTuple): dt: datetime - ketones : Optional[float]=None - glucose : Optional[float]=None + ketones : float | None=None + glucose : float | None=None - vitamin_d : Optional[float]=None - vitamin_b12 : Optional[float]=None + vitamin_d : float | None=None + vitamin_b12 : float | None=None - hdl : Optional[float]=None - ldl : Optional[float]=None - triglycerides: Optional[float]=None + hdl : float | None=None + ldl : float | None=None + triglycerides: float | None=None - source : Optional[str]=None - extra : Optional[str]=None + source : str | None=None + extra : str | None=None Result = Res[Entry] -def try_float(s: str) -> Optional[float]: +def try_float(s: str) -> float | None: l = s.split() if len(l) == 0: return None @@ -105,6 +106,7 @@ def blood_tests_data() -> Iterable[Result]: def data() -> Iterable[Result]: from itertools import chain + from ..core.error import sort_res_by datas = chain(glucose_ketones_data(), blood_tests_data()) return sort_res_by(datas, key=lambda e: e.dt) diff --git a/my/body/exercise/all.py b/my/body/exercise/all.py index e86a5af..d0df747 100644 --- a/my/body/exercise/all.py +++ b/my/body/exercise/all.py @@ -7,10 +7,10 @@ from ...core.pandas import DataFrameT, check_dataframe @check_dataframe def dataframe() -> DataFrameT: # this should be somehow more flexible... - from ...endomondo import dataframe as EDF - from ...runnerup import dataframe as RDF - import pandas as pd + + from ...endomondo import dataframe as EDF + from ...runnerup import dataframe as RDF return pd.concat([ EDF(), RDF(), diff --git a/my/body/exercise/cardio.py b/my/body/exercise/cardio.py index 083b972..d8a6afd 100644 --- a/my/body/exercise/cardio.py +++ b/my/body/exercise/cardio.py @@ -3,7 +3,6 @@ Cardio data, filtered from various data sources ''' from ...core.pandas import DataFrameT, check_dataframe - CARDIO = { 'Running', 'Running, treadmill', diff --git a/my/body/exercise/cross_trainer.py b/my/body/exercise/cross_trainer.py index edbb557..30f96f9 100644 --- a/my/body/exercise/cross_trainer.py +++ b/my/body/exercise/cross_trainer.py @@ -5,16 +5,18 @@ This is probably too specific to my needs, so later I will move it away to a per For now it's worth keeping it here as an example and perhaps utility functions might be useful for other HPI modules. ''' -from datetime import datetime, timedelta -from typing import Optional +from __future__ import annotations -from ...core.pandas import DataFrameT, check_dataframe as cdf -from ...core.orgmode import collect, Table, parse_org_datetime, TypedTable +from datetime import datetime, timedelta + +import pytz from my.config import exercise as config +from ...core.orgmode import Table, TypedTable, collect, parse_org_datetime +from ...core.pandas import DataFrameT +from ...core.pandas import check_dataframe as cdf -import pytz # FIXME how to attach it properly? tz = pytz.timezone('Europe/London') @@ -114,7 +116,7 @@ def dataframe() -> DataFrameT: rows.append(rd) # presumably has an error set continue - idx: Optional[int] + idx: int | None close = edf[edf['start_time'].apply(lambda t: pd_date_diff(t, mdate)).abs() < _DELTA] if len(close) == 0: idx = None @@ -163,7 +165,9 @@ def dataframe() -> DataFrameT: # TODO wtf?? where is speed coming from?? -from ...core import stat, Stats +from ...core import Stats, stat + + def stats() -> Stats: return stat(cross_trainer_data) diff --git a/my/body/sleep/common.py b/my/body/sleep/common.py index 1100814..fc288e5 100644 --- a/my/body/sleep/common.py +++ b/my/body/sleep/common.py @@ -1,5 +1,6 @@ -from ...core import stat, Stats -from ...core.pandas import DataFrameT, check_dataframe as cdf +from ...core import Stats, stat +from ...core.pandas import DataFrameT +from ...core.pandas import check_dataframe as cdf class Combine: diff --git a/my/body/sleep/main.py b/my/body/sleep/main.py index 29b12a7..2460e03 100644 --- a/my/body/sleep/main.py +++ b/my/body/sleep/main.py @@ -1,7 +1,6 @@ -from ... import jawbone -from ... import emfit - +from ... import emfit, jawbone from .common import Combine + _combined = Combine([ jawbone, emfit, diff --git a/my/body/weight.py b/my/body/weight.py index 51e6513..d5478ef 100644 --- a/my/body/weight.py +++ b/my/body/weight.py @@ -2,14 +2,14 @@ Weight data (manually logged) ''' +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -from typing import Any, Iterator - -from my.core import make_logger -from my.core.error import Res, extract_error_datetime, set_error_datetime +from typing import Any from my import orgmode +from my.core import make_logger +from my.core.error import Res, extract_error_datetime, set_error_datetime config = Any diff --git a/my/books/kobo.py b/my/books/kobo.py index 2a469d0..899ef31 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -1,7 +1,6 @@ -from ..core import warnings +from my.core import warnings warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!') -from ..core.util import __NOT_HPI_MODULE__ - -from ..kobo import * # type: ignore[no-redef] +from my.core.util import __NOT_HPI_MODULE__ +from my.kobo import * # type: ignore[no-redef] diff --git a/my/browser/active_browser.py b/my/browser/active_browser.py index 6f335bd..8051f1b 100644 --- a/my/browser/active_browser.py +++ b/my/browser/active_browser.py @@ -19,16 +19,18 @@ class config(user_config.active_browser): export_path: Paths +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Sequence, Iterator -from my.core import get_files, Stats, make_logger -from browserexport.merge import read_visits, Visit +from browserexport.merge import Visit, read_visits from sqlite_backup import sqlite_backup +from my.core import Stats, get_files, make_logger + logger = make_logger(__name__) from .common import _patch_browserexport_logs + _patch_browserexport_logs(logger.level) diff --git a/my/browser/all.py b/my/browser/all.py index a7d12b4..feb973a 100644 --- a/my/browser/all.py +++ b/my/browser/all.py @@ -1,9 +1,9 @@ -from typing import Iterator +from collections.abc import Iterator + +from browserexport.merge import Visit, merge_visits from my.core import Stats from my.core.source import import_source -from browserexport.merge import merge_visits, Visit - src_export = import_source(module_name="my.browser.export") src_active = import_source(module_name="my.browser.active_browser") diff --git a/my/browser/export.py b/my/browser/export.py index 1b428b5..351cf6e 100644 --- a/my/browser/export.py +++ b/my/browser/export.py @@ -4,11 +4,12 @@ Parses browser history using [[http://github.com/seanbreckenridge/browserexport] REQUIRES = ["browserexport"] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence -import my.config +from browserexport.merge import Visit, read_and_merge + from my.core import ( Paths, Stats, @@ -18,10 +19,10 @@ from my.core import ( ) from my.core.cachew import mcachew -from browserexport.merge import read_and_merge, Visit - from .common import _patch_browserexport_logs +import my.config # isort: skip + @dataclass class config(my.config.browser.export): diff --git a/my/bumble/android.py b/my/bumble/android.py index 54a0441..3f9fa13 100644 --- a/my/bumble/android.py +++ b/my/bumble/android.py @@ -3,24 +3,24 @@ Bumble data from Android app database (in =/data/data/com.bumble.app/databases/C """ from __future__ import annotations +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -from typing import Iterator, Sequence, Optional, Dict +from pathlib import Path from more_itertools import unique_everseen -from my.config import bumble as user_config +from my.core import Paths, get_files + +from my.config import bumble as user_config # isort: skip -from ..core import Paths @dataclass class config(user_config.android): # paths[s]/glob to the exported sqlite databases export_path: Paths -from ..core import get_files -from pathlib import Path def inputs() -> Sequence[Path]: return get_files(config.export_path) @@ -43,22 +43,24 @@ class _BaseMessage: @dataclass(unsafe_hash=True) class _Message(_BaseMessage): conversation_id: str - reply_to_id: Optional[str] + reply_to_id: str | None @dataclass(unsafe_hash=True) class Message(_BaseMessage): person: Person - reply_to: Optional[Message] + reply_to: Message | None import json -from typing import Union -from ..core import Res import sqlite3 -from ..core.sqlite import sqlite_connect_immutable, select +from typing import Union + from my.core.compat import assert_never +from ..core import Res +from ..core.sqlite import select, sqlite_connect_immutable + EntitiesRes = Res[Union[Person, _Message]] def _entities() -> Iterator[EntitiesRes]: @@ -120,8 +122,8 @@ _UNKNOWN_PERSON = "UNKNOWN_PERSON" def messages() -> Iterator[Res[Message]]: - id2person: Dict[str, Person] = {} - id2msg: Dict[str, Message] = {} + id2person: dict[str, Person] = {} + id2msg: dict[str, Message] = {} for x in unique_everseen(_entities(), key=_key): if isinstance(x, Exception): yield x diff --git a/my/calendar/holidays.py b/my/calendar/holidays.py index af51696..522672e 100644 --- a/my/calendar/holidays.py +++ b/my/calendar/holidays.py @@ -15,7 +15,8 @@ from my.core.time import zone_to_countrycode @lru_cache(1) def _calendar(): - from workalendar.registry import registry # type: ignore + from workalendar.registry import registry # type: ignore + # todo switch to using time.tz.main once _get_tz stabilizes? from ..time.tz import via_location as LTZ # TODO would be nice to do it dynamically depending on the past timezones... diff --git a/my/cfg.py b/my/cfg.py index e4020b4..9331e8a 100644 --- a/my/cfg.py +++ b/my/cfg.py @@ -1,7 +1,6 @@ import my.config as config from .core import __NOT_HPI_MODULE__ - from .core import warnings as W # still used in Promnesia, maybe in dashboard? diff --git a/my/codeforces.py b/my/codeforces.py index f2d150a..9c6b7c9 100644 --- a/my/codeforces.py +++ b/my/codeforces.py @@ -1,13 +1,12 @@ +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from functools import cached_property -import json from pathlib import Path -from typing import Dict, Iterator, Sequence - -from my.core import get_files, Res, datetime_aware from my.config import codeforces as config # type: ignore[attr-defined] +from my.core import Res, datetime_aware, get_files def inputs() -> Sequence[Path]: @@ -39,7 +38,7 @@ class Competition: class Parser: def __init__(self, *, inputs: Sequence[Path]) -> None: self.inputs = inputs - self.contests: Dict[ContestId, Contest] = {} + self.contests: dict[ContestId, Contest] = {} def _parse_allcontests(self, p: Path) -> Iterator[Contest]: j = json.loads(p.read_text()) diff --git a/my/coding/commits.py b/my/coding/commits.py index 31c366e..fe17dee 100644 --- a/my/coding/commits.py +++ b/my/coding/commits.py @@ -1,29 +1,32 @@ """ Git commits data for repositories on your filesystem """ + +from __future__ import annotations + REQUIRES = [ 'gitpython', ] - import shutil -from pathlib import Path -from datetime import datetime, timezone +from collections.abc import Iterator, Sequence from dataclasses import dataclass, field -from typing import List, Optional, Iterator, Set, Sequence, cast +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, cast - -from my.core import PathIsh, LazyLogger, make_config +from my.core import LazyLogger, PathIsh, make_config from my.core.cachew import cache_dir, mcachew from my.core.warnings import high +from my.config import commits as user_config # isort: skip + -from my.config import commits as user_config @dataclass class commits_cfg(user_config): roots: Sequence[PathIsh] = field(default_factory=list) - emails: Optional[Sequence[str]] = None - names: Optional[Sequence[str]] = None + emails: Sequence[str] | None = None + names: Sequence[str] | None = None # experiment to make it lazy? @@ -40,7 +43,6 @@ def config() -> commits_cfg: import git from git.repo.fun import is_git_dir - log = LazyLogger(__name__, level='info') @@ -93,7 +95,7 @@ def _git_root(git_dir: PathIsh) -> Path: return gd # must be bare -def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Commit]: +def _repo_commits_aux(gr: git.Repo, rev: str, emitted: set[str]) -> Iterator[Commit]: # without path might not handle pull heads properly for c in gr.iter_commits(rev=rev): if not by_me(c): @@ -120,7 +122,7 @@ def _repo_commits_aux(gr: git.Repo, rev: str, emitted: Set[str]) -> Iterator[Com def repo_commits(repo: PathIsh): gr = git.Repo(str(repo)) - emitted: Set[str] = set() + emitted: set[str] = set() for r in gr.references: yield from _repo_commits_aux(gr=gr, rev=r.path, emitted=emitted) @@ -141,14 +143,14 @@ def canonical_name(repo: Path) -> str: def _fd_path() -> str: # todo move it to core - fd_path: Optional[str] = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd") + fd_path: str | None = shutil.which("fdfind") or shutil.which("fd-find") or shutil.which("fd") if fd_path is None: high("my.coding.commits requires 'fd' to be installed, See https://github.com/sharkdp/fd#installation") assert fd_path is not None return fd_path -def git_repos_in(roots: List[Path]) -> List[Path]: +def git_repos_in(roots: list[Path]) -> list[Path]: from subprocess import check_output outputs = check_output([ _fd_path(), @@ -172,7 +174,7 @@ def git_repos_in(roots: List[Path]) -> List[Path]: return repos -def repos() -> List[Path]: +def repos() -> list[Path]: return git_repos_in(list(map(Path, config().roots))) @@ -190,7 +192,7 @@ def _repo_depends_on(_repo: Path) -> int: raise RuntimeError(f"Could not find a FETCH_HEAD/HEAD file in {_repo}") -def _commits(_repos: List[Path]) -> Iterator[Commit]: +def _commits(_repos: list[Path]) -> Iterator[Commit]: for r in _repos: yield from _cached_commits(r) diff --git a/my/common.py b/my/common.py index 1b56fb5..22e9487 100644 --- a/my/common.py +++ b/my/common.py @@ -1,6 +1,6 @@ from .core.warnings import high + high("DEPRECATED! Please use my.core.common instead.") from .core import __NOT_HPI_MODULE__ - from .core.common import * diff --git a/my/config.py b/my/config.py index 2dd9cda..301bf49 100644 --- a/my/config.py +++ b/my/config.py @@ -9,17 +9,18 @@ This file is used for: - mypy: this file provides some type annotations - for loading the actual user config ''' + +from __future__ import annotations + #### NOTE: you won't need this line VVVV in your personal config -from my.core import init # noqa: F401 +from my.core import init # noqa: F401 # isort: skip ### from datetime import tzinfo from pathlib import Path -from typing import List - -from my.core import Paths, PathIsh +from my.core import PathIsh, Paths class hypothesis: @@ -75,14 +76,16 @@ class google: takeout_path: Paths = '' -from typing import Sequence, Union, Tuple -from datetime import datetime, date, timedelta +from collections.abc import Sequence +from datetime import date, datetime, timedelta +from typing import Union + DateIsh = Union[datetime, date, str] -LatLon = Tuple[float, float] +LatLon = tuple[float, float] class location: # todo ugh, need to think about it... mypy wants the type here to be general, otherwise it can't deduce # and we can't import the types from the module itself, otherwise would be circular. common module? - home: Union[LatLon, Sequence[Tuple[DateIsh, LatLon]]] = (1.0, -1.0) + home: LatLon | Sequence[tuple[DateIsh, LatLon]] = (1.0, -1.0) home_accuracy = 30_000.0 class via_ip: @@ -103,6 +106,8 @@ class location: from typing import Literal + + class time: class tz: policy: Literal['keep', 'convert', 'throw'] @@ -121,10 +126,9 @@ class arbtt: logfiles: Paths -from typing import Optional class commits: - emails: Optional[Sequence[str]] - names: Optional[Sequence[str]] + emails: Sequence[str] | None + names: Sequence[str] | None roots: Sequence[PathIsh] @@ -150,8 +154,8 @@ class tinder: class instagram: class android: export_path: Paths - username: Optional[str] - full_name: Optional[str] + username: str | None + full_name: str | None class gdpr: export_path: Paths @@ -169,7 +173,7 @@ class materialistic: class fbmessenger: class fbmessengerexport: export_db: PathIsh - facebook_id: Optional[str] + facebook_id: str | None class android: export_path: Paths @@ -247,7 +251,7 @@ class runnerup: class emfit: export_path: Path timezone: tzinfo - excluded_sids: List[str] + excluded_sids: list[str] class foursquare: @@ -270,7 +274,7 @@ class roamresearch: class whatsapp: class android: export_path: Paths - my_user_id: Optional[str] + my_user_id: str | None class harmonic: diff --git a/my/core/_deprecated/kompress.py b/my/core/_deprecated/kompress.py index ce14fad..c3f333f 100644 --- a/my/core/_deprecated/kompress.py +++ b/my/core/_deprecated/kompress.py @@ -11,7 +11,7 @@ from collections.abc import Iterator, Sequence from datetime import datetime from functools import total_ordering from pathlib import Path -from typing import IO, Any, Union +from typing import IO, Union PathIsh = Union[Path, str] diff --git a/my/core/common.py b/my/core/common.py index 91fe9bd..aa994ea 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -63,7 +63,7 @@ def get_files( if '*' in gs: if glob != DEFAULT_GLOB: warnings.medium(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!") - paths.extend(map(Path, do_glob(gs))) + paths.extend(map(Path, do_glob(gs))) # noqa: PTH207 elif os.path.isdir(str(src)): # noqa: PTH112 # NOTE: we're using os.path here on purpose instead of src.is_dir # the reason is is_dir for archives might return True and then @@ -157,7 +157,7 @@ def get_valid_filename(s: str) -> str: # TODO deprecate and suggest to use one from my.core directly? not sure -from .utils.itertools import unique_everseen +from .utils.itertools import unique_everseen # noqa: F401 ### legacy imports, keeping them here for backwards compatibility ## hiding behind TYPE_CHECKING so it works in runtime diff --git a/my/demo.py b/my/demo.py index 0c54792..fa80b2a 100644 --- a/my/demo.py +++ b/my/demo.py @@ -1,12 +1,14 @@ ''' Just a demo module for testing and documentation purposes ''' +from __future__ import annotations import json +from collections.abc import Iterable, Sequence from dataclasses import dataclass from datetime import datetime, timezone, tzinfo from pathlib import Path -from typing import Iterable, Optional, Protocol, Sequence +from typing import Protocol from my.core import Json, PathIsh, Paths, get_files @@ -20,7 +22,7 @@ class config(Protocol): # this is to check optional attribute handling timezone: tzinfo = timezone.utc - external: Optional[PathIsh] = None + external: PathIsh | None = None @property def external_module(self): diff --git a/my/emfit/__init__.py b/my/emfit/__init__.py index 9934903..0d50b06 100644 --- a/my/emfit/__init__.py +++ b/my/emfit/__init__.py @@ -4,31 +4,34 @@ Consumes data exported by https://github.com/karlicoss/emfitexport """ +from __future__ import annotations + REQUIRES = [ 'git+https://github.com/karlicoss/emfitexport', ] -from contextlib import contextmanager import dataclasses -from datetime import datetime, time, timedelta import inspect +from collections.abc import Iterable, Iterator +from contextlib import contextmanager +from datetime import datetime, time, timedelta from pathlib import Path -from typing import Any, Dict, Iterable, Iterator, List, Optional - -from my.core import ( - get_files, - stat, - Res, - Stats, -) -from my.core.cachew import cache_dir, mcachew -from my.core.error import set_error_datetime, extract_error_datetime -from my.core.pandas import DataFrameT - -from my.config import emfit as config +from typing import Any import emfitexport.dal as dal +from my.core import ( + Res, + Stats, + get_files, + stat, +) +from my.core.cachew import cache_dir, mcachew +from my.core.error import extract_error_datetime, set_error_datetime +from my.core.pandas import DataFrameT + +from my.config import emfit as config # isort: skip + Emfit = dal.Emfit @@ -85,7 +88,7 @@ def datas() -> Iterable[Res[Emfit]]: # TODO should be used for jawbone data as well? def pre_dataframe() -> Iterable[Res[Emfit]]: # TODO shit. I need some sort of interrupted sleep detection? - g: List[Emfit] = [] + g: list[Emfit] = [] def flush() -> Iterable[Res[Emfit]]: if len(g) == 0: @@ -112,10 +115,10 @@ def pre_dataframe() -> Iterable[Res[Emfit]]: def dataframe() -> DataFrameT: - dicts: List[Dict[str, Any]] = [] - last: Optional[Emfit] = None + dicts: list[dict[str, Any]] = [] + last: Emfit | None = None for s in pre_dataframe(): - d: Dict[str, Any] + d: dict[str, Any] if isinstance(s, Exception): edt = extract_error_datetime(s) d = { @@ -166,11 +169,12 @@ def stats() -> Stats: @contextmanager def fake_data(nights: int = 500) -> Iterator: - from my.core.cfg import tmp_config from tempfile import TemporaryDirectory import pytz + from my.core.cfg import tmp_config + with TemporaryDirectory() as td: tdir = Path(td) gen = dal.FakeData() @@ -187,7 +191,7 @@ def fake_data(nights: int = 500) -> Iterator: # TODO remove/deprecate it? I think used by timeline -def get_datas() -> List[Emfit]: +def get_datas() -> list[Emfit]: # todo ugh. run lint properly return sorted(datas(), key=lambda e: e.start) # type: ignore diff --git a/my/endomondo.py b/my/endomondo.py index 293a542..7732c00 100644 --- a/my/endomondo.py +++ b/my/endomondo.py @@ -7,13 +7,14 @@ REQUIRES = [ ] # todo use ast in setup.py or doctor to extract the corresponding pip packages? +from collections.abc import Iterable, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterable + +from my.config import endomondo as user_config from .core import Paths, get_files -from my.config import endomondo as user_config @dataclass class endomondo(user_config): @@ -33,15 +34,17 @@ def inputs() -> Sequence[Path]: import endoexport.dal as dal from endoexport.dal import Point, Workout # noqa: F401 - from .core import Res + + # todo cachew? def workouts() -> Iterable[Res[Workout]]: _dal = dal.DAL(inputs()) yield from _dal.workouts() -from .core.pandas import check_dataframe, DataFrameT +from .core.pandas import DataFrameT, check_dataframe + @check_dataframe def dataframe(*, defensive: bool=True) -> DataFrameT: @@ -75,7 +78,9 @@ def dataframe(*, defensive: bool=True) -> DataFrameT: return df -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return { # todo pretty print stats? @@ -86,13 +91,16 @@ def stats() -> Stats: # TODO make sure it's possible to 'advise' functions and override stuff +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator + + @contextmanager def fake_data(count: int=100) -> Iterator: - from my.core.cfg import tmp_config - from tempfile import TemporaryDirectory import json + from tempfile import TemporaryDirectory + + from my.core.cfg import tmp_config with TemporaryDirectory() as td: tdir = Path(td) fd = dal.FakeData() diff --git a/my/error.py b/my/error.py index c0b734c..e3c1e11 100644 --- a/my/error.py +++ b/my/error.py @@ -1,6 +1,6 @@ from .core.warnings import high + high("DEPRECATED! Please use my.core.error instead.") from .core import __NOT_HPI_MODULE__ - from .core.error import * diff --git a/my/experimental/destructive_parsing.py b/my/experimental/destructive_parsing.py index b389f7e..0c4092a 100644 --- a/my/experimental/destructive_parsing.py +++ b/my/experimental/destructive_parsing.py @@ -1,5 +1,6 @@ +from collections.abc import Iterator from dataclasses import dataclass -from typing import Any, Iterator, List, Tuple +from typing import Any from my.core.compat import NoneType, assert_never @@ -9,7 +10,7 @@ from my.core.compat import NoneType, assert_never class Helper: manager: 'Manager' item: Any # todo realistically, list or dict? could at least type as indexable or something - path: Tuple[str, ...] + path: tuple[str, ...] def pop_if_primitive(self, *keys: str) -> None: """ @@ -40,9 +41,9 @@ def is_empty(x) -> bool: class Manager: def __init__(self) -> None: - self.helpers: List[Helper] = [] + self.helpers: list[Helper] = [] - def helper(self, item: Any, *, path: Tuple[str, ...] = ()) -> Helper: + def helper(self, item: Any, *, path: tuple[str, ...] = ()) -> Helper: res = Helper(manager=self, item=item, path=path) self.helpers.append(res) return res diff --git a/my/fbmessenger/__init__.py b/my/fbmessenger/__init__.py index 40fb235..f729de9 100644 --- a/my/fbmessenger/__init__.py +++ b/my/fbmessenger/__init__.py @@ -20,6 +20,7 @@ REQUIRES = [ from my.core.hpi_compat import handle_legacy_import + is_legacy_import = handle_legacy_import( parent_module_name=__name__, legacy_submodule_name='export', diff --git a/my/fbmessenger/all.py b/my/fbmessenger/all.py index 13689db..a057dca 100644 --- a/my/fbmessenger/all.py +++ b/my/fbmessenger/all.py @@ -1,10 +1,10 @@ -from typing import Iterator -from my.core import Res, stat, Stats +from collections.abc import Iterator + +from my.core import Res, Stats from my.core.source import import_source from .common import Message, _merge_messages - src_export = import_source(module_name='my.fbmessenger.export') src_android = import_source(module_name='my.fbmessenger.android') diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index effabab..a16d924 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -4,19 +4,20 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab from __future__ import annotations +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import sqlite3 -from typing import Iterator, Sequence, Optional, Dict, Union, List +from typing import Union -from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files, make_config from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain from my.core.sqlite import sqlite_connection -from my.config import fbmessenger as user_config +from my.config import fbmessenger as user_config # isort: skip logger = LazyLogger(__name__) @@ -27,7 +28,7 @@ class Config(user_config.android): # paths[s]/glob to the exported sqlite databases export_path: Paths - facebook_id: Optional[str] = None + facebook_id: str | None = None # hmm. this is necessary for default value (= None) to work @@ -42,13 +43,13 @@ def inputs() -> Sequence[Path]: @dataclass(unsafe_hash=True) class Sender: id: str - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) class Thread: id: str - name: Optional[str] # isn't set for groups or one to one messages + name: str | None # isn't set for groups or one to one messages # todo not sure about order of fields... @@ -56,14 +57,14 @@ class Thread: class _BaseMessage: id: str dt: datetime_aware - text: Optional[str] + text: str | None @dataclass(unsafe_hash=True) class _Message(_BaseMessage): thread_id: str sender_id: str - reply_to_id: Optional[str] + reply_to_id: str | None # todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here @@ -72,7 +73,7 @@ class _Message(_BaseMessage): class Message(_BaseMessage): thread: Thread sender: Sender - reply_to: Optional[Message] + reply_to: Message | None Entity = Union[Sender, Thread, _Message] @@ -110,7 +111,7 @@ def _normalise_thread_id(key) -> str: # NOTE: this is sort of copy pasted from other _process_db method # maybe later could unify them def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: - senders: Dict[str, Sender] = {} + senders: dict[str, Sender] = {} for r in db.execute('SELECT CAST(id AS TEXT) AS id, name FROM contacts'): s = Sender( id=r['id'], # looks like it's server id? same used on facebook site @@ -127,7 +128,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: # TODO can we get it from db? could infer as the most common id perhaps? self_id = config.facebook_id - thread_users: Dict[str, List[Sender]] = {} + thread_users: dict[str, list[Sender]] = {} for r in db.execute('SELECT CAST(thread_key AS TEXT) AS thread_key, CAST(contact_id AS TEXT) AS contact_id FROM participants'): thread_key = r['thread_key'] user_key = r['contact_id'] @@ -193,7 +194,7 @@ def _process_db_msys(db: sqlite3.Connection) -> Iterator[Res[Entity]]: def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]: - senders: Dict[str, Sender] = {} + senders: dict[str, Sender] = {} for r in db.execute('''SELECT * FROM thread_users'''): # for messaging_actor_type == 'REDUCED_MESSAGING_ACTOR', name is None # but they are still referenced, so need to keep @@ -207,7 +208,7 @@ def _process_db_threads_db2(db: sqlite3.Connection) -> Iterator[Res[Entity]]: yield s self_id = config.facebook_id - thread_users: Dict[str, List[Sender]] = {} + thread_users: dict[str, list[Sender]] = {} for r in db.execute('SELECT * from thread_participants'): thread_key = r['thread_key'] user_key = r['user_key'] @@ -267,9 +268,9 @@ def contacts() -> Iterator[Res[Sender]]: def messages() -> Iterator[Res[Message]]: - senders: Dict[str, Sender] = {} - msgs: Dict[str, Message] = {} - threads: Dict[str, Thread] = {} + senders: dict[str, Sender] = {} + msgs: dict[str, Message] = {} + threads: dict[str, Thread] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/fbmessenger/common.py b/my/fbmessenger/common.py index 33d1b20..0f5a374 100644 --- a/my/fbmessenger/common.py +++ b/my/fbmessenger/common.py @@ -1,6 +1,9 @@ -from my.core import __NOT_HPI_MODULE__ +from __future__ import annotations -from typing import Iterator, Optional, Protocol +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterator +from typing import Protocol from my.core import datetime_aware @@ -10,7 +13,7 @@ class Thread(Protocol): def id(self) -> str: ... @property - def name(self) -> Optional[str]: ... + def name(self) -> str | None: ... class Sender(Protocol): @@ -18,7 +21,7 @@ class Sender(Protocol): def id(self) -> str: ... @property - def name(self) -> Optional[str]: ... + def name(self) -> str | None: ... class Message(Protocol): @@ -29,7 +32,7 @@ class Message(Protocol): def dt(self) -> datetime_aware: ... @property - def text(self) -> Optional[str]: ... + def text(self) -> str | None: ... @property def thread(self) -> Thread: ... @@ -39,8 +42,11 @@ class Message(Protocol): from itertools import chain + from more_itertools import unique_everseen -from my.core import warn_if_empty, Res + +from my.core import Res, warn_if_empty + @warn_if_empty def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: diff --git a/my/fbmessenger/export.py b/my/fbmessenger/export.py index 201fad8..3b06618 100644 --- a/my/fbmessenger/export.py +++ b/my/fbmessenger/export.py @@ -7,16 +7,15 @@ REQUIRES = [ 'git+https://github.com/karlicoss/fbmessengerexport', ] +from collections.abc import Iterator from contextlib import ExitStack, contextmanager from dataclasses import dataclass -from typing import Iterator - -from my.core import PathIsh, Res, stat, Stats -from my.core.warnings import high -from my.config import fbmessenger as user_config import fbmessengerexport.dal as messenger +from my.config import fbmessenger as user_config +from my.core import PathIsh, Res, Stats, stat +from my.core.warnings import high ### # support old style config diff --git a/my/foursquare.py b/my/foursquare.py index 394fdf3..3b418aa 100644 --- a/my/foursquare.py +++ b/my/foursquare.py @@ -2,15 +2,14 @@ Foursquare/Swarm checkins ''' -from datetime import datetime, timezone, timedelta -from itertools import chain import json +from datetime import datetime, timedelta, timezone +from itertools import chain -# TODO pytz for timezone??? - -from my.core import get_files, make_logger from my.config import foursquare as config +# TODO pytz for timezone??? +from my.core import get_files, make_logger logger = make_logger(__name__) diff --git a/my/github/all.py b/my/github/all.py index f885dde..f5e13cf 100644 --- a/my/github/all.py +++ b/my/github/all.py @@ -3,8 +3,7 @@ Unified Github data (merged from GDPR export and periodic API updates) """ from . import gdpr, ghexport - -from .common import merge_events, Results +from .common import Results, merge_events def events() -> Results: diff --git a/my/github/common.py b/my/github/common.py index e54bc4d..22ba47e 100644 --- a/my/github/common.py +++ b/my/github/common.py @@ -1,24 +1,27 @@ """ Github events and their metadata: comments/issues/pull requests """ -from ..core import __NOT_HPI_MODULE__ + +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip +from collections.abc import Iterable from datetime import datetime, timezone -from typing import Optional, NamedTuple, Iterable, Set, Tuple +from typing import NamedTuple, Optional -from ..core import warn_if_empty, LazyLogger -from ..core.error import Res +from my.core import make_logger, warn_if_empty +from my.core.error import Res - -logger = LazyLogger(__name__) +logger = make_logger(__name__) class Event(NamedTuple): dt: datetime summary: str eid: str link: Optional[str] - body: Optional[str]=None + body: Optional[str] = None is_bot: bool = False @@ -27,7 +30,7 @@ Results = Iterable[Res[Event]] @warn_if_empty def merge_events(*sources: Results) -> Results: from itertools import chain - emitted: Set[Tuple[datetime, str]] = set() + emitted: set[tuple[datetime, str]] = set() for e in chain(*sources): if isinstance(e, Exception): yield e @@ -52,7 +55,7 @@ def parse_dt(s: str) -> datetime: # experimental way of supportint event ids... not sure class EventIds: @staticmethod - def repo_created(*, dts: str, name: str, ref_type: str, ref: Optional[str]) -> str: + def repo_created(*, dts: str, name: str, ref_type: str, ref: str | None) -> str: return f'{dts}_repocreated_{name}_{ref_type}_{ref}' @staticmethod diff --git a/my/github/gdpr.py b/my/github/gdpr.py index a56ff46..be56454 100644 --- a/my/github/gdpr.py +++ b/my/github/gdpr.py @@ -6,8 +6,9 @@ from __future__ import annotations import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Any, Iterator, Sequence +from typing import Any from my.core import Paths, Res, Stats, get_files, make_logger, stat, warnings from my.core.error import echain diff --git a/my/github/ghexport.py b/my/github/ghexport.py index 80106a5..3e17c10 100644 --- a/my/github/ghexport.py +++ b/my/github/ghexport.py @@ -1,13 +1,17 @@ """ Github data: events, comments, etc. (API data) """ + +from __future__ import annotations + REQUIRES = [ 'git+https://github.com/karlicoss/ghexport', ] + from dataclasses import dataclass -from my.core import Paths from my.config import github as user_config +from my.core import Paths @dataclass @@ -21,7 +25,9 @@ class github(user_config): ### -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config + + def migration(attrs: Attrs) -> Attrs: export_dir = 'export_dir' if export_dir in attrs: # legacy name @@ -41,15 +47,14 @@ except ModuleNotFoundError as e: ############################ +from collections.abc import Sequence from functools import lru_cache from pathlib import Path -from typing import Tuple, Dict, Sequence, Optional -from my.core import get_files, LazyLogger +from my.core import LazyLogger, get_files from my.core.cachew import mcachew -from .common import Event, parse_dt, Results, EventIds - +from .common import Event, EventIds, Results, parse_dt logger = LazyLogger(__name__) @@ -82,7 +87,9 @@ def _events() -> Results: yield e -from my.core import stat, Stats +from my.core import Stats, stat + + def stats() -> Stats: return { **stat(events), @@ -99,7 +106,7 @@ def _log_if_unhandled(e) -> None: Link = str EventId = str Body = str -def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Body]]: +def _get_summary(e) -> tuple[str, Link | None, EventId | None, Body | None]: # TODO would be nice to give access to raw event within timeline dts = e['created_at'] eid = e['id'] @@ -195,7 +202,7 @@ def _get_summary(e) -> Tuple[str, Optional[Link], Optional[EventId], Optional[Bo return tp, None, None, None -def _parse_event(d: Dict) -> Event: +def _parse_event(d: dict) -> Event: summary, link, eid, body = _get_summary(d) if eid is None: eid = d['id'] # meh diff --git a/my/goodreads.py b/my/goodreads.py index 864bd64..559efda 100644 --- a/my/goodreads.py +++ b/my/goodreads.py @@ -7,15 +7,18 @@ REQUIRES = [ from dataclasses import dataclass -from my.core import datetime_aware, Paths + from my.config import goodreads as user_config +from my.core import Paths, datetime_aware + @dataclass class goodreads(user_config): # paths[s]/glob to the exported JSON data export_path: Paths -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config + def _migration(attrs: Attrs) -> Attrs: export_dir = 'export_dir' @@ -29,18 +32,19 @@ config = make_config(goodreads, migration=_migration) #############################3 -from my.core import get_files -from typing import Sequence, Iterator +from collections.abc import Iterator, Sequence from pathlib import Path +from my.core import get_files + + def inputs() -> Sequence[Path]: return get_files(config.export_path) from datetime import datetime + import pytz - - from goodrexport import dal diff --git a/my/google/maps/_android_protobuf.py b/my/google/maps/_android_protobuf.py index 1d43ae0..615623d 100644 --- a/my/google/maps/_android_protobuf.py +++ b/my/google/maps/_android_protobuf.py @@ -1,8 +1,8 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip # NOTE: this tool was quite useful https://github.com/aj3423/aproto -from google.protobuf import descriptor_pool, descriptor_pb2, message_factory +from google.protobuf import descriptor_pb2, descriptor_pool, message_factory TYPE_STRING = descriptor_pb2.FieldDescriptorProto.TYPE_STRING TYPE_BYTES = descriptor_pb2.FieldDescriptorProto.TYPE_BYTES diff --git a/my/google/maps/android.py b/my/google/maps/android.py index 279231a..95ecacf 100644 --- a/my/google/maps/android.py +++ b/my/google/maps/android.py @@ -7,20 +7,20 @@ REQUIRES = [ "protobuf", # for parsing blobs from the database ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Any, Iterator, Optional, Sequence +from typing import Any from urllib.parse import quote -from my.core import datetime_aware, get_files, LazyLogger, Paths, Res +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen from my.core.sqlite import sqlite_connection -import my.config - from ._android_protobuf import parse_labeled, parse_list, parse_place +import my.config # isort: skip logger = LazyLogger(__name__) @@ -59,8 +59,8 @@ class Place: updated_at: datetime_aware # TODO double check it's utc? title: str location: Location - address: Optional[str] - note: Optional[str] + address: str | None + note: str | None @property def place_url(self) -> str: diff --git a/my/google/takeout/html.py b/my/google/takeout/html.py index 750beac..3f2b5db 100644 --- a/my/google/takeout/html.py +++ b/my/google/takeout/html.py @@ -2,18 +2,22 @@ Google Takeout exports: browsing history, search/youtube/google play activity ''' -from enum import Enum +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip + import re -from pathlib import Path +from collections.abc import Iterable from datetime import datetime +from enum import Enum from html.parser import HTMLParser -from typing import List, Optional, Any, Callable, Iterable, Tuple +from pathlib import Path +from typing import Any, Callable from urllib.parse import unquote import pytz -from ...core.time import abbr_to_timezone - +from my.core.time import abbr_to_timezone # NOTE: https://bugs.python.org/issue22377 %Z doesn't work properly _TIME_FORMATS = [ @@ -36,7 +40,7 @@ def parse_dt(s: str) -> datetime: s, tzabbr = s.rsplit(maxsplit=1) tz = abbr_to_timezone(tzabbr) - dt: Optional[datetime] = None + dt: datetime | None = None for fmt in _TIME_FORMATS: try: dt = datetime.strptime(s, fmt) @@ -73,7 +77,7 @@ class State(Enum): Url = str Title = str -Parsed = Tuple[datetime, Url, Title] +Parsed = tuple[datetime, Url, Title] Callback = Callable[[datetime, Url, Title], None] @@ -83,9 +87,9 @@ class TakeoutHTMLParser(HTMLParser): super().__init__() self.state: State = State.OUTSIDE - self.title_parts: List[str] = [] - self.title: Optional[str] = None - self.url: Optional[str] = None + self.title_parts: list[str] = [] + self.title: str | None = None + self.url: str | None = None self.callback = callback @@ -148,7 +152,7 @@ class TakeoutHTMLParser(HTMLParser): def read_html(tpath: Path, file: str) -> Iterable[Parsed]: - results: List[Parsed] = [] + results: list[Parsed] = [] def cb(dt: datetime, url: Url, title: Title) -> None: results.append((dt, url, title)) parser = TakeoutHTMLParser(callback=cb) @@ -156,5 +160,3 @@ def read_html(tpath: Path, file: str) -> Iterable[Parsed]: data = fo.read() parser.feed(data) return results - -from ...core import __NOT_HPI_MODULE__ diff --git a/my/google/takeout/parser.py b/my/google/takeout/parser.py index 170553a..80c2be1 100644 --- a/my/google/takeout/parser.py +++ b/my/google/takeout/parser.py @@ -14,24 +14,27 @@ the cachew cache REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +import os +from collections.abc import Sequence from contextlib import ExitStack from dataclasses import dataclass -import os -from typing import List, Sequence, cast from pathlib import Path -from my.core import make_config, stat, Stats, get_files, Paths, make_logger +from typing import cast + +from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES + +from my.core import Paths, Stats, get_files, make_config, make_logger, stat from my.core.cachew import mcachew from my.core.error import ErrorPolicy from my.core.structure import match_structure - from my.core.time import user_forced -from google_takeout_parser.parse_html.html_time_utils import ABBR_TIMEZONES + ABBR_TIMEZONES.extend(user_forced()) import google_takeout_parser -from google_takeout_parser.path_dispatch import TakeoutParser -from google_takeout_parser.merge import GoogleEventSet, CacheResults +from google_takeout_parser.merge import CacheResults, GoogleEventSet from google_takeout_parser.models import BaseEvent +from google_takeout_parser.path_dispatch import TakeoutParser # see https://github.com/seanbreckenridge/dotfiles/blob/master/.config/my/my/config/__init__.py for an example from my.config import google as user_config @@ -56,6 +59,7 @@ logger = make_logger(__name__, level="warning") # patch the takeout parser logger to match the computed loglevel from google_takeout_parser.log import setup as setup_takeout_logger + setup_takeout_logger(logger.level) @@ -83,7 +87,7 @@ except ImportError: google_takeout_version = str(getattr(google_takeout_parser, '__version__', 'unknown')) -def _cachew_depends_on() -> List[str]: +def _cachew_depends_on() -> list[str]: exports = sorted([str(p) for p in inputs()]) # add google takeout parser pip version to hash, so this re-creates on breaking changes exports.insert(0, f"google_takeout_version: {google_takeout_version}") diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index 948cf2e..6a523e2 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -2,13 +2,17 @@ Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data ''' +from __future__ import annotations + +from my.core import __NOT_HPI_MODULE__ # isort: skip + from abc import abstractmethod +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, Optional, Protocol from more_itertools import last -from my.core import __NOT_HPI_MODULE__, Paths, get_files +from my.core import Paths, get_files class config: @@ -33,7 +37,7 @@ def make_config() -> config: return combined_config() -def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]: +def get_takeouts(*, path: str | None = None) -> Iterable[Path]: """ Sometimes google splits takeout into multiple archives, so we need to detect the ones that contain the path we need """ @@ -45,7 +49,7 @@ def get_takeouts(*, path: Optional[str] = None) -> Iterable[Path]: yield takeout -def get_last_takeout(*, path: Optional[str] = None) -> Optional[Path]: +def get_last_takeout(*, path: str | None = None) -> Path | None: return last(get_takeouts(path=path), default=None) diff --git a/my/hackernews/dogsheep.py b/my/hackernews/dogsheep.py index de6c58d..8303284 100644 --- a/my/hackernews/dogsheep.py +++ b/my/hackernews/dogsheep.py @@ -3,14 +3,14 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee """ from __future__ import annotations +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Sequence, Optional -from my.core import get_files, Paths, Res, datetime_aware -from my.core.sqlite import sqlite_connection import my.config +from my.core import Paths, Res, datetime_aware, get_files +from my.core.sqlite import sqlite_connection from .common import hackernews_link @@ -33,9 +33,9 @@ class Item: id: str type: str created: datetime_aware # checked and it's utc - title: Optional[str] # only present for Story - text_html: Optional[str] # should be present for Comment and might for Story - url: Optional[str] # might be present for Story + title: str | None # only present for Story + text_html: str | None # should be present for Comment and might for Story + url: str | None # might be present for Story # todo process 'deleted'? fields? # todo process 'parent'? diff --git a/my/hackernews/harmonic.py b/my/hackernews/harmonic.py index 3b4ae61..08a82e6 100644 --- a/my/hackernews/harmonic.py +++ b/my/hackernews/harmonic.py @@ -1,17 +1,22 @@ """ [[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews """ + +from __future__ import annotations + REQUIRES = ['lxml', 'orjson'] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone -import orjson from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast +from typing import Any, TypedDict, cast +import orjson from lxml import etree from more_itertools import one +import my.config from my.core import ( Paths, Res, @@ -22,8 +27,10 @@ from my.core import ( stat, ) from my.core.common import unique_everseen -import my.config -from .common import hackernews_link, SavedBase + +from .common import SavedBase, hackernews_link + +import my.config # isort: skip logger = make_logger(__name__) @@ -43,7 +50,7 @@ class Cached(TypedDict): created_at_i: int id: str points: int - test: Optional[str] + test: str | None title: str type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho url: str @@ -94,16 +101,16 @@ def _saved() -> Iterator[Res[Saved]]: # TODO defensive for each item! tr = etree.parse(path) - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) cached_ids = [x.text.split('-')[0] for x in res] - cached: Dict[str, Cached] = {} + cached: dict[str, Cached] = {} for sid in cached_ids: - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]'))) j = orjson.loads(res.text) cached[sid] = j - res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]'))) + res = one(cast(list[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]'))) for x in res.text.split('-'): ids, item_timestamp = x.split('q') # not sure if timestamp is any useful? diff --git a/my/hackernews/materialistic.py b/my/hackernews/materialistic.py index 4d5cd47..ccf285b 100644 --- a/my/hackernews/materialistic.py +++ b/my/hackernews/materialistic.py @@ -1,19 +1,20 @@ """ [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews """ +from collections.abc import Iterator, Sequence from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, Iterator, NamedTuple, Sequence +from typing import Any, NamedTuple from more_itertools import unique_everseen -from my.core import get_files, datetime_aware, make_logger +from my.core import datetime_aware, get_files, make_logger from my.core.sqlite import sqlite_connection -from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic - from .common import hackernews_link +# todo migrate config to my.hackernews.materialistic +from my.config import materialistic as config # isort: skip logger = make_logger(__name__) @@ -22,7 +23,7 @@ def inputs() -> Sequence[Path]: return get_files(config.export_path) -Row = Dict[str, Any] +Row = dict[str, Any] class Saved(NamedTuple): diff --git a/my/hypothesis.py b/my/hypothesis.py index 82104cd..15e854b 100644 --- a/my/hypothesis.py +++ b/my/hypothesis.py @@ -4,20 +4,22 @@ REQUIRES = [ 'git+https://github.com/karlicoss/hypexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING from my.core import ( - get_files, - stat, Paths, Res, Stats, + get_files, + stat, ) from my.core.cfg import make_config from my.core.hpi_compat import always_supports_sequence -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/instagram/all.py b/my/instagram/all.py index 8007399..214e6ac 100644 --- a/my/instagram/all.py +++ b/my/instagram/all.py @@ -1,11 +1,10 @@ -from typing import Iterator +from collections.abc import Iterator -from my.core import Res, stat, Stats +from my.core import Res, Stats, stat from my.core.source import import_source from .common import Message, _merge_messages - src_gdpr = import_source(module_name='my.instagram.gdpr') @src_gdpr def _messages_gdpr() -> Iterator[Res[Message]]: diff --git a/my/instagram/android.py b/my/instagram/android.py index 96b75d2..12c11d3 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -3,30 +3,29 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data """ from __future__ import annotations +import json +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -import json from pathlib import Path -import sqlite3 -from typing import Iterator, Sequence, Optional, Dict, Union from my.core import ( - get_files, - Paths, - make_config, - make_logger, - datetime_naive, Json, + Paths, Res, assert_never, + datetime_naive, + get_files, + make_config, + make_logger, ) -from my.core.common import unique_everseen from my.core.cachew import mcachew +from my.core.common import unique_everseen from my.core.error import echain -from my.core.sqlite import sqlite_connect_immutable, select - -from my.config import instagram as user_config +from my.core.sqlite import select, sqlite_connect_immutable +from my.config import instagram as user_config # isort: skip logger = make_logger(__name__) @@ -38,8 +37,8 @@ class instagram_android_config(user_config.android): # sadly doesn't seem easy to extract user's own handle/name from the db... # todo maybe makes more sense to keep in parent class? not sure... - username: Optional[str] = None - full_name: Optional[str] = None + username: str | None = None + full_name: str | None = None config = make_config(instagram_android_config) @@ -101,13 +100,13 @@ class MessageError(RuntimeError): return self.rest == other.rest -def _parse_message(j: Json) -> Optional[_Message]: +def _parse_message(j: Json) -> _Message | None: id = j['item_id'] t = j['item_type'] tid = j['thread_key']['thread_id'] uid = j['user_id'] created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000) - text: Optional[str] = None + text: str | None = None if t == 'text': text = j['text'] elif t == 'reel_share': @@ -133,7 +132,7 @@ def _parse_message(j: Json) -> Optional[_Message]: ) -def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]: +def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]: # TODO ugh. seems like no way to extract username? # sometimes messages (e.g. media_share) contain it in message field # but generally it's not present. ugh @@ -175,7 +174,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]: yield e -def _entities() -> Iterator[Res[Union[User, _Message]]]: +def _entities() -> Iterator[Res[User | _Message]]: # NOTE: definitely need to merge multiple, app seems to recycle old messages # TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data... # todo use TypedDict? @@ -194,7 +193,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: @mcachew(depends_on=inputs) def messages() -> Iterator[Res[Message]]: - id2user: Dict[str, User] = {} + id2user: dict[str, User] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/instagram/common.py b/my/instagram/common.py index 4df07a1..17d130f 100644 --- a/my/instagram/common.py +++ b/my/instagram/common.py @@ -1,9 +1,10 @@ +from collections.abc import Iterator from dataclasses import replace from datetime import datetime from itertools import chain -from typing import Iterator, Dict, Any, Protocol +from typing import Any, Protocol -from my.core import warn_if_empty, Res +from my.core import Res, warn_if_empty class User(Protocol): @@ -40,7 +41,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: # ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump) # so the only way to correlate is to try and match messages # we also can't use unique_everseen here, otherwise will never get a chance to unify threads - mmap: Dict[str, Message] = {} + mmap: dict[str, Message] = {} thread_map = {} user_map = {} @@ -60,7 +61,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: user_map[m.user.id] = mm.user else: # not emitted yet, need to emit - repls: Dict[str, Any] = {} + repls: dict[str, Any] = {} tid = thread_map.get(m.thread_id) if tid is not None: repls['thread_id'] = tid diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 1415d55..7454a04 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -2,26 +2,27 @@ Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]]) """ +from __future__ import annotations + +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime -import json from pathlib import Path -from typing import Iterator, Sequence, Dict, Union from more_itertools import bucket from my.core import ( - get_files, Paths, - datetime_naive, Res, assert_never, + datetime_naive, + get_files, make_logger, ) from my.core.common import unique_everseen -from my.config import instagram as user_config - +from my.config import instagram as user_config # isort: skip logger = make_logger(__name__) @@ -70,7 +71,7 @@ def _decode(s: str) -> str: return s.encode('latin-1').decode('utf8') -def _entities() -> Iterator[Res[Union[User, _Message]]]: +def _entities() -> Iterator[Res[User | _Message]]: # it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones # NOTE: here there are basically two options # - process inputs as is (from oldest to newest) @@ -84,7 +85,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: yield from _entitites_from_path(path) -def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]: +def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]: # TODO make sure it works both with plan directory # idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here # e.g. possible options are: @@ -202,7 +203,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]: # TODO basically copy pasted from android.py... hmm def messages() -> Iterator[Res[Message]]: - id2user: Dict[str, User] = {} + id2user: dict[str, User] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x diff --git a/my/instapaper.py b/my/instapaper.py index df1f70b..d79e7e4 100644 --- a/my/instapaper.py +++ b/my/instapaper.py @@ -7,10 +7,10 @@ REQUIRES = [ from dataclasses import dataclass -from .core import Paths - from my.config import instapaper as user_config +from .core import Paths + @dataclass class instapaper(user_config): @@ -22,6 +22,7 @@ class instapaper(user_config): from .core.cfg import make_config + config = make_config(instapaper) @@ -39,9 +40,12 @@ Bookmark = dal.Bookmark Page = dal.Page -from typing import Sequence, Iterable +from collections.abc import Iterable, Sequence from pathlib import Path + from .core import get_files + + def inputs() -> Sequence[Path]: return get_files(config.export_path) diff --git a/my/ip/all.py b/my/ip/all.py index 46c1fec..e8277c1 100644 --- a/my/ip/all.py +++ b/my/ip/all.py @@ -9,10 +9,9 @@ For an example of how this could be used, see https://github.com/seanbreckenridg REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] -from typing import Iterator +from collections.abc import Iterator from my.core import Stats, warn_if_empty - from my.ip.common import IP diff --git a/my/ip/common.py b/my/ip/common.py index 244ddc5..ef54ee3 100644 --- a/my/ip/common.py +++ b/my/ip/common.py @@ -2,11 +2,12 @@ Provides location/timezone data from IP addresses, using [[https://github.com/seanbreckenridge/ipgeocache][ipgeocache]] """ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip import ipaddress -from typing import NamedTuple, Iterator, Tuple +from collections.abc import Iterator from datetime import datetime +from typing import NamedTuple import ipgeocache @@ -22,7 +23,7 @@ class IP(NamedTuple): return ipgeocache.get(self.addr) @property - def latlon(self) -> Tuple[float, float]: + def latlon(self) -> tuple[float, float]: loc: str = self.ipgeocache()["loc"] lat, _, lon = loc.partition(",") return float(lat), float(lon) diff --git a/my/jawbone/__init__.py b/my/jawbone/__init__.py index 35112ba..463d735 100644 --- a/my/jawbone/__init__.py +++ b/my/jawbone/__init__.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import Dict, Any, List, Iterable import json +from collections.abc import Iterable +from datetime import date, datetime, time, timedelta from functools import lru_cache -from datetime import datetime, date, time, timedelta from pathlib import Path +from typing import Any import pytz @@ -14,7 +15,6 @@ logger = make_logger(__name__) from my.config import jawbone as config # type: ignore[attr-defined] - BDIR = config.export_dir PHASES_FILE = BDIR / 'phases.json' SLEEPS_FILE = BDIR / 'sleeps.json' @@ -24,7 +24,7 @@ GRAPHS_DIR = BDIR / 'graphs' XID = str # TODO how to shared with backup thing? -Phases = Dict[XID, Any] +Phases = dict[XID, Any] @lru_cache(1) def get_phases() -> Phases: return json.loads(PHASES_FILE.read_text()) @@ -89,7 +89,7 @@ class SleepEntry: # TODO might be useful to cache these?? @property - def phases(self) -> List[datetime]: + def phases(self) -> list[datetime]: # TODO make sure they are consistent with emfit? return [self._fromts(i['time']) for i in get_phases()[self.xid]] @@ -100,12 +100,13 @@ class SleepEntry: return str(self) -def load_sleeps() -> List[SleepEntry]: +def load_sleeps() -> list[SleepEntry]: sleeps = json.loads(SLEEPS_FILE.read_text()) return [SleepEntry(js) for js in sleeps] -from ..core.error import Res, set_error_datetime, extract_error_datetime +from ..core.error import Res, extract_error_datetime, set_error_datetime + def pre_dataframe() -> Iterable[Res[SleepEntry]]: from more_itertools import bucket @@ -129,9 +130,9 @@ def pre_dataframe() -> Iterable[Res[SleepEntry]]: def dataframe(): - dicts: List[Dict[str, Any]] = [] + dicts: list[dict[str, Any]] = [] for s in pre_dataframe(): - d: Dict[str, Any] + d: dict[str, Any] if isinstance(s, Exception): dt = extract_error_datetime(s) d = { @@ -181,7 +182,7 @@ def plot_one(sleep: SleepEntry, fig, axes, xlims=None, *, showtext=True): print(f"{sleep.xid} span: {span}") # pip install imageio - from imageio import imread # type: ignore + from imageio import imread # type: ignore img = imread(sleep.graph) # all of them are 300x300 images apparently @@ -260,8 +261,8 @@ def predicate(sleep: SleepEntry): # TODO move to dashboard def plot() -> None: - from matplotlib.figure import Figure # type: ignore[import-not-found] import matplotlib.pyplot as plt # type: ignore[import-not-found] + from matplotlib.figure import Figure # type: ignore[import-not-found] # TODO FIXME melatonin data melatonin_data = {} # type: ignore[var-annotated] diff --git a/my/jawbone/plots.py b/my/jawbone/plots.py index d26d606..5968412 100755 --- a/my/jawbone/plots.py +++ b/my/jawbone/plots.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 # TODO this should be in dashboard -from pathlib import Path # from kython.plotting import * from csv import DictReader +from pathlib import Path +from typing import Any, NamedTuple -from typing import Dict, Any, NamedTuple +import matplotlib.pylab as pylab # type: ignore # sleep = [] # with open('2017.csv', 'r') as fo: @@ -12,16 +13,14 @@ from typing import Dict, Any, NamedTuple # for line in islice(reader, 0, 10): # sleep # print(line) - -import matplotlib.pyplot as plt # type: ignore +import matplotlib.pyplot as plt # type: ignore from numpy import genfromtxt -import matplotlib.pylab as pylab # type: ignore pylab.rcParams['figure.figsize'] = (32.0, 24.0) pylab.rcParams['font.size'] = 10 jawboneDataFeatures = Path(__file__).parent / 'features.csv' # Data File Path -featureDesc: Dict[str, str] = {} +featureDesc: dict[str, str] = {} for x in genfromtxt(jawboneDataFeatures, dtype='unicode', delimiter=','): featureDesc[x[0]] = x[1] @@ -52,7 +51,7 @@ class SleepData(NamedTuple): quality: float # ??? @classmethod - def from_jawbone_dict(cls, d: Dict[str, Any]): + def from_jawbone_dict(cls, d: dict[str, Any]): return cls( date=d['DATE'], asleep_time=_safe_mins(_safe_float(d['s_asleep_time'])), @@ -75,7 +74,7 @@ class SleepData(NamedTuple): def iter_useful(data_file: str): - with open(data_file) as fo: + with Path(data_file).open() as fo: reader = DictReader(fo) for d in reader: dt = SleepData.from_jawbone_dict(d) @@ -95,6 +94,7 @@ files = [ ] from kython import concat, parse_date # type: ignore + useful = concat(*(list(iter_useful(str(f))) for f in files)) # for u in useful: @@ -108,6 +108,7 @@ dates = [parse_date(u.date, yearfirst=True, dayfirst=False) for u in useful] # TODO don't need this anymore? it's gonna be in dashboards package from kython.plotting import plot_timestamped # type: ignore + for attr, lims, mavg, fig in [ ('light', (0, 400), 5, None), ('deep', (0, 600), 5, None), diff --git a/my/kobo.py b/my/kobo.py index 85bc50f..b4a1575 100644 --- a/my/kobo.py +++ b/my/kobo.py @@ -7,21 +7,22 @@ REQUIRES = [ 'kobuddy', ] +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator - -from my.core import ( - get_files, - stat, - Paths, - Stats, -) -from my.core.cfg import make_config -import my.config import kobuddy -from kobuddy import Highlight, get_highlights from kobuddy import * +from kobuddy import Highlight, get_highlights + +from my.core import ( + Paths, + Stats, + get_files, + stat, +) +from my.core.cfg import make_config + +import my.config # isort: skip @dataclass @@ -51,7 +52,7 @@ def stats() -> Stats: ## TODO hmm. not sure if all this really belongs here?... perhaps orger? -from typing import Callable, Union, List +from typing import Callable, Union # TODO maybe type over T? _Predicate = Callable[[str], bool] @@ -69,17 +70,17 @@ def from_predicatish(p: Predicatish) -> _Predicate: return p -def by_annotation(predicatish: Predicatish, **kwargs) -> List[Highlight]: +def by_annotation(predicatish: Predicatish, **kwargs) -> list[Highlight]: pred = from_predicatish(predicatish) - res: List[Highlight] = [] + res: list[Highlight] = [] for h in get_highlights(**kwargs): if pred(h.annotation): res.append(h) return res -def get_todos() -> List[Highlight]: +def get_todos() -> list[Highlight]: def with_todo(ann): if ann is None: ann = '' diff --git a/my/kython/kompress.py b/my/kython/kompress.py index 01e24e4..a5d9c29 100644 --- a/my/kython/kompress.py +++ b/my/kython/kompress.py @@ -1,5 +1,4 @@ -from my.core import __NOT_HPI_MODULE__ -from my.core import warnings +from my.core import __NOT_HPI_MODULE__, warnings warnings.high('my.kython.kompress is deprecated, please use "kompress" library directly. See https://github.com/karlicoss/kompress') diff --git a/my/lastfm.py b/my/lastfm.py index d20ebf3..cd9fa8b 100644 --- a/my/lastfm.py +++ b/my/lastfm.py @@ -3,9 +3,9 @@ Last.fm scrobbles ''' from dataclasses import dataclass -from my.core import Paths, Json, make_logger, get_files -from my.config import lastfm as user_config +from my.config import lastfm as user_config +from my.core import Json, Paths, get_files, make_logger logger = make_logger(__name__) @@ -19,13 +19,15 @@ class lastfm(user_config): from my.core.cfg import make_config + config = make_config(lastfm) -from datetime import datetime, timezone import json +from collections.abc import Iterable, Sequence +from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Sequence, Iterable +from typing import NamedTuple from my.core.cachew import mcachew @@ -76,7 +78,9 @@ def scrobbles() -> Iterable[Scrobble]: yield Scrobble(raw=raw) -from my.core import stat, Stats +from my.core import Stats, stat + + def stats() -> Stats: return stat(scrobbles) diff --git a/my/location/all.py b/my/location/all.py index fd88721..c6e8cab 100644 --- a/my/location/all.py +++ b/my/location/all.py @@ -2,14 +2,13 @@ Merges location data from multiple sources """ -from typing import Iterator +from collections.abc import Iterator -from my.core import Stats, LazyLogger +from my.core import LazyLogger, Stats from my.core.source import import_source from .common import Location - logger = LazyLogger(__name__, level="warning") diff --git a/my/location/common.py b/my/location/common.py index f406370..4c47ef0 100644 --- a/my/location/common.py +++ b/my/location/common.py @@ -1,12 +1,13 @@ -from datetime import date, datetime -from typing import Union, Tuple, Optional, Iterable, TextIO, Iterator, Protocol -from dataclasses import dataclass +from my.core import __NOT_HPI_MODULE__ # isort: skip -from my.core import __NOT_HPI_MODULE__ +from collections.abc import Iterable, Iterator +from dataclasses import dataclass +from datetime import date, datetime +from typing import Optional, Protocol, TextIO, Union DateIsh = Union[datetime, date, str] -LatLon = Tuple[float, float] +LatLon = tuple[float, float] class LocationProtocol(Protocol): diff --git a/my/location/fallback/all.py b/my/location/fallback/all.py index a5daa05..d340148 100644 --- a/my/location/fallback/all.py +++ b/my/location/fallback/all.py @@ -1,14 +1,16 @@ # TODO: add config here which passes kwargs to estimate_from (under_accuracy) # overwritable by passing the kwarg name here to the top-level estimate_location -from typing import Iterator, Optional +from __future__ import annotations + +from collections.abc import Iterator from my.core.source import import_source from my.location.fallback.common import ( - estimate_from, - FallbackLocation, DateExact, + FallbackLocation, LocationEstimator, + estimate_from, ) @@ -24,7 +26,7 @@ def fallback_estimators() -> Iterator[LocationEstimator]: yield _home_estimate -def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: Optional[int] = None) -> FallbackLocation: +def estimate_location(dt: DateExact, *, first_match: bool=False, under_accuracy: int | None = None) -> FallbackLocation: loc = estimate_from(dt, estimators=list(fallback_estimators()), first_match=first_match, under_accuracy=under_accuracy) # should never happen if the user has home configured if loc is None: diff --git a/my/location/fallback/common.py b/my/location/fallback/common.py index 13bc603..622b2f5 100644 --- a/my/location/fallback/common.py +++ b/my/location/fallback/common.py @@ -1,9 +1,12 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import Optional, Callable, Sequence, Iterator, List, Union -from datetime import datetime, timedelta, timezone -from ..common import LocationProtocol, Location +from collections.abc import Iterator, Sequence +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Callable, Union + +from ..common import Location, LocationProtocol + DateExact = Union[datetime, float, int] # float/int as epoch timestamps Second = float @@ -13,10 +16,10 @@ class FallbackLocation(LocationProtocol): lat: float lon: float dt: datetime - duration: Optional[Second] = None - accuracy: Optional[float] = None - elevation: Optional[float] = None - datasource: Optional[str] = None # which module provided this, useful for debugging + duration: Second | None = None + accuracy: float | None = None + elevation: float | None = None + datasource: str | None = None # which module provided this, useful for debugging def to_location(self, *, end: bool = False) -> Location: ''' @@ -43,9 +46,9 @@ class FallbackLocation(LocationProtocol): lon: float, dt: datetime, end_dt: datetime, - accuracy: Optional[float] = None, - elevation: Optional[float] = None, - datasource: Optional[str] = None, + accuracy: float | None = None, + elevation: float | None = None, + datasource: str | None = None, ) -> FallbackLocation: ''' Create FallbackLocation from a start date and an end date @@ -93,13 +96,13 @@ def estimate_from( estimators: LocationEstimators, *, first_match: bool = False, - under_accuracy: Optional[int] = None, -) -> Optional[FallbackLocation]: + under_accuracy: int | None = None, +) -> FallbackLocation | None: ''' first_match: if True, return the first location found under_accuracy: if set, only return locations with accuracy under this value ''' - found: List[FallbackLocation] = [] + found: list[FallbackLocation] = [] for loc in _iter_estimate_from(dt, estimators): if under_accuracy is not None and loc.accuracy is not None and loc.accuracy > under_accuracy: continue diff --git a/my/location/fallback/via_home.py b/my/location/fallback/via_home.py index e44c59d..f88fee0 100644 --- a/my/location/fallback/via_home.py +++ b/my/location/fallback/via_home.py @@ -2,25 +2,22 @@ Simple location provider, serving as a fallback when more detailed data isn't available ''' +from __future__ import annotations + +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, time, timezone -from functools import lru_cache -from typing import Sequence, Tuple, Union, cast, List, Iterator +from functools import cache +from typing import cast from my.config import location as user_config +from my.location.common import DateIsh, LatLon +from my.location.fallback.common import DateExact, FallbackLocation -from my.location.common import LatLon, DateIsh -from my.location.fallback.common import FallbackLocation, DateExact @dataclass class Config(user_config): - home: Union[ - LatLon, # either single, 'current' location - Sequence[Tuple[ # or, a sequence of location history - DateIsh, # date when you moved to - LatLon, # the location - ]] - ] + home: LatLon | Sequence[tuple[DateIsh, LatLon]] # default ~30km accuracy # this is called 'home_accuracy' since it lives on the base location.config object, @@ -29,13 +26,13 @@ class Config(user_config): # TODO could make current Optional and somehow determine from system settings? @property - def _history(self) -> Sequence[Tuple[datetime, LatLon]]: + def _history(self) -> Sequence[tuple[datetime, LatLon]]: home1 = self.home # todo ugh, can't test for isnstance LatLon, it's a tuple itself - home2: Sequence[Tuple[DateIsh, LatLon]] + home2: Sequence[tuple[DateIsh, LatLon]] if isinstance(home1[0], tuple): # already a sequence - home2 = cast(Sequence[Tuple[DateIsh, LatLon]], home1) + home2 = cast(Sequence[tuple[DateIsh, LatLon]], home1) else: # must be a pair of coordinates. also doesn't really matter which date to pick? loc = cast(LatLon, home1) @@ -60,10 +57,11 @@ class Config(user_config): from ...core.cfg import make_config + config = make_config(Config) -@lru_cache(maxsize=None) +@cache def get_location(dt: datetime) -> LatLon: ''' Interpolates the location at dt @@ -74,8 +72,8 @@ def get_location(dt: datetime) -> LatLon: # TODO: in python3.8, use functools.cached_property instead? -@lru_cache(maxsize=None) -def homes_cached() -> List[Tuple[datetime, LatLon]]: +@cache +def homes_cached() -> list[tuple[datetime, LatLon]]: return list(config._history) diff --git a/my/location/fallback/via_ip.py b/my/location/fallback/via_ip.py index 79a452c..732af67 100644 --- a/my/location/fallback/via_ip.py +++ b/my/location/fallback/via_ip.py @@ -7,8 +7,8 @@ REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] from dataclasses import dataclass from datetime import timedelta -from my.core import Stats, make_config from my.config import location +from my.core import Stats, make_config from my.core.warnings import medium @@ -24,13 +24,13 @@ class ip_config(location.via_ip): config = make_config(ip_config) +from collections.abc import Iterator from functools import lru_cache -from typing import Iterator, List from my.core import make_logger from my.core.compat import bisect_left from my.location.common import Location -from my.location.fallback.common import FallbackLocation, DateExact, _datetime_timestamp +from my.location.fallback.common import DateExact, FallbackLocation, _datetime_timestamp logger = make_logger(__name__, level="warning") @@ -60,7 +60,7 @@ def locations() -> Iterator[Location]: @lru_cache(1) -def _sorted_fallback_locations() -> List[FallbackLocation]: +def _sorted_fallback_locations() -> list[FallbackLocation]: fl = list(filter(lambda l: l.duration is not None, fallback_locations())) logger.debug(f"Fallback locations: {len(fl)}, sorting...:") fl.sort(key=lambda l: l.dt.timestamp()) diff --git a/my/location/google.py b/my/location/google.py index b966ec6..750c847 100644 --- a/my/location/google.py +++ b/my/location/google.py @@ -3,28 +3,27 @@ Location data from Google Takeout DEPRECATED: setup my.google.takeout.parser and use my.location.google_takeout instead """ +from __future__ import annotations REQUIRES = [ 'geopy', # checking that coordinates are valid 'ijson', ] +import re +from collections.abc import Iterable, Sequence from datetime import datetime, timezone from itertools import islice from pathlib import Path -from subprocess import Popen, PIPE -from typing import Iterable, NamedTuple, Optional, Sequence, IO, Tuple -import re +from subprocess import PIPE, Popen +from typing import IO, NamedTuple, Optional # pip3 install geopy -import geopy # type: ignore +import geopy # type: ignore -from my.core import stat, Stats, make_logger +from my.core import Stats, make_logger, stat, warnings from my.core.cachew import cache_dir, mcachew -from my.core import warnings - - warnings.high("Please set up my.google.takeout.parser module for better takeout support") @@ -43,7 +42,7 @@ class Location(NamedTuple): alt: Optional[float] -TsLatLon = Tuple[int, int, int] +TsLatLon = tuple[int, int, int] def _iter_via_ijson(fo) -> Iterable[TsLatLon]: @@ -51,10 +50,10 @@ def _iter_via_ijson(fo) -> Iterable[TsLatLon]: # todo extract to common? try: # pip3 install ijson cffi - import ijson.backends.yajl2_cffi as ijson # type: ignore + import ijson.backends.yajl2_cffi as ijson # type: ignore except: warnings.medium("Falling back to default ijson because 'cffi' backend isn't found. It's up to 2x faster, you might want to check it out") - import ijson # type: ignore + import ijson # type: ignore for d in ijson.items(fo, 'locations.item'): yield ( diff --git a/my/location/google_takeout.py b/my/location/google_takeout.py index eb757ce..cb5bef3 100644 --- a/my/location/google_takeout.py +++ b/my/location/google_takeout.py @@ -4,13 +4,14 @@ Extracts locations using google_takeout_parser -- no shared code with the deprec REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] -from typing import Iterator +from collections.abc import Iterator -from my.google.takeout.parser import events, _cachew_depends_on from google_takeout_parser.models import Location as GoogleLocation -from my.core import stat, Stats, LazyLogger +from my.core import LazyLogger, Stats, stat from my.core.cachew import mcachew +from my.google.takeout.parser import _cachew_depends_on, events + from .common import Location logger = LazyLogger(__name__) diff --git a/my/location/google_takeout_semantic.py b/my/location/google_takeout_semantic.py index 5f2c055..7bddfa8 100644 --- a/my/location/google_takeout_semantic.py +++ b/my/location/google_takeout_semantic.py @@ -7,21 +7,24 @@ Extracts semantic location history using google_takeout_parser REQUIRES = ["git+https://github.com/seanbreckenridge/google_takeout_parser"] +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator, List -from my.google.takeout.parser import events, _cachew_depends_on as _parser_cachew_depends_on from google_takeout_parser.models import PlaceVisit as SemanticLocation -from my.core import make_config, stat, LazyLogger, Stats +from my.core import LazyLogger, Stats, make_config, stat from my.core.cachew import mcachew from my.core.error import Res +from my.google.takeout.parser import _cachew_depends_on as _parser_cachew_depends_on +from my.google.takeout.parser import events + from .common import Location logger = LazyLogger(__name__) from my.config import location as user_config + @dataclass class semantic_locations_config(user_config.google_takeout_semantic): # a value between 0 and 100, 100 being the most confident @@ -36,7 +39,7 @@ config = make_config(semantic_locations_config) # add config to cachew dependency so it recomputes on config changes -def _cachew_depends_on() -> List[str]: +def _cachew_depends_on() -> list[str]: dep = _parser_cachew_depends_on() dep.insert(0, f"require_confidence={config.require_confidence} accuracy={config.accuracy}") return dep diff --git a/my/location/gpslogger.py b/my/location/gpslogger.py index 6d158a0..bbbf70e 100644 --- a/my/location/gpslogger.py +++ b/my/location/gpslogger.py @@ -20,20 +20,20 @@ class config(location.gpslogger): accuracy: float = 50.0 -from itertools import chain +from collections.abc import Iterator, Sequence from datetime import datetime, timezone +from itertools import chain from pathlib import Path -from typing import Iterator, Sequence, List import gpxpy from gpxpy.gpx import GPXXMLSyntaxException from more_itertools import unique_everseen -from my.core import Stats, LazyLogger +from my.core import LazyLogger, Stats from my.core.cachew import mcachew from my.core.common import get_files -from .common import Location +from .common import Location logger = LazyLogger(__name__, level="warning") @@ -49,7 +49,7 @@ def inputs() -> Sequence[Path]: return sorted(get_files(config.export_path, glob="*.gpx", sort=False), key=_input_sort_key) -def _cachew_depends_on() -> List[float]: +def _cachew_depends_on() -> list[float]: return [p.stat().st_mtime for p in inputs()] diff --git a/my/location/home.py b/my/location/home.py index f6e6978..c82dda7 100644 --- a/my/location/home.py +++ b/my/location/home.py @@ -1,7 +1,7 @@ -from .fallback.via_home import * - from my.core.warnings import high +from .fallback.via_home import * + high( "my.location.home is deprecated, use my.location.fallback.via_home instead, or estimate locations using the higher-level my.location.fallback.all.estimate_location" ) diff --git a/my/location/via_ip.py b/my/location/via_ip.py index df48f8b..d465ad0 100644 --- a/my/location/via_ip.py +++ b/my/location/via_ip.py @@ -1,7 +1,7 @@ REQUIRES = ["git+https://github.com/seanbreckenridge/ipgeocache"] -from .fallback.via_ip import * - from my.core.warnings import high +from .fallback.via_ip import * + high("my.location.via_ip is deprecated, use my.location.fallback.via_ip instead") diff --git a/my/materialistic.py b/my/materialistic.py index 8a6a997..45af3f9 100644 --- a/my/materialistic.py +++ b/my/materialistic.py @@ -1,4 +1,5 @@ from .core.warnings import high + high("DEPRECATED! Please use my.hackernews.materialistic instead.") from .hackernews.materialistic import * diff --git a/my/media/imdb.py b/my/media/imdb.py index df31032..131f6a7 100644 --- a/my/media/imdb.py +++ b/my/media/imdb.py @@ -1,10 +1,12 @@ import csv +from collections.abc import Iterator from datetime import datetime -from typing import Iterator, List, NamedTuple +from typing import NamedTuple -from ..core import get_files +from my.core import get_files + +from my.config import imdb as config # isort: skip -from my.config import imdb as config def _get_last(): return max(get_files(config.export_path)) @@ -31,7 +33,7 @@ def iter_movies() -> Iterator[Movie]: yield Movie(created=created, title=title, rating=rating) -def get_movies() -> List[Movie]: +def get_movies() -> list[Movie]: return sorted(iter_movies(), key=lambda m: m.created) diff --git a/my/media/youtube.py b/my/media/youtube.py index 3ddbc14..9a38c43 100644 --- a/my/media/youtube.py +++ b/my/media/youtube.py @@ -1,4 +1,4 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip from typing import TYPE_CHECKING diff --git a/my/monzo/monzoexport.py b/my/monzo/monzoexport.py index 3aa0cf5..f5e1cd1 100644 --- a/my/monzo/monzoexport.py +++ b/my/monzo/monzoexport.py @@ -5,16 +5,17 @@ REQUIRES = [ 'git+https://github.com/karlicoss/monzoexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Iterator from my.core import ( Paths, get_files, make_logger, ) -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/orgmode.py b/my/orgmode.py index cf14e43..10f53c0 100644 --- a/my/orgmode.py +++ b/my/orgmode.py @@ -1,15 +1,17 @@ ''' Programmatic access and queries to org-mode files on the filesystem ''' +from __future__ import annotations REQUIRES = [ 'orgparse', ] import re +from collections.abc import Iterable, Sequence from datetime import datetime from pathlib import Path -from typing import Iterable, List, NamedTuple, Optional, Sequence, Tuple +from typing import NamedTuple, Optional import orgparse @@ -34,7 +36,7 @@ def make_config() -> config: class OrgNote(NamedTuple): created: Optional[datetime] heading: str - tags: List[str] + tags: list[str] def inputs() -> Sequence[Path]: @@ -45,7 +47,7 @@ def inputs() -> Sequence[Path]: _rgx = re.compile(orgparse.date.gene_timestamp_regex(brtype='inactive'), re.VERBOSE) -def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]: +def _created(n: orgparse.OrgNode) -> tuple[datetime | None, str]: heading = n.heading # meh.. support in orgparse? pp = {} if n.is_root() else n.properties @@ -68,7 +70,7 @@ def _created(n: orgparse.OrgNode) -> Tuple[Optional[datetime], str]: def to_note(x: orgparse.OrgNode) -> OrgNote: # ugh. hack to merely make it cacheable heading = x.heading - created: Optional[datetime] + created: datetime | None try: c, heading = _created(x) if isinstance(c, datetime): diff --git a/my/pdfs.py b/my/pdfs.py index de9324d..eefd573 100644 --- a/my/pdfs.py +++ b/my/pdfs.py @@ -1,6 +1,7 @@ ''' PDF documents and annotations on your filesystem ''' +from __future__ import annotations as _annotations REQUIRES = [ 'git+https://github.com/0xabu/pdfannots', @@ -8,9 +9,10 @@ REQUIRES = [ ] import time +from collections.abc import Iterator, Sequence from datetime import datetime from pathlib import Path -from typing import Iterator, List, NamedTuple, Optional, Protocol, Sequence, TYPE_CHECKING +from typing import TYPE_CHECKING, NamedTuple, Optional, Protocol import pdfannots from more_itertools import bucket @@ -72,7 +74,7 @@ class Annotation(NamedTuple): created: Optional[datetime] # note: can be tz unaware in some bad pdfs... @property - def date(self) -> Optional[datetime]: + def date(self) -> datetime | None: # legacy name return self.created @@ -93,7 +95,7 @@ def _as_annotation(*, raw: pdfannots.Annotation, path: str) -> Annotation: ) -def get_annots(p: Path) -> List[Annotation]: +def get_annots(p: Path) -> list[Annotation]: b = time.time() with p.open('rb') as fo: doc = pdfannots.process_file(fo, emit_progress_to=None) @@ -150,17 +152,17 @@ class Pdf(NamedTuple): annotations: Sequence[Annotation] @property - def created(self) -> Optional[datetime]: + def created(self) -> datetime | None: annots = self.annotations return None if len(annots) == 0 else annots[-1].created @property - def date(self) -> Optional[datetime]: + def date(self) -> datetime | None: # legacy return self.created -def annotated_pdfs(*, filelist: Optional[Sequence[PathIsh]] = None) -> Iterator[Res[Pdf]]: +def annotated_pdfs(*, filelist: Sequence[PathIsh] | None = None) -> Iterator[Res[Pdf]]: if filelist is not None: # hacky... keeping it backwards compatible # https://github.com/karlicoss/HPI/pull/74 diff --git a/my/photos/main.py b/my/photos/main.py index bf912e4..f98cb15 100644 --- a/my/photos/main.py +++ b/my/photos/main.py @@ -1,27 +1,30 @@ """ Photos and videos on your filesystem, their GPS and timestamps """ + +from __future__ import annotations + REQUIRES = [ 'geopy', 'magic', ] # NOTE: also uses fdfind to search photos +import json +from collections.abc import Iterable, Iterator from concurrent.futures import ProcessPoolExecutor as Pool from datetime import datetime -import json from pathlib import Path -from typing import Optional, NamedTuple, Iterator, Iterable, List +from typing import NamedTuple, Optional from geopy.geocoders import Nominatim # type: ignore from my.core import LazyLogger -from my.core.error import Res, sort_res_by from my.core.cachew import cache_dir, mcachew +from my.core.error import Res, sort_res_by from my.core.mime import fastermime -from my.config import photos as config # type: ignore[attr-defined] - +from my.config import photos as config # type: ignore[attr-defined] # isort: skip logger = LazyLogger(__name__) @@ -55,15 +58,15 @@ class Photo(NamedTuple): return f'{config.base_url}{self._basename}' -from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref +from .utils import Exif, ExifTags, convert_ref, dt_from_path, get_exif_from_file Result = Res[Photo] -def _make_photo_aux(*args, **kwargs) -> List[Result]: +def _make_photo_aux(*args, **kwargs) -> list[Result]: # for the process pool.. return list(_make_photo(*args, **kwargs)) -def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Iterator[Result]: +def _make_photo(photo: Path, mtype: str, *, parent_geo: LatLon | None) -> Iterator[Result]: exif: Exif if any(x in mtype for x in ['image/png', 'image/x-ms-bmp', 'video']): # TODO don't remember why.. @@ -77,7 +80,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite yield e exif = {} - def _get_geo() -> Optional[LatLon]: + def _get_geo() -> LatLon | None: meta = exif.get(ExifTags.GPSINFO, {}) if ExifTags.LAT in meta and ExifTags.LON in meta: return LatLon( @@ -87,7 +90,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite return parent_geo # TODO aware on unaware? - def _get_dt() -> Optional[datetime]: + def _get_dt() -> datetime | None: edt = exif.get(ExifTags.DATETIME, None) if edt is not None: dtimes = edt.replace(' 24', ' 00') # jeez maybe log it? @@ -123,7 +126,7 @@ def _make_photo(photo: Path, mtype: str, *, parent_geo: Optional[LatLon]) -> Ite def _candidates() -> Iterable[Res[str]]: # TODO that could be a bit slow if there are to many extra files? - from subprocess import Popen, PIPE + from subprocess import PIPE, Popen # TODO could extract this to common? # TODO would be nice to reuse get_files (or even let it use find) # that way would be easier to exclude @@ -162,7 +165,7 @@ def _photos(candidates: Iterable[Res[str]]) -> Iterator[Result]: from functools import lru_cache @lru_cache(None) - def get_geo(d: Path) -> Optional[LatLon]: + def get_geo(d: Path) -> LatLon | None: geof = d / 'geo.json' if not geof.exists(): if d == d.parent: @@ -214,5 +217,7 @@ def print_all() -> None: # todo cachew -- invalidate if function code changed? from ..core import Stats, stat + + def stats() -> Stats: return stat(photos) diff --git a/my/photos/utils.py b/my/photos/utils.py index c614c4a..e88def2 100644 --- a/my/photos/utils.py +++ b/my/photos/utils.py @@ -1,11 +1,13 @@ +from __future__ import annotations + +from ..core import __NOT_HPI_MODULE__ # isort: skip + from pathlib import Path -from typing import Dict import PIL.Image -from PIL.ExifTags import TAGS, GPSTAGS +from PIL.ExifTags import GPSTAGS, TAGS - -Exif = Dict +Exif = dict # TODO PIL.ExifTags.TAGS @@ -62,18 +64,15 @@ def convert_ref(cstr, ref: str) -> float: import re from datetime import datetime -from typing import Optional # TODO surely there is a library that does it?? # TODO this belongs to a private overlay or something # basically have a function that patches up dates after the files were yielded.. _DT_REGEX = re.compile(r'\D(\d{8})\D*(\d{6})\D') -def dt_from_path(p: Path) -> Optional[datetime]: +def dt_from_path(p: Path) -> datetime | None: name = p.stem mm = _DT_REGEX.search(name) if mm is None: return None dates = mm.group(1) + mm.group(2) return datetime.strptime(dates, "%Y%m%d%H%M%S") - -from ..core import __NOT_HPI_MODULE__ diff --git a/my/pinboard.py b/my/pinboard.py index ef4ca36..e98dc78 100644 --- a/my/pinboard.py +++ b/my/pinboard.py @@ -5,15 +5,16 @@ REQUIRES = [ 'git+https://github.com/karlicoss/pinbexport', ] +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Iterator, Sequence - -from my.core import get_files, Paths, Res -import my.config import pinbexport.dal as pinbexport +from my.core import Paths, Res, get_files + +import my.config # isort: skip + @dataclass class config(my.config.pinboard): # TODO rename to pinboard.pinbexport? diff --git a/my/pocket.py b/my/pocket.py index b638fba..ff9a788 100644 --- a/my/pocket.py +++ b/my/pocket.py @@ -7,10 +7,10 @@ REQUIRES = [ from dataclasses import dataclass from typing import TYPE_CHECKING -from .core import Paths - from my.config import pocket as user_config +from .core import Paths + @dataclass class pocket(user_config): @@ -23,6 +23,7 @@ class pocket(user_config): from .core.cfg import make_config + config = make_config(pocket) @@ -37,7 +38,7 @@ except ModuleNotFoundError as e: Article = dal.Article -from typing import Sequence, Iterable +from collections.abc import Iterable, Sequence # todo not sure if should be defensive against empty? @@ -51,9 +52,12 @@ def articles() -> Iterable[Article]: yield from _dal().articles() -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: from itertools import chain + from more_itertools import ilen return { **stat(articles), diff --git a/my/polar.py b/my/polar.py index e52bb14..2172014 100644 --- a/my/polar.py +++ b/my/polar.py @@ -1,11 +1,12 @@ """ [[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights """ +from __future__ import annotations + from pathlib import Path -from typing import cast, TYPE_CHECKING +from typing import TYPE_CHECKING, cast - -import my.config +import my.config # isort: skip # todo use something similar to tz.via_location for config fallback if not TYPE_CHECKING: @@ -20,8 +21,11 @@ if user_config is None: pass -from .core import PathIsh from dataclasses import dataclass + +from .core import PathIsh + + @dataclass class polar(user_config): ''' @@ -32,20 +36,21 @@ class polar(user_config): from .core import make_config + config = make_config(polar) # todo not sure where it keeps stuff on Windows? # https://github.com/burtonator/polar-bookshelf/issues/296 -from datetime import datetime -from typing import List, Dict, Iterable, NamedTuple, Sequence, Optional import json +from collections.abc import Iterable, Sequence +from datetime import datetime +from typing import NamedTuple -from .core import LazyLogger, Json, Res +from .core import Json, LazyLogger, Res from .core.compat import fromisoformat from .core.error import echain, sort_res_by -from .core.konsume import wrap, Zoomable, Wdict - +from .core.konsume import Wdict, Zoomable, wrap logger = LazyLogger(__name__) @@ -65,7 +70,7 @@ class Highlight(NamedTuple): comments: Sequence[Comment] tags: Sequence[str] page: int # 1-indexed - color: Optional[str] = None + color: str | None = None Uid = str @@ -73,7 +78,7 @@ class Book(NamedTuple): created: datetime uid: Uid path: Path - title: Optional[str] + title: str | None # TODO hmmm. I think this needs to be defensive as well... # think about it later. items: Sequence[Highlight] @@ -129,7 +134,7 @@ class Loader: pi['dimensions'].consume_all() # TODO how to make it nicer? - cmap: Dict[Hid, List[Comment]] = {} + cmap: dict[Hid, list[Comment]] = {} vals = list(comments) for v in vals: cid = v['id'].zoom() @@ -163,7 +168,7 @@ class Loader: h['rects'].ignore() # TODO make it more generic.. - htags: List[str] = [] + htags: list[str] = [] if 'tags' in h: ht = h['tags'].zoom() for _k, v in list(ht.items()): @@ -242,7 +247,7 @@ def iter_entries() -> Iterable[Result]: yield err -def get_entries() -> List[Result]: +def get_entries() -> list[Result]: # sorting by first annotation is reasonable I guess??? # todo perhaps worth making it a pattern? X() returns iterable, get_X returns reasonably sorted list? return list(sort_res_by(iter_entries(), key=lambda e: e.created)) diff --git a/my/reddit/__init__.py b/my/reddit/__init__.py index e81aaf9..f344eeb 100644 --- a/my/reddit/__init__.py +++ b/my/reddit/__init__.py @@ -20,6 +20,7 @@ REQUIRES = [ from my.core.hpi_compat import handle_legacy_import + is_legacy_import = handle_legacy_import( parent_module_name=__name__, legacy_submodule_name='rexport', diff --git a/my/reddit/all.py b/my/reddit/all.py index daedba1..27e22df 100644 --- a/my/reddit/all.py +++ b/my/reddit/all.py @@ -1,8 +1,9 @@ -from typing import Iterator -from my.core import stat, Stats +from collections.abc import Iterator + +from my.core import Stats, stat from my.core.source import import_source -from .common import Save, Upvote, Comment, Submission, _merge_comments +from .common import Comment, Save, Submission, Upvote, _merge_comments # Man... ideally an all.py file isn't this verbose, but # reddit just feels like that much of a complicated source and diff --git a/my/reddit/common.py b/my/reddit/common.py index c01258b..40f9f6e 100644 --- a/my/reddit/common.py +++ b/my/reddit/common.py @@ -2,12 +2,14 @@ This defines Protocol classes, which make sure that each different type of shared models have a standardized interface """ -from my.core import __NOT_HPI_MODULE__ -from typing import Set, Iterator, Protocol +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterator from itertools import chain +from typing import Protocol -from my.core import datetime_aware, Json +from my.core import Json, datetime_aware # common fields across all the Protocol classes, so generic code can be written @@ -49,7 +51,7 @@ class Submission(RedditBase, Protocol): def _merge_comments(*sources: Iterator[Comment]) -> Iterator[Comment]: #from .rexport import logger #ignored = 0 - emitted: Set[str] = set() + emitted: set[str] = set() for e in chain(*sources): uid = e.id if uid in emitted: diff --git a/my/reddit/pushshift.py b/my/reddit/pushshift.py index 9580005..1bfa048 100644 --- a/my/reddit/pushshift.py +++ b/my/reddit/pushshift.py @@ -10,13 +10,13 @@ REQUIRES = [ from dataclasses import dataclass +# note: keeping pushshift import before config import, so it's handled gracefully by import_source +from pushshift_comment_export.dal import PComment, read_file + +from my.config import reddit as uconfig from my.core import Paths, Stats, stat from my.core.cfg import make_config -# note: keeping pushshift import before config import, so it's handled gracefully by import_source -from pushshift_comment_export.dal import read_file, PComment - -from my.config import reddit as uconfig @dataclass class pushshift_config(uconfig.pushshift): @@ -29,10 +29,10 @@ class pushshift_config(uconfig.pushshift): config = make_config(pushshift_config) -from my.core import get_files -from typing import Sequence, Iterator +from collections.abc import Iterator, Sequence from pathlib import Path +from my.core import get_files def inputs() -> Sequence[Path]: diff --git a/my/reddit/rexport.py b/my/reddit/rexport.py index 5dcd7d9..cb6af01 100644 --- a/my/reddit/rexport.py +++ b/my/reddit/rexport.py @@ -7,23 +7,24 @@ REQUIRES = [ 'git+https://github.com/karlicoss/rexport', ] -from dataclasses import dataclass import inspect +from collections.abc import Iterator, Sequence +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Iterator, Sequence +from typing import TYPE_CHECKING from my.core import ( - get_files, - make_logger, - warnings, - stat, Paths, Stats, + get_files, + make_logger, + stat, + warnings, ) from my.core.cachew import mcachew -from my.core.cfg import make_config, Attrs +from my.core.cfg import Attrs, make_config -from my.config import reddit as uconfig +from my.config import reddit as uconfig # isort: skip logger = make_logger(__name__) diff --git a/my/rescuetime.py b/my/rescuetime.py index 76a0d4c..0c9fd28 100644 --- a/my/rescuetime.py +++ b/my/rescuetime.py @@ -5,16 +5,15 @@ REQUIRES = [ 'git+https://github.com/karlicoss/rescuexport', ] -from pathlib import Path +from collections.abc import Iterable, Sequence from datetime import timedelta -from typing import Sequence, Iterable +from pathlib import Path -from my.core import get_files, make_logger, stat, Stats +from my.core import Stats, get_files, make_logger, stat from my.core.cachew import mcachew from my.core.error import Res, split_errors -from my.config import rescuetime as config - +from my.config import rescuetime as config # isort: skip logger = make_logger(__name__) @@ -24,6 +23,7 @@ def inputs() -> Sequence[Path]: import rescuexport.dal as dal + DAL = dal.DAL Entry = dal.Entry @@ -43,6 +43,8 @@ def groups(gap: timedelta=timedelta(hours=3)) -> Iterable[Res[Sequence[Entry]]]: # todo automatic dataframe interface? from .core.pandas import DataFrameT, as_dataframe + + def dataframe() -> DataFrameT: return as_dataframe(entries()) @@ -56,16 +58,19 @@ def stats() -> Stats: # basically, hack config and populate it with fake data? fake data generated by DAL, but the rest is handled by this? +from collections.abc import Iterator from contextlib import contextmanager -from typing import Iterator + + # todo take seed, or what? @contextmanager def fake_data(rows: int=1000) -> Iterator: # todo also disable cachew automatically for such things? - from my.core.cfg import tmp_config - from my.core.cachew import disabled_cachew - from tempfile import TemporaryDirectory import json + from tempfile import TemporaryDirectory + + from my.core.cachew import disabled_cachew + from my.core.cfg import tmp_config with disabled_cachew(), TemporaryDirectory() as td: tdir = Path(td) f = tdir / 'rescuetime.json' diff --git a/my/roamresearch.py b/my/roamresearch.py index 2fe06d4..7322774 100644 --- a/my/roamresearch.py +++ b/my/roamresearch.py @@ -1,16 +1,19 @@ """ [[https://roamresearch.com][Roam]] data """ -from datetime import datetime, timezone -from pathlib import Path -from itertools import chain -import re -from typing import NamedTuple, Iterator, List, Optional +from __future__ import annotations -from .core import get_files, LazyLogger, Json +import re +from collections.abc import Iterator +from datetime import datetime, timezone +from itertools import chain +from pathlib import Path +from typing import NamedTuple from my.config import roamresearch as config +from .core import Json, LazyLogger, get_files + logger = LazyLogger(__name__) @@ -57,15 +60,15 @@ class Node(NamedTuple): return datetime.fromtimestamp(rt / 1000, tz=timezone.utc) @property - def title(self) -> Optional[str]: + def title(self) -> str | None: return self.raw.get(Keys.TITLE) @property - def body(self) -> Optional[str]: + def body(self) -> str | None: return self.raw.get(Keys.STRING) @property - def children(self) -> List['Node']: + def children(self) -> list[Node]: # TODO cache? needs a key argument (because of Json) ch = self.raw.get(Keys.CHILDREN, []) return list(map(Node, ch)) @@ -95,7 +98,7 @@ class Node(NamedTuple): # - heading -- notes that haven't been created yet return len(self.body or '') == 0 and len(self.children) == 0 - def traverse(self) -> Iterator['Node']: + def traverse(self) -> Iterator[Node]: # not sure about __iter__, because might be a bit unintuitive that it's recursive.. yield self for c in self.children: @@ -120,7 +123,7 @@ class Node(NamedTuple): return f'Node(created={self.created}, title={self.title}, body={self.body})' @staticmethod - def make(raw: Json) -> Iterator['Node']: + def make(raw: Json) -> Iterator[Node]: is_empty = set(raw.keys()) == {Keys.EDITED, Keys.EDIT_EMAIL, Keys.TITLE} # not sure about that... but daily notes end up like that if is_empty: @@ -130,11 +133,11 @@ class Node(NamedTuple): class Roam: - def __init__(self, raw: List[Json]) -> None: + def __init__(self, raw: list[Json]) -> None: self.raw = raw @property - def notes(self) -> List[Node]: + def notes(self) -> list[Node]: return list(chain.from_iterable(map(Node.make, self.raw))) def traverse(self) -> Iterator[Node]: diff --git a/my/rss/all.py b/my/rss/all.py index b4dbdbd..e10e4d2 100644 --- a/my/rss/all.py +++ b/my/rss/all.py @@ -3,9 +3,9 @@ Unified RSS data, merged from different services I used historically ''' # NOTE: you can comment out the sources you're not using -from . import feedbin, feedly +from collections.abc import Iterable -from typing import Iterable +from . import feedbin, feedly from .common import Subscription, compute_subscriptions diff --git a/my/rss/common.py b/my/rss/common.py index bb75297..bf9506e 100644 --- a/my/rss/common.py +++ b/my/rss/common.py @@ -1,10 +1,12 @@ -from my.core import __NOT_HPI_MODULE__ +from __future__ import annotations +from my.core import __NOT_HPI_MODULE__ # isort: skip + +from collections.abc import Iterable, Sequence from dataclasses import dataclass, replace from itertools import chain -from typing import Optional, List, Dict, Iterable, Tuple, Sequence -from my.core import warn_if_empty, datetime_aware +from my.core import datetime_aware, warn_if_empty @dataclass @@ -13,16 +15,16 @@ class Subscription: url: str id: str # TODO not sure about it... # eh, not all of them got reasonable 'created' time - created_at: Optional[datetime_aware] + created_at: datetime_aware | None subscribed: bool = True # snapshot of subscriptions at time -SubscriptionState = Tuple[datetime_aware, Sequence[Subscription]] +SubscriptionState = tuple[datetime_aware, Sequence[Subscription]] @warn_if_empty -def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]: +def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> list[Subscription]: """ Keeps track of everything I ever subscribed to. In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed) @@ -30,7 +32,7 @@ def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscri states = list(chain.from_iterable(sources)) # TODO keep 'source'/'provider'/'service' attribute? - by_url: Dict[str, Subscription] = {} + by_url: dict[str, Subscription] = {} # ah. dates are used for sorting for _when, state in sorted(states): # TODO use 'when'? diff --git a/my/rss/feedbin.py b/my/rss/feedbin.py index dc13a17..5f4da0a 100644 --- a/my/rss/feedbin.py +++ b/my/rss/feedbin.py @@ -3,15 +3,15 @@ Feedbin RSS reader """ import json +from collections.abc import Iterator, Sequence from pathlib import Path -from typing import Iterator, Sequence -from my.core import get_files, stat, Stats +from my.core import Stats, get_files, stat from my.core.compat import fromisoformat + from .common import Subscription, SubscriptionState -from my.config import feedbin as config - +from my.config import feedbin as config # isort: skip def inputs() -> Sequence[Path]: return get_files(config.export_path) diff --git a/my/rss/feedly.py b/my/rss/feedly.py index 127ef61..9bf5429 100644 --- a/my/rss/feedly.py +++ b/my/rss/feedly.py @@ -4,9 +4,10 @@ Feedly RSS reader import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Protocol, Sequence +from typing import Protocol from my.core import Paths, get_files diff --git a/my/rtm.py b/my/rtm.py index b559ba4..217c969 100644 --- a/my/rtm.py +++ b/my/rtm.py @@ -6,21 +6,19 @@ REQUIRES = [ 'icalendar', ] +import re +from collections.abc import Iterator from datetime import datetime from functools import cached_property -import re -from typing import Dict, List, Iterator -from my.core import make_logger, get_files -from my.core.utils.itertools import make_dict - -from my.config import rtm as config - - -from more_itertools import bucket import icalendar # type: ignore from icalendar.cal import Todo # type: ignore +from more_itertools import bucket +from my.core import get_files, make_logger +from my.core.utils.itertools import make_dict + +from my.config import rtm as config # isort: skip logger = make_logger(__name__) @@ -32,14 +30,14 @@ class MyTodo: self.revision = revision @cached_property - def notes(self) -> List[str]: + def notes(self) -> list[str]: # TODO can there be multiple?? desc = self.todo['DESCRIPTION'] notes = re.findall(r'---\n\n(.*?)\n\nUpdated:', desc, flags=re.DOTALL) return notes @cached_property - def tags(self) -> List[str]: + def tags(self) -> list[str]: desc = self.todo['DESCRIPTION'] [tags_str] = re.findall(r'\nTags: (.*?)\n', desc, flags=re.DOTALL) if tags_str == 'none': @@ -92,11 +90,11 @@ class DAL: for t in self.cal.walk('VTODO'): yield MyTodo(t, self.revision) - def get_todos_by_uid(self) -> Dict[str, MyTodo]: + def get_todos_by_uid(self) -> dict[str, MyTodo]: todos = self.all_todos() return make_dict(todos, key=lambda t: t.uid) - def get_todos_by_title(self) -> Dict[str, List[MyTodo]]: + def get_todos_by_title(self) -> dict[str, list[MyTodo]]: todos = self.all_todos() bucketed = bucket(todos, lambda todo: todo.title) return {k: list(bucketed[k]) for k in bucketed} diff --git a/my/runnerup.py b/my/runnerup.py index a21075a..f5d7d1e 100644 --- a/my/runnerup.py +++ b/my/runnerup.py @@ -6,17 +6,15 @@ REQUIRES = [ 'python-tcxparser', ] +from collections.abc import Iterable from datetime import timedelta from pathlib import Path -from typing import Iterable - -from my.core import Res, get_files, Json -from my.core.compat import fromisoformat import tcxparser # type: ignore[import-untyped] from my.config import runnerup as config - +from my.core import Json, Res, get_files +from my.core.compat import fromisoformat # TODO later, use a proper namedtuple? Workout = Json @@ -70,6 +68,8 @@ def workouts() -> Iterable[Res[Workout]]: from .core.pandas import DataFrameT, check_dataframe, error_to_row + + @check_dataframe def dataframe() -> DataFrameT: def it(): @@ -85,6 +85,8 @@ def dataframe() -> DataFrameT: return df -from .core import stat, Stats +from .core import Stats, stat + + def stats() -> Stats: return stat(dataframe) diff --git a/my/simple.py b/my/simple.py index 7462291..b7f25cd 100644 --- a/my/simple.py +++ b/my/simple.py @@ -1,12 +1,11 @@ ''' Just a demo module for testing and documentation purposes ''' +from collections.abc import Iterator from dataclasses import dataclass -from typing import Iterator - -from my.core import make_config from my.config import simple as user_config +from my.core import make_config @dataclass diff --git a/my/smscalls.py b/my/smscalls.py index 78bf7ee..ccaac72 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -2,6 +2,7 @@ Phone calls and SMS messages Exported using https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore&hl=en_US """ +from __future__ import annotations # See: https://www.synctech.com.au/sms-backup-restore/fields-in-xml-backup-files/ for schema @@ -9,8 +10,9 @@ REQUIRES = ['lxml'] from dataclasses import dataclass -from my.core import get_files, stat, Paths, Stats from my.config import smscalls as user_config +from my.core import Paths, Stats, get_files, stat + @dataclass class smscalls(user_config): @@ -18,11 +20,13 @@ class smscalls(user_config): export_path: Paths from my.core.cfg import make_config + config = make_config(smscalls) +from collections.abc import Iterator from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Iterator, Set, Tuple, Optional, Any, Dict, List +from typing import Any, NamedTuple import lxml.etree as etree @@ -33,7 +37,7 @@ class Call(NamedTuple): dt: datetime dt_readable: str duration_s: int - who: Optional[str] + who: str | None # type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List. call_type: int @@ -50,7 +54,7 @@ class Call(NamedTuple): # All the field values are read as-is from the underlying database and no conversion is done by the app in most cases. # # The '(Unknown)' is just what my android phone does, not sure if there are others -UNKNOWN: Set[str] = {'(Unknown)'} +UNKNOWN: set[str] = {'(Unknown)'} def _extract_calls(path: Path) -> Iterator[Res[Call]]: @@ -83,7 +87,7 @@ def calls() -> Iterator[Res[Call]]: files = get_files(config.export_path, glob='calls-*.xml') # TODO always replacing with the latter is good, we get better contact names?? - emitted: Set[datetime] = set() + emitted: set[datetime] = set() for p in files: for c in _extract_calls(p): if isinstance(c, Exception): @@ -98,7 +102,7 @@ def calls() -> Iterator[Res[Call]]: class Message(NamedTuple): dt: datetime dt_readable: str - who: Optional[str] + who: str | None message: str phone_number: str # type - 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox, 5 = Failed, 6 = Queued @@ -112,7 +116,7 @@ class Message(NamedTuple): def messages() -> Iterator[Res[Message]]: files = get_files(config.export_path, glob='sms-*.xml') - emitted: Set[Tuple[datetime, Optional[str], bool]] = set() + emitted: set[tuple[datetime, str | None, bool]] = set() for p in files: for c in _extract_messages(p): if isinstance(c, Exception): @@ -155,20 +159,20 @@ class MMSContentPart(NamedTuple): sequence_index: int content_type: str filename: str - text: Optional[str] - data: Optional[str] + text: str | None + data: str | None class MMS(NamedTuple): dt: datetime dt_readable: str - parts: List[MMSContentPart] + parts: list[MMSContentPart] # NOTE: these is often something like 'Name 1, Name 2', but might be different depending on your client - who: Optional[str] + who: str | None # NOTE: This can be a single phone number, or multiple, split by '~' or ','. Its better to think # of this as a 'key' or 'conversation ID', phone numbers are also present in 'addresses' phone_number: str - addresses: List[Tuple[str, int]] + addresses: list[tuple[str, int]] # 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox message_type: int @@ -194,7 +198,7 @@ class MMS(NamedTuple): def mms() -> Iterator[Res[MMS]]: files = get_files(config.export_path, glob='sms-*.xml') - emitted: Set[Tuple[datetime, Optional[str], str]] = set() + emitted: set[tuple[datetime, str | None, str]] = set() for p in files: for c in _extract_mms(p): if isinstance(c, Exception): @@ -207,7 +211,7 @@ def mms() -> Iterator[Res[MMS]]: yield c -def _resolve_null_str(value: Optional[str]) -> Optional[str]: +def _resolve_null_str(value: str | None) -> str | None: if value is None: return None # hmm.. theres some risk of the text actually being 'null', but theres @@ -235,7 +239,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: yield RuntimeError(f'Missing one or more required attributes [date, readable_date, msg_box, address] in {mxml_str}') continue - addresses: List[Tuple[str, int]] = [] + addresses: list[tuple[str, int]] = [] for addr_parent in mxml.findall('addrs'): for addr in addr_parent.findall('addr'): addr_data = addr.attrib @@ -250,7 +254,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: continue addresses.append((user_address, int(user_type))) - content: List[MMSContentPart] = [] + content: list[MMSContentPart] = [] for part_root in mxml.findall('parts'): @@ -267,8 +271,8 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: # # man, attrib is some internal cpython ._Attrib type which can't # be typed by any sort of mappingproxy. maybe a protocol could work..? - part_data: Dict[str, Any] = part.attrib # type: ignore - seq: Optional[str] = part_data.get('seq') + part_data: dict[str, Any] = part.attrib # type: ignore + seq: str | None = part_data.get('seq') if seq == '-1': continue @@ -276,13 +280,13 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]: yield RuntimeError(f'seq must be a number, was seq={seq} {type(seq)} in {part_data}') continue - charset_type: Optional[str] = _resolve_null_str(part_data.get('ct')) - filename: Optional[str] = _resolve_null_str(part_data.get('name')) + charset_type: str | None = _resolve_null_str(part_data.get('ct')) + filename: str | None = _resolve_null_str(part_data.get('name')) # in some cases (images, cards), the filename is set in 'cl' instead if filename is None: filename = _resolve_null_str(part_data.get('cl')) - text: Optional[str] = _resolve_null_str(part_data.get('text')) - data: Optional[str] = _resolve_null_str(part_data.get('data')) + text: str | None = _resolve_null_str(part_data.get('text')) + data: str | None = _resolve_null_str(part_data.get('data')) if charset_type is None or filename is None or (text is None and data is None): yield RuntimeError(f'Missing one or more required attributes [ct, name, (text, data)] must be present in {part_data}') diff --git a/my/stackexchange/gdpr.py b/my/stackexchange/gdpr.py index 5292bef..78987be 100644 --- a/my/stackexchange/gdpr.py +++ b/my/stackexchange/gdpr.py @@ -6,8 +6,11 @@ Stackexchange data (uses [[https://stackoverflow.com/legal/gdpr/request][officia ### config from dataclasses import dataclass + from my.config import stackexchange as user_config -from my.core import PathIsh, make_config, get_files, Json +from my.core import Json, PathIsh, get_files, make_config + + @dataclass class stackexchange(user_config): gdpr_path: PathIsh # path to GDPR zip file @@ -17,9 +20,13 @@ config = make_config(stackexchange) # TODO just merge all of them and then filter?.. not sure -from my.core.compat import fromisoformat -from typing import NamedTuple, Iterable +from collections.abc import Iterable from datetime import datetime +from typing import NamedTuple + +from my.core.compat import fromisoformat + + class Vote(NamedTuple): j: Json # todo ip? @@ -62,7 +69,10 @@ class Vote(NamedTuple): # todo expose vote type? import json + from ..core.error import Res + + def votes() -> Iterable[Res[Vote]]: # TODO there is also some site specific stuff in qa/ directory.. not sure if its' more detailed # todo should be defensive? not sure if present when user has no votes @@ -74,6 +84,8 @@ def votes() -> Iterable[Res[Vote]]: yield Vote(r) -from ..core import stat, Stats +from ..core import Stats, stat + + def stats() -> Stats: return stat(votes) diff --git a/my/stackexchange/stexport.py b/my/stackexchange/stexport.py index 812a155..111ed28 100644 --- a/my/stackexchange/stexport.py +++ b/my/stackexchange/stexport.py @@ -16,7 +16,8 @@ from my.core import ( make_config, stat, ) -import my.config + +import my.config # isort: skip @dataclass diff --git a/my/taplog.py b/my/taplog.py index 51eeb72..5e64a72 100644 --- a/my/taplog.py +++ b/my/taplog.py @@ -1,24 +1,26 @@ ''' [[https://play.google.com/store/apps/details?id=com.waterbear.taglog][Taplog]] app data ''' -from datetime import datetime -from typing import NamedTuple, Dict, Optional, Iterable +from __future__ import annotations -from my.core import get_files, stat, Stats -from my.core.sqlite import sqlite_connection +from collections.abc import Iterable +from datetime import datetime +from typing import NamedTuple from my.config import taplog as user_config +from my.core import Stats, get_files, stat +from my.core.sqlite import sqlite_connection class Entry(NamedTuple): - row: Dict + row: dict @property def id(self) -> str: return str(self.row['_id']) @property - def number(self) -> Optional[float]: + def number(self) -> float | None: ns = self.row['number'] # TODO ?? if isinstance(ns, str): diff --git a/my/telegram/telegram_backup.py b/my/telegram/telegram_backup.py index ff4f904..eea7e50 100644 --- a/my/telegram/telegram_backup.py +++ b/my/telegram/telegram_backup.py @@ -1,17 +1,17 @@ """ Telegram data via [fabianonline/telegram_backup](https://github.com/fabianonline/telegram_backup) tool """ +from __future__ import annotations +import sqlite3 +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime, timezone -from struct import unpack_from, calcsize -import sqlite3 -from typing import Dict, Iterator, Optional - -from my.core import datetime_aware, PathIsh -from my.core.sqlite import sqlite_connection +from struct import calcsize, unpack_from from my.config import telegram as user_config +from my.core import PathIsh, datetime_aware +from my.core.sqlite import sqlite_connection @dataclass @@ -23,17 +23,17 @@ class config(user_config.telegram_backup): @dataclass class Chat: id: str - name: Optional[str] + name: str | None # not all users have short handle + groups don't have them either? # TODO hmm some groups have it -- it's just the tool doesn't dump them?? - handle: Optional[str] + handle: str | None # not sure if need type? @dataclass class User: id: str - name: Optional[str] + name: str | None @dataclass @@ -44,7 +44,7 @@ class Message: chat: Chat sender: User text: str - extra_media_info: Optional[str] = None + extra_media_info: str | None = None @property def permalink(self) -> str: @@ -61,7 +61,7 @@ class Message: -Chats = Dict[str, Chat] +Chats = dict[str, Chat] def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bool) -> Message: ts = r['time'] # desktop export uses UTC (checked by exporting in winter time vs summer time) @@ -70,7 +70,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo chat = chats[r['source_id']] sender = chats[r['sender_id']] - extra_media_info: Optional[str] = None + extra_media_info: str | None = None if with_extra_media_info and r['has_media'] == 1: # also it's quite hacky, so at least for now it's just an optional attribute behind the flag # defensive because it's a bit tricky to correctly parse without a proper api parser.. @@ -90,7 +90,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo ) -def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=False) -> Iterator[Message]: +def messages(*, extra_where: str | None=None, with_extra_media_info: bool=False) -> Iterator[Message]: messages_query = 'SELECT * FROM messages WHERE message_type NOT IN ("service_message", "empty_message")' if extra_where is not None: messages_query += ' AND ' + extra_where @@ -106,7 +106,7 @@ def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=Fal for r in db.execute('SELECT * FROM users ORDER BY id'): first = r["first_name"] last = r["last_name"] - name: Optional[str] + name: str | None if first is not None and last is not None: name = f'{first} {last}' else: @@ -121,7 +121,7 @@ def messages(*, extra_where: Optional[str]=None, with_extra_media_info: bool=Fal yield _message_from_row(r, chats=chats, with_extra_media_info=with_extra_media_info) -def _extract_extra_media_info(data: bytes) -> Optional[str]: +def _extract_extra_media_info(data: bytes) -> str | None: # ugh... very hacky, but it does manage to extract from 90% of messages that have media pos = 0 diff --git a/my/tests/bluemaestro.py b/my/tests/bluemaestro.py index 2d7c81e..d139a8f 100644 --- a/my/tests/bluemaestro.py +++ b/my/tests/bluemaestro.py @@ -1,4 +1,4 @@ -from typing import Iterator +from collections.abc import Iterator import pytest from more_itertools import one diff --git a/my/tests/body/weight.py b/my/tests/body/weight.py index 069e940..f26ccf2 100644 --- a/my/tests/body/weight.py +++ b/my/tests/body/weight.py @@ -1,8 +1,10 @@ from pathlib import Path -import pytz -from my.core.cfg import tmp_config + import pytest +import pytz + from my.body.weight import from_orgmode +from my.core.cfg import tmp_config def test_body_weight() -> None: diff --git a/my/tests/commits.py b/my/tests/commits.py index c967027..48e349f 100644 --- a/my/tests/commits.py +++ b/my/tests/commits.py @@ -1,14 +1,11 @@ import os from pathlib import Path -from more_itertools import bucket import pytest - - -from my.core.cfg import tmp_config +from more_itertools import bucket from my.coding.commits import commits - +from my.core.cfg import tmp_config pytestmark = pytest.mark.skipif( os.name == 'nt', diff --git a/my/tests/location/fallback.py b/my/tests/location/fallback.py index 10a4e5b..c09b902 100644 --- a/my/tests/location/fallback.py +++ b/my/tests/location/fallback.py @@ -2,8 +2,8 @@ To test my.location.fallback_location.all """ +from collections.abc import Iterator from datetime import datetime, timedelta, timezone -from typing import Iterator import pytest from more_itertools import ilen diff --git a/my/tests/reddit.py b/my/tests/reddit.py index 4f1ec51..4ddccf8 100644 --- a/my/tests/reddit.py +++ b/my/tests/reddit.py @@ -1,16 +1,14 @@ import pytest from more_itertools import consume -from my.core.cfg import tmp_config -from my.core.utils.itertools import ensure_unique - -from .common import testdata - - # deliberately use mixed style imports on the top level and inside the methods to test tmp_config stuff # todo won't really be necessary once we migrate to lazy user config import my.reddit.all as my_reddit_all import my.reddit.rexport as my_reddit_rexport +from my.core.cfg import tmp_config +from my.core.utils.itertools import ensure_unique + +from .common import testdata def test_basic_1() -> None: diff --git a/my/time/tz/common.py b/my/time/tz/common.py index 13c8ac0..c0dd262 100644 --- a/my/time/tz/common.py +++ b/my/time/tz/common.py @@ -3,7 +3,6 @@ from typing import Callable, Literal, cast from my.core import datetime_aware - ''' Depending on the specific data provider and your level of paranoia you might expect different behaviour.. E.g.: - if your objects already have tz info, you might not need to call localize() at all diff --git a/my/time/tz/main.py b/my/time/tz/main.py index fafc5fe..bdd36b1 100644 --- a/my/time/tz/main.py +++ b/my/time/tz/main.py @@ -6,6 +6,7 @@ from datetime import datetime from my.core import datetime_aware + # todo hmm, kwargs isn't mypy friendly.. but specifying types would require duplicating default args. uhoh def localize(dt: datetime, **kwargs) -> datetime_aware: # todo document patterns for combining multiple data sources diff --git a/my/time/tz/via_location.py b/my/time/tz/via_location.py index 4920333..58b5bf7 100644 --- a/my/time/tz/via_location.py +++ b/my/time/tz/via_location.py @@ -2,6 +2,8 @@ Timezone data provider, guesses timezone based on location data (e.g. GPS) ''' +from __future__ import annotations + REQUIRES = [ # for determining timezone by coordinate 'timezonefinder', @@ -10,6 +12,7 @@ REQUIRES = [ import heapq import os from collections import Counter +from collections.abc import Iterable, Iterator from dataclasses import dataclass from datetime import date, datetime from functools import lru_cache @@ -17,14 +20,7 @@ from itertools import groupby from typing import ( TYPE_CHECKING, Any, - Dict, - Iterable, - Iterator, - List, - Optional, Protocol, - Set, - Tuple, ) import pytz @@ -102,7 +98,7 @@ def _timezone_finder(*, fast: bool) -> Any: # for backwards compatibility -def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]: +def _locations() -> Iterator[tuple[LatLon, datetime_aware]]: try: import my.location.all @@ -125,7 +121,7 @@ def _locations() -> Iterator[Tuple[LatLon, datetime_aware]]: # TODO: could use heapmerge or sort the underlying iterators somehow? # see https://github.com/karlicoss/HPI/pull/237#discussion_r858372934 -def _sorted_locations() -> List[Tuple[LatLon, datetime_aware]]: +def _sorted_locations() -> list[tuple[LatLon, datetime_aware]]: return sorted(_locations(), key=lambda x: x[1]) @@ -140,7 +136,7 @@ class DayWithZone: zone: Zone -def _find_tz_for_locs(finder: Any, locs: Iterable[Tuple[LatLon, datetime]]) -> Iterator[DayWithZone]: +def _find_tz_for_locs(finder: Any, locs: Iterable[tuple[LatLon, datetime]]) -> Iterator[DayWithZone]: for (lat, lon), dt in locs: # TODO right. its _very_ slow... zone = finder.timezone_at(lat=lat, lng=lon) @@ -172,7 +168,7 @@ def _iter_local_dates() -> Iterator[DayWithZone]: # TODO: warnings doesn't actually warn? # warnings = [] - locs: Iterable[Tuple[LatLon, datetime]] + locs: Iterable[tuple[LatLon, datetime]] locs = _sorted_locations() if cfg.sort_locations else _locations() yield from _find_tz_for_locs(finder, locs) @@ -187,7 +183,7 @@ def _iter_local_dates_fallback() -> Iterator[DayWithZone]: cfg = make_config() - def _fallback_locations() -> Iterator[Tuple[LatLon, datetime]]: + def _fallback_locations() -> Iterator[tuple[LatLon, datetime]]: for loc in sorted(flocs(), key=lambda x: x.dt): yield ((loc.lat, loc.lon), loc.dt) @@ -225,14 +221,14 @@ def _iter_tzs() -> Iterator[DayWithZone]: # we need to sort them first before we can do a groupby by_day = lambda p: p.day - local_dates: List[DayWithZone] = sorted(_iter_local_dates(), key=by_day) + local_dates: list[DayWithZone] = sorted(_iter_local_dates(), key=by_day) logger.debug(f"no. of items using exact locations: {len(local_dates)}") - local_dates_fallback: List[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day) + local_dates_fallback: list[DayWithZone] = sorted(_iter_local_dates_fallback(), key=by_day) # find days that are in fallback but not in local_dates (i.e., missing days) - local_dates_set: Set[date] = {d.day for d in local_dates} - use_fallback_days: List[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set] + local_dates_set: set[date] = {d.day for d in local_dates} + use_fallback_days: list[DayWithZone] = [d for d in local_dates_fallback if d.day not in local_dates_set] logger.debug(f"no. of items being used from fallback locations: {len(use_fallback_days)}") # combine local_dates and missing days from fallback into a sorted list @@ -246,20 +242,20 @@ def _iter_tzs() -> Iterator[DayWithZone]: @lru_cache(1) -def _day2zone() -> Dict[date, pytz.BaseTzInfo]: +def _day2zone() -> dict[date, pytz.BaseTzInfo]: # NOTE: kinda unfortunate that this will have to process all days before returning result for just one # however otherwise cachew cache might never be initialized properly # so we'll always end up recomputing everyting during subsequent runs return {dz.day: pytz.timezone(dz.zone) for dz in _iter_tzs()} -def _get_day_tz(d: date) -> Optional[pytz.BaseTzInfo]: +def _get_day_tz(d: date) -> pytz.BaseTzInfo | None: return _day2zone().get(d) # ok to cache, there are only a few home locations? @lru_cache(None) -def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]: +def _get_home_tz(loc: LatLon) -> pytz.BaseTzInfo | None: (lat, lng) = loc finder = _timezone_finder(fast=False) # ok to use slow here for better precision zone = finder.timezone_at(lat=lat, lng=lng) @@ -270,7 +266,7 @@ def _get_home_tz(loc: LatLon) -> Optional[pytz.BaseTzInfo]: return pytz.timezone(zone) -def get_tz(dt: datetime) -> Optional[pytz.BaseTzInfo]: +def get_tz(dt: datetime) -> pytz.BaseTzInfo | None: ''' Given a datetime, returns the timezone for that date. ''' diff --git a/my/tinder/android.py b/my/tinder/android.py index d9b256b..a09794f 100644 --- a/my/tinder/android.py +++ b/my/tinder/android.py @@ -3,20 +3,22 @@ Tinder data from Android app database (in =/data/data/com.tinder/databases/tinde """ from __future__ import annotations -from collections import defaultdict, Counter +import sqlite3 +from collections import Counter, defaultdict +from collections.abc import Iterator, Mapping, Sequence from dataclasses import dataclass from datetime import datetime, timezone from itertools import chain from pathlib import Path -import sqlite3 -from typing import Sequence, Iterator, Union, Dict, List, Mapping +from typing import Union -from my.core import Paths, get_files, Res, stat, Stats, datetime_aware, make_logger +from my.core import Paths, Res, Stats, datetime_aware, get_files, make_logger, stat from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain from my.core.sqlite import sqlite_connection -import my.config + +import my.config # isort: skip logger = make_logger(__name__) @@ -164,8 +166,8 @@ def _parse_msg(row: sqlite3.Row) -> _Message: # todo maybe it's rich_entities method? def entities() -> Iterator[Res[Entity]]: - id2person: Dict[str, Person] = {} - id2match: Dict[str, Match] = {} + id2person: dict[str, Person] = {} + id2match: dict[str, Match] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x @@ -217,7 +219,7 @@ def messages() -> Iterator[Res[Message]]: # todo not sure, maybe it's not fundamental enough to keep here... def match2messages() -> Iterator[Res[Mapping[Match, Sequence[Message]]]]: - res: Dict[Match, List[Message]] = defaultdict(list) + res: dict[Match, list[Message]] = defaultdict(list) for x in entities(): if isinstance(x, Exception): yield x diff --git a/my/topcoder.py b/my/topcoder.py index 07f71be..56403e2 100644 --- a/my/topcoder.py +++ b/my/topcoder.py @@ -1,14 +1,14 @@ +import json +from collections.abc import Iterator, Sequence from dataclasses import dataclass from functools import cached_property -import json from pathlib import Path -from typing import Iterator, Sequence -from my.core import get_files, Res, datetime_aware +from my.core import Res, datetime_aware, get_files from my.core.compat import fromisoformat from my.experimental.destructive_parsing import Manager -from my.config import topcoder as config # type: ignore[attr-defined] +from my.config import topcoder as config # type: ignore[attr-defined] # isort: skip def inputs() -> Sequence[Path]: diff --git a/my/twitter/all.py b/my/twitter/all.py index 4714021..c2c471e 100644 --- a/my/twitter/all.py +++ b/my/twitter/all.py @@ -1,11 +1,11 @@ """ Unified Twitter data (merged from the archive and periodic updates) """ -from typing import Iterator +from collections.abc import Iterator + from ..core import Res from ..core.source import import_source -from .common import merge_tweets, Tweet - +from .common import Tweet, merge_tweets # NOTE: you can comment out the sources you don't need src_twint = import_source(module_name='my.twitter.twint') diff --git a/my/twitter/android.py b/my/twitter/android.py index ada04ae..88c9389 100644 --- a/my/twitter/android.py +++ b/my/twitter/android.py @@ -4,21 +4,21 @@ Twitter data from official app for Android from __future__ import annotations +import re +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import re from struct import unpack_from -from typing import Iterator, Sequence, Set -from my.core import datetime_aware, get_files, LazyLogger, Paths, Res +from my.core import LazyLogger, Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen from my.core.sqlite import sqlite_connect_immutable -import my.config - from .common import permalink +import my.config # isort: skip + logger = LazyLogger(__name__) @@ -155,7 +155,7 @@ _SELECT_OWN_TWEETS = '_SELECT_OWN_TWEETS' def get_own_user_id(conn) -> str: # unclear what's the reliable way to query it, so we use multiple different ones and arbitrate # NOTE: 'SELECT DISTINCT ev_owner_id FROM lists' doesn't work, might include lists from other people? - res: Set[str] = set() + res: set[str] = set() # need to cast as it's int by default for q in [ 'SELECT DISTINCT CAST(list_mapping_user_id AS TEXT) FROM list_mapping', @@ -239,7 +239,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]: NOT (statuses.in_r_user_id == -1 AND statuses.in_r_status_id == -1 AND statuses.conversation_id == 0) ''' - def _query_one(*, where: str, quoted: Set[int]) -> Iterator[Res[Tweet]]: + def _query_one(*, where: str, quoted: set[int]) -> Iterator[Res[Tweet]]: for ( tweet_id, user_username, @@ -263,7 +263,7 @@ def _process_one(f: Path, *, where: str) -> Iterator[Res[Tweet]]: text=content, ) - quoted: Set[int] = set() + quoted: set[int] = set() yield from _query_one(where=db_where, quoted=quoted) # get quoted tweets 'recursively' # TODO maybe do it for favs/bookmarks too? not sure diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 1573754..c9d2dbc 100644 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -7,6 +7,7 @@ from __future__ import annotations import html import json # hmm interesting enough, orjson didn't give much speedup here? from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime from functools import cached_property @@ -14,8 +15,6 @@ from itertools import chain from pathlib import Path from typing import ( TYPE_CHECKING, - Iterator, - Sequence, ) from more_itertools import unique_everseen diff --git a/my/twitter/common.py b/my/twitter/common.py index 258216f..8c346f6 100644 --- a/my/twitter/common.py +++ b/my/twitter/common.py @@ -1,17 +1,19 @@ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip +from collections.abc import Iterator from itertools import chain -from typing import Iterator, Any +from typing import Any from more_itertools import unique_everseen - # TODO add proper Protocol for Tweet Tweet = Any TweetId = str -from my.core import warn_if_empty, Res +from my.core import Res, warn_if_empty + + @warn_if_empty def merge_tweets(*sources: Iterator[Res[Tweet]]) -> Iterator[Res[Tweet]]: def key(r: Res[Tweet]): diff --git a/my/twitter/talon.py b/my/twitter/talon.py index 1b79727..dbf2e2e 100644 --- a/my/twitter/talon.py +++ b/my/twitter/talon.py @@ -7,10 +7,11 @@ from __future__ import annotations import re import sqlite3 from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import Iterator, Sequence, Union +from typing import Union from my.core import Paths, Res, datetime_aware, get_files from my.core.common import unique_everseen diff --git a/my/twitter/twint.py b/my/twitter/twint.py index ceb5406..5106923 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -1,17 +1,17 @@ """ Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export. """ +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import NamedTuple, Iterator, List +from typing import NamedTuple - -from my.core import Paths, Res, get_files, LazyLogger, Json, datetime_aware, stat, Stats +from my.core import Json, LazyLogger, Paths, Res, Stats, datetime_aware, get_files, stat from my.core.cfg import make_config from my.core.sqlite import sqlite_connection -from my.config import twint as user_config +from my.config import twint as user_config # isort: skip # TODO move to twitter.twint config structure @@ -76,7 +76,7 @@ class Tweet(NamedTuple): return text @property - def urls(self) -> List[str]: + def urls(self) -> list[str]: ustr = self.row['urls'] if len(ustr) == 0: return [] diff --git a/my/util/hpi_heartbeat.py b/my/util/hpi_heartbeat.py index 84790a4..6dcac7e 100644 --- a/my/util/hpi_heartbeat.py +++ b/my/util/hpi_heartbeat.py @@ -5,12 +5,13 @@ In particular the behaviour of import_original_module function The idea of testing is that overlays extend this module, and add their own items to items(), and the checker asserts all overlays have contributed. """ -from my.core import __NOT_HPI_MODULE__ +from my.core import __NOT_HPI_MODULE__ # isort: skip + +import sys +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -import sys -from typing import Iterator, List NOW = datetime.now() @@ -19,10 +20,10 @@ NOW = datetime.now() class Item: dt: datetime message: str - path: List[str] + path: list[str] -def get_pkg_path() -> List[str]: +def get_pkg_path() -> list[str]: pkg = sys.modules[__package__] return list(pkg.__path__) diff --git a/my/vk/favorites.py b/my/vk/favorites.py index 9caae6d..5f278ff 100644 --- a/my/vk/favorites.py +++ b/my/vk/favorites.py @@ -1,20 +1,21 @@ # todo: uses my private export script?, timezone +from __future__ import annotations + +import json +from collections.abc import Iterable, Iterator from dataclasses import dataclass from datetime import datetime, timezone -import json -from typing import Iterator, Iterable, Optional - -from my.core import Json, datetime_aware, stat, Stats -from my.core.error import Res from my.config import vk as config # type: ignore[attr-defined] +from my.core import Json, Stats, datetime_aware, stat +from my.core.error import Res @dataclass class Favorite: dt: datetime_aware title: str - url: Optional[str] + url: str | None text: str diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py index c73587f..4f593c8 100644 --- a/my/vk/vk_messages_backup.py +++ b/my/vk/vk_messages_backup.py @@ -2,18 +2,16 @@ VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]]) ''' # note: could reuse the original repo, but little point I guess since VK closed their API +import json +from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime -import json -from typing import Dict, Iterator import pytz -from my.core import stat, Stats, Json, Res, datetime_aware, get_files -from my.core.common import unique_everseen - from my.config import vk_messages_backup as config - +from my.core import Json, Res, Stats, datetime_aware, get_files, stat +from my.core.common import unique_everseen # I think vk_messages_backup used this tz? # not sure if vk actually used to return this tz in api? @@ -45,7 +43,7 @@ class Message: body: str -Users = Dict[Uid, User] +Users = dict[Uid, User] def users() -> Users: diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py index 3dfed3e..27ee743 100644 --- a/my/whatsapp/android.py +++ b/my/whatsapp/android.py @@ -3,18 +3,19 @@ Whatsapp data from Android app database (in =/data/data/com.whatsapp/databases/m """ from __future__ import annotations +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -import sqlite3 -from typing import Union, Sequence, Iterator, Optional +from typing import Union -from my.core import get_files, Paths, datetime_aware, Res, make_logger, make_config +from my.core import Paths, Res, datetime_aware, get_files, make_config, make_logger from my.core.common import unique_everseen from my.core.error import echain, notnone from my.core.sqlite import sqlite_connection -import my.config +import my.config # isort: skip logger = make_logger(__name__) @@ -23,7 +24,7 @@ logger = make_logger(__name__) class Config(my.config.whatsapp.android): # paths[s]/glob to the exported sqlite databases export_path: Paths - my_user_id: Optional[str] = None + my_user_id: str | None = None config = make_config(Config) @@ -38,13 +39,13 @@ class Chat: id: str # todo not sure how to support renames? # could change Chat object itself, but this won't work well with incremental processing.. - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) class Sender: id: str - name: Optional[str] + name: str | None @dataclass(unsafe_hash=True) @@ -53,7 +54,7 @@ class Message: id: str dt: datetime_aware sender: Sender - text: Optional[str] + text: str | None Entity = Union[Chat, Sender, Message] @@ -125,9 +126,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: ts: int = notnone(r['timestamp']) dt = datetime.fromtimestamp(ts / 1000, tz=timezone.utc) - text: Optional[str] = r['text_data'] - media_file_path: Optional[str] = r['file_path'] - media_file_size: Optional[int] = r['file_size'] + text: str | None = r['text_data'] + media_file_path: str | None = r['file_path'] + media_file_size: int | None = r['file_size'] message_type = r['message_type'] diff --git a/my/youtube/takeout.py b/my/youtube/takeout.py index f29b2e3..703715f 100644 --- a/my/youtube/takeout.py +++ b/my/youtube/takeout.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Iterable, Iterator from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Iterable, Iterator +from typing import TYPE_CHECKING, Any from my.core import Res, Stats, datetime_aware, make_logger, stat, warnings from my.core.compat import deprecated diff --git a/my/zotero.py b/my/zotero.py index 4440aae..8eb34ba 100644 --- a/my/zotero.py +++ b/my/zotero.py @@ -1,14 +1,16 @@ +from __future__ import annotations as _annotations + +import json +import sqlite3 +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone -import json -from typing import Iterator, Optional, Dict, Any, Sequence from pathlib import Path -import sqlite3 +from typing import Any -from my.core import make_logger, Res, datetime_aware +from my.core import Res, datetime_aware, make_logger from my.core.sqlite import sqlite_copy_and_open - logger = make_logger(__name__) @@ -26,7 +28,7 @@ class Item: """Corresponds to 'Zotero item'""" file: Path title: str - url: Optional[Url] + url: Url | None tags: Sequence[str] @@ -39,8 +41,8 @@ class Annotation: page: int """0-indexed""" - text: Optional[str] - comment: Optional[str] + text: str | None + comment: str | None tags: Sequence[str] color_hex: str """Original hex-encoded color in zotero""" @@ -97,7 +99,7 @@ WHERE ID.fieldID = 13 AND IA.itemID = ? # TODO maybe exclude 'private' methods from detection? -def _query_raw() -> Iterator[Res[Dict[str, Any]]]: +def _query_raw() -> Iterator[Res[dict[str, Any]]]: [db] = inputs() with sqlite_copy_and_open(db) as conn: @@ -157,7 +159,7 @@ def _hex2human(color_hex: str) -> str: }.get(color_hex, color_hex) -def _parse_annotation(r: Dict) -> Annotation: +def _parse_annotation(r: dict) -> Annotation: text = r['text'] comment = r['comment'] # todo use json query for this? diff --git a/my/zulip/organization.py b/my/zulip/organization.py index 2e0df4b..d0cfcb7 100644 --- a/my/zulip/organization.py +++ b/my/zulip/organization.py @@ -6,11 +6,11 @@ from __future__ import annotations import json from abc import abstractmethod +from collections.abc import Iterator, Sequence from dataclasses import dataclass from datetime import datetime, timezone from itertools import count from pathlib import Path -from typing import Iterator, Sequence from my.core import ( Json, diff --git a/ruff.toml b/ruff.toml index 5fbd657..3d803e7 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,4 @@ -target-version = "py38" # NOTE: inferred from pyproject.toml if present +target-version = "py39" # NOTE: inferred from pyproject.toml if present lint.extend-select = [ "F", # flakes rules -- default, but extend just in case @@ -26,8 +26,8 @@ lint.extend-select = [ "TID", # various imports suggestions "TRY", # various exception handling rules "UP", # detect deprecated python stdlib stuff - # "FA", # suggest using from __future__ import annotations TODO enable later after we make sure cachew works? - # "PTH", # pathlib migration -- TODO enable later + "FA", # suggest using from __future__ import annotations + "PTH", # pathlib migration "ARG", # unused argument checks # "A", # builtin shadowing -- TODO handle later # "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying @@ -35,6 +35,11 @@ lint.extend-select = [ # "ALL", # uncomment this to check for new rules! ] +# Preserve types, even if a file imports `from __future__ import annotations` +# we need this for cachew to work with HPI types on 3.9 +# can probably remove after 3.10? +lint.pyupgrade.keep-runtime-typing = true + lint.ignore = [ "D", # annoying nags about docstrings "N", # pep naming @@ -68,11 +73,6 @@ lint.ignore = [ "F841", # Local variable `count` is assigned to but never used ### -### TODO should be fine to use these with from __future__ import annotations? -### there was some issue with cachew though... double check this? - "UP006", # use type instead of Type - "UP007", # use X | Y instead of Union -### "RUF100", # unused noqa -- handle later "RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs