diff --git a/my/bumble/android.py b/my/bumble/android.py index a2d2850..21ac74d 100644 --- a/my/bumble/android.py +++ b/my/bumble/android.py @@ -54,7 +54,7 @@ class Message(_BaseMessage): import json from typing import Union -from ..core import Res +from ..core import Res, assert_never import sqlite3 from ..core.sqlite import sqlite_connect_immutable @@ -66,7 +66,12 @@ def _entities() -> Iterator[EntitiesRes]: yield from _handle_db(db) -def _handle_db(db) -> Iterator[EntitiesRes]: +def _handle_db(db: sqlite3.Connection) -> Iterator[EntitiesRes]: + # todo hmm not sure + # on the one hand kinda nice to use dataset.. + # on the other, it's somewhat of a complication, and + # would be nice to have something type-directed for sql queries though + # e.g. with typeddict or something, so the number of parameter to the sql query matches? for row in db.execute(f'SELECT user_id, user_name FROM conversation_info'): (user_id, user_name) = row yield Person( @@ -136,4 +141,4 @@ def messages() -> Iterator[Res[Message]]: id2msg[m.id] = m yield m continue - assert False, type(x) # should be unreachable + assert_never(x) diff --git a/my/core/__init__.py b/my/core/__init__.py index f680f37..ee80d98 100644 --- a/my/core/__init__.py +++ b/my/core/__init__.py @@ -5,6 +5,7 @@ from .common import LazyLogger from .common import warn_if_empty from .common import stat, Stats from .common import datetime_naive, datetime_aware +from .common import assert_never from .cfg import make_config from .util import __NOT_HPI_MODULE__ diff --git a/my/core/common.py b/my/core/common.py index c72fc77..92c32f5 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -4,7 +4,7 @@ from datetime import datetime import functools from contextlib import contextmanager import types -from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING +from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple, TYPE_CHECKING, NoReturn import warnings from . import warnings as core_warnings @@ -632,5 +632,11 @@ class DummyExecutor(Executor): def shutdown(self, wait: bool=True) -> None: # type: ignore[override] self._shutdown = True + +# see https://hakibenita.com/python-mypy-exhaustive-checking#exhaustiveness-checking +def assert_never(value: NoReturn) -> NoReturn: + assert False, f'Unhandled value: {value} ({type(value).__name__})' + + # legacy deprecated import from .compat import cached_property as cproperty diff --git a/my/core/dataset.py b/my/core/dataset.py index c8591d4..070b9b3 100644 --- a/my/core/dataset.py +++ b/my/core/dataset.py @@ -1,11 +1,29 @@ +from __future__ import annotations from .common import assert_subpackage; assert_subpackage(__name__) from .common import PathIsh +from .compat import Protocol from .sqlite import sqlite_connect_immutable +## sadly dataset doesn't have any type definitions +from typing import Iterable, Iterator, Dict, Optional, Any +from contextlib import AbstractContextManager + + +# NOTE: may not be true in general, but will be in the vast majority of cases +row_type_T = Dict[str, Any] + + +class TableT(Iterable, Protocol): + def find(self, *, order_by: Optional[str]=None) -> Iterator[row_type_T]: ... + + +class DatabaseT(AbstractContextManager['DatabaseT'], Protocol): + def __getitem__(self, table: str) -> TableT: ... +## # TODO wonder if also need to open without WAL.. test this on read-only directory/db file -def connect_readonly(db: PathIsh): +def connect_readonly(db: PathIsh) -> DatabaseT: import dataset # type: ignore # see https://github.com/pudo/dataset/issues/136#issuecomment-128693122 # todo not sure if mode=ro has any benefit, but it doesn't work on read-only filesystems diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index a7ed9d6..6d82002 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -3,6 +3,8 @@ Messenger data from Android app database (in =/data/data/com.facebook.orca/datab """ from __future__ import annotations +REQUIRES = ['dataset'] + from dataclasses import dataclass from datetime import datetime from typing import Iterator, Sequence, Optional, Dict @@ -61,8 +63,8 @@ class Message(_BaseMessage): import json from typing import Union -from ..core.error import Res -from ..core.dataset import connect_readonly +from ..core import Res, assert_never +from ..core.dataset import connect_readonly, DatabaseT Entity = Union[Sender, Thread, _Message] def _entities() -> Iterator[Res[Entity]]: for f in inputs(): @@ -70,11 +72,11 @@ def _entities() -> Iterator[Res[Entity]]: yield from _process_db(db) -def _process_db(db) -> Iterator[Res[Entity]]: +def _process_db(db: DatabaseT) -> Iterator[Res[Entity]]: # works both for GROUP:group_id and ONE_TO_ONE:other_user:your_user threadkey2id = lambda key: key.split(':')[1] - for r in db['threads']: + for r in db['threads'].find(): try: yield Thread( id=threadkey2id(r['thread_key']), @@ -84,8 +86,8 @@ def _process_db(db) -> Iterator[Res[Entity]]: yield e continue - for r in db['messages'].all(order_by='timestamp_ms'): - mtype = r['msg_type'] + for r in db['messages'].find(order_by='timestamp_ms'): + mtype: int = r['msg_type'] if mtype == -1: # likely immediately deleted or something? doesn't have any data at all continue @@ -94,7 +96,7 @@ def _process_db(db) -> Iterator[Res[Entity]]: try: # todo could use thread_users? sj = json.loads(r['sender']) - ukey = sj['user_key'] + ukey: str = sj['user_key'] prefix = 'FACEBOOK:' assert ukey.startswith(prefix), ukey user_id = ukey[len(prefix):] @@ -167,4 +169,6 @@ def messages() -> Iterator[Res[Message]]: msgs[m.id] = m yield m continue - assert False, type(x) # should be unreachable + # NOTE: for some reason mypy coverage highlights it as red? + # but it actually works as expected: i.e. if you omit one of the clauses above, mypy will complain + assert_never(x) diff --git a/my/instagram/android.py b/my/instagram/android.py index c7a86e7..d99b047 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -88,12 +88,13 @@ def _parse_message(j: Json) -> Optional[_Message]: import json from typing import Union -from ..core.error import Res +from ..core import Res, assert_never import sqlite3 from ..core.sqlite import sqlite_connect_immutable def _entities() -> Iterator[Res[Union[User, _Message]]]: # NOTE: definitely need to merge multiple, app seems to recycle old messages # TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data... + # todo use TypedDict? for f in inputs(): with sqlite_connect_immutable(f) as db: @@ -149,4 +150,4 @@ def messages() -> Iterator[Res[Message]]: user=user, ) continue - assert False, type(x) # should not happen + assert_never(x) diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 59b4b07..754a2e9 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -56,7 +56,7 @@ def _decode(s: str) -> str: import json from typing import Union -from ..core.error import Res +from ..core import Res, assert_never def _entities() -> Iterator[Res[Union[User, _Message]]]: from ..core.kompress import ZipPath last = ZipPath(max(inputs())) @@ -165,4 +165,4 @@ def messages() -> Iterator[Res[Message]]: user=user, ) continue - assert False, type(x) # should not happen + assert_never(x) diff --git a/my/zulip/organization.py b/my/zulip/organization.py index 3cfe0df..7ab49a1 100644 --- a/my/zulip/organization.py +++ b/my/zulip/organization.py @@ -79,7 +79,7 @@ class Message: from typing import Union from itertools import count import json -from ..core import Res +from ..core import Res, assert_never # todo cache it def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]: # TODO hmm -- not sure if max lexicographically will actually be latest? @@ -169,4 +169,4 @@ def messages() -> Iterator[Res[Message]]: content=x.content, ) continue - assert False # should be unreachable + assert_never(x)