From f8e73134b3002ae5dd5e394e9bd066c559376514 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 5 Feb 2022 16:19:53 +0000 Subject: [PATCH] fbmessenger: add all.py, merge messages from different sources followup for https://github.com/karlicoss/HPI/pull/179 --- my/fbmessenger/all.py | 48 +++++++++++++++++++++++++++++++++++++++ my/fbmessenger/android.py | 4 ++-- my/fbmessenger/common.py | 46 +++++++++++++++++++++++++++++++++++++ my/fbmessenger/export.py | 4 ++-- 4 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 my/fbmessenger/all.py create mode 100644 my/fbmessenger/common.py diff --git a/my/fbmessenger/all.py b/my/fbmessenger/all.py new file mode 100644 index 0000000..ca7f064 --- /dev/null +++ b/my/fbmessenger/all.py @@ -0,0 +1,48 @@ +from typing import Iterator +from my.core import Res +from my.core.common import Stats +from my.core.source import import_source + +from .common import Message, _merge_messages + + +src_export = import_source(module_name=f'my.fbmessenger.export') +src_android = import_source(module_name=f'my.fbmessenger.android') + + +@src_export +def _messages_export() -> Iterator[Res[Message]]: + from . import export + # ok, this one is a little tricky + # export.Message type is actually external (coming from fbmessengerexport module) + # so it's unclear how to make mypy believe/check that common.Message is a structural subtype of export.Message + # we could use runtime_checkable, but then it might also crash in runtime + # which feels somewhat mean if someone is only using fmbessenger.export module and needs its attributes only + # so perhaps it makes sense that the typecheck belongs here? + for m in export.messages(): + # NOTE: just 'yield m' works and seems to type check properly + if isinstance(m, Exception): + yield m + else: + # however, this way it results in a nicer error (shows the missing Protocol attributes) + # https://github.com/python/mypy/issues/8235#issuecomment-570712356 + m2: Message = m + yield m2 + + +@src_android +def _messages_android() -> Iterator[Res[Message]]: + from . import android + yield from android.messages() + + +def messages() -> Iterator[Res[Message]]: + yield from _merge_messages( + _messages_export(), + _messages_android(), + ) + + +def stats() -> Stats: + from my.core import stat + return stat(messages) diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index bdc4171..a7ed9d6 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -38,10 +38,8 @@ class Thread: # todo not sure about order of fields... @dataclass class _BaseMessage: - # todo nice, ids are same as in fbchat?? id: str dt: datetime - # is_incoming: bool text: Optional[str] @@ -52,6 +50,8 @@ class _Message(_BaseMessage): reply_to_id: Optional[str] +# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here +# on the other, because the properties there are read only we can't construct the object anymore?? @dataclass(unsafe_hash=True) class Message(_BaseMessage): thread: Thread diff --git a/my/fbmessenger/common.py b/my/fbmessenger/common.py new file mode 100644 index 0000000..0f3ec1b --- /dev/null +++ b/my/fbmessenger/common.py @@ -0,0 +1,46 @@ +from datetime import datetime +from typing import Iterator, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + try: + from typing import Protocol + except ImportError: + # requirement of mypy + from typing_extensions import Protocol # type: ignore[misc] +else: + Protocol = object + + +class Thread(Protocol): + @property + def id(self) -> str: ... + + # todo hmm it doesn't like it because one from .export is just str, not Optional... + # name: Optional[str] + + +class Message(Protocol): + @property + def id(self) -> str: ... + + @property + def dt(self) -> datetime: ... + + @property + def text(self) -> Optional[str]: ... + + @property + def thread(self) -> Thread: ... + + +from itertools import chain +from more_itertools import unique_everseen +from my.core import Res +def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: + # todo might be nice to dump some stats for debugging, e.g. how many were overlapping? + def key(r: Res[Message]): + if isinstance(r, Exception): + return str(r) + else: + return r.id + yield from unique_everseen(chain(*sources), key=key) diff --git a/my/fbmessenger/export.py b/my/fbmessenger/export.py index fa7b944..0edb571 100644 --- a/my/fbmessenger/export.py +++ b/my/fbmessenger/export.py @@ -45,8 +45,8 @@ def _dal() -> messenger.DAL: return messenger.DAL(config.export_db) -# TODO Result type? -def messages() -> Iterator[messenger.Message]: +from ..core import Res +def messages() -> Iterator[Res[messenger.Message]]: model = _dal() for t in model.iter_threads(): yield from t.iter_messages()