fbmessenger: add all.py, merge messages from different sources

followup for https://github.com/karlicoss/HPI/pull/179
This commit is contained in:
Dima Gerasimov 2022-02-05 16:19:53 +00:00 committed by karlicoss
parent 4626c1bba6
commit f8e73134b3
4 changed files with 98 additions and 4 deletions

48
my/fbmessenger/all.py Normal file
View file

@ -0,0 +1,48 @@
from typing import Iterator
from my.core import Res
from my.core.common import Stats
from my.core.source import import_source
from .common import Message, _merge_messages
src_export = import_source(module_name=f'my.fbmessenger.export')
src_android = import_source(module_name=f'my.fbmessenger.android')
@src_export
def _messages_export() -> Iterator[Res[Message]]:
from . import export
# ok, this one is a little tricky
# export.Message type is actually external (coming from fbmessengerexport module)
# so it's unclear how to make mypy believe/check that common.Message is a structural subtype of export.Message
# we could use runtime_checkable, but then it might also crash in runtime
# which feels somewhat mean if someone is only using fmbessenger.export module and needs its attributes only
# so perhaps it makes sense that the typecheck belongs here?
for m in export.messages():
# NOTE: just 'yield m' works and seems to type check properly
if isinstance(m, Exception):
yield m
else:
# however, this way it results in a nicer error (shows the missing Protocol attributes)
# https://github.com/python/mypy/issues/8235#issuecomment-570712356
m2: Message = m
yield m2
@src_android
def _messages_android() -> Iterator[Res[Message]]:
from . import android
yield from android.messages()
def messages() -> Iterator[Res[Message]]:
yield from _merge_messages(
_messages_export(),
_messages_android(),
)
def stats() -> Stats:
from my.core import stat
return stat(messages)

View file

@ -38,10 +38,8 @@ class Thread:
# todo not sure about order of fields...
@dataclass
class _BaseMessage:
# todo nice, ids are same as in fbchat??
id: str
dt: datetime
# is_incoming: bool
text: Optional[str]
@ -52,6 +50,8 @@ class _Message(_BaseMessage):
reply_to_id: Optional[str]
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
# on the other, because the properties there are read only we can't construct the object anymore??
@dataclass(unsafe_hash=True)
class Message(_BaseMessage):
thread: Thread

46
my/fbmessenger/common.py Normal file
View file

@ -0,0 +1,46 @@
from datetime import datetime
from typing import Iterator, Optional, TYPE_CHECKING
if TYPE_CHECKING:
try:
from typing import Protocol
except ImportError:
# requirement of mypy
from typing_extensions import Protocol # type: ignore[misc]
else:
Protocol = object
class Thread(Protocol):
@property
def id(self) -> str: ...
# todo hmm it doesn't like it because one from .export is just str, not Optional...
# name: Optional[str]
class Message(Protocol):
@property
def id(self) -> str: ...
@property
def dt(self) -> datetime: ...
@property
def text(self) -> Optional[str]: ...
@property
def thread(self) -> Thread: ...
from itertools import chain
from more_itertools import unique_everseen
from my.core import Res
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
# todo might be nice to dump some stats for debugging, e.g. how many were overlapping?
def key(r: Res[Message]):
if isinstance(r, Exception):
return str(r)
else:
return r.id
yield from unique_everseen(chain(*sources), key=key)

View file

@ -45,8 +45,8 @@ def _dal() -> messenger.DAL:
return messenger.DAL(config.export_db)
# TODO Result type?
def messages() -> Iterator[messenger.Message]:
from ..core import Res
def messages() -> Iterator[Res[messenger.Message]]:
model = _dal()
for t in model.iter_threads():
yield from t.iter_messages()