fbmessenger: add all.py, merge messages from different sources
followup for https://github.com/karlicoss/HPI/pull/179
This commit is contained in:
parent
4626c1bba6
commit
f8e73134b3
4 changed files with 98 additions and 4 deletions
48
my/fbmessenger/all.py
Normal file
48
my/fbmessenger/all.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
from typing import Iterator
|
||||||
|
from my.core import Res
|
||||||
|
from my.core.common import Stats
|
||||||
|
from my.core.source import import_source
|
||||||
|
|
||||||
|
from .common import Message, _merge_messages
|
||||||
|
|
||||||
|
|
||||||
|
src_export = import_source(module_name=f'my.fbmessenger.export')
|
||||||
|
src_android = import_source(module_name=f'my.fbmessenger.android')
|
||||||
|
|
||||||
|
|
||||||
|
@src_export
|
||||||
|
def _messages_export() -> Iterator[Res[Message]]:
|
||||||
|
from . import export
|
||||||
|
# ok, this one is a little tricky
|
||||||
|
# export.Message type is actually external (coming from fbmessengerexport module)
|
||||||
|
# so it's unclear how to make mypy believe/check that common.Message is a structural subtype of export.Message
|
||||||
|
# we could use runtime_checkable, but then it might also crash in runtime
|
||||||
|
# which feels somewhat mean if someone is only using fmbessenger.export module and needs its attributes only
|
||||||
|
# so perhaps it makes sense that the typecheck belongs here?
|
||||||
|
for m in export.messages():
|
||||||
|
# NOTE: just 'yield m' works and seems to type check properly
|
||||||
|
if isinstance(m, Exception):
|
||||||
|
yield m
|
||||||
|
else:
|
||||||
|
# however, this way it results in a nicer error (shows the missing Protocol attributes)
|
||||||
|
# https://github.com/python/mypy/issues/8235#issuecomment-570712356
|
||||||
|
m2: Message = m
|
||||||
|
yield m2
|
||||||
|
|
||||||
|
|
||||||
|
@src_android
|
||||||
|
def _messages_android() -> Iterator[Res[Message]]:
|
||||||
|
from . import android
|
||||||
|
yield from android.messages()
|
||||||
|
|
||||||
|
|
||||||
|
def messages() -> Iterator[Res[Message]]:
|
||||||
|
yield from _merge_messages(
|
||||||
|
_messages_export(),
|
||||||
|
_messages_android(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
from my.core import stat
|
||||||
|
return stat(messages)
|
|
@ -38,10 +38,8 @@ class Thread:
|
||||||
# todo not sure about order of fields...
|
# todo not sure about order of fields...
|
||||||
@dataclass
|
@dataclass
|
||||||
class _BaseMessage:
|
class _BaseMessage:
|
||||||
# todo nice, ids are same as in fbchat??
|
|
||||||
id: str
|
id: str
|
||||||
dt: datetime
|
dt: datetime
|
||||||
# is_incoming: bool
|
|
||||||
text: Optional[str]
|
text: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +50,8 @@ class _Message(_BaseMessage):
|
||||||
reply_to_id: Optional[str]
|
reply_to_id: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
# todo hmm, on the one hand would be kinda nice to inherit common.Message protocol here
|
||||||
|
# on the other, because the properties there are read only we can't construct the object anymore??
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Message(_BaseMessage):
|
class Message(_BaseMessage):
|
||||||
thread: Thread
|
thread: Thread
|
||||||
|
|
46
my/fbmessenger/common.py
Normal file
46
my/fbmessenger/common.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Iterator, Optional, TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
try:
|
||||||
|
from typing import Protocol
|
||||||
|
except ImportError:
|
||||||
|
# requirement of mypy
|
||||||
|
from typing_extensions import Protocol # type: ignore[misc]
|
||||||
|
else:
|
||||||
|
Protocol = object
|
||||||
|
|
||||||
|
|
||||||
|
class Thread(Protocol):
|
||||||
|
@property
|
||||||
|
def id(self) -> str: ...
|
||||||
|
|
||||||
|
# todo hmm it doesn't like it because one from .export is just str, not Optional...
|
||||||
|
# name: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class Message(Protocol):
|
||||||
|
@property
|
||||||
|
def id(self) -> str: ...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> datetime: ...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> Optional[str]: ...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def thread(self) -> Thread: ...
|
||||||
|
|
||||||
|
|
||||||
|
from itertools import chain
|
||||||
|
from more_itertools import unique_everseen
|
||||||
|
from my.core import Res
|
||||||
|
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
||||||
|
# todo might be nice to dump some stats for debugging, e.g. how many were overlapping?
|
||||||
|
def key(r: Res[Message]):
|
||||||
|
if isinstance(r, Exception):
|
||||||
|
return str(r)
|
||||||
|
else:
|
||||||
|
return r.id
|
||||||
|
yield from unique_everseen(chain(*sources), key=key)
|
|
@ -45,8 +45,8 @@ def _dal() -> messenger.DAL:
|
||||||
return messenger.DAL(config.export_db)
|
return messenger.DAL(config.export_db)
|
||||||
|
|
||||||
|
|
||||||
# TODO Result type?
|
from ..core import Res
|
||||||
def messages() -> Iterator[messenger.Message]:
|
def messages() -> Iterator[Res[messenger.Message]]:
|
||||||
model = _dal()
|
model = _dal()
|
||||||
for t in model.iter_threads():
|
for t in model.iter_threads():
|
||||||
yield from t.iter_messages()
|
yield from t.iter_messages()
|
||||||
|
|
Loading…
Add table
Reference in a new issue