my.instagram: add initial all.py + some experiments on nicer errors

2022-06-03 23:26:04 +01:00 · 2022-06-03 23:26:04 +01:00 · b5f266c2bd
commit b5f266c2bd
parent bf3dd6e931
4 changed files with 86 additions and 4 deletions
--- a/my/fbmessenger/all.py
+++ b/my/fbmessenger/all.py
@ -1,6 +1,5 @@
 from typing import Iterator
-from my.core import Res
-from my.core.common import Stats
+from my.core import Res, stat, Stats
 from my.core.source import import_source

 from .common import Message, _merge_messages
--- a/my/instagram/all.py
+++ b/my/instagram/all.py
@ -0,0 +1,33 @@
+from typing import Iterator
+
+from my.core import Res, stat, Stats
+from my.core.source import import_source
+
+from .common import Message, _merge_messages
+
+
+src_gdpr = import_source(module_name='my.instagram.gdpr')
+@src_gdpr
+def _messages_gdpr() -> Iterator[Res[Message]]:
+    from . import gdpr
+    yield from gdpr.messages()
+
+
+src_android = import_source(module_name='my.instagram.android')
+@src_android
+def _messages_android() -> Iterator[Res[Message]]:
+    from . import android
+    yield from android.messages()
+
+
+def messages() -> Iterator[Res[Message]]:
+    # TODO in general best to prefer android, it has more data
+    # but for now prefer gdpr prefix until we figure out how to correlate conversation threads
+    yield from _merge_messages(
+        _messages_gdpr(),
+        _messages_android(),
+    )
+
+
+def stats() -> Stats:
+    return stat(messages)
--- a/my/instagram/android.py
+++ b/my/instagram/android.py
@ -55,6 +55,27 @@ class Message(_BaseMessage):
    # reply_to: Optional[Message]


+# this is kinda expecrimental
+# basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence:
+# there are way too many 'similar' errors (on different msg_id)
+# however passing msg_id is nice as a means of supplying extra context
+# so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen
+
+
+class MessageError(RuntimeError):
+    def __init__(self, msg_id: str, *rest: str) -> None:
+        super().__init__(msg_id, *rest)
+        self.rest = rest
+
+    def __hash__(self, other):
+        return hash(self.rest)
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, MessageError):
+            return False
+        return self.rest == other.rest
+
+
 from ..core import Json
 def _parse_message(j: Json) -> Optional[_Message]:
    id = j['item_id']
@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
        # something like "X liked message" -- hardly useful?
        return None
    else:
-        raise RuntimeError(f"{id}: {t} isn't handled yet")
+        raise MessageError(id, f"{t} isn't handled yet")

    return _Message(
        id=id,
@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:


 def messages() -> Iterator[Res[Message]]:
-    # TODO would be nicer to use a decorator for unique_everseen?
    id2user: Dict[str, User] = {}
    for x in unique_everseen(_entities()):
        if isinstance(x, Exception):
--- a/my/instagram/common.py
+++ b/my/instagram/common.py
@ -0,0 +1,30 @@
+from datetime import datetime
+from itertools import chain
+from typing import Iterator
+
+from my.core import warn_if_empty, Res
+from my.core.compat import Protocol
+
+from more_itertools import unique_everseen
+
+
+class Message(Protocol):
+    created: datetime
+    text: str
+    # TODO add some sort of thread id
+
+
+@warn_if_empty
+def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
+    def key(r: Res[Message]):
+        if isinstance(r, Exception):
+            # NOTE: using str() against Exception is nice so exceptions with same args are treated the same..
+            return str(r)
+
+        dt = r.created
+        # seems that GDPR has millisecond resolution.. so best to strip them off when merging
+        round_us = dt.microsecond // 1000 * 1000
+        without_us = r.created.replace(microsecond=round_us)
+        # using text as key is a bit crap.. but atm there are no better shared fields
+        return (without_us, r.text)
+    return unique_everseen(chain(*sources), key=key)