my.instagram: add initial all.py + some experiments on nicer errors

2022-06-03 23:26:04 +01:00 · 2022-06-03 23:26:04 +01:00 · aae951c46b
commit aae951c46b
parent 03f5988303
4 changed files with 86 additions and 4 deletions
--- a/my/fbmessenger/all.py
+++ b/my/fbmessenger/all.py
@ -1,6 +1,5 @@
 from typing import Iterator
-from my.core import Res
+from my.core import Res, stat, Stats
 from my.core.common import Stats
 from my.core.source import import_source
 from .common import Message, _merge_messages
--- a/my/instagram/all.py
+++ b/my/instagram/all.py
@ -0,0 +1,33 @@
 from typing import Iterator
 from my.core import Res, stat, Stats
 from my.core.source import import_source
 from .common import Message, _merge_messages
 src_gdpr = import_source(module_name='my.instagram.gdpr')
@src_gdpr
 def _messages_gdpr() -> Iterator[Res[Message]]:
    from . import gdpr
    yield from gdpr.messages()
 src_android = import_source(module_name='my.instagram.android')
@src_android
 def _messages_android() -> Iterator[Res[Message]]:
    from . import android
    yield from android.messages()
 def messages() -> Iterator[Res[Message]]:
    # TODO in general best to prefer android, it has more data
    # but for now prefer gdpr prefix until we figure out how to correlate conversation threads
    yield from _merge_messages(
        _messages_gdpr(),
        _messages_android(),
    )
 def stats() -> Stats:
    return stat(messages)
--- a/my/instagram/android.py
+++ b/my/instagram/android.py
@ -55,6 +55,27 @@ class Message(_BaseMessage):
    # reply_to: Optional[Message]
 # this is kinda expecrimental
 # basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence:
 # there are way too many 'similar' errors (on different msg_id)
 # however passing msg_id is nice as a means of supplying extra context
 # so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen
 class MessageError(RuntimeError):
    def __init__(self, msg_id: str, *rest: str) -> None:
        super().__init__(msg_id, *rest)
        self.rest = rest
    def __hash__(self, other):
        return hash(self.rest)
    def __eq__(self, other) -> bool:
        if not isinstance(other, MessageError):
            return False
        return self.rest == other.rest
 from ..core import Json
 def _parse_message(j: Json) -> Optional[_Message]:
    id = j['item_id']
@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
        # something like "X liked message" -- hardly useful?
        return None
    else:
-        raise RuntimeError(f"{id}: {t} isn't handled yet")
+        raise MessageError(id, f"{t} isn't handled yet")
    return _Message(
        id=id,
@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
 def messages() -> Iterator[Res[Message]]:
    # TODO would be nicer to use a decorator for unique_everseen?
    id2user: Dict[str, User] = {}
    for x in unique_everseen(_entities()):
        if isinstance(x, Exception):
--- a/my/instagram/common.py
+++ b/my/instagram/common.py
@ -0,0 +1,30 @@
 from datetime import datetime
 from itertools import chain
 from typing import Iterator
 from my.core import warn_if_empty, Res
 from my.core.compat import Protocol
 from more_itertools import unique_everseen
 class Message(Protocol):
    created: datetime
    text: str
    # TODO add some sort of thread id
@warn_if_empty
 def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
    def key(r: Res[Message]):
        if isinstance(r, Exception):
            # NOTE: using str() against Exception is nice so exceptions with same args are treated the same..
            return str(r)
        dt = r.created
        # seems that GDPR has millisecond resolution.. so best to strip them off when merging
        round_us = dt.microsecond // 1000 * 1000
        without_us = r.created.replace(microsecond=round_us)
        # using text as key is a bit crap.. but atm there are no better shared fields
        return (without_us, r.text)
    return unique_everseen(chain(*sources), key=key)