From b5f266c2bd2cb463f97bc68c627d754d6e7b6377 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 3 Jun 2022 23:26:04 +0100 Subject: [PATCH] my.instagram: add initial all.py + some experiments on nicer errors --- my/fbmessenger/all.py | 3 +-- my/instagram/all.py | 33 +++++++++++++++++++++++++++++++++ my/instagram/android.py | 24 ++++++++++++++++++++++-- my/instagram/common.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 my/instagram/all.py create mode 100644 my/instagram/common.py diff --git a/my/fbmessenger/all.py b/my/fbmessenger/all.py index ca7f064..f98b5f3 100644 --- a/my/fbmessenger/all.py +++ b/my/fbmessenger/all.py @@ -1,6 +1,5 @@ from typing import Iterator -from my.core import Res -from my.core.common import Stats +from my.core import Res, stat, Stats from my.core.source import import_source from .common import Message, _merge_messages diff --git a/my/instagram/all.py b/my/instagram/all.py new file mode 100644 index 0000000..4be2b5b --- /dev/null +++ b/my/instagram/all.py @@ -0,0 +1,33 @@ +from typing import Iterator + +from my.core import Res, stat, Stats +from my.core.source import import_source + +from .common import Message, _merge_messages + + +src_gdpr = import_source(module_name='my.instagram.gdpr') +@src_gdpr +def _messages_gdpr() -> Iterator[Res[Message]]: + from . import gdpr + yield from gdpr.messages() + + +src_android = import_source(module_name='my.instagram.android') +@src_android +def _messages_android() -> Iterator[Res[Message]]: + from . import android + yield from android.messages() + + +def messages() -> Iterator[Res[Message]]: + # TODO in general best to prefer android, it has more data + # but for now prefer gdpr prefix until we figure out how to correlate conversation threads + yield from _merge_messages( + _messages_gdpr(), + _messages_android(), + ) + + +def stats() -> Stats: + return stat(messages) diff --git a/my/instagram/android.py b/my/instagram/android.py index 21b9288..fc2ac38 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -55,6 +55,27 @@ class Message(_BaseMessage): # reply_to: Optional[Message] +# this is kinda expecrimental +# basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence: +# there are way too many 'similar' errors (on different msg_id) +# however passing msg_id is nice as a means of supplying extra context +# so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen + + +class MessageError(RuntimeError): + def __init__(self, msg_id: str, *rest: str) -> None: + super().__init__(msg_id, *rest) + self.rest = rest + + def __hash__(self, other): + return hash(self.rest) + + def __eq__(self, other) -> bool: + if not isinstance(other, MessageError): + return False + return self.rest == other.rest + + from ..core import Json def _parse_message(j: Json) -> Optional[_Message]: id = j['item_id'] @@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]: # something like "X liked message" -- hardly useful? return None else: - raise RuntimeError(f"{id}: {t} isn't handled yet") + raise MessageError(id, f"{t} isn't handled yet") return _Message( id=id, @@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: def messages() -> Iterator[Res[Message]]: - # TODO would be nicer to use a decorator for unique_everseen? id2user: Dict[str, User] = {} for x in unique_everseen(_entities()): if isinstance(x, Exception): diff --git a/my/instagram/common.py b/my/instagram/common.py new file mode 100644 index 0000000..23cefe5 --- /dev/null +++ b/my/instagram/common.py @@ -0,0 +1,30 @@ +from datetime import datetime +from itertools import chain +from typing import Iterator + +from my.core import warn_if_empty, Res +from my.core.compat import Protocol + +from more_itertools import unique_everseen + + +class Message(Protocol): + created: datetime + text: str + # TODO add some sort of thread id + + +@warn_if_empty +def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: + def key(r: Res[Message]): + if isinstance(r, Exception): + # NOTE: using str() against Exception is nice so exceptions with same args are treated the same.. + return str(r) + + dt = r.created + # seems that GDPR has millisecond resolution.. so best to strip them off when merging + round_us = dt.microsecond // 1000 * 1000 + without_us = r.created.replace(microsecond=round_us) + # using text as key is a bit crap.. but atm there are no better shared fields + return (without_us, r.text) + return unique_everseen(chain(*sources), key=key)