my.instagram: add initial all.py + some experiments on nicer errors

This commit is contained in:
Dima Gerasimov 2022-06-03 23:26:04 +01:00 committed by karlicoss
parent bf3dd6e931
commit b5f266c2bd
4 changed files with 86 additions and 4 deletions

View file

@ -1,6 +1,5 @@
from typing import Iterator from typing import Iterator
from my.core import Res from my.core import Res, stat, Stats
from my.core.common import Stats
from my.core.source import import_source from my.core.source import import_source
from .common import Message, _merge_messages from .common import Message, _merge_messages

33
my/instagram/all.py Normal file
View file

@ -0,0 +1,33 @@
from typing import Iterator
from my.core import Res, stat, Stats
from my.core.source import import_source
from .common import Message, _merge_messages
src_gdpr = import_source(module_name='my.instagram.gdpr')
@src_gdpr
def _messages_gdpr() -> Iterator[Res[Message]]:
from . import gdpr
yield from gdpr.messages()
src_android = import_source(module_name='my.instagram.android')
@src_android
def _messages_android() -> Iterator[Res[Message]]:
from . import android
yield from android.messages()
def messages() -> Iterator[Res[Message]]:
# TODO in general best to prefer android, it has more data
# but for now prefer gdpr prefix until we figure out how to correlate conversation threads
yield from _merge_messages(
_messages_gdpr(),
_messages_android(),
)
def stats() -> Stats:
return stat(messages)

View file

@ -55,6 +55,27 @@ class Message(_BaseMessage):
# reply_to: Optional[Message] # reply_to: Optional[Message]
# this is kinda expecrimental
# basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence:
# there are way too many 'similar' errors (on different msg_id)
# however passing msg_id is nice as a means of supplying extra context
# so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen
class MessageError(RuntimeError):
def __init__(self, msg_id: str, *rest: str) -> None:
super().__init__(msg_id, *rest)
self.rest = rest
def __hash__(self, other):
return hash(self.rest)
def __eq__(self, other) -> bool:
if not isinstance(other, MessageError):
return False
return self.rest == other.rest
from ..core import Json from ..core import Json
def _parse_message(j: Json) -> Optional[_Message]: def _parse_message(j: Json) -> Optional[_Message]:
id = j['item_id'] id = j['item_id']
@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
# something like "X liked message" -- hardly useful? # something like "X liked message" -- hardly useful?
return None return None
else: else:
raise RuntimeError(f"{id}: {t} isn't handled yet") raise MessageError(id, f"{t} isn't handled yet")
return _Message( return _Message(
id=id, id=id,
@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
def messages() -> Iterator[Res[Message]]: def messages() -> Iterator[Res[Message]]:
# TODO would be nicer to use a decorator for unique_everseen?
id2user: Dict[str, User] = {} id2user: Dict[str, User] = {}
for x in unique_everseen(_entities()): for x in unique_everseen(_entities()):
if isinstance(x, Exception): if isinstance(x, Exception):

30
my/instagram/common.py Normal file
View file

@ -0,0 +1,30 @@
from datetime import datetime
from itertools import chain
from typing import Iterator
from my.core import warn_if_empty, Res
from my.core.compat import Protocol
from more_itertools import unique_everseen
class Message(Protocol):
created: datetime
text: str
# TODO add some sort of thread id
@warn_if_empty
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
def key(r: Res[Message]):
if isinstance(r, Exception):
# NOTE: using str() against Exception is nice so exceptions with same args are treated the same..
return str(r)
dt = r.created
# seems that GDPR has millisecond resolution.. so best to strip them off when merging
round_us = dt.microsecond // 1000 * 1000
without_us = r.created.replace(microsecond=round_us)
# using text as key is a bit crap.. but atm there are no better shared fields
return (without_us, r.text)
return unique_everseen(chain(*sources), key=key)