my.instagram: add initial all.py + some experiments on nicer errors
This commit is contained in:
parent
bf3dd6e931
commit
b5f266c2bd
4 changed files with 86 additions and 4 deletions
|
@ -1,6 +1,5 @@
|
|||
from typing import Iterator
|
||||
from my.core import Res
|
||||
from my.core.common import Stats
|
||||
from my.core import Res, stat, Stats
|
||||
from my.core.source import import_source
|
||||
|
||||
from .common import Message, _merge_messages
|
||||
|
|
33
my/instagram/all.py
Normal file
33
my/instagram/all.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from typing import Iterator
|
||||
|
||||
from my.core import Res, stat, Stats
|
||||
from my.core.source import import_source
|
||||
|
||||
from .common import Message, _merge_messages
|
||||
|
||||
|
||||
src_gdpr = import_source(module_name='my.instagram.gdpr')
|
||||
@src_gdpr
|
||||
def _messages_gdpr() -> Iterator[Res[Message]]:
|
||||
from . import gdpr
|
||||
yield from gdpr.messages()
|
||||
|
||||
|
||||
src_android = import_source(module_name='my.instagram.android')
|
||||
@src_android
|
||||
def _messages_android() -> Iterator[Res[Message]]:
|
||||
from . import android
|
||||
yield from android.messages()
|
||||
|
||||
|
||||
def messages() -> Iterator[Res[Message]]:
|
||||
# TODO in general best to prefer android, it has more data
|
||||
# but for now prefer gdpr prefix until we figure out how to correlate conversation threads
|
||||
yield from _merge_messages(
|
||||
_messages_gdpr(),
|
||||
_messages_android(),
|
||||
)
|
||||
|
||||
|
||||
def stats() -> Stats:
|
||||
return stat(messages)
|
|
@ -55,6 +55,27 @@ class Message(_BaseMessage):
|
|||
# reply_to: Optional[Message]
|
||||
|
||||
|
||||
# this is kinda expecrimental
|
||||
# basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence:
|
||||
# there are way too many 'similar' errors (on different msg_id)
|
||||
# however passing msg_id is nice as a means of supplying extra context
|
||||
# so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen
|
||||
|
||||
|
||||
class MessageError(RuntimeError):
|
||||
def __init__(self, msg_id: str, *rest: str) -> None:
|
||||
super().__init__(msg_id, *rest)
|
||||
self.rest = rest
|
||||
|
||||
def __hash__(self, other):
|
||||
return hash(self.rest)
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(other, MessageError):
|
||||
return False
|
||||
return self.rest == other.rest
|
||||
|
||||
|
||||
from ..core import Json
|
||||
def _parse_message(j: Json) -> Optional[_Message]:
|
||||
id = j['item_id']
|
||||
|
@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
|
|||
# something like "X liked message" -- hardly useful?
|
||||
return None
|
||||
else:
|
||||
raise RuntimeError(f"{id}: {t} isn't handled yet")
|
||||
raise MessageError(id, f"{t} isn't handled yet")
|
||||
|
||||
return _Message(
|
||||
id=id,
|
||||
|
@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
|||
|
||||
|
||||
def messages() -> Iterator[Res[Message]]:
|
||||
# TODO would be nicer to use a decorator for unique_everseen?
|
||||
id2user: Dict[str, User] = {}
|
||||
for x in unique_everseen(_entities()):
|
||||
if isinstance(x, Exception):
|
||||
|
|
30
my/instagram/common.py
Normal file
30
my/instagram/common.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from datetime import datetime
|
||||
from itertools import chain
|
||||
from typing import Iterator
|
||||
|
||||
from my.core import warn_if_empty, Res
|
||||
from my.core.compat import Protocol
|
||||
|
||||
from more_itertools import unique_everseen
|
||||
|
||||
|
||||
class Message(Protocol):
|
||||
created: datetime
|
||||
text: str
|
||||
# TODO add some sort of thread id
|
||||
|
||||
|
||||
@warn_if_empty
|
||||
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
||||
def key(r: Res[Message]):
|
||||
if isinstance(r, Exception):
|
||||
# NOTE: using str() against Exception is nice so exceptions with same args are treated the same..
|
||||
return str(r)
|
||||
|
||||
dt = r.created
|
||||
# seems that GDPR has millisecond resolution.. so best to strip them off when merging
|
||||
round_us = dt.microsecond // 1000 * 1000
|
||||
without_us = r.created.replace(microsecond=round_us)
|
||||
# using text as key is a bit crap.. but atm there are no better shared fields
|
||||
return (without_us, r.text)
|
||||
return unique_everseen(chain(*sources), key=key)
|
Loading…
Add table
Add a link
Reference in a new issue