my.instagram: add initial all.py + some experiments on nicer errors
This commit is contained in:
parent
03f5988303
commit
aae951c46b
4 changed files with 86 additions and 4 deletions
|
@ -1,6 +1,5 @@
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
from my.core import Res
|
from my.core import Res, stat, Stats
|
||||||
from my.core.common import Stats
|
|
||||||
from my.core.source import import_source
|
from my.core.source import import_source
|
||||||
|
|
||||||
from .common import Message, _merge_messages
|
from .common import Message, _merge_messages
|
||||||
|
|
33
my/instagram/all.py
Normal file
33
my/instagram/all.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core import Res, stat, Stats
|
||||||
|
from my.core.source import import_source
|
||||||
|
|
||||||
|
from .common import Message, _merge_messages
|
||||||
|
|
||||||
|
|
||||||
|
src_gdpr = import_source(module_name='my.instagram.gdpr')
|
||||||
|
@src_gdpr
|
||||||
|
def _messages_gdpr() -> Iterator[Res[Message]]:
|
||||||
|
from . import gdpr
|
||||||
|
yield from gdpr.messages()
|
||||||
|
|
||||||
|
|
||||||
|
src_android = import_source(module_name='my.instagram.android')
|
||||||
|
@src_android
|
||||||
|
def _messages_android() -> Iterator[Res[Message]]:
|
||||||
|
from . import android
|
||||||
|
yield from android.messages()
|
||||||
|
|
||||||
|
|
||||||
|
def messages() -> Iterator[Res[Message]]:
|
||||||
|
# TODO in general best to prefer android, it has more data
|
||||||
|
# but for now prefer gdpr prefix until we figure out how to correlate conversation threads
|
||||||
|
yield from _merge_messages(
|
||||||
|
_messages_gdpr(),
|
||||||
|
_messages_android(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats() -> Stats:
|
||||||
|
return stat(messages)
|
|
@ -55,6 +55,27 @@ class Message(_BaseMessage):
|
||||||
# reply_to: Optional[Message]
|
# reply_to: Optional[Message]
|
||||||
|
|
||||||
|
|
||||||
|
# this is kinda expecrimental
|
||||||
|
# basically just using RuntimeError(msg_id, *rest) has an unfortunate consequence:
|
||||||
|
# there are way too many 'similar' errors (on different msg_id)
|
||||||
|
# however passing msg_id is nice as a means of supplying extra context
|
||||||
|
# so this is a compromise, the 'duplicate' errors will be filtered out by unique_everseen
|
||||||
|
|
||||||
|
|
||||||
|
class MessageError(RuntimeError):
|
||||||
|
def __init__(self, msg_id: str, *rest: str) -> None:
|
||||||
|
super().__init__(msg_id, *rest)
|
||||||
|
self.rest = rest
|
||||||
|
|
||||||
|
def __hash__(self, other):
|
||||||
|
return hash(self.rest)
|
||||||
|
|
||||||
|
def __eq__(self, other) -> bool:
|
||||||
|
if not isinstance(other, MessageError):
|
||||||
|
return False
|
||||||
|
return self.rest == other.rest
|
||||||
|
|
||||||
|
|
||||||
from ..core import Json
|
from ..core import Json
|
||||||
def _parse_message(j: Json) -> Optional[_Message]:
|
def _parse_message(j: Json) -> Optional[_Message]:
|
||||||
id = j['item_id']
|
id = j['item_id']
|
||||||
|
@ -74,7 +95,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
|
||||||
# something like "X liked message" -- hardly useful?
|
# something like "X liked message" -- hardly useful?
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"{id}: {t} isn't handled yet")
|
raise MessageError(id, f"{t} isn't handled yet")
|
||||||
|
|
||||||
return _Message(
|
return _Message(
|
||||||
id=id,
|
id=id,
|
||||||
|
@ -125,7 +146,6 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
||||||
|
|
||||||
|
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def messages() -> Iterator[Res[Message]]:
|
||||||
# TODO would be nicer to use a decorator for unique_everseen?
|
|
||||||
id2user: Dict[str, User] = {}
|
id2user: Dict[str, User] = {}
|
||||||
for x in unique_everseen(_entities()):
|
for x in unique_everseen(_entities()):
|
||||||
if isinstance(x, Exception):
|
if isinstance(x, Exception):
|
||||||
|
|
30
my/instagram/common.py
Normal file
30
my/instagram/common.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from itertools import chain
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from my.core import warn_if_empty, Res
|
||||||
|
from my.core.compat import Protocol
|
||||||
|
|
||||||
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
|
|
||||||
|
class Message(Protocol):
|
||||||
|
created: datetime
|
||||||
|
text: str
|
||||||
|
# TODO add some sort of thread id
|
||||||
|
|
||||||
|
|
||||||
|
@warn_if_empty
|
||||||
|
def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
||||||
|
def key(r: Res[Message]):
|
||||||
|
if isinstance(r, Exception):
|
||||||
|
# NOTE: using str() against Exception is nice so exceptions with same args are treated the same..
|
||||||
|
return str(r)
|
||||||
|
|
||||||
|
dt = r.created
|
||||||
|
# seems that GDPR has millisecond resolution.. so best to strip them off when merging
|
||||||
|
round_us = dt.microsecond // 1000 * 1000
|
||||||
|
without_us = r.created.replace(microsecond=round_us)
|
||||||
|
# using text as key is a bit crap.. but atm there are no better shared fields
|
||||||
|
return (without_us, r.text)
|
||||||
|
return unique_everseen(chain(*sources), key=key)
|
Loading…
Add table
Add a link
Reference in a new issue