my.instagram.android: more defensive error handling

This commit is contained in:
Dima Gerasimov 2023-10-08 23:37:59 +01:00 committed by karlicoss
parent 86ea605aec
commit f9a1050ceb

View file

@ -7,6 +7,7 @@ from dataclasses import dataclass
from datetime import datetime from datetime import datetime
import json import json
from pathlib import Path from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union from typing import Iterator, Sequence, Optional, Dict, Union
from more_itertools import unique_everseen from more_itertools import unique_everseen
@ -22,6 +23,7 @@ from my.core import (
assert_never, assert_never,
) )
from my.core.cachew import mcachew from my.core.cachew import mcachew
from my.core.error import echain
from my.core.sqlite import sqlite_connect_immutable, select from my.core.sqlite import sqlite_connect_immutable, select
from my.config import instagram as user_config from my.config import instagram as user_config
@ -132,14 +134,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
) )
def _entities() -> Iterator[Res[Union[User, _Message]]]: def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
dbs = inputs()
for f in dbs:
logger.info(f'{f} : processing...')
with sqlite_connect_immutable(f) as db:
# TODO ugh. seems like no way to extract username? # TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field # sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh # but generally it's not present. ugh
@ -154,8 +149,13 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
j = json.loads(thread_json) j = json.loads(thread_json)
# todo in principle should leave the thread attached to the message? # todo in principle should leave the thread attached to the message?
# since thread is a group of users? # since thread is a group of users?
pre_users = []
# inviter usually contains our own user # inviter usually contains our own user
for r in [j['inviter'], *j['recipients']]: if 'inviter' in j:
# sometimes it's missing (e.g. in broadcast channels)
pre_users.append(j['inviter'])
pre_users.extend(j['recipients'])
for r in pre_users:
# id disappeared and seems that pk_id is in use now (around december 2022) # id disappeared and seems that pk_id is in use now (around december 2022)
uid = r.get('id') or r.get('pk_id') uid = r.get('id') or r.get('pk_id')
assert uid is not None assert uid is not None
@ -176,6 +176,21 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
yield e yield e
def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
dbs = inputs()
for f in dbs:
logger.info(f'{f} : processing...')
with sqlite_connect_immutable(f) as db:
try:
yield from _process_db(db=db)
except Exception as e:
# todo use error policy here
yield echain(RuntimeError(f'While processing {f}'), cause=e)
@mcachew(depends_on=inputs) @mcachew(depends_on=inputs)
def messages() -> Iterator[Res[Message]]: def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {} id2user: Dict[str, User] = {}