my.instagram.android: properly extract our own user

This commit is contained in:
Dima Gerasimov 2023-03-24 21:48:13 +00:00 committed by karlicoss
parent 8f7d14e7c6
commit 9aadbb504b
2 changed files with 38 additions and 13 deletions

View file

@ -146,6 +146,9 @@ class tinder:
class instagram:
class android:
export_path: Paths
username: Optional[str]
full_name: Optional[str]
class gdpr:
export_path: Paths

View file

@ -5,22 +5,41 @@ from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Iterator, Sequence, Optional, Dict
import json
from pathlib import Path
from typing import Iterator, Sequence, Optional, Dict, Union
from more_itertools import unique_everseen
from my.core import (
get_files, Paths,
make_config,
LazyLogger,
datetime_naive,
Json,
Res, assert_never,
)
from my.core.sqlite import sqlite_connect_immutable, select
from my.config import instagram as user_config
from ..core import Paths
logger = LazyLogger(__name__, level='debug')
@dataclass
class config(user_config.android):
class instagram_android_config(user_config.android):
# paths[s]/glob to the exported sqlite databases
export_path: Paths
# sadly doesn't seem easy to extract user's own handle/name from the db...
# todo maybe makes more sense to keep in parent class? not sure...
username: Optional[str] = None
full_name: Optional[str] = None
config = make_config(instagram_android_config)
from ..core import get_files
from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -32,7 +51,6 @@ class User:
full_name: str
from ..core import datetime_naive
# todo not sure about order of fields...
@dataclass
class _BaseMessage:
@ -78,7 +96,6 @@ class MessageError(RuntimeError):
return self.rest == other.rest
from ..core import Json
def _parse_message(j: Json) -> Optional[_Message]:
id = j['item_id']
t = j['item_type']
@ -108,18 +125,23 @@ def _parse_message(j: Json) -> Optional[_Message]:
)
import json
from typing import Union
from ..core import Res, assert_never
import sqlite3
from ..core.sqlite import sqlite_connect_immutable, select
def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
for f in inputs():
with sqlite_connect_immutable(f) as db:
for (self_uid, thread_json) in select(('user_id', 'thread_info'), 'FROM threads', db=db):
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
for (self_uid,) in select(('user_id',), 'FROM session', db=db):
yield User(
id=str(self_uid),
full_name=config.full_name or 'USERS_OWN_FULL_NAME',
username=config.full_name or 'USERS_OWN_USERNAME',
)
for (thread_json,) in select(('thread_info',), 'FROM threads', db=db):
j = json.loads(thread_json)
# todo in principle should leave the thread attached to the message?
# since thread is a group of users?