my.instagram.android: properly extract our own user
This commit is contained in:
parent
8f7d14e7c6
commit
9aadbb504b
2 changed files with 38 additions and 13 deletions
|
@ -146,6 +146,9 @@ class tinder:
|
|||
class instagram:
|
||||
class android:
|
||||
export_path: Paths
|
||||
username: Optional[str]
|
||||
full_name: Optional[str]
|
||||
|
||||
class gdpr:
|
||||
export_path: Paths
|
||||
|
||||
|
|
|
@ -5,22 +5,41 @@ from __future__ import annotations
|
|||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Iterator, Sequence, Optional, Dict
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence, Optional, Dict, Union
|
||||
|
||||
from more_itertools import unique_everseen
|
||||
|
||||
from my.core import (
|
||||
get_files, Paths,
|
||||
make_config,
|
||||
LazyLogger,
|
||||
datetime_naive,
|
||||
Json,
|
||||
Res, assert_never,
|
||||
)
|
||||
from my.core.sqlite import sqlite_connect_immutable, select
|
||||
|
||||
from my.config import instagram as user_config
|
||||
|
||||
|
||||
from ..core import Paths
|
||||
logger = LazyLogger(__name__, level='debug')
|
||||
|
||||
@dataclass
|
||||
class config(user_config.android):
|
||||
class instagram_android_config(user_config.android):
|
||||
# paths[s]/glob to the exported sqlite databases
|
||||
export_path: Paths
|
||||
|
||||
# sadly doesn't seem easy to extract user's own handle/name from the db...
|
||||
# todo maybe makes more sense to keep in parent class? not sure...
|
||||
username: Optional[str] = None
|
||||
full_name: Optional[str] = None
|
||||
|
||||
|
||||
config = make_config(instagram_android_config)
|
||||
|
||||
|
||||
from ..core import get_files
|
||||
from pathlib import Path
|
||||
def inputs() -> Sequence[Path]:
|
||||
return get_files(config.export_path)
|
||||
|
||||
|
@ -32,7 +51,6 @@ class User:
|
|||
full_name: str
|
||||
|
||||
|
||||
from ..core import datetime_naive
|
||||
# todo not sure about order of fields...
|
||||
@dataclass
|
||||
class _BaseMessage:
|
||||
|
@ -78,7 +96,6 @@ class MessageError(RuntimeError):
|
|||
return self.rest == other.rest
|
||||
|
||||
|
||||
from ..core import Json
|
||||
def _parse_message(j: Json) -> Optional[_Message]:
|
||||
id = j['item_id']
|
||||
t = j['item_type']
|
||||
|
@ -108,18 +125,23 @@ def _parse_message(j: Json) -> Optional[_Message]:
|
|||
)
|
||||
|
||||
|
||||
import json
|
||||
from typing import Union
|
||||
from ..core import Res, assert_never
|
||||
import sqlite3
|
||||
from ..core.sqlite import sqlite_connect_immutable, select
|
||||
def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
||||
# NOTE: definitely need to merge multiple, app seems to recycle old messages
|
||||
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
|
||||
# todo use TypedDict?
|
||||
for f in inputs():
|
||||
with sqlite_connect_immutable(f) as db:
|
||||
for (self_uid, thread_json) in select(('user_id', 'thread_info'), 'FROM threads', db=db):
|
||||
# TODO ugh. seems like no way to extract username?
|
||||
# sometimes messages (e.g. media_share) contain it in message field
|
||||
# but generally it's not present. ugh
|
||||
for (self_uid,) in select(('user_id',), 'FROM session', db=db):
|
||||
yield User(
|
||||
id=str(self_uid),
|
||||
full_name=config.full_name or 'USERS_OWN_FULL_NAME',
|
||||
username=config.full_name or 'USERS_OWN_USERNAME',
|
||||
)
|
||||
|
||||
for (thread_json,) in select(('thread_info',), 'FROM threads', db=db):
|
||||
j = json.loads(thread_json)
|
||||
# todo in principle should leave the thread attached to the message?
|
||||
# since thread is a group of users?
|
||||
|
|
Loading…
Add table
Reference in a new issue