my.instagram.android: properly extract our own user
This commit is contained in:
parent
8f7d14e7c6
commit
9aadbb504b
2 changed files with 38 additions and 13 deletions
|
@ -146,6 +146,9 @@ class tinder:
|
||||||
class instagram:
|
class instagram:
|
||||||
class android:
|
class android:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
username: Optional[str]
|
||||||
|
full_name: Optional[str]
|
||||||
|
|
||||||
class gdpr:
|
class gdpr:
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
|
|
@ -5,22 +5,41 @@ from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterator, Sequence, Optional, Dict
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterator, Sequence, Optional, Dict, Union
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
|
from my.core import (
|
||||||
|
get_files, Paths,
|
||||||
|
make_config,
|
||||||
|
LazyLogger,
|
||||||
|
datetime_naive,
|
||||||
|
Json,
|
||||||
|
Res, assert_never,
|
||||||
|
)
|
||||||
|
from my.core.sqlite import sqlite_connect_immutable, select
|
||||||
|
|
||||||
from my.config import instagram as user_config
|
from my.config import instagram as user_config
|
||||||
|
|
||||||
|
|
||||||
from ..core import Paths
|
logger = LazyLogger(__name__, level='debug')
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config.android):
|
class instagram_android_config(user_config.android):
|
||||||
# paths[s]/glob to the exported sqlite databases
|
# paths[s]/glob to the exported sqlite databases
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
# sadly doesn't seem easy to extract user's own handle/name from the db...
|
||||||
|
# todo maybe makes more sense to keep in parent class? not sure...
|
||||||
|
username: Optional[str] = None
|
||||||
|
full_name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
config = make_config(instagram_android_config)
|
||||||
|
|
||||||
|
|
||||||
from ..core import get_files
|
|
||||||
from pathlib import Path
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
@ -32,7 +51,6 @@ class User:
|
||||||
full_name: str
|
full_name: str
|
||||||
|
|
||||||
|
|
||||||
from ..core import datetime_naive
|
|
||||||
# todo not sure about order of fields...
|
# todo not sure about order of fields...
|
||||||
@dataclass
|
@dataclass
|
||||||
class _BaseMessage:
|
class _BaseMessage:
|
||||||
|
@ -78,7 +96,6 @@ class MessageError(RuntimeError):
|
||||||
return self.rest == other.rest
|
return self.rest == other.rest
|
||||||
|
|
||||||
|
|
||||||
from ..core import Json
|
|
||||||
def _parse_message(j: Json) -> Optional[_Message]:
|
def _parse_message(j: Json) -> Optional[_Message]:
|
||||||
id = j['item_id']
|
id = j['item_id']
|
||||||
t = j['item_type']
|
t = j['item_type']
|
||||||
|
@ -108,18 +125,23 @@ def _parse_message(j: Json) -> Optional[_Message]:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
import json
|
|
||||||
from typing import Union
|
|
||||||
from ..core import Res, assert_never
|
|
||||||
import sqlite3
|
|
||||||
from ..core.sqlite import sqlite_connect_immutable, select
|
|
||||||
def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
||||||
# NOTE: definitely need to merge multiple, app seems to recycle old messages
|
# NOTE: definitely need to merge multiple, app seems to recycle old messages
|
||||||
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
|
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
|
||||||
# todo use TypedDict?
|
# todo use TypedDict?
|
||||||
for f in inputs():
|
for f in inputs():
|
||||||
with sqlite_connect_immutable(f) as db:
|
with sqlite_connect_immutable(f) as db:
|
||||||
for (self_uid, thread_json) in select(('user_id', 'thread_info'), 'FROM threads', db=db):
|
# TODO ugh. seems like no way to extract username?
|
||||||
|
# sometimes messages (e.g. media_share) contain it in message field
|
||||||
|
# but generally it's not present. ugh
|
||||||
|
for (self_uid,) in select(('user_id',), 'FROM session', db=db):
|
||||||
|
yield User(
|
||||||
|
id=str(self_uid),
|
||||||
|
full_name=config.full_name or 'USERS_OWN_FULL_NAME',
|
||||||
|
username=config.full_name or 'USERS_OWN_USERNAME',
|
||||||
|
)
|
||||||
|
|
||||||
|
for (thread_json,) in select(('thread_info',), 'FROM threads', db=db):
|
||||||
j = json.loads(thread_json)
|
j = json.loads(thread_json)
|
||||||
# todo in principle should leave the thread attached to the message?
|
# todo in principle should leave the thread attached to the message?
|
||||||
# since thread is a group of users?
|
# since thread is a group of users?
|
||||||
|
|
Loading…
Add table
Reference in a new issue