general: migrate modules to use 3.9 features
This commit is contained in:
parent
d3f9a8e8b6
commit
8496d131e7
125 changed files with 889 additions and 739 deletions
|
@ -1,11 +1,10 @@
|
|||
from typing import Iterator
|
||||
from collections.abc import Iterator
|
||||
|
||||
from my.core import Res, stat, Stats
|
||||
from my.core import Res, Stats, stat
|
||||
from my.core.source import import_source
|
||||
|
||||
from .common import Message, _merge_messages
|
||||
|
||||
|
||||
src_gdpr = import_source(module_name='my.instagram.gdpr')
|
||||
@src_gdpr
|
||||
def _messages_gdpr() -> Iterator[Res[Message]]:
|
||||
|
|
|
@ -3,30 +3,29 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from collections.abc import Iterator, Sequence
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
from typing import Iterator, Sequence, Optional, Dict, Union
|
||||
|
||||
from my.core import (
|
||||
get_files,
|
||||
Paths,
|
||||
make_config,
|
||||
make_logger,
|
||||
datetime_naive,
|
||||
Json,
|
||||
Paths,
|
||||
Res,
|
||||
assert_never,
|
||||
datetime_naive,
|
||||
get_files,
|
||||
make_config,
|
||||
make_logger,
|
||||
)
|
||||
from my.core.common import unique_everseen
|
||||
from my.core.cachew import mcachew
|
||||
from my.core.common import unique_everseen
|
||||
from my.core.error import echain
|
||||
from my.core.sqlite import sqlite_connect_immutable, select
|
||||
|
||||
from my.config import instagram as user_config
|
||||
from my.core.sqlite import select, sqlite_connect_immutable
|
||||
|
||||
from my.config import instagram as user_config # isort: skip
|
||||
|
||||
logger = make_logger(__name__)
|
||||
|
||||
|
@ -38,8 +37,8 @@ class instagram_android_config(user_config.android):
|
|||
|
||||
# sadly doesn't seem easy to extract user's own handle/name from the db...
|
||||
# todo maybe makes more sense to keep in parent class? not sure...
|
||||
username: Optional[str] = None
|
||||
full_name: Optional[str] = None
|
||||
username: str | None = None
|
||||
full_name: str | None = None
|
||||
|
||||
|
||||
config = make_config(instagram_android_config)
|
||||
|
@ -101,13 +100,13 @@ class MessageError(RuntimeError):
|
|||
return self.rest == other.rest
|
||||
|
||||
|
||||
def _parse_message(j: Json) -> Optional[_Message]:
|
||||
def _parse_message(j: Json) -> _Message | None:
|
||||
id = j['item_id']
|
||||
t = j['item_type']
|
||||
tid = j['thread_key']['thread_id']
|
||||
uid = j['user_id']
|
||||
created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000)
|
||||
text: Optional[str] = None
|
||||
text: str | None = None
|
||||
if t == 'text':
|
||||
text = j['text']
|
||||
elif t == 'reel_share':
|
||||
|
@ -133,7 +132,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
|
|||
)
|
||||
|
||||
|
||||
def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
|
||||
def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]:
|
||||
# TODO ugh. seems like no way to extract username?
|
||||
# sometimes messages (e.g. media_share) contain it in message field
|
||||
# but generally it's not present. ugh
|
||||
|
@ -175,7 +174,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
|
|||
yield e
|
||||
|
||||
|
||||
def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
||||
def _entities() -> Iterator[Res[User | _Message]]:
|
||||
# NOTE: definitely need to merge multiple, app seems to recycle old messages
|
||||
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
|
||||
# todo use TypedDict?
|
||||
|
@ -194,7 +193,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
|||
|
||||
@mcachew(depends_on=inputs)
|
||||
def messages() -> Iterator[Res[Message]]:
|
||||
id2user: Dict[str, User] = {}
|
||||
id2user: dict[str, User] = {}
|
||||
for x in unique_everseen(_entities):
|
||||
if isinstance(x, Exception):
|
||||
yield x
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
from collections.abc import Iterator
|
||||
from dataclasses import replace
|
||||
from datetime import datetime
|
||||
from itertools import chain
|
||||
from typing import Iterator, Dict, Any, Protocol
|
||||
from typing import Any, Protocol
|
||||
|
||||
from my.core import warn_if_empty, Res
|
||||
from my.core import Res, warn_if_empty
|
||||
|
||||
|
||||
class User(Protocol):
|
||||
|
@ -40,7 +41,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
|||
# ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump)
|
||||
# so the only way to correlate is to try and match messages
|
||||
# we also can't use unique_everseen here, otherwise will never get a chance to unify threads
|
||||
mmap: Dict[str, Message] = {}
|
||||
mmap: dict[str, Message] = {}
|
||||
thread_map = {}
|
||||
user_map = {}
|
||||
|
||||
|
@ -60,7 +61,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
|
|||
user_map[m.user.id] = mm.user
|
||||
else:
|
||||
# not emitted yet, need to emit
|
||||
repls: Dict[str, Any] = {}
|
||||
repls: dict[str, Any] = {}
|
||||
tid = thread_map.get(m.thread_id)
|
||||
if tid is not None:
|
||||
repls['thread_id'] = tid
|
||||
|
|
|
@ -2,26 +2,27 @@
|
|||
Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]])
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import Iterator, Sequence
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Sequence, Dict, Union
|
||||
|
||||
from more_itertools import bucket
|
||||
|
||||
from my.core import (
|
||||
get_files,
|
||||
Paths,
|
||||
datetime_naive,
|
||||
Res,
|
||||
assert_never,
|
||||
datetime_naive,
|
||||
get_files,
|
||||
make_logger,
|
||||
)
|
||||
from my.core.common import unique_everseen
|
||||
|
||||
from my.config import instagram as user_config
|
||||
|
||||
from my.config import instagram as user_config # isort: skip
|
||||
|
||||
logger = make_logger(__name__)
|
||||
|
||||
|
@ -70,7 +71,7 @@ def _decode(s: str) -> str:
|
|||
return s.encode('latin-1').decode('utf8')
|
||||
|
||||
|
||||
def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
||||
def _entities() -> Iterator[Res[User | _Message]]:
|
||||
# it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones
|
||||
# NOTE: here there are basically two options
|
||||
# - process inputs as is (from oldest to newest)
|
||||
|
@ -84,7 +85,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
|
|||
yield from _entitites_from_path(path)
|
||||
|
||||
|
||||
def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
|
||||
def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
|
||||
# TODO make sure it works both with plan directory
|
||||
# idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here
|
||||
# e.g. possible options are:
|
||||
|
@ -202,7 +203,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
|
|||
|
||||
# TODO basically copy pasted from android.py... hmm
|
||||
def messages() -> Iterator[Res[Message]]:
|
||||
id2user: Dict[str, User] = {}
|
||||
id2user: dict[str, User] = {}
|
||||
for x in unique_everseen(_entities):
|
||||
if isinstance(x, Exception):
|
||||
yield x
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue