general: migrate modules to use 3.9 features

This commit is contained in:
Dima Gerasimov 2024-10-19 22:10:40 +01:00
parent d3f9a8e8b6
commit d915c848e9
125 changed files with 889 additions and 739 deletions

View file

@ -1,11 +1,10 @@
from typing import Iterator
from collections.abc import Iterator
from my.core import Res, stat, Stats
from my.core import Res, Stats, stat
from my.core.source import import_source
from .common import Message, _merge_messages
src_gdpr = import_source(module_name='my.instagram.gdpr')
@src_gdpr
def _messages_gdpr() -> Iterator[Res[Message]]:

View file

@ -3,30 +3,29 @@ Bumble data from Android app database (in =/data/data/com.instagram.android/data
"""
from __future__ import annotations
import json
import sqlite3
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
import json
from pathlib import Path
import sqlite3
from typing import Iterator, Sequence, Optional, Dict, Union
from my.core import (
get_files,
Paths,
make_config,
make_logger,
datetime_naive,
Json,
Paths,
Res,
assert_never,
datetime_naive,
get_files,
make_config,
make_logger,
)
from my.core.common import unique_everseen
from my.core.cachew import mcachew
from my.core.common import unique_everseen
from my.core.error import echain
from my.core.sqlite import sqlite_connect_immutable, select
from my.config import instagram as user_config
from my.core.sqlite import select, sqlite_connect_immutable
from my.config import instagram as user_config # isort: skip
logger = make_logger(__name__)
@ -38,8 +37,8 @@ class instagram_android_config(user_config.android):
# sadly doesn't seem easy to extract user's own handle/name from the db...
# todo maybe makes more sense to keep in parent class? not sure...
username: Optional[str] = None
full_name: Optional[str] = None
username: str | None = None
full_name: str | None = None
config = make_config(instagram_android_config)
@ -101,13 +100,13 @@ class MessageError(RuntimeError):
return self.rest == other.rest
def _parse_message(j: Json) -> Optional[_Message]:
def _parse_message(j: Json) -> _Message | None:
id = j['item_id']
t = j['item_type']
tid = j['thread_key']['thread_id']
uid = j['user_id']
created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000)
text: Optional[str] = None
text: str | None = None
if t == 'text':
text = j['text']
elif t == 'reel_share':
@ -133,7 +132,7 @@ def _parse_message(j: Json) -> Optional[_Message]:
)
def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
def _process_db(db: sqlite3.Connection) -> Iterator[Res[User | _Message]]:
# TODO ugh. seems like no way to extract username?
# sometimes messages (e.g. media_share) contain it in message field
# but generally it's not present. ugh
@ -175,7 +174,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Res[Union[User, _Message]]]:
yield e
def _entities() -> Iterator[Res[Union[User, _Message]]]:
def _entities() -> Iterator[Res[User | _Message]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
# todo use TypedDict?
@ -194,7 +193,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
@mcachew(depends_on=inputs)
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
id2user: dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x

View file

@ -1,9 +1,10 @@
from collections.abc import Iterator
from dataclasses import replace
from datetime import datetime
from itertools import chain
from typing import Iterator, Dict, Any, Protocol
from typing import Any, Protocol
from my.core import warn_if_empty, Res
from my.core import Res, warn_if_empty
class User(Protocol):
@ -40,7 +41,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
# ugh. seems that GDPR thread ids are completely uncorrelated to any android ids (tried searching over all sqlite dump)
# so the only way to correlate is to try and match messages
# we also can't use unique_everseen here, otherwise will never get a chance to unify threads
mmap: Dict[str, Message] = {}
mmap: dict[str, Message] = {}
thread_map = {}
user_map = {}
@ -60,7 +61,7 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]:
user_map[m.user.id] = mm.user
else:
# not emitted yet, need to emit
repls: Dict[str, Any] = {}
repls: dict[str, Any] = {}
tid = thread_map.get(m.thread_id)
if tid is not None:
repls['thread_id'] = tid

View file

@ -2,26 +2,27 @@
Instagram data (uses [[https://www.instagram.com/download/request][official GDPR export]])
"""
from __future__ import annotations
import json
from collections.abc import Iterator, Sequence
from dataclasses import dataclass
from datetime import datetime
import json
from pathlib import Path
from typing import Iterator, Sequence, Dict, Union
from more_itertools import bucket
from my.core import (
get_files,
Paths,
datetime_naive,
Res,
assert_never,
datetime_naive,
get_files,
make_logger,
)
from my.core.common import unique_everseen
from my.config import instagram as user_config
from my.config import instagram as user_config # isort: skip
logger = make_logger(__name__)
@ -70,7 +71,7 @@ def _decode(s: str) -> str:
return s.encode('latin-1').decode('utf8')
def _entities() -> Iterator[Res[Union[User, _Message]]]:
def _entities() -> Iterator[Res[User | _Message]]:
# it's worth processing all previous export -- sometimes instagram removes some metadata from newer ones
# NOTE: here there are basically two options
# - process inputs as is (from oldest to newest)
@ -84,7 +85,7 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
yield from _entitites_from_path(path)
def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
def _entitites_from_path(path: Path) -> Iterator[Res[User | _Message]]:
# TODO make sure it works both with plan directory
# idelaly get_files should return the right thing, and we won't have to force ZipPath/match_structure here
# e.g. possible options are:
@ -202,7 +203,7 @@ def _entitites_from_path(path: Path) -> Iterator[Res[Union[User, _Message]]]:
# TODO basically copy pasted from android.py... hmm
def messages() -> Iterator[Res[Message]]:
id2user: Dict[str, User] = {}
id2user: dict[str, User] = {}
for x in unique_everseen(_entities):
if isinstance(x, Exception):
yield x