""" Tinder data from Android app database (in =/data/data/com.tinder/databases/tinder-3.db=) """ from __future__ import annotations from collections import defaultdict, Counter from dataclasses import dataclass from datetime import datetime, timezone from itertools import chain from pathlib import Path import sqlite3 from typing import Sequence, Iterator, Union, Dict, List, Mapping from my.core import Paths, get_files, Res, stat, Stats, datetime_aware, make_logger from my.core.common import unique_everseen from my.core.compat import assert_never from my.core.error import echain from my.core.sqlite import sqlite_connection import my.config logger = make_logger(__name__) @dataclass class config(my.config.tinder.android): # paths[s]/glob to the exported sqlite databases export_path: Paths @dataclass(unsafe_hash=True) class Person: id: str name: str # todo bio? it might change, not sure what do we want here @dataclass(unsafe_hash=True) class _BaseMatch: # for android, checked directly shortly after a match when: datetime_aware id: str @dataclass(unsafe_hash=True) class _Match(_BaseMatch): person_id: str @dataclass(unsafe_hash=True) class Match(_BaseMatch): person: Person # todo again, not sure what's the 'optimal' field order? perhaps the one which gives the most natural sort? # so either match id or datetime @dataclass class _BaseMessage: # looks like gdpr takeout does contain GMT (compared against google maps data) sent: datetime_aware id: str text: str @dataclass(unsafe_hash=True) class _Message(_BaseMessage): match_id: str from_id: str to_id: str @dataclass class Message(_BaseMessage): match: Match from_: Person to: Person # todo hmm I have a suspicion it might be cumulative? # although still possible that the user might remove/install app back, so need to keep that in mind def inputs() -> Sequence[Path]: return get_files(config.export_path) _Entity = Union[Person, _Match, _Message] Entity = Union[Person, Match, Message] def _entities() -> Iterator[Res[_Entity]]: paths = inputs() total = len(paths) width = len(str(total)) for idx, path in enumerate(paths): logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}') with sqlite_connection(path, immutable=True, row_factory='row') as db: try: yield from _handle_db(db) except Exception as e: yield e def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]: # profile_user_view contains our own user id user_profile_rows = list(db.execute('SELECT * FROM profile_user_view')) if len(user_profile_rows) == 0: # shit, sometime in 2023 profile_user_view stoppped containing user profile.. # presumably the most common from_id/to_id would be our own username counter = Counter([id_ for (id_,) in db.execute('SELECT from_id FROM message UNION ALL SELECT to_id FROM message')]) if len(counter) > 0: # this might happen if db is empty (e.g. user got logged out) [(you_id, _)] = counter.most_common(1) yield Person(id=you_id, name='you') for row in chain( user_profile_rows, db.execute('SELECT * FROM match_person'), ): try: yield _parse_person(row) except Exception as e: # todo attach error context? yield e for row in db.execute('SELECT * FROM match'): try: yield _parse_match(row) except Exception as e: yield e for row in db.execute('SELECT * FROM message'): try: yield _parse_msg(row) except Exception as e: yield e def _parse_person(row: sqlite3.Row) -> Person: return Person( id=row['id'], name=row['name'], ) def _parse_match(row: sqlite3.Row) -> _Match: return _Match( id=row['id'], person_id=row['person_id'], when=datetime.fromtimestamp(row['creation_date'] / 1000, tz=timezone.utc), ) def _parse_msg(row: sqlite3.Row) -> _Message: # note it also has raw_message_data -- not sure which is best to use.. sent = row['sent_date'] return _Message( sent=datetime.fromtimestamp(sent / 1000, tz=timezone.utc), id=row['id'], text=row['text'], match_id=row['match_id'], from_id=row['from_id'], to_id=row['to_id'], ) # todo maybe it's rich_entities method? def entities() -> Iterator[Res[Entity]]: id2person: Dict[str, Person] = {} id2match: Dict[str, Match] = {} for x in unique_everseen(_entities): if isinstance(x, Exception): yield x continue if isinstance(x, Person): id2person[x.id] = x yield x continue if isinstance(x, _Match): try: person = id2person[x.person_id] except Exception as e: yield e continue m = Match( id=x.id, when=x.when, person=person, ) id2match[x.id] = m yield m continue if isinstance(x, _Message): try: match = id2match[x.match_id] from_ = id2person[x.from_id] to = id2person[x.to_id] except Exception as e: yield echain(RuntimeError(f'while processing {x}'), e) continue yield Message( sent=x.sent, match=match, id=x.id, text=x.text, from_=from_, to=to, ) continue assert_never(x) def messages() -> Iterator[Res[Message]]: for x in entities(): if isinstance(x, (Exception, Message)): yield x continue # todo not sure, maybe it's not fundamental enough to keep here... def match2messages() -> Iterator[Res[Mapping[Match, Sequence[Message]]]]: res: Dict[Match, List[Message]] = defaultdict(list) for x in entities(): if isinstance(x, Exception): yield x continue if isinstance(x, Match): # match might happen without messages so makes sense to handle here res[x] # just trigger creation continue if isinstance(x, Message): try: ml = res[x.match] except Exception as e: yield e continue ml.append(x) continue yield res # TODO maybe a more natural return type is Iterator[Res[Tuple[Key, Value]]] # but this doesn't work straight away because the key might have no corresponding values def stats() -> Stats: return stat(messages)