switch from using dataset to raw sqlite3 module

dataset is kinda unmaintaned and currently broken due to sqlalchemy 2.0 changes

resolves https://github.com/karlicoss/HPI/issues/264
This commit is contained in:
Dima Gerasimov 2023-02-07 01:28:45 +00:00
parent 1dcb40f840
commit 7c7457ab08
8 changed files with 123 additions and 103 deletions

View file

@ -3,19 +3,18 @@ Tinder data from Android app database (in =/data/data/com.tinder/databases/tinde
"""
from __future__ import annotations
REQUIRES = ['dataset']
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from itertools import chain
from pathlib import Path
import sqlite3
from typing import Sequence, Iterator, Union, Dict, List, Mapping
from more_itertools import unique_everseen
from my.core import Paths, get_files, Res, assert_never, stat, Stats, datetime_aware
from my.core.dataset import connect_readonly, DatabaseT
from my.core.sqlite import sqlite_connection
from my.config import tinder as user_config
@ -73,6 +72,8 @@ class Message(_BaseMessage):
to: Person
# todo hmm I have a suspicion it might be cumulative?
# although still possible that the user might remove/install app back, so need to keep that in mind
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -83,40 +84,43 @@ Entity = Union[Person, Match, Message]
def _entities() -> Iterator[Res[_Entity]]:
for db_file in inputs():
with connect_readonly(db_file) as db:
with sqlite_connection(db_file, immutable=True, row_factory='row') as db:
yield from _handle_db(db)
def _handle_db(db: DatabaseT) -> Iterator[Res[_Entity]]:
def _handle_db(db: sqlite3.Connection) -> Iterator[Res[_Entity]]:
# profile_user_view contains our own user id
for row in chain(db['profile_user_view'], db['match_person']):
for row in chain(
db.execute('SELECT * FROM profile_user_view'),
db.execute('SELECT * FROM match_person'),
):
try:
yield _parse_person(row)
except Exception as e:
# todo attach error contex?
yield e
for row in db['match']:
for row in db.execute('SELECT * FROM match'):
try:
yield _parse_match(row)
except Exception as e:
yield e
for row in db['message']:
for row in db.execute('SELECT * FROM message'):
try:
yield _parse_msg(row)
except Exception as e:
yield e
def _parse_person(row) -> Person:
def _parse_person(row: sqlite3.Row) -> Person:
return Person(
id=row['id'],
name=row['name'],
)
def _parse_match(row) -> _Match:
def _parse_match(row: sqlite3.Row) -> _Match:
return _Match(
id=row['id'],
person_id=row['person_id'],
@ -124,7 +128,7 @@ def _parse_match(row) -> _Match:
)
def _parse_msg(row) -> _Message:
def _parse_msg(row: sqlite3.Row) -> _Message:
# note it also has raw_message_data -- not sure which is best to use..
sent = row['sent_date']
return _Message(