From a2b397ec4a83e6fded7c758470c49f6f18f2ab81 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 22 Oct 2024 20:50:37 +0100 Subject: [PATCH] my.whatsapp.android: adapt to new db format --- my/books/kobo.py | 2 +- my/whatsapp/android.py | 33 ++++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/my/books/kobo.py b/my/books/kobo.py index 899ef31..40b7ed7 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -3,4 +3,4 @@ from my.core import warnings warnings.high('my.books.kobo is deprecated! Please use my.kobo instead!') from my.core.util import __NOT_HPI_MODULE__ -from my.kobo import * # type: ignore[no-redef] +from my.kobo import * diff --git a/my/whatsapp/android.py b/my/whatsapp/android.py index 27ee743..3cd4436 100644 --- a/my/whatsapp/android.py +++ b/my/whatsapp/android.py @@ -1,6 +1,7 @@ """ Whatsapp data from Android app database (in =/data/data/com.whatsapp/databases/msgstore.db=) """ + from __future__ import annotations import sqlite3 @@ -63,11 +64,27 @@ Entity = Union[Chat, Sender, Message] def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: # TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources + try: + db.execute('SELECT jid_row_id FROM chat_view') + except sqlite3.OperationalError as oe: + if 'jid_row_id' not in str(oe): + raise oe + new_version_202410 = False + else: + new_version_202410 = True + + if new_version_202410: + chat_id_col = 'jid.raw_string' + jid_join = 'JOIN jid ON jid._id == chat_view.jid_row_id' + else: + chat_id_col = 'chat_view.raw_string_jid' + jid_join = '' + chats = {} for r in db.execute( - ''' - SELECT raw_string_jid AS chat_id, subject - FROM chat_view + f''' + SELECT {chat_id_col} AS chat_id, subject + FROM chat_view {jid_join} WHERE chat_id IS NOT NULL /* seems that it might be null for chats that are 'recycled' (the db is more like an LRU cache) */ ''' ): @@ -89,6 +106,7 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: ): # TODO seems that msgstore.db doesn't have contact names # perhaps should extract from wa.db and match against wa_contacts.jid? + # TODO these can also be chats? not sure if need to include... s = Sender( id=r['raw_string'], name=None, @@ -100,9 +118,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: # so even if it seems as if it has a column (e.g. for attachment path), there is actually no such data # so makes more sense to just query message column directly for r in db.execute( - ''' + f''' SELECT - C.raw_string_jid AS chat_id, + {chat_id_col} AS chat_id, M.key_id, M.timestamp, sender_jid_row_id, M.from_me, @@ -111,8 +129,9 @@ def _process_db(db: sqlite3.Connection) -> Iterator[Entity]: MM.file_size, M.message_type FROM message AS M - LEFT JOIN chat_view AS C ON M.chat_row_id = C._id - LEFT JOIN message_media AS MM ON M._id = MM.message_row_id + LEFT JOIN chat_view ON M.chat_row_id = chat_view._id + {jid_join} + left JOIN message_media AS MM ON M._id = MM.message_row_id WHERE M.key_id != -1 /* key_id -1 is some sort of fake message where everything is null */ /* type 7 seems to be some dummy system message. sometimes contain chat name, but usually null, so ignore them