my.whatsapp.android: exclude some dummy messages, minor cleanup
This commit is contained in:
parent
414b88178f
commit
72ab2603d5
1 changed files with 30 additions and 30 deletions
|
@ -11,19 +11,17 @@ from typing import Sequence, Iterator, Optional
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
from more_itertools import unique_everseen
|
||||||
|
|
||||||
from my.core import get_files, Paths, datetime_aware, Res, LazyLogger, make_config
|
from my.core import get_files, Paths, datetime_aware, Res, make_logger, make_config
|
||||||
from my.core.error import echain, notnone
|
from my.core.error import echain, notnone
|
||||||
from my.core.sqlite import sqlite_connection
|
from my.core.sqlite import sqlite_connection
|
||||||
|
import my.config
|
||||||
|
|
||||||
|
|
||||||
from my.config import whatsapp as user_config
|
logger = make_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config(user_config.android):
|
class Config(my.config.whatsapp.android):
|
||||||
# paths[s]/glob to the exported sqlite databases
|
# paths[s]/glob to the exported sqlite databases
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
my_user_id: Optional[str] = None
|
my_user_id: Optional[str] = None
|
||||||
|
@ -63,11 +61,13 @@ def _process_db(db: sqlite3.Connection):
|
||||||
# TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources
|
# TODO later, split out Chat/Sender objects separately to safe on object creation, similar to other android data sources
|
||||||
|
|
||||||
chats = {}
|
chats = {}
|
||||||
for r in db.execute('''
|
for r in db.execute(
|
||||||
|
'''
|
||||||
SELECT raw_string_jid AS chat_id, subject
|
SELECT raw_string_jid AS chat_id, subject
|
||||||
FROM chat_view
|
FROM chat_view
|
||||||
WHERE chat_id IS NOT NULL /* seems that it might be null for chats that are 'recycled' (the db is more like an LRU cache) */
|
WHERE chat_id IS NOT NULL /* seems that it might be null for chats that are 'recycled' (the db is more like an LRU cache) */
|
||||||
'''):
|
'''
|
||||||
|
):
|
||||||
chat_id = r['chat_id']
|
chat_id = r['chat_id']
|
||||||
subject = r['subject']
|
subject = r['subject']
|
||||||
chat = Chat(
|
chat = Chat(
|
||||||
|
@ -76,12 +76,13 @@ def _process_db(db: sqlite3.Connection):
|
||||||
)
|
)
|
||||||
chats[chat.id] = chat
|
chats[chat.id] = chat
|
||||||
|
|
||||||
|
|
||||||
senders = {}
|
senders = {}
|
||||||
for r in db.execute('''
|
for r in db.execute(
|
||||||
|
'''
|
||||||
SELECT _id, raw_string
|
SELECT _id, raw_string
|
||||||
FROM jid
|
FROM jid
|
||||||
'''):
|
'''
|
||||||
|
):
|
||||||
# TODO seems that msgstore.db doesn't have contact names
|
# TODO seems that msgstore.db doesn't have contact names
|
||||||
# perhaps should extract from wa.db and match against wa_contacts.jid?
|
# perhaps should extract from wa.db and match against wa_contacts.jid?
|
||||||
s = Sender(
|
s = Sender(
|
||||||
|
@ -90,18 +91,25 @@ def _process_db(db: sqlite3.Connection):
|
||||||
)
|
)
|
||||||
senders[r['_id']] = s
|
senders[r['_id']] = s
|
||||||
|
|
||||||
|
# NOTE: hmm, seems that message_view or available_message_view use lots of NULL as ...
|
||||||
|
# so even if it seems as if it has a column (e.g. for attachment path), there is actually no such data
|
||||||
|
# so makes more sense to just query message column directly
|
||||||
# todo message_type? mostly 0, but seems all over, even for seemingly normal messages with text
|
# todo message_type? mostly 0, but seems all over, even for seemingly normal messages with text
|
||||||
for r in db.execute('''
|
for r in db.execute(
|
||||||
|
'''
|
||||||
SELECT C.raw_string_jid AS chat_id, M.key_id, M.timestamp, sender_jid_row_id, M.from_me, M.text_data, MM.file_path
|
SELECT C.raw_string_jid AS chat_id, M.key_id, M.timestamp, sender_jid_row_id, M.from_me, M.text_data, MM.file_path
|
||||||
FROM message AS M
|
FROM message AS M
|
||||||
LEFT JOIN chat_view AS C
|
LEFT JOIN chat_view AS C ON M.chat_row_id = C._id
|
||||||
ON M.chat_row_id = C._id
|
LEFT JOIN message_media AS MM ON M._id = MM.message_row_id
|
||||||
LEFT JOIN message_media AS MM
|
|
||||||
ON M._id = MM.message_row_id
|
|
||||||
WHERE M.key_id != -1 /* key_id -1 is some sort of fake message where everything is null */
|
WHERE M.key_id != -1 /* key_id -1 is some sort of fake message where everything is null */
|
||||||
|
/* type 7 seems to be some dummy system message.
|
||||||
|
sometimes contain chat name, but usually null, so ignore them
|
||||||
|
for normal messages it's 0
|
||||||
|
*/
|
||||||
|
AND M.message_type != 7
|
||||||
ORDER BY M.timestamp
|
ORDER BY M.timestamp
|
||||||
'''):
|
'''
|
||||||
|
):
|
||||||
msg_id: str = notnone(r['key_id'])
|
msg_id: str = notnone(r['key_id'])
|
||||||
ts: int = notnone(r['timestamp'])
|
ts: int = notnone(r['timestamp'])
|
||||||
dt = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
dt = datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
||||||
|
@ -131,28 +139,20 @@ def _process_db(db: sqlite3.Connection):
|
||||||
# for group chats our onw id is still 0, but other ids are properly set
|
# for group chats our onw id is still 0, but other ids are properly set
|
||||||
if from_me:
|
if from_me:
|
||||||
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
myself_user_id = config.my_user_id or 'MYSELF_USER_ID'
|
||||||
sender = Sender(id=myself_user_id, name=None)
|
sender = Sender(id=myself_user_id, name=None) # TODO set my own name as well?
|
||||||
else:
|
else:
|
||||||
sender = Sender(id=chat.id, name=None)
|
sender = Sender(id=chat.id, name=None)
|
||||||
else:
|
else:
|
||||||
sender = senders[sender_row_id]
|
sender = senders[sender_row_id]
|
||||||
|
|
||||||
|
m = Message(chat=chat, id=msg_id, dt=dt, sender=sender, text=text)
|
||||||
|
|
||||||
m = Message(
|
|
||||||
chat=chat,
|
|
||||||
id=msg_id,
|
|
||||||
dt=dt,
|
|
||||||
sender=sender,
|
|
||||||
text=text
|
|
||||||
)
|
|
||||||
yield m
|
yield m
|
||||||
|
|
||||||
|
|
||||||
def _messages() -> Iterator[Res[Message]]:
|
def _messages() -> Iterator[Res[Message]]:
|
||||||
dbs = inputs()
|
dbs = inputs()
|
||||||
for i, f in enumerate(dbs):
|
for i, f in enumerate(dbs):
|
||||||
logger.debug(f'processing {f} {i}/{len(dbs)}')
|
logger.info(f'processing {f} {i}/{len(dbs)}')
|
||||||
with sqlite_connection(f, immutable=True, row_factory='row') as db:
|
with sqlite_connection(f, immutable=True, row_factory='row') as db:
|
||||||
try:
|
try:
|
||||||
yield from _process_db(db)
|
yield from _process_db(db)
|
||||||
|
|
Loading…
Add table
Reference in a new issue