From 160582b6cfa3d015405d5d94c3d58335d9d7dc16 Mon Sep 17 00:00:00 2001 From: Sean Breckenridge Date: Wed, 30 Sep 2020 16:32:06 -0700 Subject: [PATCH] parse sms messages from xml files --- my/smscalls.py | 57 +++++++++++++++++++++++++++++++++++++++++------ tests/smscalls.py | 3 ++- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/my/smscalls.py b/my/smscalls.py index 91d9af5..8994e77 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -1,15 +1,14 @@ """ Phone calls and SMS messages """ -# TODO extract SMS as well? I barely use them though.. from datetime import datetime from pathlib import Path -from typing import NamedTuple, Iterator, Set +from typing import NamedTuple, Iterator, Set, Tuple import pytz from lxml import etree # type: ignore -from .common import get_files +from .core.common import get_files from my.config import smscalls as config @@ -27,11 +26,10 @@ class Call(NamedTuple): def _extract_calls(path: Path) -> Iterator[Call]: tr = etree.parse(str(path)) for cxml in tr.findall('call'): - # TODO we've got local tz herer, not sure if useful.. - # ok, so readable date is local datetime, cahnging throughout the backup - dt = pytz.utc.localize(datetime.utcfromtimestamp(int(cxml.get('date')) / 1000)) + # TODO we've got local tz here, not sure if useful.. + # ok, so readable date is local datetime, changing throughout the backup yield Call( - dt=dt, + dt=_parse_dt_ms(cxml.get('date')), duration_s=int(cxml.get('duration')), who=cxml.get('contact_name') # TODO number if contact is unavail?? # TODO type? must be missing/outgoing/incoming @@ -49,3 +47,48 @@ def calls() -> Iterator[Call]: continue emitted.add(c.dt) yield c + + +class Message(NamedTuple): + dt: datetime + who: str + message: str + phone_number: str + from_me: bool + + +def messages() -> Iterator[Message]: + files = get_files(config.export_path, glob='sms-*.xml') + + emitted: Set[Tuple[datetime, str, bool]] = set() + for p in files: + for c in _extract_messages(p): + key = (c.dt, c.who, c.from_me) + if key in emitted: + continue + emitted.add(key) + yield c + + +def _extract_messages(path: Path) -> Iterator[Message]: + tr = etree.parse(str(path)) + for mxml in tr.findall('sms'): + yield Message( + dt=_parse_dt_ms(mxml.get('date')), + who=mxml.get('contact_name'), + message=mxml.get('body'), + phone_number=mxml.get('address'), + from_me=mxml.get('type') == '2', # 1 is received message, 2 is sent message + ) + +def _parse_dt_ms(d: str) -> datetime: + return pytz.utc.localize(datetime.utcfromtimestamp(int(d) / 1000)) + + +def stats(): + from .core import stat + + return { + **stat(calls), + **stat(messages), + } diff --git a/tests/smscalls.py b/tests/smscalls.py index a431fc9..3303e1c 100644 --- a/tests/smscalls.py +++ b/tests/smscalls.py @@ -1,6 +1,7 @@ -from my.smscalls import calls +from my.smscalls import calls, messages # TODO that's a pretty dumb test; perhaps can be generic.. def test(): assert len(list(calls())) > 10 + assert len(list(messages())) > 10