""" Phone calls and SMS messages Exported using https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore&hl=en_US """ # See: https://www.synctech.com.au/sms-backup-restore/fields-in-xml-backup-files/ for schema REQUIRES = ['lxml'] from dataclasses import dataclass from my.core import get_files, stat, Paths, Stats from my.config import smscalls as user_config @dataclass class smscalls(user_config): # path[s] that SMSBackupRestore syncs XML files to export_path: Paths from my.core.cfg import make_config config = make_config(smscalls) from datetime import datetime, timezone from pathlib import Path from typing import NamedTuple, Iterator, Set, Tuple, Optional, Any, Dict, List from lxml import etree from my.core.error import Res class Call(NamedTuple): dt: datetime dt_readable: str duration_s: int who: Optional[str] # type - 1 = Incoming, 2 = Outgoing, 3 = Missed, 4 = Voicemail, 5 = Rejected, 6 = Refused List. call_type: int @property def summary(self) -> str: return f"talked with {self.who} for {self.duration_s} secs" @property def from_me(self) -> bool: return self.call_type == 2 # From docs: # All the field values are read as-is from the underlying database and no conversion is done by the app in most cases. # # The '(Unknown)' is just what my android phone does, not sure if there are others UNKNOWN: Set[str] = {'(Unknown)'} def _extract_calls(path: Path) -> Iterator[Res[Call]]: tr = etree.parse(str(path)) for cxml in tr.findall('call'): dt = cxml.get('date') dt_readable = cxml.get('readable_date') duration = cxml.get('duration') who = cxml.get('contact_name') call_type = cxml.get('type') # if name is missing, its not None (its some string), depends on the phone/message app if who is not None and who in UNKNOWN: who = None if dt is None or dt_readable is None or duration is None or call_type is None: call_str = etree.tostring(cxml).decode('utf-8') yield RuntimeError(f"Missing one or more required attributes [date, readable_date, duration, type] in {call_str}") continue # TODO we've got local tz here, not sure if useful.. # ok, so readable date is local datetime, changing throughout the backup yield Call( dt=_parse_dt_ms(dt), dt_readable=dt_readable, duration_s=int(duration), who=who, call_type=int(call_type), ) def calls() -> Iterator[Res[Call]]: files = get_files(config.export_path, glob='calls-*.xml') # TODO always replacing with the latter is good, we get better contact names?? emitted: Set[datetime] = set() for p in files: for c in _extract_calls(p): if isinstance(c, Exception): yield c continue if c.dt in emitted: continue emitted.add(c.dt) yield c class Message(NamedTuple): dt: datetime dt_readable: str who: Optional[str] message: str phone_number: str # type - 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox, 5 = Failed, 6 = Queued message_type: int @property def from_me(self) -> bool: return self.message_type == 2 def messages() -> Iterator[Res[Message]]: files = get_files(config.export_path, glob='sms-*.xml') emitted: Set[Tuple[datetime, Optional[str], bool]] = set() for p in files: for c in _extract_messages(p): if isinstance(c, Exception): yield c continue key = (c.dt, c.who, c.from_me) if key in emitted: continue emitted.add(key) yield c def _extract_messages(path: Path) -> Iterator[Res[Message]]: tr = etree.parse(str(path)) for mxml in tr.findall('sms'): dt = mxml.get('date') dt_readable = mxml.get('readable_date') who = mxml.get('contact_name') if who is not None and who in UNKNOWN: who = None message = mxml.get('body') phone_number = mxml.get('address') message_type = mxml.get('type') if dt is None or dt_readable is None or message is None or phone_number is None or message_type is None: msg_str = etree.tostring(mxml).decode('utf-8') yield RuntimeError(f"Missing one or more required attributes [date, readable_date, body, address, type] in {msg_str}") continue yield Message( dt=_parse_dt_ms(dt), dt_readable=dt_readable, who=who, message=message, phone_number=phone_number, message_type=int(message_type), ) class MMSContentPart(NamedTuple): sequence_index: int content_type: str filename: str text: Optional[str] data: Optional[str] class MMS(NamedTuple): dt: datetime dt_readable: str parts: List[MMSContentPart] # NOTE: these is often something like 'Name 1, Name 2', but might be different depending on your client who: Optional[str] # NOTE: This can be a single phone number, or multiple, split by '~' or ','. Its better to think # of this as a 'key' or 'conversation ID', phone numbers are also present in 'addresses' phone_number: str addresses: List[Tuple[str, int]] # 1 = Received, 2 = Sent, 3 = Draft, 4 = Outbox message_type: int @property def from_user(self) -> str: # since these can be group messages, we can't just check message_type, # we need to iterate through and find who sent it # who is CC/'To' is not obvious in many message clients # # 129 = BCC, 130 = CC, 151 = To, 137 = From for (addr, _type) in self.addresses: if _type == 137: return addr else: # hmm, maybe return instead? but this probably shouldnt happen, means # something is very broken raise RuntimeError(f'No from address matching 137 found in {self.addresses}') @property def from_me(self) -> bool: return self.message_type == 2 def mms() -> Iterator[Res[MMS]]: files = get_files(config.export_path, glob='sms-*.xml') emitted: Set[Tuple[datetime, Optional[str], str]] = set() for p in files: for c in _extract_mms(p): if isinstance(c, Exception): yield c continue key = (c.dt, c.phone_number, c.from_user) if key in emitted: continue emitted.add(key) yield c def _resolve_null_str(value: Optional[str]) -> Optional[str]: if value is None: return None # hmm.. theres some risk of the text actually being 'null', but theres # no way to distinguish that from XML values if value == 'null': return None return value def _extract_mms(path: Path) -> Iterator[Res[MMS]]: tr = etree.parse(str(path)) for mxml in tr.findall('mms'): dt = mxml.get('date') dt_readable = mxml.get('readable_date') message_type = mxml.get('msg_box') who = mxml.get('contact_name') if who is not None and who in UNKNOWN: who = None phone_number = mxml.get('address') if dt is None or dt_readable is None or message_type is None or phone_number is None: mxml_str = etree.tostring(mxml).decode('utf-8') yield RuntimeError(f'Missing one or more required attributes [date, readable_date, msg_box, address] in {mxml_str}') continue addresses: List[Tuple[str, int]] = [] for addr_parent in mxml.findall('addrs'): for addr in addr_parent.findall('addr'): addr_data = addr.attrib user_address = addr_data.get('address') user_type = addr_data.get('type') if user_address is None or user_type is None: addr_str = etree.tostring(addr_parent).decode() yield RuntimeError(f'Missing one or more required attributes [address, type] in {addr_str}') continue if not user_type.isdigit(): yield RuntimeError(f'Invalid address type {user_type} {type(user_type)}, cannot convert to number') continue addresses.append((user_address, int(user_type))) content: List[MMSContentPart] = [] for part_root in mxml.findall('parts'): for part in part_root.findall('part'): # the first item is an SMIL XML element encoded as a string which describes # how the rest of the parts are laid out # https://www.w3.org/TR/SMIL3/smil-timing.html#Timing-TimeContainerSyntax # An example: # # # This seems pretty useless, so we should try and skip it, and just return the # text/images/data # # man, attrib is some internal cpython ._Attrib type which can't # be typed by any sort of mappingproxy. maybe a protocol could work..? part_data: Dict[str, Any] = part.attrib # type: ignore seq: Optional[str] = part_data.get('seq') if seq == '-1': continue if seq is None or not seq.isdigit(): yield RuntimeError(f'seq must be a number, was seq={seq} {type(seq)} in {part_data}') continue charset_type: Optional[str] = _resolve_null_str(part_data.get('ct')) filename: Optional[str] = _resolve_null_str(part_data.get('name')) # in some cases (images, cards), the filename is set in 'cl' instead if filename is None: filename = _resolve_null_str(part_data.get('cl')) text: Optional[str] = _resolve_null_str(part_data.get('text')) data: Optional[str] = _resolve_null_str(part_data.get('data')) if charset_type is None or filename is None or (text is None and data is None): yield RuntimeError(f'Missing one or more required attributes [ct, name, (text, data)] must be present in {part_data}') continue content.append( MMSContentPart( sequence_index=int(seq), content_type=charset_type, filename=filename, text=text, data=data ) ) yield MMS( dt=_parse_dt_ms(dt), dt_readable=dt_readable, who=who, phone_number=phone_number, message_type=int(message_type), parts=content, addresses=addresses, ) # See https://github.com/karlicoss/HPI/pull/90#issuecomment-702422351 # for potentially parsing timezone from the readable_date def _parse_dt_ms(d: str) -> datetime: return datetime.fromtimestamp(int(d) / 1000, tz=timezone.utc) def stats() -> Stats: return { **stat(calls), **stat(messages), **stat(mms), }