From b9d788efd0e70d89ddbfd0ddc57a03e0f6c4f826 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 4 Jun 2022 10:29:50 +0100 Subject: [PATCH] some enhancements for facebook/instagram modules figured out that datetimes are naive better username handling + investigation of thread names --- my/fbmessenger/android.py | 4 +++- my/fbmessenger/common.py | 6 ++++++ my/instagram/android.py | 16 +++++++--------- my/instagram/gdpr.py | 16 +++++++++++++--- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/my/fbmessenger/android.py b/my/fbmessenger/android.py index 6d82002..a8078d6 100644 --- a/my/fbmessenger/android.py +++ b/my/fbmessenger/android.py @@ -38,10 +38,12 @@ class Thread: name: Optional[str] # todo not sure about order of fields... +from ..core import datetime_naive @dataclass class _BaseMessage: id: str - dt: datetime + # checked against a message sent on 4 may 2022, and it does look naive + dt: datetime_naive text: Optional[str] diff --git a/my/fbmessenger/common.py b/my/fbmessenger/common.py index a6549d5..1f82327 100644 --- a/my/fbmessenger/common.py +++ b/my/fbmessenger/common.py @@ -43,3 +43,9 @@ def _merge_messages(*sources: Iterator[Res[Message]]) -> Iterator[Res[Message]]: # similar to twitter, might make sense to generify/document as a pattern return (r.id, r.dt) yield from unique_everseen(chain(*sources), key=key) + + +# TODO some notes about gdpr export (since there is no module yet) +# ugh, messages seem to go from new to old in messages_N.json files as N increases :facepalm: +# seems like it's storing local timestamp :facepalm: +# checked against a message sent on 4 may 2022 diff --git a/my/instagram/android.py b/my/instagram/android.py index fc2ac38..a34660c 100644 --- a/my/instagram/android.py +++ b/my/instagram/android.py @@ -32,11 +32,13 @@ class User: full_name: str +from ..core import datetime_naive # todo not sure about order of fields... @dataclass class _BaseMessage: id: str - created: datetime + # NOTE: ffs, looks like they keep naive timestamps in the db (checked some random messages) + created: datetime_naive text: str thread_id: str @@ -82,7 +84,6 @@ def _parse_message(j: Json) -> Optional[_Message]: t = j['item_type'] tid = j['thread_key']['thread_id'] uid = j['user_id'] - # TODO not sure if utc?? created = datetime.fromtimestamp(int(j['timestamp']) / 1_000_000) text: str if t == 'text': @@ -120,14 +121,11 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: with sqlite_connect_immutable(f) as db: for (self_uid, thread_json) in select(('user_id', 'thread_info'), 'FROM threads', db=db): - # ugh wtf?? no easier way to extract your own user id/name?? - yield User( - id=str(self_uid), - full_name='You', - username='you', - ) j = json.loads(thread_json) - for r in j['recipients']: + # todo in principle should leave the thread attached to the message? + # since thread is a group of users? + # inviter usually contains our own user + for r in [j['inviter'], *j['recipients']]: yield User( id=str(r['id']), # for some reason it's int in the db full_name=r['full_name'], diff --git a/my/instagram/gdpr.py b/my/instagram/gdpr.py index 754a2e9..3dfe352 100644 --- a/my/instagram/gdpr.py +++ b/my/instagram/gdpr.py @@ -31,12 +31,15 @@ class User: full_name: str +from ..core import datetime_naive @dataclass class _BaseMessage: - # TODO id is missing? - created: datetime + # ugh, this is insane, but does look like it's just keeping local device time??? + # checked against a message sent on 3 June, which should be UTC+1, but timestamp seems local + created: datetime_naive text: str thread_id: str + # NOTE: doesn't look like there aren't any meaningful message ids in the export @dataclass(unsafe_hash=True) @@ -100,7 +103,14 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]: j = json.loads(ffile.read_text()) id_len = 10 - # NOTE: no match in android db/api responses? + # NOTE: I'm not actually sure it's other user's id.., since it corresponds to the whole converstation + # but I stared a bit at these ids vs database ids and can't see any way to find the correspondence :( + # so basically the only way to merge is to actually try some magic and correlate timestamps/message texts? + # another option is perhaps to query user id from username with some free API + # it's still fragile: e.g. if user deletes themselves there is no more username (it becomes "instagramuser") + # if we use older exports we might be able to figure it out though... so think about it? + # it also names grouped ones like instagramuserchrisfoodishblogand25others_einihreoog + # so I feel like there is just not guaranteed way to correlate :( other_id = fname[-id_len:] # NOTE: no match in android db? other_username = fname[:-id_len - 1]