HPI/my/vk/vk_messages_backup.py

'''
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
'''
# note: could reuse the original repo, but little point I guess since VK closed their API


from datetime import datetime
import json
from pathlib import Path
from typing import Dict, Iterable, NamedTuple

import pytz

from ..core import Json

from my.config import vk_messages_backup as config


Uid = str
Name = str


Users = Dict[Uid, Name]

def users() -> Users:
    # todo cache?
    files = list(sorted(config.storage_path.glob('user_*.json')))
    res = {}
    for f in files:
        j = json.loads(f.read_text())
        uid = j['id']
        uf  = j['first_name']
        ul  = j['last_name']
        res[uid] = f'{uf} {ul}'
    return res


class Message(NamedTuple):
    chat_id: str
    dt: datetime
    user: Name
    body: str


msk_tz = pytz.timezone('Europe/Moscow')
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?

def _parse(x: Json, chat_id: str, udict: Users) -> Message:
    mid = x['id'] # todo not sure if useful?
    md  = x['date']

    dt = datetime.fromtimestamp(md, msk_tz)

    # todo attachments? e.g. url could be an attachment
    # todo might be forwarded?
    mb  = x.get('body')
    if mb is None:
        mb = x.get('text')
    assert mb is not None

    mu  = x.get('user_id') or x.get('peer_id')
    assert mu is not None
    out = x['out'] == 1
    # todo use name from the config?
    user = 'you' if out else udict[mu]

    # todo conversation id??

    return Message(
        chat_id=chat_id,
        dt=dt,
        user=user,
        body=mb,
    )


from ..core.error import Res
def messages() -> Iterable[Res[Message]]:
    udict = users()

    uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
             list(sorted(config.storage_path.glob('groupchat_*.json')))
    for f in uchats:
        chat_id = f.stem.split('_')[-1]
        j = json.loads(f.read_text())
        for x in j:
            try:
                yield _parse(x, chat_id=chat_id, udict=udict)
            except Exception as e:
                yield e


def stats():
    from ..core import stat
    return {
        **stat(users),
        **stat(messages),
    }