''' VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]]) ''' # note: could reuse the original repo, but little point I guess since VK closed their API from datetime import datetime from dataclasses import dataclass import json from typing import Dict, Iterable, NamedTuple import pytz from my.core import stat, Stats, Json, Res, datetime_aware from my.config import vk_messages_backup as config # I think vk_messages_backup used this tz? # not sure if vk actually used to return this tz in api? TZ = pytz.timezone('Europe/Moscow') Uid = int @dataclass(frozen=True) class User: id: Uid first_name: str last_name: str @dataclass(frozen=True) class Chat: chat_id: str title: str @dataclass class Message: dt: datetime_aware chat: Chat id: str # todo not sure it's unique? user: User body: str Users = Dict[Uid, User] def users() -> Users: files = list(sorted(config.storage_path.glob('user_*.json'))) res = {} for f in files: j = json.loads(f.read_text()) uid = j['id'] res[uid] = User( id=uid, first_name=j['first_name'], last_name=j['last_name'], ) return res # USERCHAT_TITLE = " ... " def _parse_chat(*, msg: Json, udict: Users) -> Chat: group_chat_id = msg.get('chat_id') if group_chat_id is not None: chat_id = group_chat_id title = msg['title'] else: user_id = msg.get('user_id') or msg.get('from_id') assert user_id is not None user = udict[user_id] chat_id = user_id title = f'{user.first_name} {user.last_name}' return Chat( chat_id=chat_id, title=title, ) def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message: mid = msg['id'] md = msg['date'] dt = datetime.fromtimestamp(md, tz=TZ) # todo attachments? e.g. url could be an attachment # todo might be forwarded? mb = msg.get('body') if mb is None: mb = msg.get('text') assert mb is not None, msg out = msg['out'] == 1 if out: user = udict[config.user_id] else: mu = msg.get('user_id') or msg.get('from_id') assert mu is not None, msg user = udict[mu] return Message( dt=dt, chat=chat, id=mid, user=user, body=mb, ) def messages() -> Iterable[Res[Message]]: udict = users() uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ list(sorted(config.storage_path.glob('groupchat_*.json'))) for f in uchats: j = json.loads(f.read_text()) # extract chat from last message try: last = j[-1] chat = _parse_chat(msg=last, udict=udict) except Exception as e: yield e continue for msg in j: try: yield _parse_msg(msg=msg, chat=chat, udict=udict) except Exception as e: yield e def stats() -> Stats: return { **stat(users), **stat(messages), }