diff --git a/my/config.py b/my/config.py index bfae86e..e9eafec 100644 --- a/my/config.py +++ b/my/config.py @@ -194,6 +194,7 @@ class simple: class vk_messages_backup: storage_path: Path + user_id: int class kobo: diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py index 0e8dc45..df1d18e 100644 --- a/my/vk/vk_messages_backup.py +++ b/my/vk/vk_messages_backup.py @@ -2,95 +2,132 @@ VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]]) ''' # note: could reuse the original repo, but little point I guess since VK closed their API - - from datetime import datetime +from dataclasses import dataclass import json from typing import Dict, Iterable, NamedTuple import pytz -from ..core import Json +from my.core import stat, Stats, Json, Res, datetime_aware from my.config import vk_messages_backup as config -Uid = str -Name = str +# I think vk_messages_backup used this tz? +# not sure if vk actually used to return this tz in api? +TZ = pytz.timezone('Europe/Moscow') -Users = Dict[Uid, Name] +Uid = int +@dataclass(frozen=True) +class User: + id: Uid + first_name: str + last_name: str + + +@dataclass(frozen=True) +class Chat: + chat_id: str + title: str + + +@dataclass +class Message: + dt: datetime_aware + chat: Chat + id: str # todo not sure it's unique? + user: User + body: str + + +Users = Dict[Uid, User] def users() -> Users: - # todo cache? files = list(sorted(config.storage_path.glob('user_*.json'))) res = {} for f in files: j = json.loads(f.read_text()) uid = j['id'] - uf = j['first_name'] - ul = j['last_name'] - res[uid] = f'{uf} {ul}' + res[uid] = User( + id=uid, + first_name=j['first_name'], + last_name=j['last_name'], + ) return res -class Message(NamedTuple): - chat_id: str - dt: datetime - user: Name - body: str +# USERCHAT_TITLE = " ... " +def _parse_chat(*, msg: Json, udict: Users) -> Chat: + group_chat_id = msg.get('chat_id') + if group_chat_id is not None: + chat_id = group_chat_id + title = msg['title'] + else: + user_id = msg.get('user_id') or msg.get('from_id') + assert user_id is not None + user = udict[user_id] + chat_id = user_id + title = f'{user.first_name} {user.last_name}' + return Chat( + chat_id=chat_id, + title=title, + ) -msk_tz = pytz.timezone('Europe/Moscow') -# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api? +def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message: + mid = msg['id'] + md = msg['date'] -def _parse(x: Json, chat_id: str, udict: Users) -> Message: - mid = x['id'] # todo not sure if useful? - md = x['date'] - - dt = datetime.fromtimestamp(md, msk_tz) + dt = datetime.fromtimestamp(md, tz=TZ) # todo attachments? e.g. url could be an attachment # todo might be forwarded? - mb = x.get('body') + mb = msg.get('body') if mb is None: - mb = x.get('text') - assert mb is not None - - mu = x.get('user_id') or x.get('peer_id') - assert mu is not None - out = x['out'] == 1 - # todo use name from the config? - user = 'you' if out else udict[mu] - - # todo conversation id?? + mb = msg.get('text') + assert mb is not None, msg + out = msg['out'] == 1 + if out: + user = udict[config.user_id] + else: + mu = msg.get('user_id') or msg.get('from_id') + assert mu is not None, msg + user = udict[mu] return Message( - chat_id=chat_id, dt=dt, + chat=chat, + id=mid, user=user, body=mb, ) -from ..core.error import Res def messages() -> Iterable[Res[Message]]: udict = users() uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ list(sorted(config.storage_path.glob('groupchat_*.json'))) for f in uchats: - chat_id = f.stem.split('_')[-1] j = json.loads(f.read_text()) - for x in j: + # extract chat from last message + try: + last = j[-1] + chat = _parse_chat(msg=last, udict=udict) + except Exception as e: + yield e + continue + + for msg in j: try: - yield _parse(x, chat_id=chat_id, udict=udict) + yield _parse_msg(msg=msg, chat=chat, udict=udict) except Exception as e: yield e -def stats(): - from ..core import stat +def stats() -> Stats: return { **stat(users), **stat(messages),