''' VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]]) ''' # note: could reuse the original repo, but little point I guess since VK closed their API from datetime import datetime import json from pathlib import Path from typing import Dict, Iterable, NamedTuple import pytz from ..core import Json from my.config import vk_messages_backup as config Uid = str Name = str Users = Dict[Uid, Name] def users() -> Users: # todo cache? files = list(sorted(config.storage_path.glob('user_*.json'))) res = {} for f in files: j = json.loads(f.read_text()) uid = j['id'] uf = j['first_name'] ul = j['last_name'] res[uid] = f'{uf} {ul}' return res class Message(NamedTuple): chat_id: str dt: datetime user: Name body: str msk_tz = pytz.timezone('Europe/Moscow') # todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api? def _parse(x: Json, chat_id: str, udict: Users) -> Message: mid = x['id'] # todo not sure if useful? md = x['date'] dt = datetime.fromtimestamp(md, msk_tz) # todo attachments? e.g. url could be an attachment # todo might be forwarded? mb = x.get('body') if mb is None: mb = x.get('text') assert mb is not None mu = x.get('user_id') or x.get('peer_id') assert mu is not None out = x['out'] == 1 # todo use name from the config? user = 'you' if out else udict[mu] # todo conversation id?? return Message( chat_id=chat_id, dt=dt, user=user, body=mb, ) from ..core.error import Res def messages() -> Iterable[Res[Message]]: udict = users() uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ list(sorted(config.storage_path.glob('groupchat_*.json'))) for f in uchats: chat_id = f.stem.split('_')[-1] j = json.loads(f.read_text()) for x in j: try: yield _parse(x, chat_id=chat_id, udict=udict) except Exception as e: yield e def stats(): from ..core import stat return { **stat(users), **stat(messages), }