diff --git a/my/vk/all.py b/my/vk/all.py new file mode 100644 index 0000000..c290352 --- /dev/null +++ b/my/vk/all.py @@ -0,0 +1,3 @@ +def messages(): + from . import vk_messages_backup as VMB + yield from VMB.messages() diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py new file mode 100644 index 0000000..63f1d8e --- /dev/null +++ b/my/vk/vk_messages_backup.py @@ -0,0 +1,98 @@ +''' +Handles VK data exported by https://github.com/Totktonada/vk_messages_backup +''' +# note: could reuse the original repo, but little point I guess since VK closed their API + + +from datetime import datetime +import json +from pathlib import Path +from typing import Dict, Iterable, NamedTuple + +import pytz + +from ..core import Json + +from my.config import vk_messages_backup as config + + +Uid = str +Name = str + + +Users = Dict[Uid, Name] + +def users() -> Users: + # todo cache? + files = list(sorted(config.storage_path.glob('user_*.json'))) + res = {} + for f in files: + j = json.loads(f.read_text()) + uid = j['id'] + uf = j['first_name'] + ul = j['last_name'] + res[uid] = f'{uf} {ul}' + return res + + +class Message(NamedTuple): + chat_id: str + dt: datetime + user: Name + body: str + + +msk_tz = pytz.timezone('Europe/Moscow') +# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api? + +def _parse(x: Json, chat_id: str, udict: Users) -> Message: + mid = x['id'] # todo not sure if useful? + md = x['date'] + + dt = datetime.fromtimestamp(md, msk_tz) + + # todo attachments? e.g. url could be an attachment + # todo might be forwarded? + mb = x.get('body') + if mb is None: + mb = x.get('text') + assert mb is not None + + mu = x.get('user_id') or x.get('peer_id') + assert mu is not None + out = x['out'] == 1 + # todo use name from the config? + user = 'you' if out else udict[mu] + + # todo conversation id?? + + return Message( + chat_id=chat_id, + dt=dt, + user=user, + body=mb, + ) + + +from ..core.error import Res +def messages() -> Iterable[Res[Message]]: + udict = users() + + uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ + list(sorted(config.storage_path.glob('groupchat_*.json'))) + for f in uchats: + chat_id = f.stem.split('_')[-1] + j = json.loads(f.read_text()) + for x in j: + try: + yield _parse(x, chat_id=chat_id, udict=udict) + except Exception as e: + yield e + + +def stats(): + from ..core import stat + return { + **stat(users), + **stat(messages), + }