HPI/my/vk/vk_messages_backup.py
2021-04-02 08:38:06 +01:00

97 lines
2.2 KiB
Python

'''
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
'''
# note: could reuse the original repo, but little point I guess since VK closed their API
from datetime import datetime
import json
from typing import Dict, Iterable, NamedTuple
import pytz
from ..core import Json
from my.config import vk_messages_backup as config
Uid = str
Name = str
Users = Dict[Uid, Name]
def users() -> Users:
# todo cache?
files = list(sorted(config.storage_path.glob('user_*.json')))
res = {}
for f in files:
j = json.loads(f.read_text())
uid = j['id']
uf = j['first_name']
ul = j['last_name']
res[uid] = f'{uf} {ul}'
return res
class Message(NamedTuple):
chat_id: str
dt: datetime
user: Name
body: str
msk_tz = pytz.timezone('Europe/Moscow')
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?
def _parse(x: Json, chat_id: str, udict: Users) -> Message:
mid = x['id'] # todo not sure if useful?
md = x['date']
dt = datetime.fromtimestamp(md, msk_tz)
# todo attachments? e.g. url could be an attachment
# todo might be forwarded?
mb = x.get('body')
if mb is None:
mb = x.get('text')
assert mb is not None
mu = x.get('user_id') or x.get('peer_id')
assert mu is not None
out = x['out'] == 1
# todo use name from the config?
user = 'you' if out else udict[mu]
# todo conversation id??
return Message(
chat_id=chat_id,
dt=dt,
user=user,
body=mb,
)
from ..core.error import Res
def messages() -> Iterable[Res[Message]]:
udict = users()
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
list(sorted(config.storage_path.glob('groupchat_*.json')))
for f in uchats:
chat_id = f.stem.split('_')[-1]
j = json.loads(f.read_text())
for x in j:
try:
yield _parse(x, chat_id=chat_id, udict=udict)
except Exception as e:
yield e
def stats():
from ..core import stat
return {
**stat(users),
**stat(messages),
}