98 lines
2.2 KiB
Python
98 lines
2.2 KiB
Python
'''
|
|
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
|
|
'''
|
|
# note: could reuse the original repo, but little point I guess since VK closed their API
|
|
|
|
|
|
from datetime import datetime
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, NamedTuple
|
|
|
|
import pytz
|
|
|
|
from ..core import Json
|
|
|
|
from my.config import vk_messages_backup as config
|
|
|
|
|
|
Uid = str
|
|
Name = str
|
|
|
|
|
|
Users = Dict[Uid, Name]
|
|
|
|
def users() -> Users:
|
|
# todo cache?
|
|
files = list(sorted(config.storage_path.glob('user_*.json')))
|
|
res = {}
|
|
for f in files:
|
|
j = json.loads(f.read_text())
|
|
uid = j['id']
|
|
uf = j['first_name']
|
|
ul = j['last_name']
|
|
res[uid] = f'{uf} {ul}'
|
|
return res
|
|
|
|
|
|
class Message(NamedTuple):
|
|
chat_id: str
|
|
dt: datetime
|
|
user: Name
|
|
body: str
|
|
|
|
|
|
msk_tz = pytz.timezone('Europe/Moscow')
|
|
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?
|
|
|
|
def _parse(x: Json, chat_id: str, udict: Users) -> Message:
|
|
mid = x['id'] # todo not sure if useful?
|
|
md = x['date']
|
|
|
|
dt = datetime.fromtimestamp(md, msk_tz)
|
|
|
|
# todo attachments? e.g. url could be an attachment
|
|
# todo might be forwarded?
|
|
mb = x.get('body')
|
|
if mb is None:
|
|
mb = x.get('text')
|
|
assert mb is not None
|
|
|
|
mu = x.get('user_id') or x.get('peer_id')
|
|
assert mu is not None
|
|
out = x['out'] == 1
|
|
# todo use name from the config?
|
|
user = 'you' if out else udict[mu]
|
|
|
|
# todo conversation id??
|
|
|
|
return Message(
|
|
chat_id=chat_id,
|
|
dt=dt,
|
|
user=user,
|
|
body=mb,
|
|
)
|
|
|
|
|
|
from ..core.error import Res
|
|
def messages() -> Iterable[Res[Message]]:
|
|
udict = users()
|
|
|
|
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
|
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
|
for f in uchats:
|
|
chat_id = f.stem.split('_')[-1]
|
|
j = json.loads(f.read_text())
|
|
for x in j:
|
|
try:
|
|
yield _parse(x, chat_id=chat_id, udict=udict)
|
|
except Exception as e:
|
|
yield e
|
|
|
|
|
|
def stats():
|
|
from ..core import stat
|
|
return {
|
|
**stat(users),
|
|
**stat(messages),
|
|
}
|