vk: add messages processing
This commit is contained in:
parent
c54d85037c
commit
4920defe12
2 changed files with 101 additions and 0 deletions
3
my/vk/all.py
Normal file
3
my/vk/all.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
def messages():
|
||||
from . import vk_messages_backup as VMB
|
||||
yield from VMB.messages()
|
98
my/vk/vk_messages_backup.py
Normal file
98
my/vk/vk_messages_backup.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
'''
|
||||
Handles VK data exported by https://github.com/Totktonada/vk_messages_backup
|
||||
'''
|
||||
# note: could reuse the original repo, but little point I guess since VK closed their API
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, NamedTuple
|
||||
|
||||
import pytz
|
||||
|
||||
from ..core import Json
|
||||
|
||||
from my.config import vk_messages_backup as config
|
||||
|
||||
|
||||
Uid = str
|
||||
Name = str
|
||||
|
||||
|
||||
Users = Dict[Uid, Name]
|
||||
|
||||
def users() -> Users:
|
||||
# todo cache?
|
||||
files = list(sorted(config.storage_path.glob('user_*.json')))
|
||||
res = {}
|
||||
for f in files:
|
||||
j = json.loads(f.read_text())
|
||||
uid = j['id']
|
||||
uf = j['first_name']
|
||||
ul = j['last_name']
|
||||
res[uid] = f'{uf} {ul}'
|
||||
return res
|
||||
|
||||
|
||||
class Message(NamedTuple):
|
||||
chat_id: str
|
||||
dt: datetime
|
||||
user: Name
|
||||
body: str
|
||||
|
||||
|
||||
msk_tz = pytz.timezone('Europe/Moscow')
|
||||
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?
|
||||
|
||||
def _parse(x: Json, chat_id: str, udict: Users) -> Message:
|
||||
mid = x['id'] # todo not sure if useful?
|
||||
md = x['date']
|
||||
|
||||
dt = datetime.fromtimestamp(md, msk_tz)
|
||||
|
||||
# todo attachments? e.g. url could be an attachment
|
||||
# todo might be forwarded?
|
||||
mb = x.get('body')
|
||||
if mb is None:
|
||||
mb = x.get('text')
|
||||
assert mb is not None
|
||||
|
||||
mu = x.get('user_id') or x.get('peer_id')
|
||||
assert mu is not None
|
||||
out = x['out'] == 1
|
||||
# todo use name from the config?
|
||||
user = 'you' if out else udict[mu]
|
||||
|
||||
# todo conversation id??
|
||||
|
||||
return Message(
|
||||
chat_id=chat_id,
|
||||
dt=dt,
|
||||
user=user,
|
||||
body=mb,
|
||||
)
|
||||
|
||||
|
||||
from ..core.error import Res
|
||||
def messages() -> Iterable[Res[Message]]:
|
||||
udict = users()
|
||||
|
||||
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
||||
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
||||
for f in uchats:
|
||||
chat_id = f.stem.split('_')[-1]
|
||||
j = json.loads(f.read_text())
|
||||
for x in j:
|
||||
try:
|
||||
yield _parse(x, chat_id=chat_id, udict=udict)
|
||||
except Exception as e:
|
||||
yield e
|
||||
|
||||
|
||||
def stats():
|
||||
from ..core import stat
|
||||
return {
|
||||
**stat(users),
|
||||
**stat(messages),
|
||||
}
|
Loading…
Add table
Reference in a new issue