vk: add messages processing
This commit is contained in:
parent
c54d85037c
commit
4920defe12
2 changed files with 101 additions and 0 deletions
3
my/vk/all.py
Normal file
3
my/vk/all.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
def messages():
|
||||||
|
from . import vk_messages_backup as VMB
|
||||||
|
yield from VMB.messages()
|
98
my/vk/vk_messages_backup.py
Normal file
98
my/vk/vk_messages_backup.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
'''
|
||||||
|
Handles VK data exported by https://github.com/Totktonada/vk_messages_backup
|
||||||
|
'''
|
||||||
|
# note: could reuse the original repo, but little point I guess since VK closed their API
|
||||||
|
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Iterable, NamedTuple
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from ..core import Json
|
||||||
|
|
||||||
|
from my.config import vk_messages_backup as config
|
||||||
|
|
||||||
|
|
||||||
|
Uid = str
|
||||||
|
Name = str
|
||||||
|
|
||||||
|
|
||||||
|
Users = Dict[Uid, Name]
|
||||||
|
|
||||||
|
def users() -> Users:
|
||||||
|
# todo cache?
|
||||||
|
files = list(sorted(config.storage_path.glob('user_*.json')))
|
||||||
|
res = {}
|
||||||
|
for f in files:
|
||||||
|
j = json.loads(f.read_text())
|
||||||
|
uid = j['id']
|
||||||
|
uf = j['first_name']
|
||||||
|
ul = j['last_name']
|
||||||
|
res[uid] = f'{uf} {ul}'
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
class Message(NamedTuple):
|
||||||
|
chat_id: str
|
||||||
|
dt: datetime
|
||||||
|
user: Name
|
||||||
|
body: str
|
||||||
|
|
||||||
|
|
||||||
|
msk_tz = pytz.timezone('Europe/Moscow')
|
||||||
|
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?
|
||||||
|
|
||||||
|
def _parse(x: Json, chat_id: str, udict: Users) -> Message:
|
||||||
|
mid = x['id'] # todo not sure if useful?
|
||||||
|
md = x['date']
|
||||||
|
|
||||||
|
dt = datetime.fromtimestamp(md, msk_tz)
|
||||||
|
|
||||||
|
# todo attachments? e.g. url could be an attachment
|
||||||
|
# todo might be forwarded?
|
||||||
|
mb = x.get('body')
|
||||||
|
if mb is None:
|
||||||
|
mb = x.get('text')
|
||||||
|
assert mb is not None
|
||||||
|
|
||||||
|
mu = x.get('user_id') or x.get('peer_id')
|
||||||
|
assert mu is not None
|
||||||
|
out = x['out'] == 1
|
||||||
|
# todo use name from the config?
|
||||||
|
user = 'you' if out else udict[mu]
|
||||||
|
|
||||||
|
# todo conversation id??
|
||||||
|
|
||||||
|
return Message(
|
||||||
|
chat_id=chat_id,
|
||||||
|
dt=dt,
|
||||||
|
user=user,
|
||||||
|
body=mb,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
from ..core.error import Res
|
||||||
|
def messages() -> Iterable[Res[Message]]:
|
||||||
|
udict = users()
|
||||||
|
|
||||||
|
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
||||||
|
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
||||||
|
for f in uchats:
|
||||||
|
chat_id = f.stem.split('_')[-1]
|
||||||
|
j = json.loads(f.read_text())
|
||||||
|
for x in j:
|
||||||
|
try:
|
||||||
|
yield _parse(x, chat_id=chat_id, udict=udict)
|
||||||
|
except Exception as e:
|
||||||
|
yield e
|
||||||
|
|
||||||
|
|
||||||
|
def stats():
|
||||||
|
from ..core import stat
|
||||||
|
return {
|
||||||
|
**stat(users),
|
||||||
|
**stat(messages),
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue