vk_messages_backup: better structure & exract richer information
This commit is contained in:
parent
130c273513
commit
02c98143d5
2 changed files with 78 additions and 40 deletions
|
@ -194,6 +194,7 @@ class simple:
|
||||||
|
|
||||||
class vk_messages_backup:
|
class vk_messages_backup:
|
||||||
storage_path: Path
|
storage_path: Path
|
||||||
|
user_id: int
|
||||||
|
|
||||||
|
|
||||||
class kobo:
|
class kobo:
|
||||||
|
|
|
@ -2,95 +2,132 @@
|
||||||
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
|
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
|
||||||
'''
|
'''
|
||||||
# note: could reuse the original repo, but little point I guess since VK closed their API
|
# note: could reuse the original repo, but little point I guess since VK closed their API
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from dataclasses import dataclass
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Iterable, NamedTuple
|
from typing import Dict, Iterable, NamedTuple
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from ..core import Json
|
from my.core import stat, Stats, Json, Res, datetime_aware
|
||||||
|
|
||||||
from my.config import vk_messages_backup as config
|
from my.config import vk_messages_backup as config
|
||||||
|
|
||||||
|
|
||||||
Uid = str
|
# I think vk_messages_backup used this tz?
|
||||||
Name = str
|
# not sure if vk actually used to return this tz in api?
|
||||||
|
TZ = pytz.timezone('Europe/Moscow')
|
||||||
|
|
||||||
|
|
||||||
Users = Dict[Uid, Name]
|
Uid = int
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class User:
|
||||||
|
id: Uid
|
||||||
|
first_name: str
|
||||||
|
last_name: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Chat:
|
||||||
|
chat_id: str
|
||||||
|
title: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Message:
|
||||||
|
dt: datetime_aware
|
||||||
|
chat: Chat
|
||||||
|
id: str # todo not sure it's unique?
|
||||||
|
user: User
|
||||||
|
body: str
|
||||||
|
|
||||||
|
|
||||||
|
Users = Dict[Uid, User]
|
||||||
def users() -> Users:
|
def users() -> Users:
|
||||||
# todo cache?
|
|
||||||
files = list(sorted(config.storage_path.glob('user_*.json')))
|
files = list(sorted(config.storage_path.glob('user_*.json')))
|
||||||
res = {}
|
res = {}
|
||||||
for f in files:
|
for f in files:
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
uid = j['id']
|
uid = j['id']
|
||||||
uf = j['first_name']
|
res[uid] = User(
|
||||||
ul = j['last_name']
|
id=uid,
|
||||||
res[uid] = f'{uf} {ul}'
|
first_name=j['first_name'],
|
||||||
|
last_name=j['last_name'],
|
||||||
|
)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
class Message(NamedTuple):
|
# USERCHAT_TITLE = " ... "
|
||||||
chat_id: str
|
def _parse_chat(*, msg: Json, udict: Users) -> Chat:
|
||||||
dt: datetime
|
group_chat_id = msg.get('chat_id')
|
||||||
user: Name
|
if group_chat_id is not None:
|
||||||
body: str
|
chat_id = group_chat_id
|
||||||
|
title = msg['title']
|
||||||
|
else:
|
||||||
|
user_id = msg.get('user_id') or msg.get('from_id')
|
||||||
|
assert user_id is not None
|
||||||
|
user = udict[user_id]
|
||||||
|
chat_id = user_id
|
||||||
|
title = f'{user.first_name} {user.last_name}'
|
||||||
|
return Chat(
|
||||||
|
chat_id=chat_id,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
msk_tz = pytz.timezone('Europe/Moscow')
|
def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
|
||||||
# todo hmm, vk_messages_backup used this tz? not sure if vk actually used to return this tz in api?
|
mid = msg['id']
|
||||||
|
md = msg['date']
|
||||||
|
|
||||||
def _parse(x: Json, chat_id: str, udict: Users) -> Message:
|
dt = datetime.fromtimestamp(md, tz=TZ)
|
||||||
mid = x['id'] # todo not sure if useful?
|
|
||||||
md = x['date']
|
|
||||||
|
|
||||||
dt = datetime.fromtimestamp(md, msk_tz)
|
|
||||||
|
|
||||||
# todo attachments? e.g. url could be an attachment
|
# todo attachments? e.g. url could be an attachment
|
||||||
# todo might be forwarded?
|
# todo might be forwarded?
|
||||||
mb = x.get('body')
|
mb = msg.get('body')
|
||||||
if mb is None:
|
if mb is None:
|
||||||
mb = x.get('text')
|
mb = msg.get('text')
|
||||||
assert mb is not None
|
assert mb is not None, msg
|
||||||
|
|
||||||
mu = x.get('user_id') or x.get('peer_id')
|
|
||||||
assert mu is not None
|
|
||||||
out = x['out'] == 1
|
|
||||||
# todo use name from the config?
|
|
||||||
user = 'you' if out else udict[mu]
|
|
||||||
|
|
||||||
# todo conversation id??
|
|
||||||
|
|
||||||
|
out = msg['out'] == 1
|
||||||
|
if out:
|
||||||
|
user = udict[config.user_id]
|
||||||
|
else:
|
||||||
|
mu = msg.get('user_id') or msg.get('from_id')
|
||||||
|
assert mu is not None, msg
|
||||||
|
user = udict[mu]
|
||||||
return Message(
|
return Message(
|
||||||
chat_id=chat_id,
|
|
||||||
dt=dt,
|
dt=dt,
|
||||||
|
chat=chat,
|
||||||
|
id=mid,
|
||||||
user=user,
|
user=user,
|
||||||
body=mb,
|
body=mb,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
from ..core.error import Res
|
|
||||||
def messages() -> Iterable[Res[Message]]:
|
def messages() -> Iterable[Res[Message]]:
|
||||||
udict = users()
|
udict = users()
|
||||||
|
|
||||||
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
||||||
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
||||||
for f in uchats:
|
for f in uchats:
|
||||||
chat_id = f.stem.split('_')[-1]
|
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
for x in j:
|
# extract chat from last message
|
||||||
|
try:
|
||||||
|
last = j[-1]
|
||||||
|
chat = _parse_chat(msg=last, udict=udict)
|
||||||
|
except Exception as e:
|
||||||
|
yield e
|
||||||
|
continue
|
||||||
|
|
||||||
|
for msg in j:
|
||||||
try:
|
try:
|
||||||
yield _parse(x, chat_id=chat_id, udict=udict)
|
yield _parse_msg(msg=msg, chat=chat, udict=udict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield e
|
yield e
|
||||||
|
|
||||||
|
|
||||||
def stats():
|
def stats() -> Stats:
|
||||||
from ..core import stat
|
|
||||||
return {
|
return {
|
||||||
**stat(users),
|
**stat(users),
|
||||||
**stat(messages),
|
**stat(messages),
|
||||||
|
|
Loading…
Add table
Reference in a new issue