vk_messages_backup: some cleanup + switch to get_files
This commit is contained in:
parent
24da04f142
commit
105928238f
1 changed files with 11 additions and 7 deletions
|
@ -2,8 +2,8 @@
|
||||||
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
|
VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totktonada/vk_messages_backup]])
|
||||||
'''
|
'''
|
||||||
# note: could reuse the original repo, but little point I guess since VK closed their API
|
# note: could reuse the original repo, but little point I guess since VK closed their API
|
||||||
from datetime import datetime
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Iterator
|
from typing import Dict, Iterator
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ TZ = pytz.timezone('Europe/Moscow')
|
||||||
|
|
||||||
Uid = int
|
Uid = int
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class User:
|
class User:
|
||||||
id: Uid
|
id: Uid
|
||||||
|
@ -45,8 +46,10 @@ class Message:
|
||||||
|
|
||||||
|
|
||||||
Users = Dict[Uid, User]
|
Users = Dict[Uid, User]
|
||||||
|
|
||||||
|
|
||||||
def users() -> Users:
|
def users() -> Users:
|
||||||
files = list(sorted(config.storage_path.glob('user_*.json')))
|
files = get_files(config.storage_path, glob='user_*.json')
|
||||||
res = {}
|
res = {}
|
||||||
for f in files:
|
for f in files:
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
|
@ -60,6 +63,8 @@ def users() -> Users:
|
||||||
|
|
||||||
|
|
||||||
GROUP_CHAT_MIN_ID = 2000000000
|
GROUP_CHAT_MIN_ID = 2000000000
|
||||||
|
|
||||||
|
|
||||||
def _parse_chat(*, msg: Json, udict: Users) -> Chat:
|
def _parse_chat(*, msg: Json, udict: Users) -> Chat:
|
||||||
# exported with newer api, peer_id is a proper identifier both for users and chats
|
# exported with newer api, peer_id is a proper identifier both for users and chats
|
||||||
peer_id = msg.get('peer_id')
|
peer_id = msg.get('peer_id')
|
||||||
|
@ -88,13 +93,13 @@ def _parse_chat(*, msg: Json, udict: Users) -> Chat:
|
||||||
|
|
||||||
def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
|
def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
|
||||||
mid = msg['id']
|
mid = msg['id']
|
||||||
md = msg['date']
|
md = msg['date']
|
||||||
|
|
||||||
dt = datetime.fromtimestamp(md, tz=TZ)
|
dt = datetime.fromtimestamp(md, tz=TZ)
|
||||||
|
|
||||||
# todo attachments? e.g. url could be an attachment
|
# todo attachments? e.g. url could be an attachment
|
||||||
# todo might be forwarded?
|
# todo might be forwarded?
|
||||||
mb = msg.get('body')
|
mb = msg.get('body')
|
||||||
if mb is None:
|
if mb is None:
|
||||||
mb = msg.get('text')
|
mb = msg.get('text')
|
||||||
assert mb is not None, msg
|
assert mb is not None, msg
|
||||||
|
@ -103,7 +108,7 @@ def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
|
||||||
if out:
|
if out:
|
||||||
user = udict[config.user_id]
|
user = udict[config.user_id]
|
||||||
else:
|
else:
|
||||||
mu = msg.get('user_id') or msg.get('from_id')
|
mu = msg.get('user_id') or msg.get('from_id')
|
||||||
assert mu is not None, msg
|
assert mu is not None, msg
|
||||||
user = udict[mu]
|
user = udict[mu]
|
||||||
return Message(
|
return Message(
|
||||||
|
@ -118,8 +123,7 @@ def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
|
||||||
def _messages() -> Iterator[Res[Message]]:
|
def _messages() -> Iterator[Res[Message]]:
|
||||||
udict = users()
|
udict = users()
|
||||||
|
|
||||||
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
|
uchats = get_files(config.storage_path, glob='userchat_*.json') + get_files(config.storage_path, glob='groupchat_*.json')
|
||||||
list(sorted(config.storage_path.glob('groupchat_*.json')))
|
|
||||||
for f in uchats:
|
for f in uchats:
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
# ugh. very annoying, sometimes not possible to extract title from last message
|
# ugh. very annoying, sometimes not possible to extract title from last message
|
||||||
|
|
Loading…
Add table
Reference in a new issue