vk_messages_backup: add unique_everseen to prevent duplicate messages

This commit is contained in:
Dima Gerasimov 2023-02-28 03:44:10 +00:00 committed by karlicoss
parent a7099e2efc
commit 6dc5e7575f

View file

@ -5,8 +5,9 @@ VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totkton
from datetime import datetime from datetime import datetime
from dataclasses import dataclass from dataclasses import dataclass
import json import json
from typing import Dict, Iterable, NamedTuple from typing import Dict, Iterator, NamedTuple
from more_itertools import unique_everseen
import pytz import pytz
from my.core import stat, Stats, Json, Res, datetime_aware from my.core import stat, Stats, Json, Res, datetime_aware
@ -34,7 +35,7 @@ class Chat:
title: str title: str
@dataclass @dataclass(frozen=True)
class Message: class Message:
dt: datetime_aware dt: datetime_aware
chat: Chat chat: Chat
@ -114,7 +115,7 @@ def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message:
) )
def messages() -> Iterable[Res[Message]]: def _messages() -> Iterator[Res[Message]]:
udict = users() udict = users()
uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \
@ -144,6 +145,11 @@ def messages() -> Iterable[Res[Message]]:
yield e yield e
def messages() -> Iterator[Res[Message]]:
# seems that during backup messages were sometimes duplicated..
yield from unique_everseen(_messages())
def stats() -> Stats: def stats() -> Stats:
return { return {
**stat(users), **stat(users),