From 6dc5e7575ffa8ffee3c4aa3cedcb70e99ad6a7dd Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 28 Feb 2023 03:44:10 +0000 Subject: [PATCH] vk_messages_backup: add unique_everseen to prevent duplicate messages --- my/vk/vk_messages_backup.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/my/vk/vk_messages_backup.py b/my/vk/vk_messages_backup.py index 78b595e..089605b 100644 --- a/my/vk/vk_messages_backup.py +++ b/my/vk/vk_messages_backup.py @@ -5,8 +5,9 @@ VK data (exported by [[https://github.com/Totktonada/vk_messages_backup][Totkton from datetime import datetime from dataclasses import dataclass import json -from typing import Dict, Iterable, NamedTuple +from typing import Dict, Iterator, NamedTuple +from more_itertools import unique_everseen import pytz from my.core import stat, Stats, Json, Res, datetime_aware @@ -34,7 +35,7 @@ class Chat: title: str -@dataclass +@dataclass(frozen=True) class Message: dt: datetime_aware chat: Chat @@ -114,7 +115,7 @@ def _parse_msg(*, msg: Json, chat: Chat, udict: Users) -> Message: ) -def messages() -> Iterable[Res[Message]]: +def _messages() -> Iterator[Res[Message]]: udict = users() uchats = list(sorted(config.storage_path.glob('userchat_*.json' ))) + \ @@ -144,6 +145,11 @@ def messages() -> Iterable[Res[Message]]: yield e +def messages() -> Iterator[Res[Message]]: + # seems that during backup messages were sometimes duplicated.. + yield from unique_everseen(_messages()) + + def stats() -> Stats: return { **stat(users),