From 1e2fc3bec78edef1e2985726c74e3062735d5c60 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 30 May 2022 20:24:24 +0100 Subject: [PATCH] twitter.archive: unescape stuff like </> --- my/twitter/archive.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/my/twitter/archive.py b/my/twitter/archive.py index 6533f60..342e05b 100644 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -18,6 +18,7 @@ except ImportError as e: from dataclasses import dataclass +import html from ..core.common import Paths, datetime_aware from ..core.error import Res @@ -72,7 +73,10 @@ class Tweet(NamedTuple): @property def text(self) -> str: - return self.raw['full_text'] + res = self.raw['full_text'] + # replace stuff like </> + res = html.unescape(res) + return res @property def urls(self) -> List[str]: @@ -116,7 +120,11 @@ class Like(NamedTuple): @property def text(self) -> Optional[str]: # ugh. I think none means that tweet was deleted? - return self.raw.get('fullText') + res = self.raw.get('fullText') + if res is None: + return None + res = html.unescape(res) + return res # TODO deprecate? @property