From 69a1624f8f407670039f01bf9ba1ebe608a77fd2 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 14 Apr 2020 22:15:35 +0100 Subject: [PATCH] use more-itertools; merge tweets --- my/twitter/all.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/my/twitter/all.py b/my/twitter/all.py index 29196e4..f1e39a7 100644 --- a/my/twitter/all.py +++ b/my/twitter/all.py @@ -1,17 +1,29 @@ """ Unified Twitter data (merged from the archive and periodic updates) """ +from itertools import chain from . import twint from . import archive +from more_itertools import unique_everseen + + +def merge_tweets(*sources): + yield from unique_everseen( + chain(*sources), + key=lambda t: t.id_str, + ) + + def tweets(): - yield from archive.tweets() - yield from twint.tweets() + # NOTE order matters.. twint seems to contain better data + # todo probably, worthy an investigation.. + yield from merge_tweets(twint.tweets(), archive.tweets()) # TODO not sure, likes vs favoites?? def likes(): - yield from archive.likes() + yield from merge_tweets(archive.likes()) # yield from twint