From 03773a7b2c33557be0ff610eb9c6c122a3190901 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 22 May 2020 19:00:02 +0100 Subject: [PATCH] twitter module: prettify top level twitter.all --- doc/MODULES.org | 4 +++- my/reading/polar.py | 2 +- my/twitter/all.py | 23 ++++++++++----------- my/twitter/archive.py | 42 +------------------------------------- my/twitter/common.py | 10 +++++++++ my/twitter/twint.py | 2 ++ tests/tweets.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 75 insertions(+), 55 deletions(-) create mode 100644 my/twitter/common.py create mode 100644 tests/tweets.py diff --git a/doc/MODULES.org b/doc/MODULES.org index 7f83e9f..1a05624 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -146,6 +146,8 @@ for cls, p in modules: Uses [[https://github.com/twintproject/twint][Twint]] data export. + Requirements: =pip3 install --user dataset= + #+begin_src python class twint: export_path: Paths # path[s]/glob to the twint Sqlite database @@ -171,7 +173,7 @@ for cls, p in modules: #+end_src ** [[file:../my/reading/polar.py][my.reading.polar]] - [[https://github.com/burtonator/polar-books][Polar]] articles and highlights + [[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights #+begin_src python class polar: diff --git a/my/reading/polar.py b/my/reading/polar.py index 2db5e4d..cec7a46 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -1,5 +1,5 @@ """ -[[https://github.com/burtonator/polar-books][Polar]] articles and highlights +[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights """ from pathlib import Path from typing import Type, Any, cast, TYPE_CHECKING diff --git a/my/twitter/all.py b/my/twitter/all.py index be4bdbf..acb59a2 100644 --- a/my/twitter/all.py +++ b/my/twitter/all.py @@ -1,24 +1,23 @@ """ Unified Twitter data (merged from the archive and periodic updates) """ -from itertools import chain -from . import twint -from . import archive +# NOTE: you can comment out the sources you don't need -# TODO move to .common? -def merge_tweets(*sources): - from more_itertools import unique_everseen - yield from unique_everseen( - chain(*sources), - key=lambda t: t.id_str, - ) +from . import twint, archive +from .common import merge_tweets def tweets(): - yield from merge_tweets(twint.tweets(), archive.tweets()) + yield from merge_tweets( + twint .tweets(), + archive.tweets(), + ) def likes(): - yield from merge_tweets(twint.likes(), archive.likes()) + yield from merge_tweets( + twint .likes(), + archive.likes(), + ) diff --git a/my/twitter/archive.py b/my/twitter/archive.py index e545cd6..ad006af 100755 --- a/my/twitter/archive.py +++ b/my/twitter/archive.py @@ -160,50 +160,10 @@ class ZipExport: yield Like(r, screen_name=self.screen_name()) +# todo not sure about list and sorting? although can't hurt considering json is not iterative? def tweets() -> List[Tweet]: return list(sorted(ZipExport().tweets(), key=lambda t: t.dt)) def likes() -> List[Like]: return list(ZipExport().likes()) - - -def test_tweet(): - raw = """ - { - "retweeted" : false, - "entities" : { - "hashtags" : [ ], - "symbols" : [ ], - "user_mentions" : [ ], - "urls" : [ { - "url" : "https://t.co/vUg4W6nxwU", - "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/", - "display_url" : "intelligence.org/2013/12/13/aar…", - "indices" : [ "120", "143" ] - } - ] - }, - "display_text_range" : [ "0", "90" ], - "favorite_count" : "0", - "in_reply_to_status_id_str" : "24123424", - "id_str" : "2328934829084", - "in_reply_to_user_id" : "23423424", - "truncated" : false, - "retweet_count" : "0", - "id" : "23492349032940", - "in_reply_to_status_id" : "23482984932084", - "created_at" : "Thu Aug 30 07:12:48 +0000 2012", - "favorited" : false, - "full_text" : "this is a test tweet", - "lang" : "ru", - "in_reply_to_screen_name" : "whatever", - "in_reply_to_user_id_str" : "3748274" -} - """ - t = Tweet(json.loads(raw), screen_name='whatever') - assert t.permalink is not None - assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc) - assert t.text == 'this is a test tweet' - assert t.tid == '2328934829084' - assert t.entities is not None diff --git a/my/twitter/common.py b/my/twitter/common.py new file mode 100644 index 0000000..1bf36f0 --- /dev/null +++ b/my/twitter/common.py @@ -0,0 +1,10 @@ +from itertools import chain + +from more_itertools import unique_everseen + + +def merge_tweets(*sources): + yield from unique_everseen( + chain(*sources), + key=lambda t: t.id_str, + ) diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 99b858e..0c45a0d 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -2,6 +2,8 @@ Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twint][Twint]] data export. + +Requirements: =pip3 install --user dataset= """ from ..core.common import Paths diff --git a/tests/tweets.py b/tests/tweets.py new file mode 100644 index 0000000..3c5aa4e --- /dev/null +++ b/tests/tweets.py @@ -0,0 +1,47 @@ +from datetime import datetime +import json + +import pytz + +from my.twitter.archive import Tweet + + +def test_tweet(): + raw = """ + { + "retweeted" : false, + "entities" : { + "hashtags" : [ ], + "symbols" : [ ], + "user_mentions" : [ ], + "urls" : [ { + "url" : "https://t.co/vUg4W6nxwU", + "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/", + "display_url" : "intelligence.org/2013/12/13/aar…", + "indices" : [ "120", "143" ] + } + ] + }, + "display_text_range" : [ "0", "90" ], + "favorite_count" : "0", + "in_reply_to_status_id_str" : "24123424", + "id_str" : "2328934829084", + "in_reply_to_user_id" : "23423424", + "truncated" : false, + "retweet_count" : "0", + "id" : "23492349032940", + "in_reply_to_status_id" : "23482984932084", + "created_at" : "Thu Aug 30 07:12:48 +0000 2012", + "favorited" : false, + "full_text" : "this is a test tweet", + "lang" : "ru", + "in_reply_to_screen_name" : "whatever", + "in_reply_to_user_id_str" : "3748274" +} + """ + t = Tweet(json.loads(raw), screen_name='whatever') + assert t.permalink is not None + assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc) + assert t.text == 'this is a test tweet' + assert t.tid == '2328934829084' + assert t.entities is not None