twitter module: prettify top level twitter.all

2020-05-22 19:00:02 +01:00 · 2020-05-22 19:00:02 +01:00 · 03773a7b2c
commit 03773a7b2c
parent c410daa484
7 changed files with 75 additions and 55 deletions
--- a/doc/MODULES.org
+++ b/doc/MODULES.org
@ -146,6 +146,8 @@ for cls, p in modules:

    Uses [[https://github.com/twintproject/twint][Twint]] data export.

+    Requirements: =pip3 install --user dataset=
+
    #+begin_src python
    class twint:
        export_path: Paths # path[s]/glob to the twint Sqlite database
@ -171,7 +173,7 @@ for cls, p in modules:
    #+end_src
 ** [[file:../my/reading/polar.py][my.reading.polar]]

-    [[https://github.com/burtonator/polar-books][Polar]] articles and highlights
+    [[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights

    #+begin_src python
    class polar:
--- a/my/reading/polar.py
+++ b/my/reading/polar.py
@ -1,5 +1,5 @@
 """
-[[https://github.com/burtonator/polar-books][Polar]] articles and highlights
+[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
 """
 from pathlib import Path
 from typing import Type, Any, cast, TYPE_CHECKING
--- a/my/twitter/all.py
+++ b/my/twitter/all.py
@ -1,24 +1,23 @@
 """
 Unified Twitter data (merged from the archive and periodic updates)
 """
-from itertools import chain

-from . import twint
-from . import archive
+# NOTE: you can comment out the sources you don't need


-# TODO move to .common?
-def merge_tweets(*sources):
-    from more_itertools import unique_everseen
-    yield from unique_everseen(
-        chain(*sources),
-        key=lambda t: t.id_str,
-    )
+from . import twint, archive
+from .common import merge_tweets


 def tweets():
-    yield from merge_tweets(twint.tweets(), archive.tweets())
+    yield from merge_tweets(
+        twint  .tweets(),
+        archive.tweets(),
+    )


 def likes():
-    yield from merge_tweets(twint.likes(), archive.likes())
+    yield from merge_tweets(
+        twint  .likes(),
+        archive.likes(),
+    )
--- a/my/twitter/archive.py
+++ b/my/twitter/archive.py
@ -160,50 +160,10 @@ class ZipExport:
            yield Like(r, screen_name=self.screen_name())


+# todo not sure about list and sorting? although can't hurt considering json is not iterative?
 def tweets() -> List[Tweet]:
    return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))


 def likes() -> List[Like]:
    return list(ZipExport().likes())
-
-
-def test_tweet():
-    raw = """
- {
-  "retweeted" : false,
-  "entities" : {
-    "hashtags" : [ ],
-    "symbols" : [ ],
-    "user_mentions" : [ ],
-    "urls" : [ {
-      "url" : "https://t.co/vUg4W6nxwU",
-      "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
-      "display_url" : "intelligence.org/2013/12/13/aar…",
-      "indices" : [ "120", "143" ]
-    }
-    ]
-  },
-  "display_text_range" : [ "0", "90" ],
-  "favorite_count" : "0",
-  "in_reply_to_status_id_str" : "24123424",
-  "id_str" : "2328934829084",
-  "in_reply_to_user_id" : "23423424",
-  "truncated" : false,
-  "retweet_count" : "0",
-  "id" : "23492349032940",
-  "in_reply_to_status_id" : "23482984932084",
-  "created_at" : "Thu Aug 30 07:12:48 +0000 2012",
-  "favorited" : false,
-  "full_text" : "this is a test tweet",
-  "lang" : "ru",
-  "in_reply_to_screen_name" : "whatever",
-  "in_reply_to_user_id_str" : "3748274"
-}
-    """
-    t = Tweet(json.loads(raw), screen_name='whatever')
-    assert t.permalink is not None
-    assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
-    assert t.text == 'this is a test tweet'
-    assert t.tid  == '2328934829084'
-    assert t.entities is not None
--- a/my/twitter/common.py
+++ b/my/twitter/common.py
@ -0,0 +1,10 @@
+from itertools import chain
+
+from more_itertools import unique_everseen
+
+
+def merge_tweets(*sources):
+    yield from unique_everseen(
+        chain(*sources),
+        key=lambda t: t.id_str,
+    )
--- a/my/twitter/twint.py
+++ b/my/twitter/twint.py
@ -2,6 +2,8 @@
 Twitter data (tweets and favorites).

 Uses [[https://github.com/twintproject/twint][Twint]] data export.
+
+Requirements: =pip3 install --user dataset=
 """

 from ..core.common import Paths
--- a/tests/tweets.py
+++ b/tests/tweets.py
@ -0,0 +1,47 @@
+from datetime import datetime
+import json
+
+import pytz
+
+from my.twitter.archive import Tweet
+
+
+def test_tweet():
+    raw = """
+ {
+  "retweeted" : false,
+  "entities" : {
+    "hashtags" : [ ],
+    "symbols" : [ ],
+    "user_mentions" : [ ],
+    "urls" : [ {
+      "url" : "https://t.co/vUg4W6nxwU",
+      "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
+      "display_url" : "intelligence.org/2013/12/13/aar…",
+      "indices" : [ "120", "143" ]
+    }
+    ]
+  },
+  "display_text_range" : [ "0", "90" ],
+  "favorite_count" : "0",
+  "in_reply_to_status_id_str" : "24123424",
+  "id_str" : "2328934829084",
+  "in_reply_to_user_id" : "23423424",
+  "truncated" : false,
+  "retweet_count" : "0",
+  "id" : "23492349032940",
+  "in_reply_to_status_id" : "23482984932084",
+  "created_at" : "Thu Aug 30 07:12:48 +0000 2012",
+  "favorited" : false,
+  "full_text" : "this is a test tweet",
+  "lang" : "ru",
+  "in_reply_to_screen_name" : "whatever",
+  "in_reply_to_user_id_str" : "3748274"
+}
+    """
+    t = Tweet(json.loads(raw), screen_name='whatever')
+    assert t.permalink is not None
+    assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
+    assert t.text == 'this is a test tweet'
+    assert t.tid  == '2328934829084'
+    assert t.entities is not None