twitter module: prettify top level twitter.all

2020-05-22 19:00:02 +01:00 · 2020-05-22 19:00:02 +01:00 · 03773a7b2c
commit 03773a7b2c
parent c410daa484
7 changed files with 75 additions and 55 deletions
--- a/doc/MODULES.org
+++ b/doc/MODULES.org
@ -146,6 +146,8 @@ for cls, p in modules:
    Uses [[https://github.com/twintproject/twint][Twint]] data export.
    Requirements: =pip3 install --user dataset=
    #+begin_src python
    class twint:
        export_path: Paths # path[s]/glob to the twint Sqlite database
@ -171,7 +173,7 @@ for cls, p in modules:
    #+end_src
 ** [[file:../my/reading/polar.py][my.reading.polar]]
-    [[https://github.com/burtonator/polar-books][Polar]] articles and highlights
+    [[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
    #+begin_src python
    class polar:
--- a/my/reading/polar.py
+++ b/my/reading/polar.py
@ -1,5 +1,5 @@
 """
-[[https://github.com/burtonator/polar-books][Polar]] articles and highlights
+[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
 """
 from pathlib import Path
 from typing import Type, Any, cast, TYPE_CHECKING
--- a/my/twitter/all.py
+++ b/my/twitter/all.py
@ -1,24 +1,23 @@
 """
 Unified Twitter data (merged from the archive and periodic updates)
 """
 from itertools import chain
-from . import twint
+# NOTE: you can comment out the sources you don't need
 from . import archive
-# TODO move to .common?
+from . import twint, archive
-def merge_tweets(*sources):
+from .common import merge_tweets
    from more_itertools import unique_everseen
    yield from unique_everseen(
        chain(*sources),
        key=lambda t: t.id_str,
    )
 def tweets():
-    yield from merge_tweets(twint.tweets(), archive.tweets())
+    yield from merge_tweets(
        twint  .tweets(),
        archive.tweets(),
    )
 def likes():
-    yield from merge_tweets(twint.likes(), archive.likes())
+    yield from merge_tweets(
        twint  .likes(),
        archive.likes(),
    )
--- a/my/twitter/archive.py
+++ b/my/twitter/archive.py
@ -160,50 +160,10 @@ class ZipExport:
            yield Like(r, screen_name=self.screen_name())
 # todo not sure about list and sorting? although can't hurt considering json is not iterative?
 def tweets() -> List[Tweet]:
    return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))
 def likes() -> List[Like]:
    return list(ZipExport().likes())
 def test_tweet():
    raw = """
 {
  "retweeted" : false,
  "entities" : {
    "hashtags" : [ ],
    "symbols" : [ ],
    "user_mentions" : [ ],
    "urls" : [ {
      "url" : "https://t.co/vUg4W6nxwU",
      "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
      "display_url" : "intelligence.org/2013/12/13/aar…",
      "indices" : [ "120", "143" ]
    }
    ]
  },
  "display_text_range" : [ "0", "90" ],
  "favorite_count" : "0",
  "in_reply_to_status_id_str" : "24123424",
  "id_str" : "2328934829084",
  "in_reply_to_user_id" : "23423424",
  "truncated" : false,
  "retweet_count" : "0",
  "id" : "23492349032940",
  "in_reply_to_status_id" : "23482984932084",
  "created_at" : "Thu Aug 30 07:12:48 +0000 2012",
  "favorited" : false,
  "full_text" : "this is a test tweet",
  "lang" : "ru",
  "in_reply_to_screen_name" : "whatever",
  "in_reply_to_user_id_str" : "3748274"
 }
    """
    t = Tweet(json.loads(raw), screen_name='whatever')
    assert t.permalink is not None
    assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
    assert t.text == 'this is a test tweet'
    assert t.tid  == '2328934829084'
    assert t.entities is not None
--- a/my/twitter/common.py
+++ b/my/twitter/common.py
@ -0,0 +1,10 @@
 from itertools import chain
 from more_itertools import unique_everseen
 def merge_tweets(*sources):
    yield from unique_everseen(
        chain(*sources),
        key=lambda t: t.id_str,
    )
--- a/my/twitter/twint.py
+++ b/my/twitter/twint.py
@ -2,6 +2,8 @@
 Twitter data (tweets and favorites).
 Uses [[https://github.com/twintproject/twint][Twint]] data export.
 Requirements: =pip3 install --user dataset=
 """
 from ..core.common import Paths
--- a/tests/tweets.py
+++ b/tests/tweets.py
@ -0,0 +1,47 @@
 from datetime import datetime
 import json
 import pytz
 from my.twitter.archive import Tweet
 def test_tweet():
    raw = """
 {
  "retweeted" : false,
  "entities" : {
    "hashtags" : [ ],
    "symbols" : [ ],
    "user_mentions" : [ ],
    "urls" : [ {
      "url" : "https://t.co/vUg4W6nxwU",
      "expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
      "display_url" : "intelligence.org/2013/12/13/aar…",
      "indices" : [ "120", "143" ]
    }
    ]
  },
  "display_text_range" : [ "0", "90" ],
  "favorite_count" : "0",
  "in_reply_to_status_id_str" : "24123424",
  "id_str" : "2328934829084",
  "in_reply_to_user_id" : "23423424",
  "truncated" : false,
  "retweet_count" : "0",
  "id" : "23492349032940",
  "in_reply_to_status_id" : "23482984932084",
  "created_at" : "Thu Aug 30 07:12:48 +0000 2012",
  "favorited" : false,
  "full_text" : "this is a test tweet",
  "lang" : "ru",
  "in_reply_to_screen_name" : "whatever",
  "in_reply_to_user_id_str" : "3748274"
 }
    """
    t = Tweet(json.loads(raw), screen_name='whatever')
    assert t.permalink is not None
    assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
    assert t.text == 'this is a test tweet'
    assert t.tid  == '2328934829084'
    assert t.entities is not None