twitter module: prettify top level twitter.all

This commit is contained in:
Dima Gerasimov 2020-05-22 19:00:02 +01:00
parent c410daa484
commit 03773a7b2c
7 changed files with 75 additions and 55 deletions

View file

@ -146,6 +146,8 @@ for cls, p in modules:
Uses [[https://github.com/twintproject/twint][Twint]] data export.
Requirements: =pip3 install --user dataset=
#+begin_src python
class twint:
export_path: Paths # path[s]/glob to the twint Sqlite database
@ -171,7 +173,7 @@ for cls, p in modules:
#+end_src
** [[file:../my/reading/polar.py][my.reading.polar]]
[[https://github.com/burtonator/polar-books][Polar]] articles and highlights
[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
#+begin_src python
class polar:

View file

@ -1,5 +1,5 @@
"""
[[https://github.com/burtonator/polar-books][Polar]] articles and highlights
[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
"""
from pathlib import Path
from typing import Type, Any, cast, TYPE_CHECKING

View file

@ -1,24 +1,23 @@
"""
Unified Twitter data (merged from the archive and periodic updates)
"""
from itertools import chain
from . import twint
from . import archive
# NOTE: you can comment out the sources you don't need
# TODO move to .common?
def merge_tweets(*sources):
from more_itertools import unique_everseen
yield from unique_everseen(
chain(*sources),
key=lambda t: t.id_str,
)
from . import twint, archive
from .common import merge_tweets
def tweets():
yield from merge_tweets(twint.tweets(), archive.tweets())
yield from merge_tweets(
twint .tweets(),
archive.tweets(),
)
def likes():
yield from merge_tweets(twint.likes(), archive.likes())
yield from merge_tweets(
twint .likes(),
archive.likes(),
)

View file

@ -160,50 +160,10 @@ class ZipExport:
yield Like(r, screen_name=self.screen_name())
# todo not sure about list and sorting? although can't hurt considering json is not iterative?
def tweets() -> List[Tweet]:
return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))
def likes() -> List[Like]:
return list(ZipExport().likes())
def test_tweet():
raw = """
{
"retweeted" : false,
"entities" : {
"hashtags" : [ ],
"symbols" : [ ],
"user_mentions" : [ ],
"urls" : [ {
"url" : "https://t.co/vUg4W6nxwU",
"expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
"display_url" : "intelligence.org/2013/12/13/aar…",
"indices" : [ "120", "143" ]
}
]
},
"display_text_range" : [ "0", "90" ],
"favorite_count" : "0",
"in_reply_to_status_id_str" : "24123424",
"id_str" : "2328934829084",
"in_reply_to_user_id" : "23423424",
"truncated" : false,
"retweet_count" : "0",
"id" : "23492349032940",
"in_reply_to_status_id" : "23482984932084",
"created_at" : "Thu Aug 30 07:12:48 +0000 2012",
"favorited" : false,
"full_text" : "this is a test tweet",
"lang" : "ru",
"in_reply_to_screen_name" : "whatever",
"in_reply_to_user_id_str" : "3748274"
}
"""
t = Tweet(json.loads(raw), screen_name='whatever')
assert t.permalink is not None
assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
assert t.text == 'this is a test tweet'
assert t.tid == '2328934829084'
assert t.entities is not None

10
my/twitter/common.py Normal file
View file

@ -0,0 +1,10 @@
from itertools import chain
from more_itertools import unique_everseen
def merge_tweets(*sources):
yield from unique_everseen(
chain(*sources),
key=lambda t: t.id_str,
)

View file

@ -2,6 +2,8 @@
Twitter data (tweets and favorites).
Uses [[https://github.com/twintproject/twint][Twint]] data export.
Requirements: =pip3 install --user dataset=
"""
from ..core.common import Paths

47
tests/tweets.py Normal file
View file

@ -0,0 +1,47 @@
from datetime import datetime
import json
import pytz
from my.twitter.archive import Tweet
def test_tweet():
raw = """
{
"retweeted" : false,
"entities" : {
"hashtags" : [ ],
"symbols" : [ ],
"user_mentions" : [ ],
"urls" : [ {
"url" : "https://t.co/vUg4W6nxwU",
"expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
"display_url" : "intelligence.org/2013/12/13/aar…",
"indices" : [ "120", "143" ]
}
]
},
"display_text_range" : [ "0", "90" ],
"favorite_count" : "0",
"in_reply_to_status_id_str" : "24123424",
"id_str" : "2328934829084",
"in_reply_to_user_id" : "23423424",
"truncated" : false,
"retweet_count" : "0",
"id" : "23492349032940",
"in_reply_to_status_id" : "23482984932084",
"created_at" : "Thu Aug 30 07:12:48 +0000 2012",
"favorited" : false,
"full_text" : "this is a test tweet",
"lang" : "ru",
"in_reply_to_screen_name" : "whatever",
"in_reply_to_user_id_str" : "3748274"
}
"""
t = Tweet(json.loads(raw), screen_name='whatever')
assert t.permalink is not None
assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
assert t.text == 'this is a test tweet'
assert t.tid == '2328934829084'
assert t.entities is not None