support likes from twint

This commit is contained in:
Dima Gerasimov 2020-04-14 23:01:44 +01:00
parent 69a1624f8f
commit 81986b0624
3 changed files with 37 additions and 8 deletions

View file

@ -20,10 +20,8 @@ def merge_tweets(*sources):
def tweets():
# NOTE order matters.. twint seems to contain better data
# todo probably, worthy an investigation..
yield from merge_tweets(twint.tweets(), archive.tweets())
yield from merge_tweets(twint.likes(), archive.tweets())
# TODO not sure, likes vs favoites??
def likes():
yield from merge_tweets(archive.likes())
# yield from twint
yield from merge_tweets(twint.likes(), archive.likes())

View file

@ -87,7 +87,7 @@ class Like(NamedTuple):
return f'https://twitter.com/{self.screen_name}/status/{self.tid}'
@property
def tid(self) -> Tid:
def id_str(self) -> Tid:
return self.raw['tweetId']
@property
@ -95,6 +95,11 @@ class Like(NamedTuple):
# ugh. I think none means that tweet was deleted?
return self.raw.get('fullText')
# TODO deprecate?
@property
def tid(self) -> Tid:
return self.id_str
class ZipExport:
def __init__(self) -> None:

View file

@ -61,11 +61,37 @@ class Tweet(NamedTuple):
def __repr__(self):
return f'Tweet(id_str={self.id_str}, created_at={self.created_at}, text={self.text})'
# https://github.com/twintproject/twint/issues/196
# ugh. so it dumps everything in tweet table, and there is no good way to tell between fav/original tweet.
# it might result in some tweets missing from the timeline if you happened to like them...
# not sure what to do with it
# alternatively, could ask the user to run separate databases for tweets and favs?
# TODO think about it
def tweets() -> Iterable[Tweet]:
_QUERY = '''
SELECT T.*
FROM tweets as T
LEFT JOIN favorites as F
ON T.id_str = F.tweet_id
WHERE {where}
ORDER BY T.created_at
'''
def _get_db():
import dataset # type: ignore
db_path = get_db_path()
# TODO check that exists?
db = dataset.connect(f'sqlite:///{db_path}')
tdb = db.load_table('tweets')
yield from map(Tweet, tdb.all(order_by='created_at'))
return db
def tweets() -> Iterable[Tweet]:
db = _get_db()
res = db.query(_QUERY.format(where='F.tweet_id IS NULL'))
yield from map(Tweet, res)
def likes() -> Iterable[Tweet]:
db = _get_db()
res = db.query(_QUERY.format(where='F.tweet_id IS NOT NULL'))
yield from map(Tweet, res)