twitter.talon: make retweets more compatible with twitter archive

This commit is contained in:
Dima Gerasimov 2022-05-30 23:45:59 +01:00 committed by karlicoss
parent ef120bc643
commit d092608002
2 changed files with 16 additions and 4 deletions

View file

@ -89,4 +89,9 @@ check 'so there is clearly a pattern'
check '2022-02-02 Wed 18:28.*You are in luck!.*https://deepmind.com/blog/article/Competitive-programming-with-AlphaCode'
# https://twitter.com/karlicoss/status/349168455964033024
# check link which is only in twidump
check '2013-06-24 Mon 14:13.*RT @gorod095: Нашел недавно в букинист'
# TODO check likes as well

View file

@ -90,9 +90,6 @@ def _process_favorite_tweets(db) -> Iterator[Res[Entity]]:
yield e
def _parse_tweet(row) -> Tweet:
# TODO row['retweeter] if not empty, would be user's name and means retweet?
# screen name would be the actual tweet's author
# ok so looks like it's tz aware..
# https://github.com/klinker24/talon-for-twitter-android/blob/c3b0612717ba3ea93c0cae6d907d7d86d640069e/app/src/main/java/com/klinker/android/twitter_l/data/sq_lite/FavoriteTweetsDataSource.java#L95
# uses https://docs.oracle.com/javase/7/docs/api/java/util/Date.html#getTime()
@ -115,10 +112,20 @@ def _parse_tweet(row) -> Tweet:
break
#
screen_name = row['screen_name']
# considering id_str is referring to the retweeter's tweet (rather than the original tweet)
# makes sense for the permalink to contain the retweeter as well
# also makes it more compatible to twitter archive
# a bit sad to lose structured information about RT, but then again we could always just parse it..
retweeter = row['retweeter']
if len(retweeter) > 0:
text = f'RT @{screen_name}: {text}'
screen_name = retweeter
return Tweet(
id_str=str(row['tweet_id']),
created_at=created_at,
screen_name=row['screen_name'],
screen_name=screen_name,
text=text,
# todo hmm text sometimes is trimmed with ellipsis? at least urls
urls=tuple(u for u in row['other_url'].split(' ') if len(u.strip()) > 0),