diff --git a/misc/check-twitter.sh b/misc/check-twitter.sh new file mode 100755 index 0000000..f5f26ce --- /dev/null +++ b/misc/check-twitter.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# just a hacky script to check twitter module behaviour w.r.t. merging and normalising data +# this checks against orger output for @karlicoss data + +set -eu + +FILE="$1" + +function check() { + x="$1" + if [[ $(rg --count "$x" "$FILE") != "1" ]]; then + echo "FAILED! $x" + fi +} + +# only in old twitter archive data + test mentions +check '2010-03-24 Wed 10:02.*@GDRussia подлагивает' + +# check that old twitter archive data replaces </> +check '2011-05-12 Thu 17:51.*set ><' +# this would probs be from twint or something? +check '2013-06-01 Sat 18:48.* Iterator[Res[Tweet]]: def tweets() -> Iterator[Res[Tweet]]: + # for tweets, archive data is higher quality yield from merge_tweets( - _tweets_twint(), _tweets_archive(), + _tweets_twint(), ) def likes() -> Iterator[Res[Tweet]]: + # for likes, archive data barely has anything so twint is preferred yield from merge_tweets( _likes_twint(), _likes_archive(), diff --git a/my/twitter/talon.py b/my/twitter/talon.py index 3ff9ddf..f540d14 100644 --- a/my/twitter/talon.py +++ b/my/twitter/talon.py @@ -124,3 +124,5 @@ def likes() -> Iterator[Res[Tweet]]: elif isinstance(x, _IsFavorire): yield x.tweet + +# TODO maybe should combine all public iterators into a stats()