From 473b8d476cd8e21254318e5d179c200a2e106c7c Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 9 Sep 2018 23:41:51 +0100 Subject: [PATCH 01/10] initial --- tweets/__init__.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tweets/__init__.py diff --git a/tweets/__init__.py b/tweets/__init__.py new file mode 100644 index 0000000..5fc693f --- /dev/null +++ b/tweets/__init__.py @@ -0,0 +1,58 @@ +from datetime import date, datetime +from typing import Union, List + +KARLICOSS_ID = '119756204' + +# TODO how to discover configs? ... I guess symlinking... +def tweets_all(): + import sys + sys.path.append("/L/coding/twidump") + import twidump + # add current package to path to discover config?... nah, twidump should be capable of that. + from twidump.data_manipulation.timelines import TimelineLoader + from twidump.component import get_app_injector + tl_loader = get_app_injector().get(TimelineLoader) # type: TimelineLoader + tl = tl_loader.load_timeline(KARLICOSS_ID) + return tl + + +class Tweet: + def __init__(self, tw): + self.tw = tw + + def __getattr__(self, attr): + return getattr(self.tw, attr) + + @property + def url(self) -> str: + from twidump.render.tools import make_tweet_permalink + return make_tweet_permalink(self.tw.id_str) + + @property + def time(self) -> str: + return self.tw.created_at + + @property + def datetime(self) -> datetime: + return self.tw.get_utc_datetime() + + def __str__(self) -> str: + return str(self.tw) + + def __repr__(self) -> str: + return repr(self.tw) + +def predicate(p) -> List[Tweet]: + return [Tweet(t) for t in tweets_all() if p(t)] + +def predicate_date(p) -> List[Tweet]: + return predicate(lambda t: p(t.get_utc_datetime().date())) + +Datish = Union[date, str] +def tweets_on(*dts: Datish) -> List[Tweet]: + from kython import parse_date_new + # TODO how to make sure we don't miss on 29 feb? + dates = {parse_date_new(d) for d in dts} + return predicate_date(lambda d: d in dates) + +on = tweets_on From fc41c78d2072ebe1fbbe6397a87dbfedf9828940 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 2 Oct 2018 20:25:23 +0100 Subject: [PATCH 02/10] Handle twidump config properly, nicer wrapping --- tweets/__init__.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index 5fc693f..a44028b 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -3,19 +3,6 @@ from typing import Union, List KARLICOSS_ID = '119756204' -# TODO how to discover configs? ... I guess symlinking... -def tweets_all(): - import sys - sys.path.append("/L/coding/twidump") - import twidump - # add current package to path to discover config?... nah, twidump should be capable of that. - from twidump.data_manipulation.timelines import TimelineLoader - from twidump.component import get_app_injector - tl_loader = get_app_injector().get(TimelineLoader) # type: TimelineLoader - tl = tl_loader.load_timeline(KARLICOSS_ID) - return tl - - class Tweet: def __init__(self, tw): self.tw = tw @@ -33,20 +20,34 @@ class Tweet: return self.tw.created_at @property - def datetime(self) -> datetime: + def dt(self) -> datetime: return self.tw.get_utc_datetime() + @property + def text(self) -> str: + return self.tw.text + def __str__(self) -> str: return str(self.tw) def __repr__(self) -> str: return repr(self.tw) +def tweets_all(): + import twidump + # add current package to path to discover config?... nah, twidump should be capable of that. + from twidump.data_manipulation.timelines import TimelineLoader + from twidump.component import get_app_injector + tl_loader = get_app_injector().get(TimelineLoader) # type: TimelineLoader + tl = tl_loader.load_timeline(KARLICOSS_ID) + return [Tweet(x) for x in tl] + + def predicate(p) -> List[Tweet]: - return [Tweet(t) for t in tweets_all() if p(t)] + return [t for t in tweets_all() if p(t)] def predicate_date(p) -> List[Tweet]: - return predicate(lambda t: p(t.get_utc_datetime().date())) + return predicate(lambda t: p(t.dt.date())) Datish = Union[date, str] def tweets_on(*dts: Datish) -> List[Tweet]: From 60ed4d09384ef46c2afeda3e3e5304d0ba037224 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 30 Oct 2018 06:39:30 +0000 Subject: [PATCH 03/10] add tid --- tweets/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tweets/__init__.py b/tweets/__init__.py index a44028b..f00f6f4 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -27,6 +27,10 @@ class Tweet: def text(self) -> str: return self.tw.text + @property + def tid(self) -> str: + return self.tw.id_str + def __str__(self) -> str: return str(self.tw) From 67e85c8597558efa7b47d3757aec7b03cb7d6ae7 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 3 Feb 2019 20:50:07 +0000 Subject: [PATCH 04/10] Use real db location --- tweets/__init__.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index f00f6f4..9a12232 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -1,7 +1,15 @@ from datetime import date, datetime from typing import Union, List +from pathlib import Path KARLICOSS_ID = '119756204' +DB_PATH = Path('/L/zzz_syncthing/data/tweets') + + +import sys +sys.path.append('/L/Dropbox/coding/twidump') +import twidump # type: ignore +sys.path.pop() # TODO not sure if necessary? class Tweet: def __init__(self, tw): @@ -39,10 +47,10 @@ class Tweet: def tweets_all(): import twidump - # add current package to path to discover config?... nah, twidump should be capable of that. + # add current package to path to discover config?... nah, twidump should be capable of that. from twidump.data_manipulation.timelines import TimelineLoader from twidump.component import get_app_injector - tl_loader = get_app_injector().get(TimelineLoader) # type: TimelineLoader + tl_loader = get_app_injector(db_path=DB_PATH).get(TimelineLoader) # type: TimelineLoader tl = tl_loader.load_timeline(KARLICOSS_ID) return [Tweet(x) for x in tl] From 3444565a22b95eeb81685f4e5aa00c5d52f38ee1 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 11 Mar 2019 23:23:03 +0000 Subject: [PATCH 05/10] Fix ruci --- tweets/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index 9a12232..0e3cf6b 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -20,7 +20,7 @@ class Tweet: @property def url(self) -> str: - from twidump.render.tools import make_tweet_permalink + from twidump.render.tools import make_tweet_permalink # type: ignore return make_tweet_permalink(self.tw.id_str) @property @@ -48,8 +48,8 @@ class Tweet: def tweets_all(): import twidump # add current package to path to discover config?... nah, twidump should be capable of that. - from twidump.data_manipulation.timelines import TimelineLoader - from twidump.component import get_app_injector + from twidump.data_manipulation.timelines import TimelineLoader # type: ignore + from twidump.component import get_app_injector # type: ignore tl_loader = get_app_injector(db_path=DB_PATH).get(TimelineLoader) # type: TimelineLoader tl = tl_loader.load_timeline(KARLICOSS_ID) return [Tweet(x) for x in tl] From 2d478b767b74a6f18b9c10da938a3b9f236d00d8 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 12 Mar 2019 12:18:10 +0000 Subject: [PATCH 06/10] move main to init --- tweets/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tweets/__init__.py b/tweets/__init__.py index 0e3cf6b..89eef63 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -69,3 +69,7 @@ def tweets_on(*dts: Datish) -> List[Tweet]: return predicate_date(lambda d: d in dates) on = tweets_on + +if __name__ == '__main__': + for t in tweets_all(): + print(t) From 99eb79e23056b1c9979871596d6ff8bf8bbf0007 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 20 Mar 2019 00:36:48 +0000 Subject: [PATCH 07/10] Handle json twitter export for more timeline data --- tweets/__init__.py | 49 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index 89eef63..3022ec5 100644 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -1,9 +1,15 @@ from datetime import date, datetime -from typing import Union, List +from typing import Union, List, Dict, Set from pathlib import Path +import json + +import zipfile + +from kython import make_dict KARLICOSS_ID = '119756204' DB_PATH = Path('/L/zzz_syncthing/data/tweets') +EXPORTS_PATH = Path('/L/backups/twitter-exports') import sys @@ -11,6 +17,9 @@ sys.path.append('/L/Dropbox/coding/twidump') import twidump # type: ignore sys.path.pop() # TODO not sure if necessary? +Tid = str + +# TODO make sure it's not used anywhere else and simplify interface class Tweet: def __init__(self, tw): self.tw = tw @@ -36,7 +45,7 @@ class Tweet: return self.tw.text @property - def tid(self) -> str: + def tid(self) -> Tid: return self.tw.id_str def __str__(self) -> str: @@ -45,7 +54,8 @@ class Tweet: def __repr__(self) -> str: return repr(self.tw) -def tweets_all(): + +def _twidump() -> List[Tweet]: import twidump # add current package to path to discover config?... nah, twidump should be capable of that. from twidump.data_manipulation.timelines import TimelineLoader # type: ignore @@ -55,6 +65,38 @@ def tweets_all(): return [Tweet(x) for x in tl] +def _json() -> List[Tweet]: + from twidump.data.tweet import Tweet as TDTweet # type: ignore + + zips = EXPORTS_PATH.glob('*.zip') + last = list(sorted(zips, key=lambda p: p.stat().st_mtime))[-1] + ddd = zipfile.ZipFile(last).read('tweet.js').decode('utf8') + start = ddd.index('[') + ddd = ddd[start:] + tws = [] + for j in json.loads(ddd): + j['user'] = {} # TODO is it ok? + tw = Tweet(TDTweet.from_api_dict(j)) + tws.append(tw) + return tws + + +def tweets_all() -> List[Tweet]: + tjson: Dict[Tid, Tweet] = make_dict(_json(), key=lambda t: t.tid) + tdump: Dict[Tid, Tweet] = make_dict(_twidump(), key=lambda t: t.tid) + keys: Set[Tid] = set(tdump.keys()).union(set(tjson.keys())) + + # TODO hmm. looks like json generally got longer tweets? + res: List[Tweet] = [] + for tid in keys: + if tid in tjson: + res.append(tjson[tid]) + else: + res.append(tdump[tid]) + res.sort(key=lambda t: t.dt) + return res + + def predicate(p) -> List[Tweet]: return [t for t in tweets_all() if p(t)] @@ -70,6 +112,7 @@ def tweets_on(*dts: Datish) -> List[Tweet]: on = tweets_on + if __name__ == '__main__': for t in tweets_all(): print(t) From 8c746be358602fa56dba9f17c4fde3d6fa971781 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sun, 2 Jun 2019 07:57:18 +0000 Subject: [PATCH 08/10] fix path --- tweets/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) mode change 100644 => 100755 tweets/__init__.py diff --git a/tweets/__init__.py b/tweets/__init__.py old mode 100644 new mode 100755 index 3022ec5..4f36536 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 from datetime import date, datetime from typing import Union, List, Dict, Set from pathlib import Path @@ -13,7 +14,7 @@ EXPORTS_PATH = Path('/L/backups/twitter-exports') import sys -sys.path.append('/L/Dropbox/coding/twidump') +sys.path.append('/L/coding/twidump') import twidump # type: ignore sys.path.pop() # TODO not sure if necessary? @@ -112,6 +113,13 @@ def tweets_on(*dts: Datish) -> List[Tweet]: on = tweets_on +def test_on(): + tww = tweets_on('2019-05-11') + assert len(tww) == 2 + +def test_all(): + tall = tweets_all() + assert len(tall) > 100 if __name__ == '__main__': for t in tweets_all(): From d83552e7db3ce57e321faa9fc219edbe3608e7a8 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Sat, 13 Jul 2019 21:55:48 +0100 Subject: [PATCH 09/10] minor --- tweets/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index 4f36536..17dc56e 100755 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -91,9 +91,10 @@ def tweets_all() -> List[Tweet]: res: List[Tweet] = [] for tid in keys: if tid in tjson: - res.append(tjson[tid]) + tw = tjson[tid] else: - res.append(tdump[tid]) + tw = tdump[tid] + res.append(tw) res.sort(key=lambda t: t.dt) return res From c3c2f0ddebfc74f2540501af6c5d60be7f8a3927 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 6 Aug 2019 07:40:31 +0100 Subject: [PATCH 10/10] fix permalinks --- tweets/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tweets/__init__.py b/tweets/__init__.py index 17dc56e..5501b81 100755 --- a/tweets/__init__.py +++ b/tweets/__init__.py @@ -30,8 +30,7 @@ class Tweet: @property def url(self) -> str: - from twidump.render.tools import make_tweet_permalink # type: ignore - return make_tweet_permalink(self.tw.id_str) + return self.tw.permalink(username='karlicoss') @property def time(self) -> str: