adapt twitter provider for public consumption
This commit is contained in:
parent
1075c9fdae
commit
a6d16b9c55
2 changed files with 85 additions and 76 deletions
|
@ -92,6 +92,7 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
|
||||||
bdt = _get_bdate(bfile)
|
bdt = _get_bdate(bfile)
|
||||||
|
|
||||||
saves = [SaveWithDt(save, bdt) for save in rexport.Model([bfile]).saved()]
|
saves = [SaveWithDt(save, bdt) for save in rexport.Model([bfile]).saved()]
|
||||||
|
# TODO FIXME remove kython?
|
||||||
from kython import make_dict
|
from kython import make_dict
|
||||||
return make_dict(
|
return make_dict(
|
||||||
sorted(saves, key=lambda p: p.save.created),
|
sorted(saves, key=lambda p: p.save.created),
|
||||||
|
|
160
my/tweets.py
160
my/tweets.py
|
@ -1,52 +1,60 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from typing import Union, List, Dict, Set
|
from typing import Union, List, Dict, Set, Optional, Iterator, Any
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from kython import make_dict
|
import pytz
|
||||||
|
|
||||||
KARLICOSS_ID = '119756204'
|
from .common import PathIsh
|
||||||
DB_PATH = Path('/L/zzz_syncthing/data/tweets')
|
|
||||||
EXPORTS_PATH = Path('/L/backups/twitter-exports')
|
|
||||||
|
_export_path: Optional[Path] = None
|
||||||
|
|
||||||
|
def configure(*, export_path: Optional[PathIsh]=None) -> None:
|
||||||
|
if export_path is not None:
|
||||||
|
global _export_path
|
||||||
|
_export_path = Path(export_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_export() -> Path:
|
||||||
|
export_path = _export_path
|
||||||
|
if export_path is None:
|
||||||
|
# fallback to my_configuration
|
||||||
|
from . import paths
|
||||||
|
export_path = paths.twitter.export_path
|
||||||
|
return Path(export_path)
|
||||||
|
|
||||||
|
|
||||||
import sys
|
|
||||||
sys.path.append('/L/coding/twidump')
|
|
||||||
import twidump # type: ignore
|
|
||||||
sys.path.pop() # TODO not sure if necessary?
|
|
||||||
|
|
||||||
Tid = str
|
Tid = str
|
||||||
|
|
||||||
# TODO make sure it's not used anywhere else and simplify interface
|
# TODO make sure it's not used anywhere else and simplify interface
|
||||||
class Tweet:
|
class Tweet:
|
||||||
def __init__(self, tw):
|
def __init__(self, tw: Dict[str, Any]) -> None:
|
||||||
self.tw = tw
|
self.tw = tw
|
||||||
|
|
||||||
def __getattr__(self, attr):
|
|
||||||
return getattr(self.tw, attr)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def url(self) -> str:
|
|
||||||
return self.tw.permalink(username='karlicoss')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def time(self) -> str:
|
|
||||||
return self.tw.created_at
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dt(self) -> datetime:
|
|
||||||
return self.tw.get_utc_datetime()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def text(self) -> str:
|
|
||||||
return self.tw.text
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tid(self) -> Tid:
|
def tid(self) -> Tid:
|
||||||
return self.tw.id_str
|
return self.tw['id_str']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def permalink(self) -> str:
|
||||||
|
return f'https://twitter.com/i/web/status/{self.tid}'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> datetime:
|
||||||
|
dts = self.tw['created_at']
|
||||||
|
return datetime.strptime(dts, '%a %b %d %H:%M:%S %z %Y')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
return self.tw['full_text']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entities(self):
|
||||||
|
return self.tw['entities']
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return str(self.tw)
|
return str(self.tw)
|
||||||
|
@ -55,47 +63,17 @@ class Tweet:
|
||||||
return repr(self.tw)
|
return repr(self.tw)
|
||||||
|
|
||||||
|
|
||||||
def _twidump() -> List[Tweet]:
|
def _from_json_export() -> Iterator[Tweet]:
|
||||||
import twidump
|
epath = _get_export()
|
||||||
# add current package to path to discover config?... nah, twidump should be capable of that.
|
ddd = zipfile.ZipFile(epath).read('tweet.js').decode('utf8')
|
||||||
from twidump.data_manipulation.timelines import TimelineLoader # type: ignore
|
|
||||||
from twidump.component import get_app_injector # type: ignore
|
|
||||||
tl_loader = get_app_injector(db_path=DB_PATH).get(TimelineLoader) # type: TimelineLoader
|
|
||||||
tl = tl_loader.load_timeline(KARLICOSS_ID)
|
|
||||||
return [Tweet(x) for x in tl]
|
|
||||||
|
|
||||||
|
|
||||||
def _json() -> List[Tweet]:
|
|
||||||
from twidump.data.tweet import Tweet as TDTweet # type: ignore
|
|
||||||
|
|
||||||
zips = EXPORTS_PATH.glob('*.zip')
|
|
||||||
last = list(sorted(zips, key=lambda p: p.stat().st_mtime))[-1]
|
|
||||||
ddd = zipfile.ZipFile(last).read('tweet.js').decode('utf8')
|
|
||||||
start = ddd.index('[')
|
start = ddd.index('[')
|
||||||
ddd = ddd[start:]
|
ddd = ddd[start:]
|
||||||
tws = []
|
|
||||||
for j in json.loads(ddd):
|
for j in json.loads(ddd):
|
||||||
j['user'] = {} # TODO is it ok?
|
yield Tweet(j)
|
||||||
tw = Tweet(TDTweet.from_api_dict(j))
|
|
||||||
tws.append(tw)
|
|
||||||
return tws
|
|
||||||
|
|
||||||
|
|
||||||
def tweets_all() -> List[Tweet]:
|
def tweets_all() -> List[Tweet]:
|
||||||
tjson: Dict[Tid, Tweet] = make_dict(_json(), key=lambda t: t.tid)
|
return list(sorted(_from_json_export(), key=lambda t: t.dt))
|
||||||
tdump: Dict[Tid, Tweet] = make_dict(_twidump(), key=lambda t: t.tid)
|
|
||||||
keys: Set[Tid] = set(tdump.keys()).union(set(tjson.keys()))
|
|
||||||
|
|
||||||
# TODO hmm. looks like json generally got longer tweets?
|
|
||||||
res: List[Tweet] = []
|
|
||||||
for tid in keys:
|
|
||||||
if tid in tjson:
|
|
||||||
tw = tjson[tid]
|
|
||||||
else:
|
|
||||||
tw = tdump[tid]
|
|
||||||
res.append(tw)
|
|
||||||
res.sort(key=lambda t: t.dt)
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def predicate(p) -> List[Tweet]:
|
def predicate(p) -> List[Tweet]:
|
||||||
|
@ -104,6 +82,7 @@ def predicate(p) -> List[Tweet]:
|
||||||
def predicate_date(p) -> List[Tweet]:
|
def predicate_date(p) -> List[Tweet]:
|
||||||
return predicate(lambda t: p(t.dt.date()))
|
return predicate(lambda t: p(t.dt.date()))
|
||||||
|
|
||||||
|
# TODO move these to private tests?
|
||||||
Datish = Union[date, str]
|
Datish = Union[date, str]
|
||||||
def tweets_on(*dts: Datish) -> List[Tweet]:
|
def tweets_on(*dts: Datish) -> List[Tweet]:
|
||||||
from kython import parse_date_new
|
from kython import parse_date_new
|
||||||
|
@ -113,14 +92,43 @@ def tweets_on(*dts: Datish) -> List[Tweet]:
|
||||||
|
|
||||||
on = tweets_on
|
on = tweets_on
|
||||||
|
|
||||||
def test_on():
|
|
||||||
tww = tweets_on('2019-05-11')
|
|
||||||
assert len(tww) == 2
|
|
||||||
|
|
||||||
def test_all():
|
def test_tweet():
|
||||||
tall = tweets_all()
|
raw = """
|
||||||
assert len(tall) > 100
|
{
|
||||||
|
"retweeted" : false,
|
||||||
if __name__ == '__main__':
|
"entities" : {
|
||||||
for t in tweets_all():
|
"hashtags" : [ ],
|
||||||
print(t)
|
"symbols" : [ ],
|
||||||
|
"user_mentions" : [ ],
|
||||||
|
"urls" : [ {
|
||||||
|
"url" : "https://t.co/vUg4W6nxwU",
|
||||||
|
"expanded_url" : "https://intelligence.org/2013/12/13/aaronson/",
|
||||||
|
"display_url" : "intelligence.org/2013/12/13/aar…",
|
||||||
|
"indices" : [ "120", "143" ]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"display_text_range" : [ "0", "90" ],
|
||||||
|
"favorite_count" : "0",
|
||||||
|
"in_reply_to_status_id_str" : "24123424",
|
||||||
|
"id_str" : "2328934829084",
|
||||||
|
"in_reply_to_user_id" : "23423424",
|
||||||
|
"truncated" : false,
|
||||||
|
"retweet_count" : "0",
|
||||||
|
"id" : "23492349032940",
|
||||||
|
"in_reply_to_status_id" : "23482984932084",
|
||||||
|
"created_at" : "Thu Aug 30 07:12:48 +0000 2012",
|
||||||
|
"favorited" : false,
|
||||||
|
"full_text" : "this is a test tweet",
|
||||||
|
"lang" : "ru",
|
||||||
|
"in_reply_to_screen_name" : "whatever",
|
||||||
|
"in_reply_to_user_id_str" : "3748274"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
t = Tweet(json.loads(raw))
|
||||||
|
assert t.permalink is not None
|
||||||
|
assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
|
||||||
|
assert t.text == 'this is a test tweet'
|
||||||
|
assert t.tid == '2328934829084'
|
||||||
|
assert t.entities is not None
|
||||||
|
|
Loading…
Add table
Reference in a new issue