unified view for twitter data
This commit is contained in:
parent
56b6ab9aaf
commit
30b6918a8d
3 changed files with 63 additions and 24 deletions
17
my/twitter/all.py
Normal file
17
my/twitter/all.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
"""
|
||||||
|
Unified Twitter data (merged from the archive and periodic updates)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from . import twint
|
||||||
|
from . import archive
|
||||||
|
|
||||||
|
|
||||||
|
def tweets():
|
||||||
|
yield from archive.tweets()
|
||||||
|
yield from twint.tweets()
|
||||||
|
|
||||||
|
|
||||||
|
# TODO not sure, likes vs favoites??
|
||||||
|
def likes():
|
||||||
|
yield from archive.likes()
|
||||||
|
# yield from twint
|
|
@ -3,27 +3,25 @@ Twitter data (uses official twitter archive export)
|
||||||
|
|
||||||
See https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive
|
See https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from . import init
|
|
||||||
|
|
||||||
|
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
|
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from functools import lru_cache
|
||||||
import json
|
import json
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from .common import PathIsh, get_files, LazyLogger, Json
|
from ..common import PathIsh, get_files, LazyLogger, Json
|
||||||
from .kython import kompress
|
from ..kython import kompress
|
||||||
|
|
||||||
|
from my.config import twitter as config
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _get_export() -> Path:
|
def _get_export() -> Path:
|
||||||
from my.config import twitter as config
|
|
||||||
return max(get_files(config.export_path, '*.zip'))
|
return max(get_files(config.export_path, '*.zip'))
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,29 +31,33 @@ Tid = str
|
||||||
# TODO make sure it's not used anywhere else and simplify interface
|
# TODO make sure it's not used anywhere else and simplify interface
|
||||||
class Tweet(NamedTuple):
|
class Tweet(NamedTuple):
|
||||||
raw: Json
|
raw: Json
|
||||||
|
screen_name: str
|
||||||
|
|
||||||
# TODO deprecate tid?
|
|
||||||
@property
|
@property
|
||||||
def tid(self) -> Tid:
|
def id_str(self) -> str:
|
||||||
return self.raw['id_str']
|
return self.raw['id_str']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def created_at(self) -> datetime:
|
||||||
return f'https://twitter.com/i/web/status/{self.tid}'
|
|
||||||
|
|
||||||
# TODO deprecate dt?
|
|
||||||
@property
|
|
||||||
def dt(self) -> datetime:
|
|
||||||
dts = self.raw['created_at']
|
dts = self.raw['created_at']
|
||||||
return datetime.strptime(dts, '%a %b %d %H:%M:%S %z %Y')
|
return datetime.strptime(dts, '%a %b %d %H:%M:%S %z %Y')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def permalink(self) -> str:
|
||||||
|
return f'https://twitter.com/{self.screen_name}/status/{self.tid}'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
return self.raw['full_text']
|
return self.raw['full_text']
|
||||||
|
|
||||||
# TODO not sure if I need them...
|
|
||||||
@property
|
@property
|
||||||
def entities(self):
|
def urls(self) -> List[str]:
|
||||||
|
ents = self.entities
|
||||||
|
us = ents['urls']
|
||||||
|
return [u['expanded_url'] for u in us]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entities(self) -> Json:
|
||||||
return self.raw['entities']
|
return self.raw['entities']
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
|
@ -64,15 +66,25 @@ class Tweet(NamedTuple):
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return repr(self.raw)
|
return repr(self.raw)
|
||||||
|
|
||||||
|
# TODO deprecate tid?
|
||||||
|
@property
|
||||||
|
def tid(self) -> Tid:
|
||||||
|
return self.id_str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> datetime:
|
||||||
|
return self.created_at
|
||||||
|
|
||||||
|
|
||||||
class Like(NamedTuple):
|
class Like(NamedTuple):
|
||||||
raw: Json
|
raw: Json
|
||||||
|
screen_name: str
|
||||||
|
|
||||||
# TODO need to make permalink/link/url consistent across my stuff..
|
# TODO need to make permalink/link/url consistent across my stuff..
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def permalink(self) -> str:
|
||||||
# doesn'tseem like link it export is more specific...
|
# doesn'tseem like link it export is more specific...
|
||||||
return f'https://twitter.com/i/web/status/{self.tid}'
|
return f'https://twitter.com/{self.screen_name}/status/{self.tid}'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tid(self) -> Tid:
|
def tid(self) -> Tid:
|
||||||
|
@ -113,17 +125,21 @@ class ZipExport:
|
||||||
# older format
|
# older format
|
||||||
yield j
|
yield j
|
||||||
|
|
||||||
|
@lru_cache(1)
|
||||||
|
def screen_name(self) -> str:
|
||||||
|
[acc] = self.raw('account')
|
||||||
|
return acc['username']
|
||||||
|
|
||||||
def tweets(self) -> Iterator[Tweet]:
|
def tweets(self) -> Iterator[Tweet]:
|
||||||
for r in self.raw('tweet'):
|
for r in self.raw('tweet'):
|
||||||
yield Tweet(r)
|
yield Tweet(r, screen_name=self.screen_name())
|
||||||
|
|
||||||
|
|
||||||
def likes(self) -> Iterator[Like]:
|
def likes(self) -> Iterator[Like]:
|
||||||
# TODO ugh. would be nice to unify Tweet/Like interface
|
# TODO ugh. would be nice to unify Tweet/Like interface
|
||||||
# however, akeout only got tweetId, full text and url
|
# however, akeout only got tweetId, full text and url
|
||||||
for r in self.raw('like'):
|
for r in self.raw('like'):
|
||||||
yield Like(r)
|
yield Like(r, screen_name=self.screen_name())
|
||||||
|
|
||||||
|
|
||||||
def tweets() -> List[Tweet]:
|
def tweets() -> List[Tweet]:
|
||||||
|
@ -185,7 +201,7 @@ def test_tweet():
|
||||||
"in_reply_to_user_id_str" : "3748274"
|
"in_reply_to_user_id_str" : "3748274"
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
t = Tweet(json.loads(raw))
|
t = Tweet(json.loads(raw), screen_name='whatever')
|
||||||
assert t.permalink is not None
|
assert t.permalink is not None
|
||||||
assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
|
assert t.dt == datetime(year=2012, month=8, day=30, hour=7, minute=12, second=48, tzinfo=pytz.utc)
|
||||||
assert t.text == 'this is a test tweet'
|
assert t.text == 'this is a test tweet'
|
||||||
|
|
|
@ -3,11 +3,11 @@ Twitter data (tweets and favorites). Uses [[https://github.com/twintproject/twin
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import NamedTuple, Iterable
|
from typing import NamedTuple, Iterable, List
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .common import PathIsh, get_files, LazyLogger, Json
|
from ..common import PathIsh, get_files, LazyLogger, Json
|
||||||
from .core.time import abbr_to_timezone
|
from ..core.time import abbr_to_timezone
|
||||||
|
|
||||||
from my.config import twint as config
|
from my.config import twint as config
|
||||||
|
|
||||||
|
@ -45,6 +45,12 @@ class Tweet(NamedTuple):
|
||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
return self.row['tweet']
|
return self.row['tweet']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def urls(self) -> List[str]:
|
||||||
|
ustr = self.row['urls']
|
||||||
|
if len(ustr) == 0:
|
||||||
|
return []
|
||||||
|
return ustr.split(',')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def permalink(self) -> str:
|
||||||
|
|
Loading…
Add table
Reference in a new issue