add like processing
This commit is contained in:
parent
a1f65754f9
commit
14a5a91685
1 changed files with 41 additions and 15 deletions
|
@ -10,7 +10,7 @@ Expects path to be set
|
||||||
|
|
||||||
|
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from typing import Union, List, Dict, Set, Optional, Iterator, Any
|
from typing import Union, List, Dict, Set, Optional, Iterator, Any, NamedTuple
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json
|
||||||
import zipfile
|
import zipfile
|
||||||
|
@ -41,14 +41,18 @@ def _get_export() -> Path:
|
||||||
|
|
||||||
Tid = str
|
Tid = str
|
||||||
|
|
||||||
|
|
||||||
|
# TODO a bit messy... perhaps we do need DAL for twitter exports
|
||||||
|
Json = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
# TODO make sure it's not used anywhere else and simplify interface
|
# TODO make sure it's not used anywhere else and simplify interface
|
||||||
class Tweet:
|
class Tweet(NamedTuple):
|
||||||
def __init__(self, tw: Dict[str, Any]) -> None:
|
raw: Json
|
||||||
self.tw = tw
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tid(self) -> Tid:
|
def tid(self) -> Tid:
|
||||||
return self.tw['id_str']
|
return self.raw['id_str']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def permalink(self) -> str:
|
||||||
|
@ -56,33 +60,44 @@ class Tweet:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self) -> datetime:
|
def dt(self) -> datetime:
|
||||||
dts = self.tw['created_at']
|
dts = self.raw['created_at']
|
||||||
return datetime.strptime(dts, '%a %b %d %H:%M:%S %z %Y')
|
return datetime.strptime(dts, '%a %b %d %H:%M:%S %z %Y')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
return self.tw['full_text']
|
return self.raw['full_text']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entities(self):
|
def entities(self):
|
||||||
return self.tw['entities']
|
return self.raw['entities']
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return str(self.tw)
|
return str(self.raw)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return repr(self.tw)
|
return repr(self.raw)
|
||||||
|
|
||||||
|
|
||||||
|
class Like(NamedTuple):
|
||||||
|
raw: Json
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tid(self) -> Tid:
|
||||||
|
return self.raw['tweetId']
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
return self.raw['fullText']
|
||||||
|
|
||||||
# TODO a bit messy... perhaps we do need DAL for twitter exports
|
|
||||||
|
|
||||||
class ZipExport:
|
class ZipExport:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def raw(self): # TODO Json in common?
|
def raw(self, what: str): # TODO Json in common?
|
||||||
epath = _get_export()
|
epath = _get_export()
|
||||||
logger.info('processing: %s', epath)
|
logger.info('processing: %s %s', epath, what)
|
||||||
ddd = zipfile.ZipFile(epath).read('tweet.js').decode('utf8')
|
ddd = zipfile.ZipFile(epath).read(what).decode('utf8')
|
||||||
start = ddd.index('[')
|
start = ddd.index('[')
|
||||||
ddd = ddd[start:]
|
ddd = ddd[start:]
|
||||||
for j in json.loads(ddd):
|
for j in json.loads(ddd):
|
||||||
|
@ -90,14 +105,25 @@ class ZipExport:
|
||||||
|
|
||||||
|
|
||||||
def tweets(self) -> Iterator[Tweet]:
|
def tweets(self) -> Iterator[Tweet]:
|
||||||
for r in self.raw():
|
for r in self.raw('tweet.js'):
|
||||||
yield Tweet(r)
|
yield Tweet(r)
|
||||||
|
|
||||||
|
|
||||||
|
def likes(self) -> Iterator[Like]:
|
||||||
|
# TODO ugh. would be nice to unify Tweet/Like interface
|
||||||
|
# however, akeout only got tweetId, full text and url
|
||||||
|
for r in self.raw('like.js'):
|
||||||
|
yield Like(r)
|
||||||
|
|
||||||
|
|
||||||
def tweets_all() -> List[Tweet]:
|
def tweets_all() -> List[Tweet]:
|
||||||
return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))
|
return list(sorted(ZipExport().tweets(), key=lambda t: t.dt))
|
||||||
|
|
||||||
|
|
||||||
|
def likes_all() -> List[Like]:
|
||||||
|
return list(ZipExport().likes())
|
||||||
|
|
||||||
|
|
||||||
def predicate(p) -> List[Tweet]:
|
def predicate(p) -> List[Tweet]:
|
||||||
return [t for t in tweets_all() if p(t)]
|
return [t for t in tweets_all() if p(t)]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue