handle updated twitter archive
This commit is contained in:
parent
66790cb9f4
commit
6c5d713a17
2 changed files with 34 additions and 9 deletions
|
@ -45,3 +45,12 @@ class CPath(PosixPath):
|
||||||
|
|
||||||
|
|
||||||
open = kopen # TODO FIXME remove?
|
open = kopen # TODO FIXME remove?
|
||||||
|
|
||||||
|
|
||||||
|
# meh
|
||||||
|
def kexists(path: PathIsh, subpath: str) -> bool:
|
||||||
|
try:
|
||||||
|
kopen(path, subpath)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
|
@ -21,6 +21,7 @@ import zipfile
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from .common import PathIsh, get_files, LazyLogger
|
from .common import PathIsh, get_files, LazyLogger
|
||||||
|
from .kython import kompress
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger('my.twitter')
|
logger = LazyLogger('my.twitter')
|
||||||
|
@ -102,29 +103,44 @@ class Like(NamedTuple):
|
||||||
|
|
||||||
class ZipExport:
|
class ZipExport:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
pass
|
self.epath = _get_export()
|
||||||
|
|
||||||
|
self.old_format = False # changed somewhere around 2020.03
|
||||||
|
if not kompress.kexists(self.epath, 'Your archive.html'):
|
||||||
|
self.old_format = True
|
||||||
|
|
||||||
|
|
||||||
def raw(self, what: str): # TODO Json in common?
|
def raw(self, what: str): # TODO Json in common?
|
||||||
epath = _get_export()
|
logger.info('processing: %s %s', self.epath, what)
|
||||||
logger.info('processing: %s %s', epath, what)
|
|
||||||
ddd = zipfile.ZipFile(epath).read(what).decode('utf8')
|
path = what
|
||||||
|
if not self.old_format:
|
||||||
|
path = 'data/' + path
|
||||||
|
path += '.js'
|
||||||
|
|
||||||
|
with kompress.kopen(self.epath, path) as fo:
|
||||||
|
ddd = fo.read().decode('utf8')
|
||||||
start = ddd.index('[')
|
start = ddd.index('[')
|
||||||
ddd = ddd[start:]
|
ddd = ddd[start:]
|
||||||
for j in json.loads(ddd):
|
for j in json.loads(ddd):
|
||||||
|
if set(j.keys()) == {what}:
|
||||||
|
# newer format
|
||||||
|
yield j[what]
|
||||||
|
else:
|
||||||
|
# older format
|
||||||
yield j
|
yield j
|
||||||
|
|
||||||
|
|
||||||
def tweets(self) -> Iterator[Tweet]:
|
def tweets(self) -> Iterator[Tweet]:
|
||||||
for r in self.raw('tweet.js'):
|
for r in self.raw('tweet'):
|
||||||
yield Tweet(r)
|
yield Tweet(r)
|
||||||
|
|
||||||
|
|
||||||
def likes(self) -> Iterator[Like]:
|
def likes(self) -> Iterator[Like]:
|
||||||
# TODO ugh. would be nice to unify Tweet/Like interface
|
# TODO ugh. would be nice to unify Tweet/Like interface
|
||||||
# however, akeout only got tweetId, full text and url
|
# however, akeout only got tweetId, full text and url
|
||||||
for r in self.raw('like.js'):
|
for r in self.raw('like'):
|
||||||
assert set(r.keys()) == {'like'}
|
yield Like(r)
|
||||||
yield Like(r['like'])
|
|
||||||
|
|
||||||
|
|
||||||
def tweets() -> List[Tweet]:
|
def tweets() -> List[Tweet]:
|
||||||
|
|
Loading…
Add table
Reference in a new issue