my.zulip.organization: use tarfile instead of kopen/kompress

potentially will extract some common interface here like ZipPath

relevant to https://github.com/karlicoss/HPI/issues/20
This commit is contained in:
Dima Gerasimov 2022-05-31 13:54:28 +01:00 committed by karlicoss
parent 4e59a65f9a
commit 5799c062a5

View file

@ -79,17 +79,22 @@ class Message:
from typing import Union from typing import Union
from itertools import count from itertools import count
import json import json
from ..core.error import Res from ..core import Res
from ..core.kompress import kopen, kexists # todo cache it
# TODO cache it
def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]: def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]:
# TODO hmm -- not sure if max lexicographically will actually be latest? # TODO hmm -- not sure if max lexicographically will actually be latest?
last = max(inputs()) last = max(inputs())
no_suffix = last.name.split('.')[0]
# TODO check that it also works with unpacked dirs??? subdir = last.with_suffix('').stem # there is a directory inside tar.gz
with kopen(last, f'{no_suffix}/realm.json') as f:
rj = json.load(f) # todo would be nice to switch it to unpacked dirs as well, similar to ZipPath
# I guess makes sense to have a special implementation for .tar.gz considering how common are they
import tarfile
from ..core.error import notnone
tfile = tarfile.open(last)
with notnone(tfile.extractfile(f'{subdir}/realm.json')) as fo:
rj = json.load(fo)
[sj] = rj['zerver_realm'] [sj] = rj['zerver_realm']
server = Server( server = Server(
@ -126,11 +131,12 @@ def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]:
for idx in count(start=1, step=1): for idx in count(start=1, step=1):
fname = f'messages-{idx:06}.json' fname = f'messages-{idx:06}.json'
fpath = f'{no_suffix}/{fname}' fpath = f'{subdir}/{fname}'
if not kexists(last, fpath): if fpath not in tfile.getnames():
# tarfile doesn't have .exists?
break break
with kopen(last, fpath) as f: with notnone(tfile.extractfile(fpath)) as fo:
mj = json.load(f) mj = json.load(fo)
# TODO handle zerver_usermessage # TODO handle zerver_usermessage
for j in mj['zerver_message']: for j in mj['zerver_message']:
try: try: