From 5799c062a5e901c2aea1ee9241d7efcfb3dac0d0 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Tue, 31 May 2022 13:54:28 +0100 Subject: [PATCH] my.zulip.organization: use tarfile instead of kopen/kompress potentially will extract some common interface here like ZipPath relevant to https://github.com/karlicoss/HPI/issues/20 --- my/zulip/organization.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/my/zulip/organization.py b/my/zulip/organization.py index b9bd190..3cfe0df 100644 --- a/my/zulip/organization.py +++ b/my/zulip/organization.py @@ -79,17 +79,22 @@ class Message: from typing import Union from itertools import count import json -from ..core.error import Res -from ..core.kompress import kopen, kexists -# TODO cache it +from ..core import Res +# todo cache it def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]: # TODO hmm -- not sure if max lexicographically will actually be latest? last = max(inputs()) - no_suffix = last.name.split('.')[0] - # TODO check that it also works with unpacked dirs??? - with kopen(last, f'{no_suffix}/realm.json') as f: - rj = json.load(f) + subdir = last.with_suffix('').stem # there is a directory inside tar.gz + + # todo would be nice to switch it to unpacked dirs as well, similar to ZipPath + # I guess makes sense to have a special implementation for .tar.gz considering how common are they + import tarfile + from ..core.error import notnone + + tfile = tarfile.open(last) + with notnone(tfile.extractfile(f'{subdir}/realm.json')) as fo: + rj = json.load(fo) [sj] = rj['zerver_realm'] server = Server( @@ -126,11 +131,12 @@ def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]: for idx in count(start=1, step=1): fname = f'messages-{idx:06}.json' - fpath = f'{no_suffix}/{fname}' - if not kexists(last, fpath): + fpath = f'{subdir}/{fname}' + if fpath not in tfile.getnames(): + # tarfile doesn't have .exists? break - with kopen(last, fpath) as f: - mj = json.load(f) + with notnone(tfile.extractfile(fpath)) as fo: + mj = json.load(fo) # TODO handle zerver_usermessage for j in mj['zerver_message']: try: