From a39b5605ae7e59527bab17a565ab3bbe86cf06cf Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 28 Jan 2022 00:46:03 +0000 Subject: [PATCH] my.zulip: extract Server/Sender objects, experiment with normalised and denormalised objects --- my/zulip/organization.py | 117 ++++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 20 deletions(-) diff --git a/my/zulip/organization.py b/my/zulip/organization.py index 3be721f..37f13ce 100644 --- a/my/zulip/organization.py +++ b/my/zulip/organization.py @@ -20,49 +20,99 @@ def inputs() -> Sequence[Path]: from datetime import datetime + + @dataclass(frozen=True) -class Message: +class Server: + id: int + string_id: str + name: str + + +@dataclass(frozen=True) +class Sender: + id: int + # todo make optional? + full_name: str + email: str + + +# from the data, seems that subjects are completely implicit and determined by name? +# streams have ids (can extract from realm/zerver_stream), but unclear how to correlate messages/topics to streams? + +@dataclass(frozen=True) +class _Message: + # todo hmm not sure what would be a good field order.. id: int sent: datetime + # TODO hmm kinda unclear whether it uses UTC or not?? + # https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076 + # it keeps it tz aware.. but not sure what happens after? + # https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151 subject: str - sender: str + sender_id: int + server_id: int content: str # TODO hmm, it keeps markdown, not sure how/whether it's worth to prettify at all? # TODO recipient?? # todo keep raw item instead? not sure -# TODO hmm kinda unclear whether it uses UTC or not?? -# https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076 -# it keeps it tz aware.. but not sure what happens after? -# https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151 +@dataclass(frozen=True) +class Message: + id: int + sent: datetime + subject: str + sender: Sender + server: Server + content: str +from typing import Union from itertools import count import json from ..core.error import Res from ..core.kompress import kopen, kexists -# TODO check that it also works with unpacked dirs??? -def messages() -> Iterator[Res[Message]]: +# TODO cache it +def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]: # TODO hmm -- not sure if max lexicographically will actually be latest? last = max(inputs()) no_suffix = last.name.split('.')[0] + # TODO check that it also works with unpacked dirs??? with kopen(last, f'{no_suffix}/realm.json') as f: rj = json.load(f) - id2user: Dict[int, str] = {} - for j in rj['zerver_userprofile']: - id2user[j['id']] = j['full_name'] - for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot - id2user[j['id']] = j['email'] - def _parse_message(j: Json) -> Message: + [sj] = rj['zerver_realm'] + server = Server( + id=sj['id'], + string_id=sj['string_id'], + name=sj['name'], + ) + yield server + + for j in rj['zerver_userprofile']: + yield Sender( + id=j['id'], + full_name=j['full_name'], + email=j['email'], + ) + + for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot + yield Sender( + id=j['id'], + full_name=j['email'], # doesn't seem to have anything + email=j['email'], + ) + + def _parse_message(j: Json) -> _Message: ds = j['date_sent'] - return Message( - id = j['id'], - sent = datetime.fromtimestamp(ds), - subject = j['subject'], - sender = id2user[j['sender']], - content = j['content'], + return _Message( + id = j['id'], + sent = datetime.fromtimestamp(ds), + subject = j['subject'], + sender_id = j['sender'], + server_id = server.id, + content = j['content'], ) for idx in count(start=1, step=1): @@ -78,3 +128,30 @@ def messages() -> Iterator[Res[Message]]: yield _parse_message(j) except Exception as e: yield e + + +def messages() -> Iterator[Res[Message]]: + id2sender: Dict[int, Sender] = {} + id2server: Dict[int, Server] = {} + for x in _entities(): + if isinstance(x, Exception): + yield x + continue + if isinstance(x, Server): + id2server[x.id] = x + continue + if isinstance(x, Sender): + id2sender[x.id] = x + continue + if isinstance(x, _Message): + # TODO a bit copypasty... wonder if possible to mixin or something instead + yield Message( + id=x.id, + sent=x.sent, + subject=x.subject, + sender=id2sender[x.sender_id], + server=id2server[x.server_id], + content=x.content, + ) + continue + assert False # should be unreachable