my.zulip: extract Server/Sender objects, experiment with normalised and denormalised objects

This commit is contained in:
Dima Gerasimov 2022-01-28 00:46:03 +00:00 committed by karlicoss
parent a1f03f9c02
commit a39b5605ae

View file

@ -20,48 +20,98 @@ def inputs() -> Sequence[Path]:
from datetime import datetime from datetime import datetime
@dataclass(frozen=True) @dataclass(frozen=True)
class Message: class Server:
id: int
string_id: str
name: str
@dataclass(frozen=True)
class Sender:
id: int
# todo make optional?
full_name: str
email: str
# from the data, seems that subjects are completely implicit and determined by name?
# streams have ids (can extract from realm/zerver_stream), but unclear how to correlate messages/topics to streams?
@dataclass(frozen=True)
class _Message:
# todo hmm not sure what would be a good field order..
id: int id: int
sent: datetime sent: datetime
# TODO hmm kinda unclear whether it uses UTC or not??
# https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076
# it keeps it tz aware.. but not sure what happens after?
# https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151
subject: str subject: str
sender: str sender_id: int
server_id: int
content: str # TODO hmm, it keeps markdown, not sure how/whether it's worth to prettify at all? content: str # TODO hmm, it keeps markdown, not sure how/whether it's worth to prettify at all?
# TODO recipient?? # TODO recipient??
# todo keep raw item instead? not sure # todo keep raw item instead? not sure
# TODO hmm kinda unclear whether it uses UTC or not?? @dataclass(frozen=True)
# https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076 class Message:
# it keeps it tz aware.. but not sure what happens after? id: int
# https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151 sent: datetime
subject: str
sender: Sender
server: Server
content: str
from typing import Union
from itertools import count from itertools import count
import json import json
from ..core.error import Res from ..core.error import Res
from ..core.kompress import kopen, kexists from ..core.kompress import kopen, kexists
# TODO check that it also works with unpacked dirs??? # TODO cache it
def messages() -> Iterator[Res[Message]]: def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]:
# TODO hmm -- not sure if max lexicographically will actually be latest? # TODO hmm -- not sure if max lexicographically will actually be latest?
last = max(inputs()) last = max(inputs())
no_suffix = last.name.split('.')[0] no_suffix = last.name.split('.')[0]
# TODO check that it also works with unpacked dirs???
with kopen(last, f'{no_suffix}/realm.json') as f: with kopen(last, f'{no_suffix}/realm.json') as f:
rj = json.load(f) rj = json.load(f)
id2user: Dict[int, str] = {}
for j in rj['zerver_userprofile']:
id2user[j['id']] = j['full_name']
for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot
id2user[j['id']] = j['email']
def _parse_message(j: Json) -> Message: [sj] = rj['zerver_realm']
server = Server(
id=sj['id'],
string_id=sj['string_id'],
name=sj['name'],
)
yield server
for j in rj['zerver_userprofile']:
yield Sender(
id=j['id'],
full_name=j['full_name'],
email=j['email'],
)
for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot
yield Sender(
id=j['id'],
full_name=j['email'], # doesn't seem to have anything
email=j['email'],
)
def _parse_message(j: Json) -> _Message:
ds = j['date_sent'] ds = j['date_sent']
return Message( return _Message(
id = j['id'], id = j['id'],
sent = datetime.fromtimestamp(ds), sent = datetime.fromtimestamp(ds),
subject = j['subject'], subject = j['subject'],
sender = id2user[j['sender']], sender_id = j['sender'],
server_id = server.id,
content = j['content'], content = j['content'],
) )
@ -78,3 +128,30 @@ def messages() -> Iterator[Res[Message]]:
yield _parse_message(j) yield _parse_message(j)
except Exception as e: except Exception as e:
yield e yield e
def messages() -> Iterator[Res[Message]]:
id2sender: Dict[int, Sender] = {}
id2server: Dict[int, Server] = {}
for x in _entities():
if isinstance(x, Exception):
yield x
continue
if isinstance(x, Server):
id2server[x.id] = x
continue
if isinstance(x, Sender):
id2sender[x.id] = x
continue
if isinstance(x, _Message):
# TODO a bit copypasty... wonder if possible to mixin or something instead
yield Message(
id=x.id,
sent=x.sent,
subject=x.subject,
sender=id2sender[x.sender_id],
server=id2server[x.server_id],
content=x.content,
)
continue
assert False # should be unreachable