my.zulip: extract Server/Sender objects, experiment with normalised and denormalised objects
This commit is contained in:
parent
a1f03f9c02
commit
a39b5605ae
1 changed files with 97 additions and 20 deletions
|
@ -20,49 +20,99 @@ def inputs() -> Sequence[Path]:
|
||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Message:
|
class Server:
|
||||||
|
id: int
|
||||||
|
string_id: str
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Sender:
|
||||||
|
id: int
|
||||||
|
# todo make optional?
|
||||||
|
full_name: str
|
||||||
|
email: str
|
||||||
|
|
||||||
|
|
||||||
|
# from the data, seems that subjects are completely implicit and determined by name?
|
||||||
|
# streams have ids (can extract from realm/zerver_stream), but unclear how to correlate messages/topics to streams?
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class _Message:
|
||||||
|
# todo hmm not sure what would be a good field order..
|
||||||
id: int
|
id: int
|
||||||
sent: datetime
|
sent: datetime
|
||||||
|
# TODO hmm kinda unclear whether it uses UTC or not??
|
||||||
|
# https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076
|
||||||
|
# it keeps it tz aware.. but not sure what happens after?
|
||||||
|
# https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151
|
||||||
subject: str
|
subject: str
|
||||||
sender: str
|
sender_id: int
|
||||||
|
server_id: int
|
||||||
content: str # TODO hmm, it keeps markdown, not sure how/whether it's worth to prettify at all?
|
content: str # TODO hmm, it keeps markdown, not sure how/whether it's worth to prettify at all?
|
||||||
# TODO recipient??
|
# TODO recipient??
|
||||||
# todo keep raw item instead? not sure
|
# todo keep raw item instead? not sure
|
||||||
|
|
||||||
|
|
||||||
# TODO hmm kinda unclear whether it uses UTC or not??
|
@dataclass(frozen=True)
|
||||||
# https://github.com/zulip/zulip/blob/0c2e4eec200d986a9a020f3e9a651d27216e0e85/zerver/models.py#L3071-L3076
|
class Message:
|
||||||
# it keeps it tz aware.. but not sure what happens after?
|
id: int
|
||||||
# https://github.com/zulip/zulip/blob/1dfddffc8dac744fd6a6fbfd937018074c8bb166/zproject/computed_settings.py#L151
|
sent: datetime
|
||||||
|
subject: str
|
||||||
|
sender: Sender
|
||||||
|
server: Server
|
||||||
|
content: str
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Union
|
||||||
from itertools import count
|
from itertools import count
|
||||||
import json
|
import json
|
||||||
from ..core.error import Res
|
from ..core.error import Res
|
||||||
from ..core.kompress import kopen, kexists
|
from ..core.kompress import kopen, kexists
|
||||||
# TODO check that it also works with unpacked dirs???
|
# TODO cache it
|
||||||
def messages() -> Iterator[Res[Message]]:
|
def _entities() -> Iterator[Res[Union[Server, Sender, _Message]]]:
|
||||||
# TODO hmm -- not sure if max lexicographically will actually be latest?
|
# TODO hmm -- not sure if max lexicographically will actually be latest?
|
||||||
last = max(inputs())
|
last = max(inputs())
|
||||||
no_suffix = last.name.split('.')[0]
|
no_suffix = last.name.split('.')[0]
|
||||||
|
|
||||||
|
# TODO check that it also works with unpacked dirs???
|
||||||
with kopen(last, f'{no_suffix}/realm.json') as f:
|
with kopen(last, f'{no_suffix}/realm.json') as f:
|
||||||
rj = json.load(f)
|
rj = json.load(f)
|
||||||
id2user: Dict[int, str] = {}
|
|
||||||
for j in rj['zerver_userprofile']:
|
|
||||||
id2user[j['id']] = j['full_name']
|
|
||||||
for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot
|
|
||||||
id2user[j['id']] = j['email']
|
|
||||||
|
|
||||||
def _parse_message(j: Json) -> Message:
|
[sj] = rj['zerver_realm']
|
||||||
|
server = Server(
|
||||||
|
id=sj['id'],
|
||||||
|
string_id=sj['string_id'],
|
||||||
|
name=sj['name'],
|
||||||
|
)
|
||||||
|
yield server
|
||||||
|
|
||||||
|
for j in rj['zerver_userprofile']:
|
||||||
|
yield Sender(
|
||||||
|
id=j['id'],
|
||||||
|
full_name=j['full_name'],
|
||||||
|
email=j['email'],
|
||||||
|
)
|
||||||
|
|
||||||
|
for j in rj['zerver_userprofile_crossrealm']: # e.g. zulip bot
|
||||||
|
yield Sender(
|
||||||
|
id=j['id'],
|
||||||
|
full_name=j['email'], # doesn't seem to have anything
|
||||||
|
email=j['email'],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_message(j: Json) -> _Message:
|
||||||
ds = j['date_sent']
|
ds = j['date_sent']
|
||||||
return Message(
|
return _Message(
|
||||||
id = j['id'],
|
id = j['id'],
|
||||||
sent = datetime.fromtimestamp(ds),
|
sent = datetime.fromtimestamp(ds),
|
||||||
subject = j['subject'],
|
subject = j['subject'],
|
||||||
sender = id2user[j['sender']],
|
sender_id = j['sender'],
|
||||||
content = j['content'],
|
server_id = server.id,
|
||||||
|
content = j['content'],
|
||||||
)
|
)
|
||||||
|
|
||||||
for idx in count(start=1, step=1):
|
for idx in count(start=1, step=1):
|
||||||
|
@ -78,3 +128,30 @@ def messages() -> Iterator[Res[Message]]:
|
||||||
yield _parse_message(j)
|
yield _parse_message(j)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield e
|
yield e
|
||||||
|
|
||||||
|
|
||||||
|
def messages() -> Iterator[Res[Message]]:
|
||||||
|
id2sender: Dict[int, Sender] = {}
|
||||||
|
id2server: Dict[int, Server] = {}
|
||||||
|
for x in _entities():
|
||||||
|
if isinstance(x, Exception):
|
||||||
|
yield x
|
||||||
|
continue
|
||||||
|
if isinstance(x, Server):
|
||||||
|
id2server[x.id] = x
|
||||||
|
continue
|
||||||
|
if isinstance(x, Sender):
|
||||||
|
id2sender[x.id] = x
|
||||||
|
continue
|
||||||
|
if isinstance(x, _Message):
|
||||||
|
# TODO a bit copypasty... wonder if possible to mixin or something instead
|
||||||
|
yield Message(
|
||||||
|
id=x.id,
|
||||||
|
sent=x.sent,
|
||||||
|
subject=x.subject,
|
||||||
|
sender=id2sender[x.sender_id],
|
||||||
|
server=id2server[x.server_id],
|
||||||
|
content=x.content,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
assert False # should be unreachable
|
||||||
|
|
Loading…
Add table
Reference in a new issue