core/sqlite: experiment at typing SELECT query (to some extent)

ideally would be cool to use TypedDict here somehow, but perhaps it'd only be possible after variadic generics https://peps.python.org/pep-0646
This commit is contained in:
Dima Gerasimov 2022-06-03 21:44:27 +01:00 committed by karlicoss
parent 7a1b7b1554
commit bf3dd6e931
3 changed files with 57 additions and 17 deletions

View file

@ -56,7 +56,7 @@ import json
from typing import Union
from ..core import Res, assert_never
import sqlite3
from ..core.sqlite import sqlite_connect_immutable
from ..core.sqlite import sqlite_connect_immutable, select
EntitiesRes = Res[Union[Person, _Message]]
@ -72,20 +72,22 @@ def _handle_db(db: sqlite3.Connection) -> Iterator[EntitiesRes]:
# on the other, it's somewhat of a complication, and
# would be nice to have something type-directed for sql queries though
# e.g. with typeddict or something, so the number of parameter to the sql query matches?
for row in db.execute(f'SELECT user_id, user_name FROM conversation_info'):
(user_id, user_name) = row
for (user_id, user_name) in select(
('user_id', 'user_name'),
'FROM conversation_info',
db=db,
):
yield Person(
user_id=user_id,
user_name=user_name,
)
# has sender_name, but it's always None
for row in db.execute(f'''
SELECT id, conversation_id, created_timestamp, is_incoming, payload_type, payload, reply_to_id
FROM message
ORDER BY created_timestamp
'''):
(id, conversation_id, created, is_incoming, payload_type, payload, reply_to_id) = row
# note: has sender_name, but it's always None
for ( id, conversation_id , created , is_incoming , payload_type , payload , reply_to_id) in select(
('id', 'conversation_id', 'created_timestamp', 'is_incoming', 'payload_type', 'payload', 'reply_to_id'),
'FROM message ORDER BY created_timestamp',
db=db
):
try:
key = {'TEXT': 'text', 'QUESTION_GAME': 'text', 'IMAGE': 'url', 'GIF': 'url'}[payload_type]
text = json.loads(payload)[key]

View file

@ -50,3 +50,43 @@ def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
sqlite_backup(source=conn, dest=dest)
conn.close()
return dest
from typing import Tuple, Any, Iterator
# NOTE hmm, so this kinda works
# V = TypeVar('V', bound=Tuple[Any, ...])
# def select(cols: V, rest: str, *, db: sqlite3.Connetion) -> Iterator[V]:
# but sadly when we pass columns (Tuple[str, ...]), it seems to bind this type to V?
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
# a bit annoying to have this copy-pasting, but hopefully not a big issue
from typing import overload
@overload
def select(cols: Tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any ]]: ...
@overload
def select(cols: Tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: Tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
Iterator[Tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
def select(cols, rest, *, db):
# db arg is last cause that results in nicer code formatting..
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)

View file

@ -90,7 +90,7 @@ import json
from typing import Union
from ..core import Res, assert_never
import sqlite3
from ..core.sqlite import sqlite_connect_immutable
from ..core.sqlite import sqlite_connect_immutable, select
def _entities() -> Iterator[Res[Union[User, _Message]]]:
# NOTE: definitely need to merge multiple, app seems to recycle old messages
# TODO: hmm hard to guarantee timestamp ordering when we use synthetic input data...
@ -98,15 +98,14 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
for f in inputs():
with sqlite_connect_immutable(f) as db:
for row in db.execute(f'SELECT user_id, thread_info FROM threads'):
(self_uid, js,) = row
for (self_uid, thread_json) in select(('user_id', 'thread_info'), 'FROM threads', db=db):
# ugh wtf?? no easier way to extract your own user id/name??
yield User(
id=str(self_uid),
full_name='You',
username='you',
)
j = json.loads(js)
j = json.loads(thread_json)
for r in j['recipients']:
yield User(
id=str(r['id']), # for some reason it's int in the db
@ -114,10 +113,9 @@ def _entities() -> Iterator[Res[Union[User, _Message]]]:
username=r['username'],
)
for row in db.execute(f'SELECT message FROM messages ORDER BY timestamp'):
for (msg_json,) in select(('message',), 'FROM messages ORDER BY timestamp', db=db):
# eh, seems to contain everything in json?
(js,) = row
j = json.loads(js)
j = json.loads(msg_json)
try:
m = _parse_message(j)
if m is not None: