HPI/my/core/sqlite.py

179 lines
6.7 KiB
Python

from __future__ import annotations
from .internal import assert_subpackage # noqa: I001
assert_subpackage(__name__)
import shutil
import sqlite3
from collections.abc import Iterator
from contextlib import contextmanager
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Callable, Literal, Union, overload
from .common import PathIsh
from .compat import assert_never
def sqlite_connect_immutable(db: PathIsh) -> sqlite3.Connection:
return sqlite3.connect(f'file:{db}?immutable=1', uri=True)
def test_sqlite_connect_immutable(tmp_path: Path) -> None:
db = str(tmp_path / 'db.sqlite')
with sqlite3.connect(db) as conn:
conn.execute('CREATE TABLE testtable (col)')
import pytest
with pytest.raises(sqlite3.OperationalError, match='readonly database'):
with sqlite_connect_immutable(db) as conn:
conn.execute('DROP TABLE testtable')
# succeeds without immutable
with sqlite3.connect(db) as conn:
conn.execute('DROP TABLE testtable')
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
def dict_factory(cursor, row):
fields = [column[0] for column in cursor.description]
return dict(zip(fields, row))
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
@contextmanager
def sqlite_connection(db: PathIsh, *, immutable: bool = False, row_factory: Factory | None = None) -> Iterator[sqlite3.Connection]:
dbp = f'file:{db}'
# https://www.sqlite.org/draft/uri.html#uriimmutable
if immutable:
# assert results in nicer error than sqlite3.OperationalError
assert Path(db).exists(), db
dbp = f'{dbp}?immutable=1'
row_factory_: Any = None
if row_factory is not None:
if callable(row_factory):
row_factory_ = row_factory
elif row_factory == 'row':
row_factory_ = sqlite3.Row
elif row_factory == 'dict':
row_factory_ = dict_factory
else:
assert_never()
conn = sqlite3.connect(dbp, uri=True)
try:
conn.row_factory = row_factory_
with conn:
yield conn
finally:
# Connection context manager isn't actually closing the connection, only keeps transaction
conn.close()
# TODO come up with a better name?
# NOTE: this is tested by tests/sqlite.py::test_sqlite_read_with_wal
def sqlite_copy_and_open(db: PathIsh) -> sqlite3.Connection:
"""
'Snapshots' database and opens by making a deep copy of it including journal/WAL files
"""
dp = Path(db)
# TODO make atomic/check mtimes or something
dest = sqlite3.connect(':memory:')
with TemporaryDirectory() as td:
tdir = Path(td)
# shm should be recreated from scratch -- safer not to copy perhaps
tocopy = [dp] + [p for p in dp.parent.glob(dp.name + '-*') if not p.name.endswith('-shm')]
for p in tocopy:
shutil.copy(p, tdir / p.name)
with sqlite3.connect(str(tdir / dp.name)) as conn:
conn.backup(target=dest)
conn.close()
return dest
# NOTE hmm, so this kinda works
# V = TypeVar('V', bound=Tuple[Any, ...])
# def select(cols: V, rest: str, *, db: sqlite3.Connection) -> Iterator[V]:
# but sadly when we pass columns (Tuple[str, ...]), it seems to bind this type to V?
# and then the return type ends up as Iterator[Tuple[str, ...]], which isn't desirable :(
# a bit annoying to have this copy-pasting, but hopefully not a big issue
# fmt: off
@overload
def select(cols: tuple[str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any ]]: ...
@overload
def select(cols: tuple[str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str, str, str, str, str ], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any ]]: ...
@overload
def select(cols: tuple[str, str, str, str, str, str, str, str], rest: str, *, db: sqlite3.Connection) -> \
Iterator[tuple[Any, Any, Any, Any, Any, Any, Any, Any]]: ...
# fmt: on
def select(cols, rest, *, db):
# db arg is last cause that results in nicer code formatting..
return db.execute('SELECT ' + ','.join(cols) + ' ' + rest)
class SqliteTool:
def __init__(self, connection: sqlite3.Connection) -> None:
self.connection = connection
def _get_sqlite_master(self) -> dict[str, str]:
res = {}
for c in self.connection.execute('SELECT name, type FROM sqlite_master'):
[name, type_] = c
assert type_ in {'table', 'index', 'view', 'trigger'}, (name, type_) # just in case
res[name] = type_
return res
def get_table_names(self) -> list[str]:
master = self._get_sqlite_master()
res = []
for name, type_ in master.items():
if type_ != 'table':
continue
res.append(name)
return res
def get_table_schema(self, name: str) -> dict[str, str]:
"""
Returns map from column name to column type
NOTE: Sometimes this doesn't work if the db has some extensions (e.g. happens for facebook apps)
In this case you might still be able to use get_table_names
"""
schema: dict[str, str] = {}
for row in self.connection.execute(f'PRAGMA table_info(`{name}`)'):
col = row[1]
type_ = row[2]
# hmm, somewhere between 3.34.1 and 3.37.2, sqlite started normalising type names to uppercase
# let's do this just in case since python < 3.10 are using the old version
# e.g. it could have returned 'blob' and that would confuse blob check (see _check_allowed_blobs)
type_ = type_.upper()
schema[col] = type_
return schema
def get_table_schemas(self) -> dict[str, dict[str, str]]:
return {name: self.get_table_schema(name) for name in self.get_table_names()}