some WIP on trying sqlalchemy to do what I want
This commit is contained in:
parent
510d771e6e
commit
d05ba9e5a3
1 changed files with 113 additions and 13 deletions
126
sql.py
126
sql.py
|
@ -16,7 +16,8 @@ from kython.py37 import fromisoformat
|
|||
|
||||
# TODO move to some common thing?
|
||||
class IsoDateTime(sqlalchemy.TypeDecorator):
|
||||
# TODO can we use something more effecient? e.g. blob for encoded datetime and tz? not sure if worth it
|
||||
# in theory could use something more effecient? e.g. blob for encoded datetime and tz?
|
||||
# but practically, the difference seems to be pretty small, so perhaps fine for now
|
||||
impl = sqlalchemy.types.String
|
||||
|
||||
# TODO optional?
|
||||
|
@ -102,25 +103,124 @@ def test(tmp_path):
|
|||
assert len(cached_locs) == test_limit
|
||||
assert real_locs == cached_locs
|
||||
|
||||
def main():
|
||||
|
||||
from kython.ktyping import PathIsh
|
||||
def make_dbcache(p: PathIsh, hashf):
|
||||
raise NotImplementedError
|
||||
|
||||
# TODO what if we want dynamic path??
|
||||
# dbcache = make_dbcache('/L/tmp/test.db', hashf=lambda p: p) # TODO FIXME?
|
||||
|
||||
Hash = str
|
||||
|
||||
# TODO hash is a bit misleading
|
||||
# TODO perhaps another table is the way to go...
|
||||
|
||||
# TODO careful about concurrent access?
|
||||
def read_hash(db_path: Path) -> Optional[Hash]:
|
||||
hash_file = db_path.with_suffix('.hash')
|
||||
if not hash_file.exists():
|
||||
return None
|
||||
return hash_file.read_text()
|
||||
|
||||
# TODO not sure if there is any way to guarantee atomic reading....
|
||||
# unless it happens automatically due to unlink logic?
|
||||
# TODO need to know entry type?
|
||||
# TODO or, we can just encode that in names. that way no need for atomic stuff
|
||||
|
||||
# TODO give a better name
|
||||
class Alala:
|
||||
def __init__(self, db_path: Path, type_) -> None:
|
||||
self.db = sa.create_engine(f'sqlite:///{db_path}')
|
||||
self.engine = self.db.connect() # TODO do I need to tear anything down??
|
||||
self.meta = sa.MetaData(self.engine)
|
||||
self.table_hash = sa.Table('hash' , self.meta, sa.Column('value', sa.types.String))
|
||||
|
||||
schema = make_schema(type_)
|
||||
self.table_data = sa.Table('table', self.meta, *schema)
|
||||
# for t in [self.table_data, self.table_hash]:
|
||||
# # TODO shit. how to reuse these properly????
|
||||
# cols = [c for c in t._columns]
|
||||
# sa.Table(t.name, self.meta, *cols)
|
||||
|
||||
self.meta.create_all()
|
||||
|
||||
# @property
|
||||
# def table_hash(self):
|
||||
# # TODO single entry constraint?
|
||||
# return sa.table('hash', sa.Column('value', sa.types.String))
|
||||
|
||||
|
||||
def get_dbcache_logger():
|
||||
return logging.getLogger('dbcache')
|
||||
|
||||
def dbcache_worker(db_path: PathIsh, hashf, type_, wrapped):
|
||||
logger = get_dbcache_logger()
|
||||
|
||||
db_path = Path(db_path)
|
||||
# TODO FIXME make sure we have exclusive write lock
|
||||
# TODO FIMXE ok, use transactions, then we'd be fine
|
||||
|
||||
alala = Alala(db_path, type_)
|
||||
engine = alala.engine
|
||||
# table_hash = sa.table('hash', sa.Column('value', sa.types.String))
|
||||
prev_hashes = engine.execute(alala.table_hash.select()).fetchall()
|
||||
if len(prev_hashes) > 1:
|
||||
raise RuntimeError(f'Multiple hashes! {prev_hashes}')
|
||||
|
||||
prev_hash: Optional[Hash]
|
||||
if len(prev_hashes) == 0:
|
||||
prev_hash = None
|
||||
else:
|
||||
prev_hash = prev_hashes[0]
|
||||
logger.debug('previous hash: %s', prev_hash)
|
||||
|
||||
def wrapper(key):
|
||||
h = hashf(key)
|
||||
logger.debug('current hash: %s', h)
|
||||
assert h is not None # just in case
|
||||
|
||||
with engine.begin() as transaction:
|
||||
rows = engine.execute(alala.table_data.select()).fetchall()
|
||||
if h == prev_hash:
|
||||
rows = engine.execute()
|
||||
# return type_()
|
||||
raise NotImplementedError("TODO return data")
|
||||
else:
|
||||
datas = wrapped(key)
|
||||
engine.execute(alala.table_data.insert().values(datas)) # TODO chunks??
|
||||
|
||||
# TODO FIXME insert and replace instead
|
||||
# alala.table_hash.drop(engine)
|
||||
engine.execute(alala.table_hash.delete())
|
||||
engine.execute(alala.table_hash.insert().values([{'value': h}]))
|
||||
return datas
|
||||
# TODO engine is leaking??
|
||||
return wrapper
|
||||
|
||||
def wrapped(path: Path):
|
||||
return [] # TODO
|
||||
|
||||
def hashf(path: Path):
|
||||
return str(path) # TODO mtime
|
||||
|
||||
def main():
|
||||
from kython import setup_logzero
|
||||
setup_logzero(get_logger(), level=logging.DEBUG)
|
||||
setup_logzero(get_dbcache_logger(), level=logging.DEBUG)
|
||||
|
||||
db_path = Path('test3.sqlite')
|
||||
# if db_path.exists():
|
||||
# db_path.unlink()
|
||||
src_path = Path('hi')
|
||||
|
||||
locs = iter_db_locs(db_path)
|
||||
print(len(list(locs)))
|
||||
db_path = Path('test.sqlite')
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
|
||||
new_wrapped = dbcache_worker(db_path=db_path, hashf=hashf, type_=Location, wrapped=wrapped)
|
||||
res = new_wrapped(src_path)
|
||||
print(res)
|
||||
|
||||
# TODO is it quicker to insert anyway? needs unique policy
|
||||
|
||||
# ok, very nice. the whold db is just 20mb now
|
||||
# nice, and loads in seconds basically
|
||||
# TODO FIXME just need to check timezone
|
||||
# cache_locs(source=Path('/L/tmp/LocationHistory.json'), db_path=db_path)
|
||||
# locs = iter_db_locs(db_path)
|
||||
# print(len(list(locs)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Add table
Reference in a new issue