switch from using dataset to raw sqlite3 module

dataset is kinda unmaintaned and currently broken due to sqlalchemy 2.0 changes

resolves https://github.com/karlicoss/HPI/issues/264
This commit is contained in:
Dima Gerasimov 2023-02-07 01:28:45 +00:00 committed by karlicoss
parent 9c432027b5
commit 5c82d0faa9
8 changed files with 123 additions and 103 deletions

View file

@ -5,13 +5,15 @@ from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Iterator, Sequence, Optional, Dict
from pathlib import Path
from typing import Iterator, Sequence, Optional
from my.core import get_files, Paths, Res
from my.core.sqlite import sqlite_connection
from my.config import hackernews as user_config
from ..core import Paths
@dataclass
class config(user_config.dogsheep):
# paths[s]/glob to the dogsheep database
@ -20,8 +22,6 @@ class config(user_config.dogsheep):
# todo so much boilerplate... really need some common wildcard imports?...
# at least for stuff which realistically is used in each module like get_files/Sequence/Paths/dataclass/Iterator/Optional
from ..core import get_files
from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -44,15 +44,15 @@ class Item:
@property
def permalink(self) -> str:
return hackernews_link(self.id)
# TODO hmm kinda annoying that permalink isn't getting serialized
# maybe won't be such a big problem if we used hpi query directly on objects, without jsons?
# so we could just take .permalink thing
from ..core.error import Res
from ..core.dataset import connect_readonly
def items() -> Iterator[Res[Item]]:
f = max(inputs())
with connect_readonly(f) as db:
items = db['items']
for r in items.all(order_by='time'):
with sqlite_connection(f, immutable=True, row_factory='row') as conn:
for r in conn.execute('SELECT * FROM items ORDER BY time'):
yield Item(
id=r['id'],
type=r['type'],

View file

@ -1,20 +1,17 @@
"""
[[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
"""
REQUIRES = ['dataset']
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterator, NamedTuple, Sequence
import pytz
from my.core import get_files
from my.core.sqlite import sqlite_connection
from my.config import materialistic as config
# todo migrate config to my.hackernews.materialistic
from ..core import get_files
from pathlib import Path
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@ -28,7 +25,7 @@ class Saved(NamedTuple):
@property
def when(self) -> datetime:
ts = int(self.row['time']) / 1000
return datetime.fromtimestamp(ts, tz=pytz.utc)
return datetime.fromtimestamp(ts, tz=timezone.utc)
@property
def uid(self) -> str:
@ -47,13 +44,11 @@ class Saved(NamedTuple):
return hackernews_link(self.uid)
from ..core.dataset import connect_readonly
def raw() -> Iterator[Row]:
last = max(inputs())
with connect_readonly(last) as db:
saved = db['saved']
with sqlite_connection(last, immutable=True, row_factory='dict') as conn:
yield from conn.execute('SELECT * FROM saved ORDER BY time')
# TODO wonder if it's 'save time' or creation time?
yield from saved.all(order_by='time')
def saves() -> Iterator[Saved]: