switch from using dataset to raw sqlite3 module

dataset is kinda unmaintaned and currently broken due to sqlalchemy 2.0 changes resolves https://github.com/karlicoss/HPI/issues/264
2023-02-07 01:28:45 +00:00 · 2023-02-07 01:28:45 +00:00 · 5c82d0faa9
commit 5c82d0faa9
parent 9c432027b5
8 changed files with 123 additions and 103 deletions
--- a/my/hackernews/dogsheep.py
+++ b/my/hackernews/dogsheep.py
@ -5,13 +5,15 @@ from __future__ import annotations

 from dataclasses import dataclass
 from datetime import datetime
-from typing import Iterator, Sequence, Optional, Dict
+from pathlib import Path
+from typing import Iterator, Sequence, Optional

+from my.core import get_files, Paths, Res
+from my.core.sqlite import sqlite_connection

 from my.config import hackernews as user_config


-from ..core import Paths
@dataclass
 class config(user_config.dogsheep):
    # paths[s]/glob to the dogsheep database
@ -20,8 +22,6 @@ class config(user_config.dogsheep):

 # todo so much boilerplate... really need some common wildcard imports?...
 # at least for stuff which realistically is used in each module like get_files/Sequence/Paths/dataclass/Iterator/Optional
-from ..core import get_files
-from pathlib import Path
 def inputs() -> Sequence[Path]:
    return get_files(config.export_path)

@ -44,15 +44,15 @@ class Item:
    @property
    def permalink(self) -> str:
        return hackernews_link(self.id)
+# TODO hmm kinda annoying that permalink isn't getting serialized
+# maybe won't be such a big problem if we used hpi query directly on objects, without jsons?
+# so we could just take .permalink thing


-from ..core.error import Res
-from ..core.dataset import connect_readonly
 def items() -> Iterator[Res[Item]]:
    f = max(inputs())
-    with connect_readonly(f) as db:
-        items = db['items']
-        for r in items.all(order_by='time'):
+    with sqlite_connection(f, immutable=True, row_factory='row') as conn:
+        for r in conn.execute('SELECT * FROM items ORDER BY time'):
            yield Item(
                id=r['id'],
                type=r['type'],
--- a/my/hackernews/materialistic.py
+++ b/my/hackernews/materialistic.py
@ -1,20 +1,17 @@
 """
 [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
 """
-
-REQUIRES = ['dataset']
-
-from datetime import datetime
+from datetime import datetime, timezone
+from pathlib import Path
 from typing import Any, Dict, Iterator, NamedTuple, Sequence

-import pytz
+from my.core import get_files
+from my.core.sqlite import sqlite_connection

 from my.config import materialistic as config
 # todo migrate config to my.hackernews.materialistic


-from ..core import get_files
-from pathlib import Path
 def inputs() -> Sequence[Path]:
    return get_files(config.export_path)

@ -28,7 +25,7 @@ class Saved(NamedTuple):
    @property
    def when(self) -> datetime:
        ts = int(self.row['time']) / 1000
-        return datetime.fromtimestamp(ts, tz=pytz.utc)
+        return datetime.fromtimestamp(ts, tz=timezone.utc)

    @property
    def uid(self) -> str:
@ -47,13 +44,11 @@ class Saved(NamedTuple):
        return hackernews_link(self.uid)


-from ..core.dataset import connect_readonly
 def raw() -> Iterator[Row]:
    last = max(inputs())
-    with connect_readonly(last) as db:
-        saved = db['saved']
+    with sqlite_connection(last, immutable=True, row_factory='dict') as conn:
+        yield from conn.execute('SELECT * FROM saved ORDER BY time')
        # TODO wonder if it's 'save time' or creation time?
-        yield from saved.all(order_by='time')


 def saves() -> Iterator[Saved]: