From 6f6be5c78e1984cdbffd9c833362a9e1b56c61e5 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 21 Jun 2023 19:44:26 +0100 Subject: [PATCH] my.hackernews.materialistic: process and merge all db exports + minor cleanup --- my/hackernews/materialistic.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/my/hackernews/materialistic.py b/my/hackernews/materialistic.py index e0d634a..eddf053 100644 --- a/my/hackernews/materialistic.py +++ b/my/hackernews/materialistic.py @@ -5,11 +5,14 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Iterator, NamedTuple, Sequence -from my.core import get_files +from more_itertools import unique_everseen + +from my.core import get_files, datetime_aware from my.core.sqlite import sqlite_connection -from my.config import materialistic as config -# todo migrate config to my.hackernews.materialistic +from my.config import materialistic as config # todo migrate config to my.hackernews.materialistic + +from .common import hackernews_link def inputs() -> Sequence[Path]: @@ -17,13 +20,16 @@ def inputs() -> Sequence[Path]: Row = Dict[str, Any] -from .common import hackernews_link + class Saved(NamedTuple): row: Row + # NOTE: seems like it's the time item was saved (not created originally??) + # https://github.com/hidroh/materialistic/blob/b631d5111b7487d2328f463bd95e8507c74c3566/app/src/main/java/io/github/hidroh/materialistic/data/MaterialisticDatabase.java#L224 + # but not 100% sure. @property - def when(self) -> datetime: + def when(self) -> datetime_aware: ts = int(self.row['time']) / 1000 return datetime.fromtimestamp(ts, tz=timezone.utc) @@ -44,11 +50,14 @@ class Saved(NamedTuple): return hackernews_link(self.uid) +def _all_raw() -> Iterator[Row]: + for db in inputs(): + with sqlite_connection(db, immutable=True, row_factory='dict') as conn: + yield from conn.execute('SELECT * FROM saved ORDER BY time') + + def raw() -> Iterator[Row]: - last = max(inputs()) - with sqlite_connection(last, immutable=True, row_factory='dict') as conn: - yield from conn.execute('SELECT * FROM saved ORDER BY time') - # TODO wonder if it's 'save time' or creation time? + yield from unique_everseen(_all_raw(), key=lambda r: r['itemid']) def saves() -> Iterator[Saved]: