my.hackernews.dogsheep: use utc datetime + minor cleanup

2023-10-27 02:27:04 +01:00 · 2023-10-27 02:27:04 +01:00 · 3a25c9042c
commit 3a25c9042c
parent bef0423b4f
1 changed files with 11 additions and 11 deletions
--- a/my/hackernews/dogsheep.py
+++ b/my/hackernews/dogsheep.py
@ -4,18 +4,19 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee
 from __future__ import annotations
 from dataclasses import dataclass
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Iterator, Sequence, Optional
-from my.core import get_files, Paths, Res
+from my.core import get_files, Paths, Res, datetime_aware
 from my.core.sqlite import sqlite_connection
 import my.config
-from my.config import hackernews as user_config
+from .common import hackernews_link
@dataclass
-class config(user_config.dogsheep):
+class config(my.config.hackernews.dogsheep):
    # paths[s]/glob to the dogsheep database
    export_path: Paths
@ -26,24 +27,23 @@ def inputs() -> Sequence[Path]:
    return get_files(config.export_path)
 from .common import hackernews_link
 # TODO not sure if worth splitting into Comment and Story?
@dataclass(unsafe_hash=True)
 class Item:
    id: str
    type: str
-    # TODO is it urc??
+    created: datetime_aware  # checked and it's utc
    created: datetime
    title: Optional[str]  # only present for Story
-    text_html: Optional[str] # should be present for Comment and might for Story
+    text_html: Optional[str]  # should be present for Comment and might for Story
-    url: Optional[str] # might be present for Story
+    url: Optional[str]  # might be present for Story
    # todo process 'deleted'? fields?
    # todo process 'parent'?
    @property
    def permalink(self) -> str:
        return hackernews_link(self.id)
 # TODO hmm kinda annoying that permalink isn't getting serialized
 # maybe won't be such a big problem if we used hpi query directly on objects, without jsons?
 # so we could just take .permalink thing
@ -56,7 +56,7 @@ def items() -> Iterator[Res[Item]]:
            yield Item(
                id=r['id'],
                type=r['type'],
-                created=datetime.fromtimestamp(r['time']),
+                created=datetime.fromtimestamp(r['time'], tz=timezone.utc),
                title=r['title'],
                # todo hmm maybe a method to strip off html tags would be nice
                text_html=r['text'],