my.hackernews.dogsheep: use utc datetime + minor cleanup
This commit is contained in:
parent
bef0423b4f
commit
3a25c9042c
1 changed files with 11 additions and 11 deletions
|
@ -4,18 +4,19 @@ Hackernews data via Dogsheep [[hacker-news-to-sqlite][https://github.com/dogshee
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator, Sequence, Optional
|
from typing import Iterator, Sequence, Optional
|
||||||
|
|
||||||
from my.core import get_files, Paths, Res
|
from my.core import get_files, Paths, Res, datetime_aware
|
||||||
from my.core.sqlite import sqlite_connection
|
from my.core.sqlite import sqlite_connection
|
||||||
|
import my.config
|
||||||
|
|
||||||
from my.config import hackernews as user_config
|
from .common import hackernews_link
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class config(user_config.dogsheep):
|
class config(my.config.hackernews.dogsheep):
|
||||||
# paths[s]/glob to the dogsheep database
|
# paths[s]/glob to the dogsheep database
|
||||||
export_path: Paths
|
export_path: Paths
|
||||||
|
|
||||||
|
@ -26,24 +27,23 @@ def inputs() -> Sequence[Path]:
|
||||||
return get_files(config.export_path)
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
from .common import hackernews_link
|
|
||||||
|
|
||||||
# TODO not sure if worth splitting into Comment and Story?
|
# TODO not sure if worth splitting into Comment and Story?
|
||||||
@dataclass(unsafe_hash=True)
|
@dataclass(unsafe_hash=True)
|
||||||
class Item:
|
class Item:
|
||||||
id: str
|
id: str
|
||||||
type: str
|
type: str
|
||||||
# TODO is it urc??
|
created: datetime_aware # checked and it's utc
|
||||||
created: datetime
|
|
||||||
title: Optional[str] # only present for Story
|
title: Optional[str] # only present for Story
|
||||||
text_html: Optional[str] # should be present for Comment and might for Story
|
text_html: Optional[str] # should be present for Comment and might for Story
|
||||||
url: Optional[str] # might be present for Story
|
url: Optional[str] # might be present for Story
|
||||||
# todo process 'deleted'? fields?
|
# todo process 'deleted'? fields?
|
||||||
# todo process 'parent'?
|
# todo process 'parent'?
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def permalink(self) -> str:
|
def permalink(self) -> str:
|
||||||
return hackernews_link(self.id)
|
return hackernews_link(self.id)
|
||||||
|
|
||||||
|
|
||||||
# TODO hmm kinda annoying that permalink isn't getting serialized
|
# TODO hmm kinda annoying that permalink isn't getting serialized
|
||||||
# maybe won't be such a big problem if we used hpi query directly on objects, without jsons?
|
# maybe won't be such a big problem if we used hpi query directly on objects, without jsons?
|
||||||
# so we could just take .permalink thing
|
# so we could just take .permalink thing
|
||||||
|
@ -56,7 +56,7 @@ def items() -> Iterator[Res[Item]]:
|
||||||
yield Item(
|
yield Item(
|
||||||
id=r['id'],
|
id=r['id'],
|
||||||
type=r['type'],
|
type=r['type'],
|
||||||
created=datetime.fromtimestamp(r['time']),
|
created=datetime.fromtimestamp(r['time'], tz=timezone.utc),
|
||||||
title=r['title'],
|
title=r['title'],
|
||||||
# todo hmm maybe a method to strip off html tags would be nice
|
# todo hmm maybe a method to strip off html tags would be nice
|
||||||
text_html=r['text'],
|
text_html=r['text'],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue