new module: Harmonic app for Hackernews

This commit is contained in:
Dima Gerasimov 2023-09-25 10:41:36 +01:00 committed by karlicoss
parent 01480ec8eb
commit 8addd2d58a
4 changed files with 138 additions and 0 deletions

View file

@ -265,3 +265,7 @@ class whatsapp:
class android:
export_path: Paths
my_user_id: Optional[str]
class harmonic:
export_path: Paths

View file

@ -1,2 +1,20 @@
from typing import Protocol
from my.core import datetime_aware, Json
def hackernews_link(id: str) -> str:
return f'https://news.ycombinator.com/item?id={id}'
class SavedBase(Protocol):
@property
def when(self) -> datetime_aware: ...
@property
def uid(self) -> str: ...
@property
def url(self) -> str: ...
@property
def title(self) -> str: ...
@property
def hackernews_link(self) -> str: ...

115
my/hackernews/harmonic.py Normal file
View file

@ -0,0 +1,115 @@
"""
[[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews
"""
REQUIRES = ['lxml']
from dataclasses import dataclass
from datetime import datetime, timezone
import json
import html
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast
from lxml import etree
from more_itertools import unique_everseen, one
from my.core import (
Paths,
Res,
Stats,
datetime_aware,
get_files,
stat,
)
from .common import hackernews_link, SavedBase
from my.config import harmonic as user_config
@dataclass
class harmonic(user_config):
export_path: Paths
def inputs() -> Sequence[Path]:
return get_files(harmonic.export_path)
class Cached(TypedDict):
author: str
created_at_i: int
id: str
points: int
test: Optional[str]
title: str
type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho
url: str
# TODO also has children with comments, but not sure I need it?
# TODO reuse savedbase in materialistic?
@dataclass
class Saved(SavedBase):
raw: Cached
@property
def when(self) -> datetime_aware:
ts = self.raw['created_at_i']
return datetime.fromtimestamp(ts, tz=timezone.utc)
@property
def uid(self) -> str:
return self.raw['id']
@property
def url(self) -> str:
return self.raw['url']
@property
def title(self) -> str:
return self.raw['title']
@property
def hackernews_link(self) -> str:
return hackernews_link(self.uid)
_PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES'
def _saved() -> Iterator[Res[Saved]]:
for p in inputs():
# TODO defensive for each item!
tr = etree.parse(p)
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
cached_ids = [x.text.split('-')[0] for x in res]
cached: Dict[str, Cached] = {}
for sid in cached_ids:
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
j = json.loads(html.unescape(res.text))
cached[sid] = j
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
for x in res.text.split('-'):
ids, item_timestamp = x.split('q')
# not sure if timestamp is any useful?
cc = cached.get(ids, None)
if cc is None:
# TODO warn or error?
continue
yield Saved(cc)
def saved() -> Iterator[Res[Saved]]:
yield from unique_everseen(_saved())
def stats() -> Stats:
return {
**stat(inputs),
**stat(saved),
}

View file

@ -133,6 +133,7 @@ commands =
my.github.ghexport \
my.goodreads \
my.google.takeout.parser \
my.hackernews.harmonic \
my.hypothesis \
my.instapaper \
my.ip.all \