From 8addd2d58ab16bc13e2d822c2297181e20aafe78 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Mon, 25 Sep 2023 10:41:36 +0100 Subject: [PATCH] new module: Harmonic app for Hackernews --- my/config.py | 4 ++ my/hackernews/common.py | 18 ++++++ my/hackernews/harmonic.py | 115 ++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 4 files changed, 138 insertions(+) create mode 100644 my/hackernews/harmonic.py diff --git a/my/config.py b/my/config.py index a59eadd..9cc9c11 100644 --- a/my/config.py +++ b/my/config.py @@ -265,3 +265,7 @@ class whatsapp: class android: export_path: Paths my_user_id: Optional[str] + + +class harmonic: + export_path: Paths diff --git a/my/hackernews/common.py b/my/hackernews/common.py index 8c7dd1e..0c5ff9b 100644 --- a/my/hackernews/common.py +++ b/my/hackernews/common.py @@ -1,2 +1,20 @@ +from typing import Protocol + +from my.core import datetime_aware, Json + + def hackernews_link(id: str) -> str: return f'https://news.ycombinator.com/item?id={id}' + + +class SavedBase(Protocol): + @property + def when(self) -> datetime_aware: ... + @property + def uid(self) -> str: ... + @property + def url(self) -> str: ... + @property + def title(self) -> str: ... + @property + def hackernews_link(self) -> str: ... diff --git a/my/hackernews/harmonic.py b/my/hackernews/harmonic.py new file mode 100644 index 0000000..a4eb28e --- /dev/null +++ b/my/hackernews/harmonic.py @@ -0,0 +1,115 @@ +""" +[[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews +""" +REQUIRES = ['lxml'] + +from dataclasses import dataclass +from datetime import datetime, timezone +import json +import html +from pathlib import Path +from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast + +from lxml import etree +from more_itertools import unique_everseen, one + +from my.core import ( + Paths, + Res, + Stats, + datetime_aware, + get_files, + stat, +) +from .common import hackernews_link, SavedBase + +from my.config import harmonic as user_config + + +@dataclass +class harmonic(user_config): + export_path: Paths + + +def inputs() -> Sequence[Path]: + return get_files(harmonic.export_path) + + +class Cached(TypedDict): + author: str + created_at_i: int + id: str + points: int + test: Optional[str] + title: str + type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho + url: str + # TODO also has children with comments, but not sure I need it? + + +# TODO reuse savedbase in materialistic? +@dataclass +class Saved(SavedBase): + raw: Cached + + @property + def when(self) -> datetime_aware: + ts = self.raw['created_at_i'] + return datetime.fromtimestamp(ts, tz=timezone.utc) + + @property + def uid(self) -> str: + return self.raw['id'] + + @property + def url(self) -> str: + return self.raw['url'] + + @property + def title(self) -> str: + return self.raw['title'] + + @property + def hackernews_link(self) -> str: + return hackernews_link(self.uid) + + +_PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES' + + +def _saved() -> Iterator[Res[Saved]]: + for p in inputs(): + # TODO defensive for each item! + tr = etree.parse(p) + + res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]'))) + cached_ids = [x.text.split('-')[0] for x in res] + + cached: Dict[str, Cached] = {} + for sid in cached_ids: + res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]'))) + j = json.loads(html.unescape(res.text)) + cached[sid] = j + + res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]'))) + for x in res.text.split('-'): + ids, item_timestamp = x.split('q') + # not sure if timestamp is any useful? + + cc = cached.get(ids, None) + if cc is None: + # TODO warn or error? + continue + + yield Saved(cc) + + +def saved() -> Iterator[Res[Saved]]: + yield from unique_everseen(_saved()) + + +def stats() -> Stats: + return { + **stat(inputs), + **stat(saved), + } diff --git a/tox.ini b/tox.ini index 9ec80f1..9487ae3 100644 --- a/tox.ini +++ b/tox.ini @@ -133,6 +133,7 @@ commands = my.github.ghexport \ my.goodreads \ my.google.takeout.parser \ + my.hackernews.harmonic \ my.hypothesis \ my.instapaper \ my.ip.all \