127 lines
3.1 KiB
Python
127 lines
3.1 KiB
Python
"""
|
|
[[https://play.google.com/store/apps/details?id=com.simon.harmonichackernews][Harmonic]] app for Hackernews
|
|
"""
|
|
REQUIRES = ['lxml', 'orjson']
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
import orjson
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterator, List, Optional, Sequence, TypedDict, cast
|
|
|
|
from lxml import etree
|
|
from more_itertools import one
|
|
|
|
from my.core import (
|
|
Paths,
|
|
Res,
|
|
Stats,
|
|
datetime_aware,
|
|
get_files,
|
|
make_logger,
|
|
stat,
|
|
)
|
|
from my.core.common import unique_everseen
|
|
import my.config
|
|
from .common import hackernews_link, SavedBase
|
|
|
|
|
|
logger = make_logger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class harmonic(my.config.harmonic):
|
|
export_path: Paths
|
|
|
|
|
|
def inputs() -> Sequence[Path]:
|
|
return get_files(harmonic.export_path)
|
|
|
|
|
|
class Cached(TypedDict):
|
|
author: str
|
|
created_at_i: int
|
|
id: str
|
|
points: int
|
|
test: Optional[str]
|
|
title: str
|
|
type: str # TODO Literal['story', 'comment']? comments are only in 'children' field tho
|
|
url: str
|
|
# TODO also has children with comments, but not sure I need it?
|
|
|
|
|
|
# TODO if we ever add use .text property, need to html.unescape it first
|
|
# TODO reuse SavedBase in materialistic?
|
|
@dataclass
|
|
class Saved(SavedBase):
|
|
raw: Cached
|
|
|
|
@property
|
|
def when(self) -> datetime_aware:
|
|
ts = self.raw['created_at_i']
|
|
return datetime.fromtimestamp(ts, tz=timezone.utc)
|
|
|
|
@property
|
|
def uid(self) -> str:
|
|
return self.raw['id']
|
|
|
|
@property
|
|
def url(self) -> str:
|
|
return self.raw['url']
|
|
|
|
@property
|
|
def title(self) -> str:
|
|
return self.raw['title']
|
|
|
|
@property
|
|
def hackernews_link(self) -> str:
|
|
return hackernews_link(self.uid)
|
|
|
|
def __hash__(self) -> int:
|
|
# meh. but seems like the easiest and fastest way to hash a dict?
|
|
return hash(orjson.dumps(self.raw))
|
|
|
|
|
|
_PREFIX = 'com.simon.harmonichackernews.KEY_SHARED_PREFERENCES'
|
|
|
|
|
|
def _saved() -> Iterator[Res[Saved]]:
|
|
paths = inputs()
|
|
total = len(paths)
|
|
width = len(str(total))
|
|
for idx, path in enumerate(paths):
|
|
logger.info(f'processing [{idx:>{width}}/{total:>{width}}] {path}')
|
|
# TODO defensive for each item!
|
|
tr = etree.parse(path)
|
|
|
|
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORIES_STRINGS"]')))
|
|
cached_ids = [x.text.split('-')[0] for x in res]
|
|
|
|
cached: Dict[str, Cached] = {}
|
|
for sid in cached_ids:
|
|
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_CACHED_STORY{sid}"]')))
|
|
j = orjson.loads(res.text)
|
|
cached[sid] = j
|
|
|
|
res = one(cast(List[Any], tr.xpath(f'//*[@name="{_PREFIX}_BOOKMARKS"]')))
|
|
for x in res.text.split('-'):
|
|
ids, item_timestamp = x.split('q')
|
|
# not sure if timestamp is any useful?
|
|
|
|
cc = cached.get(ids, None)
|
|
if cc is None:
|
|
# TODO warn or error?
|
|
continue
|
|
|
|
yield Saved(cc)
|
|
|
|
|
|
def saved() -> Iterator[Res[Saved]]:
|
|
yield from unique_everseen(_saved)
|
|
|
|
|
|
def stats() -> Stats:
|
|
return {
|
|
**stat(inputs),
|
|
**stat(saved),
|
|
}
|