From c289fbb872445d5494bd7b79c32498340cc11c35 Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 13 May 2020 21:29:16 +0100 Subject: [PATCH] rss: minor enhancements --- my/_rss.py | 10 ++++++---- my/core/common.py | 26 ++++++++++++++++++++++++++ my/feedbin.py | 25 +++++++++++++++++-------- my/feedly.py | 24 ++++++++++++++++-------- my/reading/polar.py | 1 + 5 files changed, 66 insertions(+), 20 deletions(-) diff --git a/my/_rss.py b/my/_rss.py index cabf53a..63e2f77 100644 --- a/my/_rss.py +++ b/my/_rss.py @@ -1,10 +1,12 @@ # shared Rss stuff -from typing import NamedTuple +from datetime import datetime +from typing import NamedTuple, Optional + class Subscription(NamedTuple): - # TODO date? title: str url: str - id: str + id: str # TODO not sure about it... + # eh, not all of them got reasonable 'created' time + created_at: Optional[datetime] subscribed: bool=True - diff --git a/my/core/common.py b/my/core/common.py index 83c77d7..918f4b2 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -1,5 +1,6 @@ from glob import glob as do_glob from pathlib import Path +from datetime import datetime import functools import types from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple @@ -219,3 +220,28 @@ class classproperty(Generic[_R]): # # def __get__(self) -> _R: # return self.f() + +# TODO maybe use opaque mypy alias? +tzdatetime = datetime + + +fromisoformat: Callable[[str], datetime] +import sys +if sys.version_info.minor >= 7: + # prevent mypy on py3.6 from complaining... + fromisoformat_real = datetime.fromisoformat # type: ignore[attr-defined] + fromisoformat = fromisoformat_real +else: + from .py37 import fromisoformat + + +# TODO doctests? +def isoparse(s: str) -> tzdatetime: + """ + Parses timestamps formatted like 2020-05-01T10:32:02.925961Z + """ + # TODO could use dateutil? but it's quite slow as far as I remember.. + # TODO support non-utc.. somehow? + assert s.endswith('Z'), s + s = s[:-1] + '+00:00' + return fromisoformat(s) diff --git a/my/feedbin.py b/my/feedbin.py index 3492afb..542c7d5 100644 --- a/my/feedbin.py +++ b/my/feedbin.py @@ -2,16 +2,22 @@ Feedbin RSS reader """ -from .common import listify -from ._rss import Subscription - from my.config import feedbin as config -import json from pathlib import Path +from typing import Sequence + +from .core.common import listify, get_files, isoparse +from ._rss import Subscription + + +def inputs() -> Sequence[Path]: + return get_files(config.export_path) + + +import json from typing import Dict, List from datetime import datetime -from dateutil.parser import isoparse @listify @@ -19,16 +25,19 @@ def parse_file(f: Path): raw = json.loads(f.read_text()) for r in raw: yield Subscription( - # TODO created_at? + created_at=isoparse(r['created_at']), title=r['title'], url=r['site_url'], id=r['id'], ) + def get_states() -> Dict[datetime, List[Subscription]]: + # meh + from dateutil.parser import isoparse # type: ignore res = {} - # TODO use get_files - for f in sorted(Path(config.export_dir).glob('*.json')): + for f in inputs(): + # TODO ugh. depends on my naming. not sure if useful? dts = f.stem.split('_')[-1] dt = isoparse(dts) subs = parse_file(f) diff --git a/my/feedly.py b/my/feedly.py index 93f8823..d43facf 100644 --- a/my/feedly.py +++ b/my/feedly.py @@ -2,16 +2,22 @@ Feedly RSS reader """ -from .common import listify -from ._rss import Subscription - from my.config import feedly as config -import json from pathlib import Path +from typing import Sequence + +from .core.common import listify, get_files, isoparse +from ._rss import Subscription + + +def inputs() -> Sequence[Path]: + return get_files(config.export_path) + + +import json from typing import Dict, List from datetime import datetime -import pytz @listify @@ -22,19 +28,21 @@ def parse_file(f: Path): rid = r['id'] website = r.get('website', rid) # meh yield Subscription( - # TODO created_at? + created_at=None, title=r['title'], url=website, id=rid, ) + def get_states() -> Dict[datetime, List[Subscription]]: + import pytz res = {} - # TODO use get_files - for f in sorted(Path(config.export_dir).glob('*.json')): + for f in inputs(): dts = f.stem.split('_')[-1] dt = datetime.strptime(dts, '%Y%m%d%H%M%S') dt = pytz.utc.localize(dt) subs = parse_file(f) res[dt] = subs + # TODO get rid of these dts... return res diff --git a/my/reading/polar.py b/my/reading/polar.py index d2b2d60..7ba4fc2 100755 --- a/my/reading/polar.py +++ b/my/reading/polar.py @@ -21,6 +21,7 @@ _POLAR_DIR = Path('~').expanduser() / '.polar' logger = LazyLogger(__name__) +# TODO use core.isoparse def parse_dt(s: str) -> datetime: return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))