rss: minor enhancements

This commit is contained in:
Dima Gerasimov 2020-05-13 21:29:16 +01:00
parent eba2d26b31
commit c289fbb872
5 changed files with 66 additions and 20 deletions

View file

@ -1,10 +1,12 @@
# shared Rss stuff
from typing import NamedTuple
from datetime import datetime
from typing import NamedTuple, Optional
class Subscription(NamedTuple):
# TODO date?
title: str
url: str
id: str
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime]
subscribed: bool=True

View file

@ -1,5 +1,6 @@
from glob import glob as do_glob
from pathlib import Path
from datetime import datetime
import functools
import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
@ -219,3 +220,28 @@ class classproperty(Generic[_R]):
#
# def __get__(self) -> _R:
# return self.f()
# TODO maybe use opaque mypy alias?
tzdatetime = datetime
fromisoformat: Callable[[str], datetime]
import sys
if sys.version_info.minor >= 7:
# prevent mypy on py3.6 from complaining...
fromisoformat_real = datetime.fromisoformat # type: ignore[attr-defined]
fromisoformat = fromisoformat_real
else:
from .py37 import fromisoformat
# TODO doctests?
def isoparse(s: str) -> tzdatetime:
"""
Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
"""
# TODO could use dateutil? but it's quite slow as far as I remember..
# TODO support non-utc.. somehow?
assert s.endswith('Z'), s
s = s[:-1] + '+00:00'
return fromisoformat(s)

View file

@ -2,16 +2,22 @@
Feedbin RSS reader
"""
from .common import listify
from ._rss import Subscription
from my.config import feedbin as config
import json
from pathlib import Path
from typing import Sequence
from .core.common import listify, get_files, isoparse
from ._rss import Subscription
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
import json
from typing import Dict, List
from datetime import datetime
from dateutil.parser import isoparse
@listify
@ -19,16 +25,19 @@ def parse_file(f: Path):
raw = json.loads(f.read_text())
for r in raw:
yield Subscription(
# TODO created_at?
created_at=isoparse(r['created_at']),
title=r['title'],
url=r['site_url'],
id=r['id'],
)
def get_states() -> Dict[datetime, List[Subscription]]:
# meh
from dateutil.parser import isoparse # type: ignore
res = {}
# TODO use get_files
for f in sorted(Path(config.export_dir).glob('*.json')):
for f in inputs():
# TODO ugh. depends on my naming. not sure if useful?
dts = f.stem.split('_')[-1]
dt = isoparse(dts)
subs = parse_file(f)

View file

@ -2,16 +2,22 @@
Feedly RSS reader
"""
from .common import listify
from ._rss import Subscription
from my.config import feedly as config
import json
from pathlib import Path
from typing import Sequence
from .core.common import listify, get_files, isoparse
from ._rss import Subscription
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
import json
from typing import Dict, List
from datetime import datetime
import pytz
@listify
@ -22,19 +28,21 @@ def parse_file(f: Path):
rid = r['id']
website = r.get('website', rid) # meh
yield Subscription(
# TODO created_at?
created_at=None,
title=r['title'],
url=website,
id=rid,
)
def get_states() -> Dict[datetime, List[Subscription]]:
import pytz
res = {}
# TODO use get_files
for f in sorted(Path(config.export_dir).glob('*.json')):
for f in inputs():
dts = f.stem.split('_')[-1]
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
dt = pytz.utc.localize(dt)
subs = parse_file(f)
res[dt] = subs
# TODO get rid of these dts...
return res

View file

@ -21,6 +21,7 @@ _POLAR_DIR = Path('~').expanduser() / '.polar'
logger = LazyLogger(__name__)
# TODO use core.isoparse
def parse_dt(s: str) -> datetime:
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))