rss: minor enhancements

This commit is contained in:
Dima Gerasimov 2020-05-13 21:29:16 +01:00
parent eba2d26b31
commit c289fbb872
5 changed files with 66 additions and 20 deletions

View file

@ -1,10 +1,12 @@
# shared Rss stuff # shared Rss stuff
from typing import NamedTuple from datetime import datetime
from typing import NamedTuple, Optional
class Subscription(NamedTuple): class Subscription(NamedTuple):
# TODO date?
title: str title: str
url: str url: str
id: str id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime]
subscribed: bool=True subscribed: bool=True

View file

@ -1,5 +1,6 @@
from glob import glob as do_glob from glob import glob as do_glob
from pathlib import Path from pathlib import Path
from datetime import datetime
import functools import functools
import types import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
@ -219,3 +220,28 @@ class classproperty(Generic[_R]):
# #
# def __get__(self) -> _R: # def __get__(self) -> _R:
# return self.f() # return self.f()
# TODO maybe use opaque mypy alias?
tzdatetime = datetime
fromisoformat: Callable[[str], datetime]
import sys
if sys.version_info.minor >= 7:
# prevent mypy on py3.6 from complaining...
fromisoformat_real = datetime.fromisoformat # type: ignore[attr-defined]
fromisoformat = fromisoformat_real
else:
from .py37 import fromisoformat
# TODO doctests?
def isoparse(s: str) -> tzdatetime:
"""
Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
"""
# TODO could use dateutil? but it's quite slow as far as I remember..
# TODO support non-utc.. somehow?
assert s.endswith('Z'), s
s = s[:-1] + '+00:00'
return fromisoformat(s)

View file

@ -2,16 +2,22 @@
Feedbin RSS reader Feedbin RSS reader
""" """
from .common import listify
from ._rss import Subscription
from my.config import feedbin as config from my.config import feedbin as config
import json
from pathlib import Path from pathlib import Path
from typing import Sequence
from .core.common import listify, get_files, isoparse
from ._rss import Subscription
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
import json
from typing import Dict, List from typing import Dict, List
from datetime import datetime from datetime import datetime
from dateutil.parser import isoparse
@listify @listify
@ -19,16 +25,19 @@ def parse_file(f: Path):
raw = json.loads(f.read_text()) raw = json.loads(f.read_text())
for r in raw: for r in raw:
yield Subscription( yield Subscription(
# TODO created_at? created_at=isoparse(r['created_at']),
title=r['title'], title=r['title'],
url=r['site_url'], url=r['site_url'],
id=r['id'], id=r['id'],
) )
def get_states() -> Dict[datetime, List[Subscription]]: def get_states() -> Dict[datetime, List[Subscription]]:
# meh
from dateutil.parser import isoparse # type: ignore
res = {} res = {}
# TODO use get_files for f in inputs():
for f in sorted(Path(config.export_dir).glob('*.json')): # TODO ugh. depends on my naming. not sure if useful?
dts = f.stem.split('_')[-1] dts = f.stem.split('_')[-1]
dt = isoparse(dts) dt = isoparse(dts)
subs = parse_file(f) subs = parse_file(f)

View file

@ -2,16 +2,22 @@
Feedly RSS reader Feedly RSS reader
""" """
from .common import listify
from ._rss import Subscription
from my.config import feedly as config from my.config import feedly as config
import json
from pathlib import Path from pathlib import Path
from typing import Sequence
from .core.common import listify, get_files, isoparse
from ._rss import Subscription
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
import json
from typing import Dict, List from typing import Dict, List
from datetime import datetime from datetime import datetime
import pytz
@listify @listify
@ -22,19 +28,21 @@ def parse_file(f: Path):
rid = r['id'] rid = r['id']
website = r.get('website', rid) # meh website = r.get('website', rid) # meh
yield Subscription( yield Subscription(
# TODO created_at? created_at=None,
title=r['title'], title=r['title'],
url=website, url=website,
id=rid, id=rid,
) )
def get_states() -> Dict[datetime, List[Subscription]]: def get_states() -> Dict[datetime, List[Subscription]]:
import pytz
res = {} res = {}
# TODO use get_files for f in inputs():
for f in sorted(Path(config.export_dir).glob('*.json')):
dts = f.stem.split('_')[-1] dts = f.stem.split('_')[-1]
dt = datetime.strptime(dts, '%Y%m%d%H%M%S') dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
dt = pytz.utc.localize(dt) dt = pytz.utc.localize(dt)
subs = parse_file(f) subs = parse_file(f)
res[dt] = subs res[dt] = subs
# TODO get rid of these dts...
return res return res

View file

@ -21,6 +21,7 @@ _POLAR_DIR = Path('~').expanduser() / '.polar'
logger = LazyLogger(__name__) logger = LazyLogger(__name__)
# TODO use core.isoparse
def parse_dt(s: str) -> datetime: def parse_dt(s: str) -> datetime:
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ')) return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))