move rss stuff in a separate subpackage

2020-05-13 21:36:19 +01:00 · 2020-05-13 21:36:19 +01:00 · 92cf375480
commit 92cf375480
parent c289fbb872
5 changed files with 4 additions and 4 deletions
--- a/my/rss/_rss.py
+++ b/my/rss/_rss.py
@ -0,0 +1,12 @@
+# shared Rss stuff
+from datetime import datetime
+from typing import NamedTuple, Optional
+
+
+class Subscription(NamedTuple):
+    title: str
+    url: str
+    id: str # TODO not sure about it...
+    # eh, not all of them got reasonable 'created' time
+    created_at: Optional[datetime]
+    subscribed: bool=True
--- a/my/rss/all.py
+++ b/my/rss/all.py
@ -0,0 +1,29 @@
+from itertools import chain
+from typing import List, Dict
+
+from ._rss import Subscription
+
+from . import feedbin
+from . import feedly
+# TODO google reader?
+
+
+def get_all_subscriptions() -> List[Subscription]:
+    """
+    Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
+    so you don't try to subscribe again (or at least take into account why you unsubscribed before)
+    """
+    states = {}
+    states.update(feedly.get_states())
+    states.update(feedbin.get_states())
+    by_url: Dict[str, Subscription] = {}
+    for _, feeds in sorted(states.items()):
+        for f in feeds:
+            if f.url not in by_url:
+                by_url[f.url] = f
+    res = []
+    last = {x.url: x for x in max(states.items())[1]}
+    for u, x in sorted(by_url.items()):
+        present = u in last
+        res.append(x._replace(subscribed=present))
+    return res
--- a/my/rss/feedbin.py
+++ b/my/rss/feedbin.py
@ -0,0 +1,45 @@
+"""
+Feedbin RSS reader
+"""
+
+from my.config import feedbin as config
+
+from pathlib import Path
+from typing import Sequence
+
+from ..core.common import listify, get_files, isoparse
+from ._rss import Subscription
+
+
+def inputs() -> Sequence[Path]:
+    return get_files(config.export_path)
+
+
+import json
+from typing import Dict, List
+from datetime import datetime
+
+
+@listify
+def parse_file(f: Path):
+    raw = json.loads(f.read_text())
+    for r in raw:
+        yield Subscription(
+            created_at=isoparse(r['created_at']),
+            title=r['title'],
+            url=r['site_url'],
+            id=r['id'],
+        )
+
+
+def get_states() -> Dict[datetime, List[Subscription]]:
+    # meh
+    from dateutil.parser import isoparse # type: ignore
+    res = {}
+    for f in inputs():
+        # TODO ugh. depends on my naming. not sure if useful?
+        dts = f.stem.split('_')[-1]
+        dt = isoparse(dts)
+        subs = parse_file(f)
+        res[dt] = subs
+    return res
--- a/my/rss/feedly.py
+++ b/my/rss/feedly.py
@ -0,0 +1,48 @@
+"""
+Feedly RSS reader
+"""
+
+from my.config import feedly as config
+
+from pathlib import Path
+from typing import Sequence
+
+from ..core.common import listify, get_files, isoparse
+from ._rss import Subscription
+
+
+def inputs() -> Sequence[Path]:
+    return get_files(config.export_path)
+
+
+import json
+from typing import Dict, List
+from datetime import datetime
+
+
+@listify
+def parse_file(f: Path):
+    raw = json.loads(f.read_text())
+    for r in raw:
+        # err, some even don't have website..
+        rid = r['id']
+        website = r.get('website', rid) # meh
+        yield Subscription(
+            created_at=None,
+            title=r['title'],
+            url=website,
+            id=rid,
+        )
+
+
+def get_states() -> Dict[datetime, List[Subscription]]:
+    import pytz
+    res = {}
+    for f in inputs():
+        dts = f.stem.split('_')[-1]
+        dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
+        dt = pytz.utc.localize(dt)
+        subs = parse_file(f)
+        res[dt] = subs
+        # TODO get rid of these dts...
+    return res