From 63d4198fd92ff722f423c1f7fef2f4817e392cfa Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Wed, 13 May 2020 22:04:23 +0100
Subject: [PATCH] rss module: prettify & reorganize to allow for easily adding
 extra modules

---
 my/rss/_rss.py    | 12 ------------
 my/rss/all.py     | 36 +++++++++---------------------------
 my/rss/common.py  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 my/rss/feedbin.py | 13 +++++--------
 my/rss/feedly.py  | 14 ++++++--------
 my/twitter/all.py |  5 ++---
 6 files changed, 66 insertions(+), 58 deletions(-)
 delete mode 100644 my/rss/_rss.py
 create mode 100644 my/rss/common.py

diff --git a/my/rss/_rss.py b/my/rss/_rss.py
deleted file mode 100644
index 63e2f77..0000000
--- a/my/rss/_rss.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# shared Rss stuff
-from datetime import datetime
-from typing import NamedTuple, Optional
-
-
-class Subscription(NamedTuple):
-    title: str
-    url: str
-    id: str # TODO not sure about it...
-    # eh, not all of them got reasonable 'created' time
-    created_at: Optional[datetime]
-    subscribed: bool=True
diff --git a/my/rss/all.py b/my/rss/all.py
index a4a27fe..90f5efa 100644
--- a/my/rss/all.py
+++ b/my/rss/all.py
@@ -1,29 +1,11 @@
-from itertools import chain
-from typing import List, Dict
-
-from ._rss import Subscription
-
-from . import feedbin
-from . import feedly
-# TODO google reader?
+'''
+Unified RSS data, merged from different services I used historically
+'''
+from typing import Iterable
+from .common import Subscription, compute_subscriptions
 
 
-def get_all_subscriptions() -> List[Subscription]:
-    """
-    Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
-    so you don't try to subscribe again (or at least take into account why you unsubscribed before)
-    """
-    states = {}
-    states.update(feedly.get_states())
-    states.update(feedbin.get_states())
-    by_url: Dict[str, Subscription] = {}
-    for _, feeds in sorted(states.items()):
-        for f in feeds:
-            if f.url not in by_url:
-                by_url[f.url] = f
-    res = []
-    last = {x.url: x for x in max(states.items())[1]}
-    for u, x in sorted(by_url.items()):
-        present = u in last
-        res.append(x._replace(subscribed=present))
-    return res
+def subscriptions() -> Iterable[Subscription]:
+    from . import feedbin, feedly
+    # TODO google reader?
+    yield from compute_subscriptions(feedbin.states(), feedly.states())
diff --git a/my/rss/common.py b/my/rss/common.py
new file mode 100644
index 0000000..3dc761c
--- /dev/null
+++ b/my/rss/common.py
@@ -0,0 +1,44 @@
+# shared Rss stuff
+from datetime import datetime
+from typing import NamedTuple, Optional, List, Dict
+
+
+class Subscription(NamedTuple):
+    title: str
+    url: str
+    id: str # TODO not sure about it...
+    # eh, not all of them got reasonable 'created' time
+    created_at: Optional[datetime]
+    subscribed: bool=True
+
+from typing import Iterable, Tuple, Sequence
+
+# snapshot of subscriptions at time
+SubscriptionState = Tuple[datetime, Sequence[Subscription]]
+
+
+def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
+    """
+    Keeps track of everything I ever subscribed to.
+    In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
+    """
+    from itertools import chain
+    states = list(chain.from_iterable(sources))
+    # TODO keep 'source'/'provider'/'service' attribute?
+
+    by_url: Dict[str, Subscription] = {}
+    # ah. dates are used for sorting
+    for when, state in sorted(states):
+        # TODO use 'when'?
+        for feed in state:
+            if feed.url not in by_url:
+                by_url[feed.url] = feed
+
+    _, last_state = max(states, key=lambda x: x[0])
+    last_urls = {f.url for f in last_state}
+
+    res = []
+    for u, x in sorted(by_url.items()):
+        present = u in last_urls
+        res.append(x._replace(subscribed=present))
+    return res
diff --git a/my/rss/feedbin.py b/my/rss/feedbin.py
index ebffb91..5a2f117 100644
--- a/my/rss/feedbin.py
+++ b/my/rss/feedbin.py
@@ -8,7 +8,7 @@ from pathlib import Path
 from typing import Sequence
 
 from ..core.common import listify, get_files, isoparse
-from ._rss import Subscription
+from .common import Subscription
 
 
 def inputs() -> Sequence[Path]:
@@ -16,9 +16,6 @@ def inputs() -> Sequence[Path]:
 
 
 import json
-from typing import Dict, List
-from datetime import datetime
-
 
 @listify
 def parse_file(f: Path):
@@ -32,14 +29,14 @@ def parse_file(f: Path):
         )
 
 
-def get_states() -> Dict[datetime, List[Subscription]]:
+from typing import Iterable
+from .common import SubscriptionState
+def states() -> Iterable[SubscriptionState]:
     # meh
     from dateutil.parser import isoparse # type: ignore
-    res = {}
     for f in inputs():
         # TODO ugh. depends on my naming. not sure if useful?
         dts = f.stem.split('_')[-1]
         dt = isoparse(dts)
         subs = parse_file(f)
-        res[dt] = subs
-    return res
+        yield dt, subs
diff --git a/my/rss/feedly.py b/my/rss/feedly.py
index 3133656..cc9331f 100644
--- a/my/rss/feedly.py
+++ b/my/rss/feedly.py
@@ -8,7 +8,7 @@ from pathlib import Path
 from typing import Sequence
 
 from ..core.common import listify, get_files, isoparse
-from ._rss import Subscription
+from .common import Subscription
 
 
 def inputs() -> Sequence[Path]:
@@ -16,8 +16,6 @@ def inputs() -> Sequence[Path]:
 
 
 import json
-from typing import Dict, List
-from datetime import datetime
 
 
 @listify
@@ -35,14 +33,14 @@ def parse_file(f: Path):
         )
 
 
-def get_states() -> Dict[datetime, List[Subscription]]:
+from datetime import datetime
+from typing import Iterable
+from .common import SubscriptionState
+def states() -> Iterable[SubscriptionState]:
     import pytz
-    res = {}
     for f in inputs():
         dts = f.stem.split('_')[-1]
         dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
         dt = pytz.utc.localize(dt)
         subs = parse_file(f)
-        res[dt] = subs
-        # TODO get rid of these dts...
-    return res
+        yield dt, subs
diff --git a/my/twitter/all.py b/my/twitter/all.py
index f2e0469..be4bdbf 100644
--- a/my/twitter/all.py
+++ b/my/twitter/all.py
@@ -7,10 +7,9 @@ from . import twint
 from . import archive
 
 
-from more_itertools import unique_everseen
-
-
+# TODO move to .common?
 def merge_tweets(*sources):
+    from more_itertools import unique_everseen
     yield from unique_everseen(
         chain(*sources),
         key=lambda t: t.id_str,