rss module: prettify & reorganize to allow for easily adding extra modules

2020-05-13 22:04:23 +01:00 · 2020-05-13 22:04:23 +01:00 · 63d4198fd9
commit 63d4198fd9
parent 92cf375480
6 changed files with 66 additions and 58 deletions
--- a/my/rss/_rss.py
+++ b/my/rss/_rss.py
@ -1,12 +0,0 @@
 # shared Rss stuff
 from datetime import datetime
 from typing import NamedTuple, Optional
 class Subscription(NamedTuple):
    title: str
    url: str
    id: str # TODO not sure about it...
    # eh, not all of them got reasonable 'created' time
    created_at: Optional[datetime]
    subscribed: bool=True
--- a/my/rss/all.py
+++ b/my/rss/all.py
@ -1,29 +1,11 @@
-from itertools import chain
+'''
-from typing import List, Dict
+Unified RSS data, merged from different services I used historically
-
+'''
-from ._rss import Subscription
+from typing import Iterable
-
+from .common import Subscription, compute_subscriptions
 from . import feedbin
 from . import feedly
 # TODO google reader?
-def get_all_subscriptions() -> List[Subscription]:
+def subscriptions() -> Iterable[Subscription]:
-    """
+    from . import feedbin, feedly
-    Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
+    # TODO google reader?
-    so you don't try to subscribe again (or at least take into account why you unsubscribed before)
+    yield from compute_subscriptions(feedbin.states(), feedly.states())
    """
    states = {}
    states.update(feedly.get_states())
    states.update(feedbin.get_states())
    by_url: Dict[str, Subscription] = {}
    for _, feeds in sorted(states.items()):
        for f in feeds:
            if f.url not in by_url:
                by_url[f.url] = f
    res = []
    last = {x.url: x for x in max(states.items())[1]}
    for u, x in sorted(by_url.items()):
        present = u in last
        res.append(x._replace(subscribed=present))
    return res
--- a/my/rss/common.py
+++ b/my/rss/common.py
@ -0,0 +1,44 @@
 # shared Rss stuff
 from datetime import datetime
 from typing import NamedTuple, Optional, List, Dict
 class Subscription(NamedTuple):
    title: str
    url: str
    id: str # TODO not sure about it...
    # eh, not all of them got reasonable 'created' time
    created_at: Optional[datetime]
    subscribed: bool=True
 from typing import Iterable, Tuple, Sequence
 # snapshot of subscriptions at time
 SubscriptionState = Tuple[datetime, Sequence[Subscription]]
 def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
    """
    Keeps track of everything I ever subscribed to.
    In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
    """
    from itertools import chain
    states = list(chain.from_iterable(sources))
    # TODO keep 'source'/'provider'/'service' attribute?
    by_url: Dict[str, Subscription] = {}
    # ah. dates are used for sorting
    for when, state in sorted(states):
        # TODO use 'when'?
        for feed in state:
            if feed.url not in by_url:
                by_url[feed.url] = feed
    _, last_state = max(states, key=lambda x: x[0])
    last_urls = {f.url for f in last_state}
    res = []
    for u, x in sorted(by_url.items()):
        present = u in last_urls
        res.append(x._replace(subscribed=present))
    return res
--- a/my/rss/feedbin.py
+++ b/my/rss/feedbin.py
@ -8,7 +8,7 @@ from pathlib import Path
 from typing import Sequence
 from ..core.common import listify, get_files, isoparse
-from ._rss import Subscription
+from .common import Subscription
 def inputs() -> Sequence[Path]:
@ -16,9 +16,6 @@ def inputs() -> Sequence[Path]:
 import json
 from typing import Dict, List
 from datetime import datetime
@listify
 def parse_file(f: Path):
@ -32,14 +29,14 @@ def parse_file(f: Path):
        )
-def get_states() -> Dict[datetime, List[Subscription]]:
+from typing import Iterable
 from .common import SubscriptionState
 def states() -> Iterable[SubscriptionState]:
    # meh
    from dateutil.parser import isoparse # type: ignore
    res = {}
    for f in inputs():
        # TODO ugh. depends on my naming. not sure if useful?
        dts = f.stem.split('_')[-1]
        dt = isoparse(dts)
        subs = parse_file(f)
-        res[dt] = subs
+        yield dt, subs
    return res
--- a/my/rss/feedly.py
+++ b/my/rss/feedly.py
@ -8,7 +8,7 @@ from pathlib import Path
 from typing import Sequence
 from ..core.common import listify, get_files, isoparse
-from ._rss import Subscription
+from .common import Subscription
 def inputs() -> Sequence[Path]:
@ -16,8 +16,6 @@ def inputs() -> Sequence[Path]:
 import json
 from typing import Dict, List
 from datetime import datetime
@listify
@ -35,14 +33,14 @@ def parse_file(f: Path):
        )
-def get_states() -> Dict[datetime, List[Subscription]]:
+from datetime import datetime
 from typing import Iterable
 from .common import SubscriptionState
 def states() -> Iterable[SubscriptionState]:
    import pytz
    res = {}
    for f in inputs():
        dts = f.stem.split('_')[-1]
        dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
        dt = pytz.utc.localize(dt)
        subs = parse_file(f)
-        res[dt] = subs
+        yield dt, subs
        # TODO get rid of these dts...
    return res
--- a/my/twitter/all.py
+++ b/my/twitter/all.py
@ -7,10 +7,9 @@ from . import twint
 from . import archive
-from more_itertools import unique_everseen
+# TODO move to .common?
 def merge_tweets(*sources):
    from more_itertools import unique_everseen
    yield from unique_everseen(
        chain(*sources),
        key=lambda t: t.id_str,