From 63d4198fd92ff722f423c1f7fef2f4817e392cfa Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 13 May 2020 22:04:23 +0100 Subject: [PATCH] rss module: prettify & reorganize to allow for easily adding extra modules --- my/rss/_rss.py | 12 ------------ my/rss/all.py | 36 +++++++++--------------------------- my/rss/common.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ my/rss/feedbin.py | 13 +++++-------- my/rss/feedly.py | 14 ++++++-------- my/twitter/all.py | 5 ++--- 6 files changed, 66 insertions(+), 58 deletions(-) delete mode 100644 my/rss/_rss.py create mode 100644 my/rss/common.py diff --git a/my/rss/_rss.py b/my/rss/_rss.py deleted file mode 100644 index 63e2f77..0000000 --- a/my/rss/_rss.py +++ /dev/null @@ -1,12 +0,0 @@ -# shared Rss stuff -from datetime import datetime -from typing import NamedTuple, Optional - - -class Subscription(NamedTuple): - title: str - url: str - id: str # TODO not sure about it... - # eh, not all of them got reasonable 'created' time - created_at: Optional[datetime] - subscribed: bool=True diff --git a/my/rss/all.py b/my/rss/all.py index a4a27fe..90f5efa 100644 --- a/my/rss/all.py +++ b/my/rss/all.py @@ -1,29 +1,11 @@ -from itertools import chain -from typing import List, Dict - -from ._rss import Subscription - -from . import feedbin -from . import feedly -# TODO google reader? +''' +Unified RSS data, merged from different services I used historically +''' +from typing import Iterable +from .common import Subscription, compute_subscriptions -def get_all_subscriptions() -> List[Subscription]: - """ - Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too - so you don't try to subscribe again (or at least take into account why you unsubscribed before) - """ - states = {} - states.update(feedly.get_states()) - states.update(feedbin.get_states()) - by_url: Dict[str, Subscription] = {} - for _, feeds in sorted(states.items()): - for f in feeds: - if f.url not in by_url: - by_url[f.url] = f - res = [] - last = {x.url: x for x in max(states.items())[1]} - for u, x in sorted(by_url.items()): - present = u in last - res.append(x._replace(subscribed=present)) - return res +def subscriptions() -> Iterable[Subscription]: + from . import feedbin, feedly + # TODO google reader? + yield from compute_subscriptions(feedbin.states(), feedly.states()) diff --git a/my/rss/common.py b/my/rss/common.py new file mode 100644 index 0000000..3dc761c --- /dev/null +++ b/my/rss/common.py @@ -0,0 +1,44 @@ +# shared Rss stuff +from datetime import datetime +from typing import NamedTuple, Optional, List, Dict + + +class Subscription(NamedTuple): + title: str + url: str + id: str # TODO not sure about it... + # eh, not all of them got reasonable 'created' time + created_at: Optional[datetime] + subscribed: bool=True + +from typing import Iterable, Tuple, Sequence + +# snapshot of subscriptions at time +SubscriptionState = Tuple[datetime, Sequence[Subscription]] + + +def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]: + """ + Keeps track of everything I ever subscribed to. + In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed) + """ + from itertools import chain + states = list(chain.from_iterable(sources)) + # TODO keep 'source'/'provider'/'service' attribute? + + by_url: Dict[str, Subscription] = {} + # ah. dates are used for sorting + for when, state in sorted(states): + # TODO use 'when'? + for feed in state: + if feed.url not in by_url: + by_url[feed.url] = feed + + _, last_state = max(states, key=lambda x: x[0]) + last_urls = {f.url for f in last_state} + + res = [] + for u, x in sorted(by_url.items()): + present = u in last_urls + res.append(x._replace(subscribed=present)) + return res diff --git a/my/rss/feedbin.py b/my/rss/feedbin.py index ebffb91..5a2f117 100644 --- a/my/rss/feedbin.py +++ b/my/rss/feedbin.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Sequence from ..core.common import listify, get_files, isoparse -from ._rss import Subscription +from .common import Subscription def inputs() -> Sequence[Path]: @@ -16,9 +16,6 @@ def inputs() -> Sequence[Path]: import json -from typing import Dict, List -from datetime import datetime - @listify def parse_file(f: Path): @@ -32,14 +29,14 @@ def parse_file(f: Path): ) -def get_states() -> Dict[datetime, List[Subscription]]: +from typing import Iterable +from .common import SubscriptionState +def states() -> Iterable[SubscriptionState]: # meh from dateutil.parser import isoparse # type: ignore - res = {} for f in inputs(): # TODO ugh. depends on my naming. not sure if useful? dts = f.stem.split('_')[-1] dt = isoparse(dts) subs = parse_file(f) - res[dt] = subs - return res + yield dt, subs diff --git a/my/rss/feedly.py b/my/rss/feedly.py index 3133656..cc9331f 100644 --- a/my/rss/feedly.py +++ b/my/rss/feedly.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Sequence from ..core.common import listify, get_files, isoparse -from ._rss import Subscription +from .common import Subscription def inputs() -> Sequence[Path]: @@ -16,8 +16,6 @@ def inputs() -> Sequence[Path]: import json -from typing import Dict, List -from datetime import datetime @listify @@ -35,14 +33,14 @@ def parse_file(f: Path): ) -def get_states() -> Dict[datetime, List[Subscription]]: +from datetime import datetime +from typing import Iterable +from .common import SubscriptionState +def states() -> Iterable[SubscriptionState]: import pytz - res = {} for f in inputs(): dts = f.stem.split('_')[-1] dt = datetime.strptime(dts, '%Y%m%d%H%M%S') dt = pytz.utc.localize(dt) subs = parse_file(f) - res[dt] = subs - # TODO get rid of these dts... - return res + yield dt, subs diff --git a/my/twitter/all.py b/my/twitter/all.py index f2e0469..be4bdbf 100644 --- a/my/twitter/all.py +++ b/my/twitter/all.py @@ -7,10 +7,9 @@ from . import twint from . import archive -from more_itertools import unique_everseen - - +# TODO move to .common? def merge_tweets(*sources): + from more_itertools import unique_everseen yield from unique_everseen( chain(*sources), key=lambda t: t.id_str,