rss module: prettify & reorganize to allow for easily adding extra modules

This commit is contained in:
Dima Gerasimov 2020-05-13 22:04:23 +01:00
parent 92cf375480
commit 63d4198fd9
6 changed files with 66 additions and 58 deletions

View file

@ -1,12 +0,0 @@
# shared Rss stuff
from datetime import datetime
from typing import NamedTuple, Optional
class Subscription(NamedTuple):
title: str
url: str
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime]
subscribed: bool=True

View file

@ -1,29 +1,11 @@
from itertools import chain
from typing import List, Dict
'''
Unified RSS data, merged from different services I used historically
'''
from typing import Iterable
from .common import Subscription, compute_subscriptions
from ._rss import Subscription
from . import feedbin
from . import feedly
def subscriptions() -> Iterable[Subscription]:
from . import feedbin, feedly
# TODO google reader?
def get_all_subscriptions() -> List[Subscription]:
"""
Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
so you don't try to subscribe again (or at least take into account why you unsubscribed before)
"""
states = {}
states.update(feedly.get_states())
states.update(feedbin.get_states())
by_url: Dict[str, Subscription] = {}
for _, feeds in sorted(states.items()):
for f in feeds:
if f.url not in by_url:
by_url[f.url] = f
res = []
last = {x.url: x for x in max(states.items())[1]}
for u, x in sorted(by_url.items()):
present = u in last
res.append(x._replace(subscribed=present))
return res
yield from compute_subscriptions(feedbin.states(), feedly.states())

44
my/rss/common.py Normal file
View file

@ -0,0 +1,44 @@
# shared Rss stuff
from datetime import datetime
from typing import NamedTuple, Optional, List, Dict
class Subscription(NamedTuple):
title: str
url: str
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime]
subscribed: bool=True
from typing import Iterable, Tuple, Sequence
# snapshot of subscriptions at time
SubscriptionState = Tuple[datetime, Sequence[Subscription]]
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
"""
Keeps track of everything I ever subscribed to.
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
"""
from itertools import chain
states = list(chain.from_iterable(sources))
# TODO keep 'source'/'provider'/'service' attribute?
by_url: Dict[str, Subscription] = {}
# ah. dates are used for sorting
for when, state in sorted(states):
# TODO use 'when'?
for feed in state:
if feed.url not in by_url:
by_url[feed.url] = feed
_, last_state = max(states, key=lambda x: x[0])
last_urls = {f.url for f in last_state}
res = []
for u, x in sorted(by_url.items()):
present = u in last_urls
res.append(x._replace(subscribed=present))
return res

View file

@ -8,7 +8,7 @@ from pathlib import Path
from typing import Sequence
from ..core.common import listify, get_files, isoparse
from ._rss import Subscription
from .common import Subscription
def inputs() -> Sequence[Path]:
@ -16,9 +16,6 @@ def inputs() -> Sequence[Path]:
import json
from typing import Dict, List
from datetime import datetime
@listify
def parse_file(f: Path):
@ -32,14 +29,14 @@ def parse_file(f: Path):
)
def get_states() -> Dict[datetime, List[Subscription]]:
from typing import Iterable
from .common import SubscriptionState
def states() -> Iterable[SubscriptionState]:
# meh
from dateutil.parser import isoparse # type: ignore
res = {}
for f in inputs():
# TODO ugh. depends on my naming. not sure if useful?
dts = f.stem.split('_')[-1]
dt = isoparse(dts)
subs = parse_file(f)
res[dt] = subs
return res
yield dt, subs

View file

@ -8,7 +8,7 @@ from pathlib import Path
from typing import Sequence
from ..core.common import listify, get_files, isoparse
from ._rss import Subscription
from .common import Subscription
def inputs() -> Sequence[Path]:
@ -16,8 +16,6 @@ def inputs() -> Sequence[Path]:
import json
from typing import Dict, List
from datetime import datetime
@listify
@ -35,14 +33,14 @@ def parse_file(f: Path):
)
def get_states() -> Dict[datetime, List[Subscription]]:
from datetime import datetime
from typing import Iterable
from .common import SubscriptionState
def states() -> Iterable[SubscriptionState]:
import pytz
res = {}
for f in inputs():
dts = f.stem.split('_')[-1]
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
dt = pytz.utc.localize(dt)
subs = parse_file(f)
res[dt] = subs
# TODO get rid of these dts...
return res
yield dt, subs

View file

@ -7,10 +7,9 @@ from . import twint
from . import archive
from more_itertools import unique_everseen
# TODO move to .common?
def merge_tweets(*sources):
from more_itertools import unique_everseen
yield from unique_everseen(
chain(*sources),
key=lambda t: t.id_str,