rss module: prettify & reorganize to allow for easily adding extra modules
This commit is contained in:
parent
92cf375480
commit
63d4198fd9
6 changed files with 66 additions and 58 deletions
|
@ -1,12 +0,0 @@
|
||||||
# shared Rss stuff
|
|
||||||
from datetime import datetime
|
|
||||||
from typing import NamedTuple, Optional
|
|
||||||
|
|
||||||
|
|
||||||
class Subscription(NamedTuple):
|
|
||||||
title: str
|
|
||||||
url: str
|
|
||||||
id: str # TODO not sure about it...
|
|
||||||
# eh, not all of them got reasonable 'created' time
|
|
||||||
created_at: Optional[datetime]
|
|
||||||
subscribed: bool=True
|
|
|
@ -1,29 +1,11 @@
|
||||||
from itertools import chain
|
'''
|
||||||
from typing import List, Dict
|
Unified RSS data, merged from different services I used historically
|
||||||
|
'''
|
||||||
from ._rss import Subscription
|
from typing import Iterable
|
||||||
|
from .common import Subscription, compute_subscriptions
|
||||||
from . import feedbin
|
|
||||||
from . import feedly
|
|
||||||
# TODO google reader?
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_subscriptions() -> List[Subscription]:
|
def subscriptions() -> Iterable[Subscription]:
|
||||||
"""
|
from . import feedbin, feedly
|
||||||
Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
|
# TODO google reader?
|
||||||
so you don't try to subscribe again (or at least take into account why you unsubscribed before)
|
yield from compute_subscriptions(feedbin.states(), feedly.states())
|
||||||
"""
|
|
||||||
states = {}
|
|
||||||
states.update(feedly.get_states())
|
|
||||||
states.update(feedbin.get_states())
|
|
||||||
by_url: Dict[str, Subscription] = {}
|
|
||||||
for _, feeds in sorted(states.items()):
|
|
||||||
for f in feeds:
|
|
||||||
if f.url not in by_url:
|
|
||||||
by_url[f.url] = f
|
|
||||||
res = []
|
|
||||||
last = {x.url: x for x in max(states.items())[1]}
|
|
||||||
for u, x in sorted(by_url.items()):
|
|
||||||
present = u in last
|
|
||||||
res.append(x._replace(subscribed=present))
|
|
||||||
return res
|
|
||||||
|
|
44
my/rss/common.py
Normal file
44
my/rss/common.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# shared Rss stuff
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import NamedTuple, Optional, List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class Subscription(NamedTuple):
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
id: str # TODO not sure about it...
|
||||||
|
# eh, not all of them got reasonable 'created' time
|
||||||
|
created_at: Optional[datetime]
|
||||||
|
subscribed: bool=True
|
||||||
|
|
||||||
|
from typing import Iterable, Tuple, Sequence
|
||||||
|
|
||||||
|
# snapshot of subscriptions at time
|
||||||
|
SubscriptionState = Tuple[datetime, Sequence[Subscription]]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
|
||||||
|
"""
|
||||||
|
Keeps track of everything I ever subscribed to.
|
||||||
|
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
|
||||||
|
"""
|
||||||
|
from itertools import chain
|
||||||
|
states = list(chain.from_iterable(sources))
|
||||||
|
# TODO keep 'source'/'provider'/'service' attribute?
|
||||||
|
|
||||||
|
by_url: Dict[str, Subscription] = {}
|
||||||
|
# ah. dates are used for sorting
|
||||||
|
for when, state in sorted(states):
|
||||||
|
# TODO use 'when'?
|
||||||
|
for feed in state:
|
||||||
|
if feed.url not in by_url:
|
||||||
|
by_url[feed.url] = feed
|
||||||
|
|
||||||
|
_, last_state = max(states, key=lambda x: x[0])
|
||||||
|
last_urls = {f.url for f in last_state}
|
||||||
|
|
||||||
|
res = []
|
||||||
|
for u, x in sorted(by_url.items()):
|
||||||
|
present = u in last_urls
|
||||||
|
res.append(x._replace(subscribed=present))
|
||||||
|
return res
|
|
@ -8,7 +8,7 @@ from pathlib import Path
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
|
||||||
from ..core.common import listify, get_files, isoparse
|
from ..core.common import listify, get_files, isoparse
|
||||||
from ._rss import Subscription
|
from .common import Subscription
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
|
@ -16,9 +16,6 @@ def inputs() -> Sequence[Path]:
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Dict, List
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
@listify
|
@listify
|
||||||
def parse_file(f: Path):
|
def parse_file(f: Path):
|
||||||
|
@ -32,14 +29,14 @@ def parse_file(f: Path):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_states() -> Dict[datetime, List[Subscription]]:
|
from typing import Iterable
|
||||||
|
from .common import SubscriptionState
|
||||||
|
def states() -> Iterable[SubscriptionState]:
|
||||||
# meh
|
# meh
|
||||||
from dateutil.parser import isoparse # type: ignore
|
from dateutil.parser import isoparse # type: ignore
|
||||||
res = {}
|
|
||||||
for f in inputs():
|
for f in inputs():
|
||||||
# TODO ugh. depends on my naming. not sure if useful?
|
# TODO ugh. depends on my naming. not sure if useful?
|
||||||
dts = f.stem.split('_')[-1]
|
dts = f.stem.split('_')[-1]
|
||||||
dt = isoparse(dts)
|
dt = isoparse(dts)
|
||||||
subs = parse_file(f)
|
subs = parse_file(f)
|
||||||
res[dt] = subs
|
yield dt, subs
|
||||||
return res
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ from pathlib import Path
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
|
||||||
from ..core.common import listify, get_files, isoparse
|
from ..core.common import listify, get_files, isoparse
|
||||||
from ._rss import Subscription
|
from .common import Subscription
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
|
@ -16,8 +16,6 @@ def inputs() -> Sequence[Path]:
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Dict, List
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
@listify
|
@listify
|
||||||
|
@ -35,14 +33,14 @@ def parse_file(f: Path):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_states() -> Dict[datetime, List[Subscription]]:
|
from datetime import datetime
|
||||||
|
from typing import Iterable
|
||||||
|
from .common import SubscriptionState
|
||||||
|
def states() -> Iterable[SubscriptionState]:
|
||||||
import pytz
|
import pytz
|
||||||
res = {}
|
|
||||||
for f in inputs():
|
for f in inputs():
|
||||||
dts = f.stem.split('_')[-1]
|
dts = f.stem.split('_')[-1]
|
||||||
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
|
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
|
||||||
dt = pytz.utc.localize(dt)
|
dt = pytz.utc.localize(dt)
|
||||||
subs = parse_file(f)
|
subs = parse_file(f)
|
||||||
res[dt] = subs
|
yield dt, subs
|
||||||
# TODO get rid of these dts...
|
|
||||||
return res
|
|
||||||
|
|
|
@ -7,10 +7,9 @@ from . import twint
|
||||||
from . import archive
|
from . import archive
|
||||||
|
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
# TODO move to .common?
|
||||||
|
|
||||||
|
|
||||||
def merge_tweets(*sources):
|
def merge_tweets(*sources):
|
||||||
|
from more_itertools import unique_everseen
|
||||||
yield from unique_everseen(
|
yield from unique_everseen(
|
||||||
chain(*sources),
|
chain(*sources),
|
||||||
key=lambda t: t.id_str,
|
key=lambda t: t.id_str,
|
||||||
|
|
Loading…
Add table
Reference in a new issue