core.common: move listify to core.utils.itertools, use better typing annotations for it

also some minor refactoring of my.rss
This commit is contained in:
Dima Gerasimov 2024-08-14 10:59:47 +03:00
parent 029fa3ae84
commit f4214807a3
6 changed files with 81 additions and 65 deletions

View file

@ -1,6 +1,7 @@
'''
Unified RSS data, merged from different services I used historically
'''
# NOTE: you can comment out the sources you're not using
from . import feedbin, feedly
@ -12,5 +13,5 @@ def subscriptions() -> Iterable[Subscription]:
# TODO google reader?
yield from compute_subscriptions(
feedbin.states(),
feedly .states(),
feedly.states(),
)

View file

@ -1,30 +1,32 @@
# shared Rss stuff
from datetime import datetime
from typing import NamedTuple, Optional, List, Dict
from my.core import __NOT_HPI_MODULE__
from dataclasses import dataclass, replace
from itertools import chain
from typing import Optional, List, Dict, Iterable, Tuple, Sequence
from my.core import warn_if_empty, datetime_aware
class Subscription(NamedTuple):
@dataclass
class Subscription:
title: str
url: str
id: str # TODO not sure about it...
id: str # TODO not sure about it...
# eh, not all of them got reasonable 'created' time
created_at: Optional[datetime]
subscribed: bool=True
created_at: Optional[datetime_aware]
subscribed: bool = True
from typing import Iterable, Tuple, Sequence
# snapshot of subscriptions at time
SubscriptionState = Tuple[datetime, Sequence[Subscription]]
SubscriptionState = Tuple[datetime_aware, Sequence[Subscription]]
from ..core import warn_if_empty
@warn_if_empty
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
"""
Keeps track of everything I ever subscribed to.
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
"""
from itertools import chain
states = list(chain.from_iterable(sources))
# TODO keep 'source'/'provider'/'service' attribute?
@ -45,7 +47,5 @@ def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscri
res = []
for u, x in sorted(by_url.items()):
present = u in last_urls
res.append(x._replace(subscribed=present))
res.append(replace(x, subscribed=present))
return res
from ..core import __NOT_HPI_MODULE__

View file

@ -2,24 +2,22 @@
Feedbin RSS reader
"""
from my.config import feedbin as config
import json
from pathlib import Path
from typing import Sequence
from typing import Iterator, Sequence
from my.core.common import listify, get_files
from my.core import get_files, stat, Stats
from my.core.compat import fromisoformat
from .common import Subscription
from .common import Subscription, SubscriptionState
from my.config import feedbin as config
def inputs() -> Sequence[Path]:
return get_files(config.export_path)
import json
@listify
def parse_file(f: Path):
def parse_file(f: Path) -> Iterator[Subscription]:
raw = json.loads(f.read_text())
for r in raw:
yield Subscription(
@ -30,19 +28,14 @@ def parse_file(f: Path):
)
from typing import Iterable
from .common import SubscriptionState
def states() -> Iterable[SubscriptionState]:
def states() -> Iterator[SubscriptionState]:
for f in inputs():
# TODO ugh. depends on my naming. not sure if useful?
dts = f.stem.split('_')[-1]
dt = fromisoformat(dts)
subs = parse_file(f)
subs = list(parse_file(f))
yield dt, subs
def stats():
from more_itertools import ilen, last
return {
'subscriptions': ilen(last(states())[1])
}
def stats() -> Stats:
return stat(states)

View file

@ -1,14 +1,15 @@
"""
Feedly RSS reader
"""
from my.config import feedly as config
from datetime import datetime, timezone
import json
from pathlib import Path
from typing import Iterable, Sequence
from typing import Iterator, Sequence
from ..core.common import listify, get_files
from my.core import get_files
from .common import Subscription, SubscriptionState
@ -16,13 +17,12 @@ def inputs() -> Sequence[Path]:
return get_files(config.export_path)
@listify
def parse_file(f: Path):
def parse_file(f: Path) -> Iterator[Subscription]:
raw = json.loads(f.read_text())
for r in raw:
# err, some even don't have website..
rid = r['id']
website = r.get('website', rid) # meh
website = r.get('website', rid) # meh
yield Subscription(
created_at=None,
title=r['title'],
@ -31,9 +31,9 @@ def parse_file(f: Path):
)
def states() -> Iterable[SubscriptionState]:
def states() -> Iterator[SubscriptionState]:
for f in inputs():
dts = f.stem.split('_')[-1]
dt = datetime.strptime(dts, '%Y%m%d%H%M%S').replace(tzinfo=timezone.utc)
subs = parse_file(f)
subs = list(parse_file(f))
yield dt, subs