Merge pull request #48 from karlicoss/configuration
lastfmupdates: docs, lastfm, rss module
This commit is contained in:
commit
d0427855e8
18 changed files with 358 additions and 115 deletions
|
@ -224,7 +224,7 @@ My conclusion was using a *combined approach*:
|
||||||
|
|
||||||
Inheritance is a standard mechanism, which doesn't require any extra frameworks and plays well with other Python concepts. As a specific example:
|
Inheritance is a standard mechanism, which doesn't require any extra frameworks and plays well with other Python concepts. As a specific example:
|
||||||
|
|
||||||
,#+begin_src python
|
#+begin_src python
|
||||||
from my.config import bluemaestro as user_config
|
from my.config import bluemaestro as user_config
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -256,24 +256,27 @@ I claim this solves pretty much everything:
|
||||||
- *(6)*: the dataclass header is easily readable, and it's possible to generate the docs automatically
|
- *(6)*: the dataclass header is easily readable, and it's possible to generate the docs automatically
|
||||||
|
|
||||||
Downsides:
|
Downsides:
|
||||||
- inheriting from ~user_config~ means early import of =my.config=
|
- inheriting from ~user_config~ means an early import of =my.config=
|
||||||
|
|
||||||
Generally it's better to keep everything as lazy as possible and defer loading to the first time the config is used.
|
Generally it's better to keep everything as lazy as possible and defer loading to the first time the config is used.
|
||||||
This might be annoying at times, e.g. if you have a top-level import of you module, but no config.
|
This might be annoying at times, e.g. if you have a top-level import of you module, but no config.
|
||||||
|
|
||||||
But considering that in 99% of cases config is going to be on the disk
|
But considering that in 99% of cases config is going to be on the disk
|
||||||
and it's possible to do something dynamic like =del sys.modules['my.bluemastro']= to reload the config, I think it's a minor issue.
|
and it's [[https://github.com/karlicoss/HPI/blob/1e6e0bd381d20437343473878c7f63b1f9d6362b/tests/demo.py#L22-L25][possible]] to do something dynamic like =del sys.modules['my.bluemastro']= to reload the config, I think it's a minor issue.
|
||||||
# TODO demonstrate in a test?
|
|
||||||
|
|
||||||
- =make_config= allows for some mypy false negatives in the user config
|
- =make_config= allows for some mypy false negatives in the user config
|
||||||
|
|
||||||
E.g. if you forgot =export_path= attribute, mypy would miss it. But you'd have a runtime failure, and the downstream code using config is still correctly type checked.
|
E.g. if you forgot =export_path= attribute, mypy would miss it. But you'd have a runtime failure, and the downstream code using config is still correctly type checked.
|
||||||
|
|
||||||
Perhaps it will be better when [[https://github.com/python/mypy/issues/5374][this]] is fixed.
|
Perhaps it will be better when [[https://github.com/python/mypy/issues/5374][this mypy issue]] is fixed.
|
||||||
- the =make_config= bit is a little scary and manual
|
- the =make_config= bit is a little scary and manual
|
||||||
|
|
||||||
However, it's extracted in a generic helper, and [[https://github.com/karlicoss/HPI/blob/d6f071e3b12ba1cd5a86ad80e3821bec004e6a6d/my/twitter/archive.py#L17][ends up pretty simple]]
|
However, it's extracted in a generic helper, and [[https://github.com/karlicoss/HPI/blob/d6f071e3b12ba1cd5a86ad80e3821bec004e6a6d/my/twitter/archive.py#L17][ends up pretty simple]]
|
||||||
|
|
||||||
|
# In addition, it's not even necessary if you don't have optional attributes, you can simply use the class variables (i.e. ~bluemaestro.export_path~)
|
||||||
|
# upd. ugh, you can't, it doesn't handle default attributes overriding correctly (see tests/demo.py)
|
||||||
|
# eh. basically all I need is class level dataclass??
|
||||||
|
|
||||||
- inheriting from ~user_config~ requires it to be a =class= rather than an =object=
|
- inheriting from ~user_config~ requires it to be a =class= rather than an =object=
|
||||||
|
|
||||||
A practical downside is you can't use something like ~SimpleNamespace~.
|
A practical downside is you can't use something like ~SimpleNamespace~.
|
||||||
|
|
|
@ -32,6 +32,7 @@ modules = [
|
||||||
('reddit' , 'my.reddit' ),
|
('reddit' , 'my.reddit' ),
|
||||||
('twint' , 'my.twitter.twint' ),
|
('twint' , 'my.twitter.twint' ),
|
||||||
('twitter', 'my.twitter.archive' ),
|
('twitter', 'my.twitter.archive' ),
|
||||||
|
('lastfm' , 'my.lastfm' ),
|
||||||
]
|
]
|
||||||
|
|
||||||
def indent(s, spaces=4):
|
def indent(s, spaces=4):
|
||||||
|
@ -105,4 +106,15 @@ for cls, p in modules:
|
||||||
class twitter:
|
class twitter:
|
||||||
export_path: Paths # path[s]/glob to the twitter archive takeout
|
export_path: Paths # path[s]/glob to the twitter archive takeout
|
||||||
#+end_src
|
#+end_src
|
||||||
|
- [[file:../my/lastfm][my.lastfm]]
|
||||||
|
|
||||||
|
Last.fm scrobbles
|
||||||
|
|
||||||
|
#+begin_src python
|
||||||
|
class lastfm:
|
||||||
|
"""
|
||||||
|
Uses [[https://github.com/karlicoss/lastfm-backup][lastfm-backup]] outputs
|
||||||
|
"""
|
||||||
|
export_path: Paths
|
||||||
|
#+end_src
|
||||||
:end:
|
:end:
|
||||||
|
|
10
my/_rss.py
10
my/_rss.py
|
@ -1,10 +0,0 @@
|
||||||
# shared Rss stuff
|
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
class Subscription(NamedTuple):
|
|
||||||
# TODO date?
|
|
||||||
title: str
|
|
||||||
url: str
|
|
||||||
id: str
|
|
||||||
subscribed: bool=True
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from glob import glob as do_glob
|
from glob import glob as do_glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
import functools
|
import functools
|
||||||
import types
|
import types
|
||||||
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
|
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
|
||||||
|
@ -219,3 +220,28 @@ class classproperty(Generic[_R]):
|
||||||
#
|
#
|
||||||
# def __get__(self) -> _R:
|
# def __get__(self) -> _R:
|
||||||
# return self.f()
|
# return self.f()
|
||||||
|
|
||||||
|
# TODO maybe use opaque mypy alias?
|
||||||
|
tzdatetime = datetime
|
||||||
|
|
||||||
|
|
||||||
|
fromisoformat: Callable[[str], datetime]
|
||||||
|
import sys
|
||||||
|
if sys.version_info.minor >= 7:
|
||||||
|
# prevent mypy on py3.6 from complaining...
|
||||||
|
fromisoformat_real = datetime.fromisoformat # type: ignore[attr-defined]
|
||||||
|
fromisoformat = fromisoformat_real
|
||||||
|
else:
|
||||||
|
from .py37 import fromisoformat
|
||||||
|
|
||||||
|
|
||||||
|
# TODO doctests?
|
||||||
|
def isoparse(s: str) -> tzdatetime:
|
||||||
|
"""
|
||||||
|
Parses timestamps formatted like 2020-05-01T10:32:02.925961Z
|
||||||
|
"""
|
||||||
|
# TODO could use dateutil? but it's quite slow as far as I remember..
|
||||||
|
# TODO support non-utc.. somehow?
|
||||||
|
assert s.endswith('Z'), s
|
||||||
|
s = s[:-1] + '+00:00'
|
||||||
|
return fromisoformat(s)
|
||||||
|
|
122
my/core/py37.py
Normal file
122
my/core/py37.py
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
# borrowed from /usr/lib/python3.7/datetime.py
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
def _parse_isoformat_date(dtstr):
|
||||||
|
# It is assumed that this function will only be called with a
|
||||||
|
# string of length exactly 10, and (though this is not used) ASCII-only
|
||||||
|
year = int(dtstr[0:4])
|
||||||
|
if dtstr[4] != '-':
|
||||||
|
raise ValueError('Invalid date separator: %s' % dtstr[4])
|
||||||
|
|
||||||
|
month = int(dtstr[5:7])
|
||||||
|
|
||||||
|
if dtstr[7] != '-':
|
||||||
|
raise ValueError('Invalid date separator')
|
||||||
|
|
||||||
|
day = int(dtstr[8:10])
|
||||||
|
|
||||||
|
return [year, month, day]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_hh_mm_ss_ff(tstr):
|
||||||
|
# Parses things of the form HH[:MM[:SS[.fff[fff]]]]
|
||||||
|
len_str = len(tstr)
|
||||||
|
|
||||||
|
time_comps = [0, 0, 0, 0]
|
||||||
|
pos = 0
|
||||||
|
for comp in range(0, 3):
|
||||||
|
if (len_str - pos) < 2:
|
||||||
|
raise ValueError('Incomplete time component')
|
||||||
|
|
||||||
|
time_comps[comp] = int(tstr[pos:pos+2])
|
||||||
|
|
||||||
|
pos += 2
|
||||||
|
next_char = tstr[pos:pos+1]
|
||||||
|
|
||||||
|
if not next_char or comp >= 2:
|
||||||
|
break
|
||||||
|
|
||||||
|
if next_char != ':':
|
||||||
|
raise ValueError('Invalid time separator: %c' % next_char)
|
||||||
|
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
if pos < len_str:
|
||||||
|
if tstr[pos] != '.':
|
||||||
|
raise ValueError('Invalid microsecond component')
|
||||||
|
else:
|
||||||
|
pos += 1
|
||||||
|
|
||||||
|
len_remainder = len_str - pos
|
||||||
|
if len_remainder not in (3, 6):
|
||||||
|
raise ValueError('Invalid microsecond component')
|
||||||
|
|
||||||
|
time_comps[3] = int(tstr[pos:])
|
||||||
|
if len_remainder == 3:
|
||||||
|
time_comps[3] *= 1000
|
||||||
|
|
||||||
|
return time_comps
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_isoformat_time(tstr):
|
||||||
|
# Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
|
||||||
|
len_str = len(tstr)
|
||||||
|
if len_str < 2:
|
||||||
|
raise ValueError('Isoformat time too short')
|
||||||
|
|
||||||
|
# This is equivalent to re.search('[+-]', tstr), but faster
|
||||||
|
tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
|
||||||
|
timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
|
||||||
|
|
||||||
|
time_comps = _parse_hh_mm_ss_ff(timestr)
|
||||||
|
|
||||||
|
tzi = None
|
||||||
|
if tz_pos > 0:
|
||||||
|
tzstr = tstr[tz_pos:]
|
||||||
|
|
||||||
|
# Valid time zone strings are:
|
||||||
|
# HH:MM len: 5
|
||||||
|
# HH:MM:SS len: 8
|
||||||
|
# HH:MM:SS.ffffff len: 15
|
||||||
|
|
||||||
|
if len(tzstr) not in (5, 8, 15):
|
||||||
|
raise ValueError('Malformed time zone string')
|
||||||
|
|
||||||
|
tz_comps = _parse_hh_mm_ss_ff(tzstr)
|
||||||
|
if all(x == 0 for x in tz_comps):
|
||||||
|
tzi = timezone.utc
|
||||||
|
else:
|
||||||
|
tzsign = -1 if tstr[tz_pos - 1] == '-' else 1
|
||||||
|
|
||||||
|
td = timedelta(hours=tz_comps[0], minutes=tz_comps[1],
|
||||||
|
seconds=tz_comps[2], microseconds=tz_comps[3])
|
||||||
|
|
||||||
|
tzi = timezone(tzsign * td)
|
||||||
|
|
||||||
|
time_comps.append(tzi)
|
||||||
|
|
||||||
|
return time_comps
|
||||||
|
|
||||||
|
def fromisoformat(date_string, cls=datetime):
|
||||||
|
"""Construct a datetime from the output of datetime.isoformat()."""
|
||||||
|
if not isinstance(date_string, str):
|
||||||
|
raise TypeError('fromisoformat: argument must be str')
|
||||||
|
|
||||||
|
# Split this at the separator
|
||||||
|
dstr = date_string[0:10]
|
||||||
|
tstr = date_string[11:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
date_components = _parse_isoformat_date(dstr)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError('Invalid isoformat string: %s' % date_string)
|
||||||
|
|
||||||
|
if tstr:
|
||||||
|
try:
|
||||||
|
time_components = _parse_isoformat_time(tstr)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError('Invalid isoformat string: %s' % date_string)
|
||||||
|
else:
|
||||||
|
time_components = [0, 0, 0, 0, None]
|
||||||
|
|
||||||
|
return cls(*(date_components + time_components))
|
15
my/demo.py
15
my/demo.py
|
@ -16,13 +16,8 @@ class demo(user_config):
|
||||||
username: str
|
username: str
|
||||||
timezone: tzinfo = pytz.utc
|
timezone: tzinfo = pytz.utc
|
||||||
|
|
||||||
|
from .core.cfg import make_config
|
||||||
def config() -> demo:
|
config = make_config(demo)
|
||||||
from .core.cfg import make_config
|
|
||||||
config = make_config(demo)
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Sequence, Iterable
|
from typing import Sequence, Iterable
|
||||||
|
@ -40,17 +35,17 @@ class Item:
|
||||||
|
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
def inputs() -> Sequence[Path]:
|
||||||
return get_files(config().data_path)
|
return get_files(config.data_path)
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
def items() -> Iterable[Item]:
|
def items() -> Iterable[Item]:
|
||||||
for f in inputs():
|
for f in inputs():
|
||||||
dt = datetime.fromtimestamp(f.stat().st_mtime, tz=config().timezone)
|
dt = datetime.fromtimestamp(f.stat().st_mtime, tz=config.timezone)
|
||||||
j = json.loads(f.read_text())
|
j = json.loads(f.read_text())
|
||||||
for raw in j:
|
for raw in j:
|
||||||
yield Item(
|
yield Item(
|
||||||
username=config().username,
|
username=config.username,
|
||||||
raw=raw,
|
raw=raw,
|
||||||
dt=dt,
|
dt=dt,
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
"""
|
|
||||||
Feedbin RSS reader
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .common import listify
|
|
||||||
from ._rss import Subscription
|
|
||||||
|
|
||||||
from my.config import feedbin as config
|
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, List
|
|
||||||
from datetime import datetime
|
|
||||||
from dateutil.parser import isoparse
|
|
||||||
|
|
||||||
|
|
||||||
@listify
|
|
||||||
def parse_file(f: Path):
|
|
||||||
raw = json.loads(f.read_text())
|
|
||||||
for r in raw:
|
|
||||||
yield Subscription(
|
|
||||||
# TODO created_at?
|
|
||||||
title=r['title'],
|
|
||||||
url=r['site_url'],
|
|
||||||
id=r['id'],
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_states() -> Dict[datetime, List[Subscription]]:
|
|
||||||
res = {}
|
|
||||||
# TODO use get_files
|
|
||||||
for f in sorted(Path(config.export_dir).glob('*.json')):
|
|
||||||
dts = f.stem.split('_')[-1]
|
|
||||||
dt = isoparse(dts)
|
|
||||||
subs = parse_file(f)
|
|
||||||
res[dt] = subs
|
|
||||||
return res
|
|
|
@ -2,8 +2,21 @@
|
||||||
Last.fm scrobbles
|
Last.fm scrobbles
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
from ..core.common import Paths
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from my.config import lastfm as user_config
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class lastfm(user_config):
|
||||||
|
"""
|
||||||
|
Uses [[https://github.com/karlicoss/lastfm-backup][lastfm-backup]] outputs
|
||||||
|
"""
|
||||||
|
export_path: Paths
|
||||||
|
|
||||||
|
|
||||||
|
from ..core.cfg import make_config
|
||||||
|
config = make_config(lastfm)
|
||||||
|
|
||||||
from ..common import get_files, mcachew, Json
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
@ -12,16 +25,17 @@ from typing import NamedTuple, Any, Sequence, Iterable
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from my.config import lastfm as config
|
from ..core.common import mcachew, Json, get_files
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
# TODO memoised properties?
|
# TODO memoised properties?
|
||||||
# TODO lazy mode and eager mode?
|
# TODO lazy mode and eager mode?
|
||||||
# lazy is a bit nicer in terms of more flexibility and less processing?
|
# lazy is a bit nicer in terms of more flexibility and less processing?
|
||||||
# eager is a bit more explicit for error handling
|
# eager is a bit more explicit for error handling
|
||||||
|
|
||||||
def inputs() -> Sequence[Path]:
|
|
||||||
return get_files(config.export_path)
|
|
||||||
|
|
||||||
|
|
||||||
class Scrobble(NamedTuple):
|
class Scrobble(NamedTuple):
|
||||||
raw: Json
|
raw: Json
|
||||||
|
@ -54,5 +68,5 @@ def scrobbles() -> Iterable[Scrobble]:
|
||||||
last = max(inputs())
|
last = max(inputs())
|
||||||
j = json.loads(last.read_text())
|
j = json.loads(last.read_text())
|
||||||
|
|
||||||
for raw in j:
|
for raw in reversed(j):
|
||||||
yield Scrobble(raw=raw)
|
yield Scrobble(raw=raw)
|
||||||
|
|
|
@ -21,6 +21,7 @@ _POLAR_DIR = Path('~').expanduser() / '.polar'
|
||||||
logger = LazyLogger(__name__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO use core.isoparse
|
||||||
def parse_dt(s: str) -> datetime:
|
def parse_dt(s: str) -> datetime:
|
||||||
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))
|
return pytz.utc.localize(datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%fZ'))
|
||||||
|
|
||||||
|
|
29
my/rss.py
29
my/rss.py
|
@ -1,29 +0,0 @@
|
||||||
from itertools import chain
|
|
||||||
from typing import List, Dict
|
|
||||||
|
|
||||||
from ._rss import Subscription
|
|
||||||
|
|
||||||
from . import feedbin
|
|
||||||
from . import feedly
|
|
||||||
# TODO google reader?
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_subscriptions() -> List[Subscription]:
|
|
||||||
"""
|
|
||||||
Keeps track of everything I ever subscribed to. It's useful to keep track of unsubscribed too
|
|
||||||
so you don't try to subscribe again (or at least take into account why you unsubscribed before)
|
|
||||||
"""
|
|
||||||
states = {}
|
|
||||||
states.update(feedly.get_states())
|
|
||||||
states.update(feedbin.get_states())
|
|
||||||
by_url: Dict[str, Subscription] = {}
|
|
||||||
for d, feeds in sorted(states.items()):
|
|
||||||
for f in feeds:
|
|
||||||
if f.url not in by_url:
|
|
||||||
by_url[f.url] = f
|
|
||||||
res = []
|
|
||||||
last = {x.url: x for x in max(states.items())[1]}
|
|
||||||
for u, x in sorted(by_url.items()):
|
|
||||||
present = u in last
|
|
||||||
res.append(x._replace(subscribed=present))
|
|
||||||
return res
|
|
11
my/rss/all.py
Normal file
11
my/rss/all.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
'''
|
||||||
|
Unified RSS data, merged from different services I used historically
|
||||||
|
'''
|
||||||
|
from typing import Iterable
|
||||||
|
from .common import Subscription, compute_subscriptions
|
||||||
|
|
||||||
|
|
||||||
|
def subscriptions() -> Iterable[Subscription]:
|
||||||
|
from . import feedbin, feedly
|
||||||
|
# TODO google reader?
|
||||||
|
yield from compute_subscriptions(feedbin.states(), feedly.states())
|
44
my/rss/common.py
Normal file
44
my/rss/common.py
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# shared Rss stuff
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import NamedTuple, Optional, List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
class Subscription(NamedTuple):
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
id: str # TODO not sure about it...
|
||||||
|
# eh, not all of them got reasonable 'created' time
|
||||||
|
created_at: Optional[datetime]
|
||||||
|
subscribed: bool=True
|
||||||
|
|
||||||
|
from typing import Iterable, Tuple, Sequence
|
||||||
|
|
||||||
|
# snapshot of subscriptions at time
|
||||||
|
SubscriptionState = Tuple[datetime, Sequence[Subscription]]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_subscriptions(*sources: Iterable[SubscriptionState]) -> List[Subscription]:
|
||||||
|
"""
|
||||||
|
Keeps track of everything I ever subscribed to.
|
||||||
|
In addition, keeps track of unsubscribed as well (so you'd remember when and why you unsubscribed)
|
||||||
|
"""
|
||||||
|
from itertools import chain
|
||||||
|
states = list(chain.from_iterable(sources))
|
||||||
|
# TODO keep 'source'/'provider'/'service' attribute?
|
||||||
|
|
||||||
|
by_url: Dict[str, Subscription] = {}
|
||||||
|
# ah. dates are used for sorting
|
||||||
|
for when, state in sorted(states):
|
||||||
|
# TODO use 'when'?
|
||||||
|
for feed in state:
|
||||||
|
if feed.url not in by_url:
|
||||||
|
by_url[feed.url] = feed
|
||||||
|
|
||||||
|
_, last_state = max(states, key=lambda x: x[0])
|
||||||
|
last_urls = {f.url for f in last_state}
|
||||||
|
|
||||||
|
res = []
|
||||||
|
for u, x in sorted(by_url.items()):
|
||||||
|
present = u in last_urls
|
||||||
|
res.append(x._replace(subscribed=present))
|
||||||
|
return res
|
42
my/rss/feedbin.py
Normal file
42
my/rss/feedbin.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
"""
|
||||||
|
Feedbin RSS reader
|
||||||
|
"""
|
||||||
|
|
||||||
|
from my.config import feedbin as config
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
from ..core.common import listify, get_files, isoparse
|
||||||
|
from .common import Subscription
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
@listify
|
||||||
|
def parse_file(f: Path):
|
||||||
|
raw = json.loads(f.read_text())
|
||||||
|
for r in raw:
|
||||||
|
yield Subscription(
|
||||||
|
created_at=isoparse(r['created_at']),
|
||||||
|
title=r['title'],
|
||||||
|
url=r['site_url'],
|
||||||
|
id=r['id'],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Iterable
|
||||||
|
from .common import SubscriptionState
|
||||||
|
def states() -> Iterable[SubscriptionState]:
|
||||||
|
# meh
|
||||||
|
from dateutil.parser import isoparse # type: ignore
|
||||||
|
for f in inputs():
|
||||||
|
# TODO ugh. depends on my naming. not sure if useful?
|
||||||
|
dts = f.stem.split('_')[-1]
|
||||||
|
dt = isoparse(dts)
|
||||||
|
subs = parse_file(f)
|
||||||
|
yield dt, subs
|
|
@ -2,16 +2,20 @@
|
||||||
Feedly RSS reader
|
Feedly RSS reader
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .common import listify
|
|
||||||
from ._rss import Subscription
|
|
||||||
|
|
||||||
from my.config import feedly as config
|
from my.config import feedly as config
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List
|
from typing import Sequence
|
||||||
from datetime import datetime
|
|
||||||
import pytz
|
from ..core.common import listify, get_files, isoparse
|
||||||
|
from .common import Subscription
|
||||||
|
|
||||||
|
|
||||||
|
def inputs() -> Sequence[Path]:
|
||||||
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
@listify
|
@listify
|
||||||
|
@ -22,19 +26,21 @@ def parse_file(f: Path):
|
||||||
rid = r['id']
|
rid = r['id']
|
||||||
website = r.get('website', rid) # meh
|
website = r.get('website', rid) # meh
|
||||||
yield Subscription(
|
yield Subscription(
|
||||||
# TODO created_at?
|
created_at=None,
|
||||||
title=r['title'],
|
title=r['title'],
|
||||||
url=website,
|
url=website,
|
||||||
id=rid,
|
id=rid,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_states() -> Dict[datetime, List[Subscription]]:
|
|
||||||
res = {}
|
from datetime import datetime
|
||||||
# TODO use get_files
|
from typing import Iterable
|
||||||
for f in sorted(Path(config.export_dir).glob('*.json')):
|
from .common import SubscriptionState
|
||||||
|
def states() -> Iterable[SubscriptionState]:
|
||||||
|
import pytz
|
||||||
|
for f in inputs():
|
||||||
dts = f.stem.split('_')[-1]
|
dts = f.stem.split('_')[-1]
|
||||||
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
|
dt = datetime.strptime(dts, '%Y%m%d%H%M%S')
|
||||||
dt = pytz.utc.localize(dt)
|
dt = pytz.utc.localize(dt)
|
||||||
subs = parse_file(f)
|
subs = parse_file(f)
|
||||||
res[dt] = subs
|
yield dt, subs
|
||||||
return res
|
|
|
@ -7,10 +7,9 @@ from . import twint
|
||||||
from . import archive
|
from . import archive
|
||||||
|
|
||||||
|
|
||||||
from more_itertools import unique_everseen
|
# TODO move to .common?
|
||||||
|
|
||||||
|
|
||||||
def merge_tweets(*sources):
|
def merge_tweets(*sources):
|
||||||
|
from more_itertools import unique_everseen
|
||||||
yield from unique_everseen(
|
yield from unique_everseen(
|
||||||
chain(*sources),
|
chain(*sources),
|
||||||
key=lambda t: t.id_str,
|
key=lambda t: t.id_str,
|
||||||
|
|
|
@ -76,7 +76,7 @@ class feedly:
|
||||||
os.environ['MY_CONFIG'] = str(tmp_path)
|
os.environ['MY_CONFIG'] = str(tmp_path)
|
||||||
|
|
||||||
# should not raise at least
|
# should not raise at least
|
||||||
import my.feedly
|
import my.rss.feedly
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
|
@ -54,7 +54,40 @@ def test_dynamic_config_simplenamespace(tmp_path: Path) -> None:
|
||||||
my.config.demo = user_config # type: ignore[misc, assignment]
|
my.config.demo = user_config # type: ignore[misc, assignment]
|
||||||
|
|
||||||
from my.demo import config
|
from my.demo import config
|
||||||
assert config().username == 'user3'
|
assert config.username == 'user3'
|
||||||
|
|
||||||
|
|
||||||
|
# make sure our config handling pattern does it as expected
|
||||||
|
def test_attribute_handling(tmp_path: Path) -> None:
|
||||||
|
# doesn't work without it!
|
||||||
|
# because the config from test_dybamic_config_1 is cached in my.demo.demo
|
||||||
|
del sys.modules['my.demo']
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
nytz = pytz.timezone('America/New_York')
|
||||||
|
|
||||||
|
import my.config
|
||||||
|
class user_config:
|
||||||
|
# check that override is taken into the account
|
||||||
|
timezone = nytz
|
||||||
|
|
||||||
|
irrelevant = 'hello'
|
||||||
|
|
||||||
|
username = 'UUU'
|
||||||
|
data_path = f'{tmp_path}/*.json'
|
||||||
|
|
||||||
|
|
||||||
|
my.config.demo = user_config # type: ignore[misc, assignment]
|
||||||
|
|
||||||
|
from my.demo import config
|
||||||
|
|
||||||
|
assert config.username == 'UUU'
|
||||||
|
|
||||||
|
# mypy doesn't know about it, but the attribute is there
|
||||||
|
assert getattr(config, 'irrelevant') == 'hello'
|
||||||
|
|
||||||
|
# check that overriden default attribute is actually getting overridden
|
||||||
|
assert config.timezone == nytz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
from my.core.cachew import disable_cachew
|
||||||
|
# TODO need something nicer and integrated inside cachew..
|
||||||
|
disable_cachew() # meh
|
||||||
|
|
||||||
from more_itertools import ilen
|
from more_itertools import ilen
|
||||||
|
|
||||||
from my.lastfm import scrobbles
|
from my.lastfm import scrobbles
|
||||||
|
@ -5,3 +9,9 @@ from my.lastfm import scrobbles
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
assert ilen(scrobbles()) > 1000
|
assert ilen(scrobbles()) > 1000
|
||||||
|
|
||||||
|
|
||||||
|
def test_datetime_ascending():
|
||||||
|
from more_itertools import pairwise
|
||||||
|
for a, b in pairwise(scrobbles()):
|
||||||
|
assert a.dt <= b.dt
|
||||||
|
|
Loading…
Add table
Reference in a new issue