diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index df89d38..2e2b10f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,9 +19,9 @@ jobs: strategy: matrix: platform: [ubuntu-latest, macos-latest] # TODO windows-latest?? - python-version: ['3.6', '3.7', '3.8'] + python-version: [3.6, 3.7, 3.8] # seems like 3.6 isn't available on their osx image anymore - exclude: [{platform: macos-latest, python-version: '3.6'}] + exclude: [{platform: macos-latest, python-version: 3.6}] runs-on: ${{ matrix.platform }} diff --git a/README.org b/README.org index 00a4509..332b3c1 100644 --- a/README.org +++ b/README.org @@ -35,9 +35,9 @@ You simply 'import' your data and get to work with familiar Python types and dat - Here's a short example to give you an idea: "which subreddits I find the most interesting?" #+begin_src python - import my.reddit + import my.reddit.all from collections import Counter - return Counter(s.subreddit for s in my.reddit.saved()).most_common(4) + return Counter(s.subreddit for s in my.reddit.all.saved()).most_common(4) #+end_src | orgmode | 62 | diff --git a/doc/MODULES.org b/doc/MODULES.org index 4090e32..4e36d84 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -74,7 +74,6 @@ import importlib modules = [ ('google' , 'my.google.takeout.paths'), ('hypothesis' , 'my.hypothesis' ), - ('reddit' , 'my.reddit' ), ('pocket' , 'my.pocket' ), ('twint' , 'my.twitter.twint' ), ('twitter_archive', 'my.twitter.archive' ), @@ -144,14 +143,25 @@ for cls, p in modules: Reddit data: saved items/comments/upvotes/etc. + # Note: can't be generated as easily since this is a nested configuration object #+begin_src python class reddit: - ''' - Uses [[https://github.com/karlicoss/rexport][rexport]] output. - ''' + class rexport: + ''' + Uses [[https://github.com/karlicoss/rexport][rexport]] output. + ''' + + # path[s]/glob to the exported JSON data + export_path: Paths + + class pushshift: + ''' + Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments + ''' + + # path[s]/glob to the exported JSON data + export_path: Paths - # path[s]/glob to the exported JSON data - export_path: Paths #+end_src ** [[file:../my/pocket.py][my.pocket]] diff --git a/doc/MODULE_DESIGN.org b/doc/MODULE_DESIGN.org index b6f31f0..9019dfa 100644 --- a/doc/MODULE_DESIGN.org +++ b/doc/MODULE_DESIGN.org @@ -76,11 +76,11 @@ A related concern is how to structure namespace packages to allow users to easil - In addition, you can *override* the builtin HPI modules too: - : custom_reddit_overlay + : custom_lastfm_overlay : └── my - : └──reddit.py + : └──lastfm.py - Now if you add =custom_reddit_overlay= *in front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead. + Now if you add =custom_lastfm_overlay= [[https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH][*in front* of ~PYTHONPATH~]], all the downstream scripts using =my.lastfm= will load it from =custom_lastfm_overlay= instead. This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind. You can check [[https://github.com/karlicoss/hpi-personal-overlay/blob/7fca8b1b6031bf418078da2d8be70fd81d2d8fa0/src/my/calendar/holidays.py#L1-L14][my.calendar.holidays]] in my personal overlay as a reference. @@ -99,15 +99,15 @@ In order to do that, like stated above, you could edit the ~PYTHONPATH~ variable In the context of HPI, it being a namespace package means you can have a local clone of this repository, and your own 'HPI' modules in a separate folder, which then get combined into the ~my~ package. -As an example, say you were trying to override the ~my.reddit~ file, to include some new feature. You could create a new file hierarchy like: +As an example, say you were trying to override the ~my.lastfm~ file, to include some new feature. You could create a new file hierarchy like: : . : ├── my -: │   ├── reddit.py +: │   ├── lastfm.py : │   └── some_new_module.py : └── setup.py -Where ~reddit.py~ is your version of ~my.reddit~, which you've copied from this repository and applied your changes to. The ~setup.py~ would be something like: +Where ~lastfm.py~ is your version of ~my.lastfm~, which you've copied from this repository and applied your changes to. The ~setup.py~ would be something like: #+begin_src python from setuptools import setup, find_namespace_packages @@ -121,9 +121,9 @@ Where ~reddit.py~ is your version of ~my.reddit~, which you've copied from this ) #+end_src -Then, running ~pip3 install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~reddit.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install. +Then, running ~python3 -m pip install -e .~ in that directory would install that as part of the namespace package, and assuming (see below for possible issues) this appears on ~sys.path~ before the upstream repository, your ~lastfm.py~ file overrides the upstream. Adding more files, like ~my.some_new_module~ into that directory immediately updates the global ~my~ package -- allowing you to quickly add new modules without having to re-install. -If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_reddit_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository. +If you install both directories as editable packages (which has the benefit of any changes you making in either repository immediately updating the globally installed ~my~ package), there are some concerns with which editable install appears on your ~sys.path~ first. If you wanted your modules to override the upstream modules, yours would have to appear on the ~sys.path~ first (this is the same reason that =custom_lastfm_overlay= must be at the front of your ~PYTHONPATH~). For more details and examples on dealing with editable namespace packages in the context of HPI, see the [[https://github.com/seanbreckenridge/reorder_editable][reorder_editable]] repository. There is no limit to how many directories you could install into a single namespace package, which could be a possible way for people to install additional HPI modules, without worrying about the module count here becoming too large to manage. diff --git a/doc/SETUP.org b/doc/SETUP.org index 74aae02..3364653 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -355,7 +355,7 @@ The only thing you need to do is to tell it where to find the files on your disk Reddit has a proper API, so in theory HPI could talk directly to Reddit and retrieve the latest data. But that's not what it doing! - first, there are excellent programmatic APIs for Reddit out there already, for example, [[https://github.com/praw-dev/praw][praw]] -- more importantly, this is the [[https://beepb00p.xyz/exports.html#design][design decision]] of HP +- more importantly, this is the [[https://beepb00p.xyz/exports.html#design][design decision]] of HPI It doesn't deal with all with the complexities of API interactions. Instead, it relies on other tools to put *intermediate, raw data*, on your disk and then transforms this data into something nice. @@ -368,19 +368,18 @@ As an example, for [[file:../my/reddit.py][Reddit]], HPI is relying on data fetc : ⇓⇓⇓ : |💾 /backups/reddit/*.json | : ⇓⇓⇓ -: HPI (my.reddit) +: HPI (my.reddit.rexport) : ⇓⇓⇓ : < python interface > So, in your [[file:MODULES.org::#myreddit][reddit config]], similarly to Takeout, you need =export_path=, so HPI knows how to find your Reddit data on the disk. But there is an extra caveat: rexport is already coming with nice [[https://github.com/karlicoss/rexport/blob/master/dal.py][data bindings]] to parse its outputs. -Another *design decision* of HPI is to use existing code and libraries as much as possible, so we also specify a path to =rexport= repository in the config. - -(note: in the future it's possible that rexport will be installed via PIP, I just haven't had time for it so far). Several other HPI modules are following a similar pattern: hypothesis, instapaper, pinboard, kobo, etc. +Since the [[https://github.com/karlicoss/rexport#api-limitations][reddit API has limited results]], you can use [[https://github.com/seanbreckenridge/pushshift_comment_export][my.reddit.pushshift]] to access older reddit comments, which both then get merged into =my.reddit.all.comments= + ** Twitter Twitter is interesting, because it's an example of an HPI module that *arbitrates* between several data sources from the same service. diff --git a/my/config.py b/my/config.py index 63d962c..4add9e9 100644 --- a/my/config.py +++ b/my/config.py @@ -34,7 +34,12 @@ class github: export_path: Paths = '' class reddit: - export_path: Paths = '' + class rexport: + export_path: Paths = '' + class pushshift: + export_path: Paths = '' + class gdpr: + export_path: Paths = '' class endomondo: export_path: Paths = '' diff --git a/my/core/cfg.py b/my/core/cfg.py index b23fa86..4b5cbed 100644 --- a/my/core/cfg.py +++ b/my/core/cfg.py @@ -10,7 +10,7 @@ C = TypeVar('C') def make_config(cls: Type[C], migration: Callable[[Attrs], Attrs]=lambda x: x) -> C: user_config = cls.__base__ old_props = { - # NOTE: deliberately use gettatr to 'force' lcass properties here + # NOTE: deliberately use gettatr to 'force' class properties here k: getattr(user_config, k) for k in vars(user_config) } new_props = migration(old_props) diff --git a/my/core/source.py b/my/core/source.py new file mode 100644 index 0000000..e2529eb --- /dev/null +++ b/my/core/source.py @@ -0,0 +1,63 @@ +""" +Decorator to gracefully handle importing a data source, or warning +and yielding nothing (or a default) when its not available +""" + +from typing import Any, Iterator, TypeVar, Callable, Optional, Iterable, Any +from my.core.warnings import warn +from functools import wraps + +# The factory function may produce something that has data +# similar to the shared model, but not exactly, so not +# making this a TypeVar, is just to make reading the +# type signature below a bit easier... +T = Any + +# https://mypy.readthedocs.io/en/latest/generics.html?highlight=decorators#decorator-factories +FactoryF = TypeVar("FactoryF", bound=Callable[..., Iterator[T]]) + +_DEFUALT_ITR = () + + +# tried to use decorator module but it really doesn't work well +# with types and kw-arguments... :/ +def import_source( + default: Iterable[T] = _DEFUALT_ITR, + module_name: Optional[str] = None, +) -> Callable[..., Callable[..., Iterator[T]]]: + """ + doesn't really play well with types, but is used to catch + ModuleNotFoundError's for when modules aren't installed in + all.py files, so the types don't particularly matter + + this is meant to be used to wrap some function which imports + and then yields an iterator of objects + + If the user doesn't have that module installed, it returns + nothing and warns instead + """ + + def decorator(factory_func: FactoryF) -> Callable[..., Iterator[T]]: + @wraps(factory_func) + def wrapper(*args, **kwargs) -> Iterator[T]: + try: + res = factory_func(**kwargs) + yield from res + except ModuleNotFoundError: + from . import core_config as CC + suppressed_in_conf = False + if module_name is not None and CC.config._is_module_active(module_name) is False: + suppressed_in_conf = True + if not suppressed_in_conf: + if module_name is None: + warn(f"Module {factory_func.__qualname__} could not be imported, or isn't configured propertly") + else: + warn(f"""Module {module_name} ({factory_func.__qualname__}) could not be imported, or isn't configured propertly\nTo hide this message, add {module_name} to your core config disabled_classes, like: + +class core: + disabled_modules = [{repr(module_name)}] +""") + yield from default + return wrapper + return decorator + diff --git a/my/reddit/__init__.py b/my/reddit/__init__.py new file mode 100644 index 0000000..f2b60ca --- /dev/null +++ b/my/reddit/__init__.py @@ -0,0 +1,41 @@ +""" +This is here temporarily, for backwards compatability purposes +It should be removed in the future, and you should replace any imports +like: +from my.reddit import ... +to: +from my.reddit.all import ... +since that allows for easier overriding using namespace packages +https://github.com/karlicoss/HPI/issues/102 +""" + +# For now, including this here, since importing the module +# causes .rexport to be imported, which requires rexport +REQUIRES = [ + 'git+https://github.com/karlicoss/rexport', +] + +import re +import traceback + +# some hacky traceback to inspect the current stack +# to see if the user is using the old style of importing +warn = False +for f in traceback.extract_stack(): + line = f.line or '' # just in case it's None, who knows.. + + # cover the most common ways of previously interacting with the module + if 'import my.reddit ' in (line + ' '): + warn = True + elif 'from my import reddit' in line: + warn = True + elif re.match(r"from my\.reddit\simport\s(comments|saved|submissions|upvoted)", line): + warn = True + +# TODO: add link to instructions to migrate +if warn: + from my.core import warnings as W + W.high("DEPRECATED! Instead of my.reddit, import from my.reddit.all instead.") + + +from .rexport import * diff --git a/my/reddit/all.py b/my/reddit/all.py new file mode 100644 index 0000000..a668081 --- /dev/null +++ b/my/reddit/all.py @@ -0,0 +1,68 @@ +from typing import Iterator +from my.core.common import Stats +from my.core.source import import_source + +from .common import Save, Upvote, Comment, Submission, _merge_comments + +# Man... ideally an all.py file isn't this verbose, but +# reddit just feels like that much of a complicated source and +# data acquired by different methods isn't the same + +### 'safe importers' -- falls back to empty data if the module couldn't be found +rexport_src = import_source(module_name="my.reddit.rexport") +pushshift_src = import_source(module_name="my.reddit.pushshift") + +@rexport_src +def _rexport_comments() -> Iterator[Comment]: + from . import rexport + yield from rexport.comments() + +@rexport_src +def _rexport_submissions() -> Iterator[Submission]: + from . import rexport + yield from rexport.submissions() + +@rexport_src +def _rexport_saved() -> Iterator[Save]: + from . import rexport + yield from rexport.saved() + +@rexport_src +def _rexport_upvoted() -> Iterator[Upvote]: + from . import rexport + yield from rexport.upvoted() + +@pushshift_src +def _pushshift_comments() -> Iterator[Comment]: + from .pushshift import comments as pcomments + yield from pcomments() + +# Merged functions + +def comments() -> Iterator[Comment]: + # TODO: merge gdpr here + yield from _merge_comments(_rexport_comments(), _pushshift_comments()) + +def submissions() -> Iterator[Submission]: + # TODO: merge gdpr here + yield from _rexport_submissions() + +@rexport_src +def saved() -> Iterator[Save]: + from .rexport import saved + yield from saved() + +@rexport_src +def upvoted() -> Iterator[Upvote]: + from .rexport import upvoted + yield from upvoted() + +def stats() -> Stats: + from my.core import stat + return { + **stat(saved), + **stat(comments), + **stat(submissions), + **stat(upvoted), + } + diff --git a/my/reddit/common.py b/my/reddit/common.py new file mode 100644 index 0000000..c4a1f81 --- /dev/null +++ b/my/reddit/common.py @@ -0,0 +1,72 @@ +""" +This defines Protocol classes, which make sure that each different +type of shared models have a standardized interface +""" + +from typing import Dict, Any, Set, Iterator, TYPE_CHECKING +from itertools import chain + +from my.core.common import datetime_aware + +Json = Dict[str, Any] + +if TYPE_CHECKING: + try: + from typing import Protocol + except ImportError: + # requirement of mypy + from typing_extensions import Protocol # type: ignore[misc] +else: + Protocol = object + + +# common fields across all the Protocol classes, so generic code can be written +class RedditBase(Protocol): + @property + def raw(self) -> Json: ... + @property + def created(self) -> datetime_aware: ... + @property + def id(self) -> str: ... + @property + def url(self) -> str: ... + @property + def text(self) -> str: ... + + +# Note: doesn't include GDPR Save's since they don't have the same metadata +class Save(Protocol, RedditBase): + @property + def subreddit(self) -> str: ... + +# Note: doesn't include GDPR Upvote's since they don't have the same metadata +class Upvote(Protocol, RedditBase): + @property + def title(self) -> str: ... + + +# From rexport, pushshift and the reddit GDPR export +class Comment(Protocol, RedditBase): + pass + + +# From rexport and the GDPR export +class Submission(Protocol, RedditBase): + @property + def title(self) -> str: ... + + +def _merge_comments(*sources: Iterator[Comment]) -> Iterator[Comment]: + #from .rexport import logger + #ignored = 0 + emitted: Set[str] = set() + for e in chain(*sources): + uid = e.id + if uid in emitted: + #ignored += 1 + #logger.info('ignoring %s: %s', uid, e) + continue + yield e + emitted.add(uid) + #logger.info(f"Ignored {ignored} comments...") + diff --git a/my/reddit/pushshift.py b/my/reddit/pushshift.py new file mode 100644 index 0000000..e67db84 --- /dev/null +++ b/my/reddit/pushshift.py @@ -0,0 +1,48 @@ +""" +Gives you access to older comments possibly not accessible with rexport +using pushshift +See https://github.com/seanbreckenridge/pushshift_comment_export +""" + +REQUIRES = [ + "git+https://github.com/seanbreckenridge/pushshift_comment_export", +] + +from my.core.common import Paths, Stats +from dataclasses import dataclass +from my.core.cfg import make_config + +from my.config import reddit as uconfig + +@dataclass +class pushshift_config(uconfig.pushshift): + ''' + Uses [[https://github.com/seanbreckenridge/pushshift_comment_export][pushshift]] to get access to old comments + ''' + + # path[s]/glob to the exported JSON data + export_path: Paths + +config = make_config(pushshift_config) + +from my.core import get_files +from typing import Sequence, Iterator +from pathlib import Path + +from pushshift_comment_export.dal import read_file, PComment + + +def inputs() -> Sequence[Path]: + return get_files(config.export_path) + + +def comments() -> Iterator[PComment]: + for f in inputs(): + yield from read_file(f) + +def stats() -> Stats: + from my.core import stat + return { + **stat(comments) + } + diff --git a/my/reddit.py b/my/reddit/rexport.py similarity index 82% rename from my/reddit.py rename to my/reddit/rexport.py index bbafe92..ca2059d 100755 --- a/my/reddit.py +++ b/my/reddit/rexport.py @@ -5,10 +5,12 @@ REQUIRES = [ 'git+https://github.com/karlicoss/rexport', ] -from .core.common import Paths +from my.core.common import Paths +from dataclasses import dataclass +from typing import Any from my.config import reddit as uconfig -from dataclasses import dataclass + @dataclass class reddit(uconfig): @@ -20,15 +22,27 @@ class reddit(uconfig): export_path: Paths -from .core.cfg import make_config, Attrs +from my.core.cfg import make_config, Attrs # hmm, also nice thing about this is that migration is possible to test without the rest of the config? def migration(attrs: Attrs) -> Attrs: - export_dir = 'export_dir' - if export_dir in attrs: # legacy name - attrs['export_path'] = attrs[export_dir] - from .core.warnings import high - high(f'"{export_dir}" is deprecated! Please use "export_path" instead."') + # new structure, take top-level config and extract 'rexport' class + if 'rexport' in attrs: + ex: uconfig.rexport = attrs['rexport'] + attrs['export_path'] = ex.export_path + else: + from my.core.warnings import high + high("""DEPRECATED! Please modify your reddit config to look like: + +class reddit: + class rexport: + export_path: Paths = '/path/to/rexport/data' + """) + export_dir = 'export_dir' + if export_dir in attrs: # legacy name + attrs['export_path'] = attrs[export_dir] + high(f'"{export_dir}" is deprecated! Please use "export_path" instead."') return attrs + config = make_config(reddit, migration=migration) ### @@ -37,7 +51,7 @@ config = make_config(reddit, migration=migration) try: from rexport import dal except ModuleNotFoundError as e: - from .core.compat import pre_pip_dal_handler + from my.core.compat import pre_pip_dal_handler dal = pre_pip_dal_handler('rexport', e, config, requires=REQUIRES) # TODO ugh. this would import too early # but on the other hand we do want to bring the objects into the scope for easier imports, etc. ugh! @@ -47,8 +61,8 @@ except ModuleNotFoundError as e: ############################ -from typing import List, Sequence, Mapping, Iterator -from .core.common import mcachew, get_files, LazyLogger, make_dict +from typing import List, Sequence, Mapping, Iterator, Any +from my.core.common import mcachew, get_files, LazyLogger, make_dict, Stats logger = LazyLogger(__name__, level='debug') @@ -59,7 +73,7 @@ def inputs() -> Sequence[Path]: return get_files(config.export_path) -Sid = dal.Sid +Uid = dal.Sid # str Save = dal.Save Comment = dal.Comment Submission = dal.Submission @@ -69,7 +83,7 @@ Upvote = dal.Upvote def _dal() -> dal.DAL: inp = list(inputs()) return dal.DAL(inp) -cache = mcachew(hashf=inputs) # depends on inputs only +cache = mcachew(depends_on=inputs) # depends on inputs only @cache @@ -139,7 +153,7 @@ def _get_bdate(bfile: Path) -> datetime: return bdt -def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]: +def _get_state(bfile: Path) -> Dict[Uid, SaveWithDt]: logger.debug('handling %s', bfile) bdt = _get_bdate(bfile) @@ -156,11 +170,11 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]: def _get_events(backups: Sequence[Path], parallel: bool=True) -> Iterator[Event]: # todo cachew: let it transform return type? so you don't have to write a wrapper for lists? - prev_saves: Mapping[Sid, SaveWithDt] = {} + prev_saves: Mapping[Uid, SaveWithDt] = {} # TODO suppress first batch?? # TODO for initial batch, treat event time as creation time - states: Iterable[Mapping[Sid, SaveWithDt]] + states: Iterable[Mapping[Uid, SaveWithDt]] if parallel: with Pool() as p: states = p.map(_get_state, backups) @@ -213,8 +227,8 @@ def events(*args, **kwargs) -> List[Event]: return list(sorted(evit, key=lambda e: e.cmp_key)) # type: ignore[attr-defined,arg-type] -def stats(): - from .core import stat +def stats() -> Stats: + from my.core import stat return { **stat(saved ), **stat(comments ), @@ -223,9 +237,6 @@ def stats(): } -## - - def main() -> None: for e in events(parallel=False): print(e) @@ -234,7 +245,3 @@ def main() -> None: if __name__ == '__main__': main() -# TODO deprecate... - -get_sources = inputs -get_events = events diff --git a/tests/reddit.py b/tests/reddit.py index 964d0f5..b0dd47a 100644 --- a/tests/reddit.py +++ b/tests/reddit.py @@ -7,13 +7,13 @@ from my.common import make_dict def test() -> None: - from my.reddit import events, inputs, saved + from my.reddit.rexport import events, inputs, saved list(events()) list(saved()) def test_unfav() -> None: - from my.reddit import events, inputs, saved + from my.reddit.rexport import events, inputs, saved ev = events() url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/' uev = [e for e in ev if e.url == url] @@ -26,7 +26,7 @@ def test_unfav() -> None: def test_saves() -> None: - from my.reddit import events, inputs, saved + from my.reddit.rexport import events, inputs, saved # TODO not sure if this is necesasry anymore? saves = list(saved()) # just check that they are unique.. @@ -34,7 +34,7 @@ def test_saves() -> None: def test_disappearing() -> None: - from my.reddit import events, inputs, saved + from my.reddit.rexport import events, inputs, saved # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason # but I guess it was just a short glitch... so whatever saves = events() @@ -44,7 +44,7 @@ def test_disappearing() -> None: def test_unfavorite() -> None: - from my.reddit import events, inputs, saved + from my.reddit.rexport import events, inputs, saved evs = events() unfavs = [s for s in evs if s.text == 'unfavorited'] [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop'] @@ -52,7 +52,7 @@ def test_unfavorite() -> None: def test_extra_attr() -> None: - from my.reddit import config + from my.reddit.rexport import config assert isinstance(getattr(config, 'passthrough'), str) @@ -61,7 +61,9 @@ import pytest # type: ignore def prepare(): from my.common import get_files from my.config import reddit as config - files = get_files(config.export_path) + # since these are only tested locally, the config should be fine + # just need to make sure local config matches that in my.config properly + files = get_files(config.rexport.export_path) # use less files for the test to make it faster # first bit is for 'test_unfavorite, the second is for test_disappearing files = files[300:330] + files[500:520] diff --git a/tox.ini b/tox.ini index 770198c..5f0f6f1 100644 --- a/tox.ini +++ b/tox.ini @@ -88,7 +88,8 @@ commands = hpi module install my.hypothesis hpi module install my.instapaper hpi module install my.pocket - hpi module install my.reddit + hpi module install my.reddit.rexport + hpi module install my.reddit.pushshift hpi module install my.stackexchange.stexport hpi module install my.pinboard hpi module install my.arbtt