Merge pull request #37 from karlicoss/updates

various updates: implicit globs for get-files, mcachew type checking, modules cleanup
2020-05-03 17:19:55 +01:00 · 2020-05-03 17:19:55 +01:00 · 5aecc037e9
commit 5aecc037e9
parent 81ca1e2c25 0b61dd9e42
16 changed files with 285 additions and 170 deletions
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -20,8 +20,7 @@ from my.config import github as config
 import my.config.repos.ghexport.dal as ghexport
-logger = LazyLogger('my.github')
+logger = LazyLogger(__name__)
 # TODO __package__???
 class Event(NamedTuple):
@ -32,56 +31,75 @@ class Event(NamedTuple):
    body: Optional[str]=None
 # TODO hmm. need some sort of abstract syntax for this...
 # TODO split further, title too
 def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
    # TODO would be nice to give access to raw event withing timeline
    eid = e['id']
    tp = e['type']
    pl = e['payload']
    rname = e['repo']['name']
    mapping = {
        'CreateEvent': 'created',
        'DeleteEvent': 'deleted',
    }
    if tp == 'ForkEvent':
        url = e['payload']['forkee']['html_url']
-        return f"forked {rname}", url, None
+        return f"{rname}: forked", url, None
    elif tp == 'PushEvent':
-        return f"pushed to {rname}", None, None
+        commits = pl['commits']
        messages = [c['message'] for c in commits]
        body = '\n'.join(messages)
        return f"{rname}: pushed\n{body}", None, None
    elif tp == 'WatchEvent':
-        return f"watching {rname}", None, None
+        return f"{rname}: watching", None, None
-    elif tp == 'CreateEvent':
+    elif tp in mapping:
-        # TODO eh, only weird API link?
+        what = mapping[tp]
-        return f"created {rname}", None, f'created_{rname}'
+        rt  = pl['ref_type']
        ref = pl['ref']
        # TODO link to branch? only contains weird API link though
        # TODO hmm. include timestamp instead?
        # breakpoint()
        # TODO combine automatically instead
        return f"{rname}: {what} {rt} {ref}", None, f'{rname}_{what}_{rt}_{ref}_{eid}'
    elif tp == 'PullRequestEvent':
        pr = pl['pull_request']
        action = pl['action']
        link = pr['html_url']
        title = pr['title']
-        return f"{action} PR {title}", link, f'pull_request_{link}'
+        return f"{rname}: {action} PR {title}", link, f'{rname}_{action}_pr_{link}'
    elif tp == "IssuesEvent":
        action = pl['action']
        iss = pl['issue']
        link = iss['html_url']
        title = iss['title']
-        return f"{action} issue {title}", link, None
+        return f"{rname}: {action} issue {title}", link, None
    elif tp == "IssueCommentEvent":
        com = pl['comment']
        link = com['html_url']
        iss = pl['issue']
        title = iss['title']
-        return f"commented on issue {title}", link, f'issue_comment_' + link
+        return f"{rname}: commented on issue {title}", link, f'issue_comment_' + link
    elif tp == "ReleaseEvent":
        action = pl['action']
        rel = pl['release']
        tag = rel['tag_name']
        link = rel['html_url']
-        return f"{action} {rname} [{tag}]", link, None
+        return f"{rname}: {action} [{tag}]", link, None
-    elif tp in (
+    elif tp in 'PublicEvent':
-            "DeleteEvent",
+        return f'{tp} {e}', None, None # TODO ???
            "PublicEvent",
    ):
        return tp, None, None # TODO ???
    else:
        return tp, None, None
-def get_dal():
+def inputs():
-    sources = get_files(config.export_dir, glob='*.json*')
+   return get_files(config.export_dir, glob='*.json*')
 def _dal():
    sources = inputs()
    sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
    return ghexport.DAL(sources)
@ -218,7 +236,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
 # TODO hmm. not good, need to be lazier?...
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
-def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
+def iter_backup_events(dal=_dal()) -> Iterator[Event]:
    for d in dal.events():
        yield _parse_event(d)
--- a/my/common.py
+++ b/my/common.py
@ -1,7 +1,9 @@
 from glob import glob as do_glob
 from pathlib import Path
 import functools
 import types
-from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast
+from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
 import warnings
 from . import init
@ -46,6 +48,7 @@ def the(l: Iterable[T]) -> T:
    return first
 # TODO more_itertools.bucket?
 def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
    res: Dict[K, List[T]] = {}
    for i in l:
@ -106,9 +109,12 @@ from .kython.klogging import setup_logger, LazyLogger
 Paths = Union[Sequence[PathIsh], PathIsh]
-def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
+DEFAULT_GLOB = '*'
 def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.
    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path] = []
@ -122,17 +128,38 @@ def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
        else:
            ss = str(src)
            if '*' in ss:
                if glob != DEFAULT_GLOB:
                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
                paths.extend(map(Path, do_glob(ss)))
            else:
                assert src.is_file(), src
-            # TODO FIXME assert matches glob??
+                # todo assert matches glob??
                paths.append(src)
    if sort:
        paths = list(sorted(paths))
-    return paths
+    return tuple(paths)
-def mcachew(*args, **kwargs):
+# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from typing import Callable, TypeVar
    from typing_extensions import Protocol
    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
    # ok, that's actually a super nice 'pattern'
    F = TypeVar('F')
    class McachewType(Protocol):
        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
            ...
    mcachew: McachewType
 def mcachew(*args, **kwargs): # type: ignore[no-redef]
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
@ -140,7 +167,6 @@ def mcachew(*args, **kwargs):
    try:
        import cachew
    except ModuleNotFoundError:
        import warnings
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
--- a/my/emfit/init.py
+++ b/my/emfit/init.py
@ -5,26 +5,21 @@
 Consumes data exported by https://github.com/karlicoss/backup-emfit
 """
 import json
 import logging
 from collections import OrderedDict as odict
 from dataclasses import dataclass
 from datetime import date, datetime, time, timedelta
 from itertools import groupby
 from pathlib import Path
 from typing import Dict, Iterator, List, NamedTuple, Any, cast
 import pytz
 from more_itertools import bucket
-from ..common import get_files, LazyLogger, cproperty, group_by_key, mcachew
+from ..common import get_files, LazyLogger, cproperty, mcachew
 from my.config import emfit as config
-logger = LazyLogger('my.emfit', level='info')
+logger = LazyLogger(__name__, level='info')
 # TODO FIXME remove?
 import kython
 timed = lambda f: kython.timed(f, logger=logger)
 def hhmm(minutes):
@ -35,13 +30,10 @@ AWAKE = 4
 Sid = str
 # TODO use tz provider for that?
 _TZ = pytz.timezone(config.tz)
 # TODO use common tz thing?
 def fromts(ts) -> datetime:
-    dt = datetime.fromtimestamp(ts)
+    dt = datetime.fromtimestamp(ts, tz=pytz.utc)
-    return _TZ.localize(dt)
+    return dt
 class Mixin:
@ -295,14 +287,14 @@ class Emfit(Mixin):
 # TODO move to common?
 def dir_hash(path: Path):
-    mtimes = tuple(p.stat().st_mtime for p in sorted(path.glob('*.json')))
+    mtimes = tuple(p.stat().st_mtime for p in get_files(path, glob='*.json'))
    return mtimes
 # TODO take __file__ into account somehow?
@mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger)
-def iter_datas_cached(path: Path) -> Iterator[Emfit]:
+def iter_datas(path: Path=config.export_path) -> Iterator[Emfit]:
-    # TODO use get_files?
+    for f in get_files(path, glob='*.json'):
    for f in sorted(path.glob('*.json')):
        sid = f.stem
        if sid in config.excluded_sids:
            continue
@ -311,20 +303,17 @@ def iter_datas_cached(path: Path) -> Iterator[Emfit]:
        yield from Emfit.make(em)
 def iter_datas(path=config.export_path) -> Iterator[Emfit]:
    yield from iter_datas_cached(path)
 def get_datas() -> List[Emfit]:
    return list(sorted(iter_datas(), key=lambda e: e.start))
 # TODO move away old entries if there is a diff??
@timed
 def by_night() -> Dict[date, Emfit]:
-    res: Dict[date, Emfit] = odict()
+    res: Dict[date, Emfit] = {}
    # TODO shit. I need some sort of interrupted sleep detection?
-    for dd, sleeps in group_by_key(get_datas(), key=lambda s: s.date).items():
+    grouped = bucket(get_datas(), key=lambda s: s.date)
    for dd in grouped:
        sleeps = list(grouped[dd])
        if len(sleeps) > 1:
            logger.warning("multiple sleeps per night, not handled yet: %s", sleeps)
            continue
--- a/my/foursquare.py
+++ b/my/foursquare.py
@ -15,10 +15,10 @@ from .common import get_files, LazyLogger
 from my.config import foursquare as config
-logger = LazyLogger(__package__)
+logger = LazyLogger(__name__)
-def _get_exports() -> List[Path]:
+def inputs():
    return get_files(config.export_path, '*.json')
@ -62,7 +62,7 @@ class Place:
 def get_raw(fname=None):
    if fname is None:
-        fname = max(_get_exports())
+        fname = max(inputs())
    j = json.loads(Path(fname).read_text())
    assert isinstance(j, list)
--- a/my/hypothesis.py
+++ b/my/hypothesis.py
@ -3,50 +3,41 @@
 """
 from . import init
-from .common import PathIsh
+from .common import get_files
-
+from .error import Res, sort_res_by
 import my.config.repos.hypexport as hypexport
 from my.config.repos.hypexport import dal
 import my.config.repos.hypexport.dal as hypexport
 from my.config import hypothesis as config
 export_path: PathIsh = config.export_path
 ###
 from typing import List
 from .common import get_files, cproperty, group_by_key
 from .error import Res, sort_res_by
 # TODO weird. not sure why e.g. from dal import Highlight doesn't work..
-Highlight = dal.Highlight
+Highlight = hypexport.Highlight
-DAL = dal.DAL
+Page      = hypexport.Page
 Page = dal.Page
 # TODO eh. not sure if I should rename everything to dao/DAO or not...
-def dao() -> DAL:
+def _dal() -> hypexport.DAL:
-    sources = get_files(export_path, '*.json')
+    sources = get_files(config.export_path, '*.json')
-    model = DAL(sources)
+    return hypexport.DAL(sources)
    return model
-def get_highlights() -> List[Res[Highlight]]:
+def highlights() -> List[Res[Highlight]]:
-    return sort_res_by(dao().highlights(), key=lambda h: h.created)
+    return sort_res_by(_dal().highlights(), key=lambda h: h.created)
 # TODO eh. always provide iterators? although sort_res_by could be neat too...
-def get_pages() -> List[Res[Page]]:
+def pages() -> List[Res[Page]]:
-    return sort_res_by(dao().pages(), key=lambda h: h.created)
+    return sort_res_by(_dal().pages(), key=lambda h: h.created)
 # TODO move to side tests?
 def test():
-    get_pages()
+    list(pages())
-    get_highlights()
+    list(highlights())
 def _main():
@ -55,3 +46,6 @@ def _main():
 if __name__ == '__main__':
    _main()
 get_highlights = highlights # TODO deprecate
 get_pages = pages # TODO deprecate
--- a/my/instapaper.py
+++ b/my/instapaper.py
@ -1,55 +1,32 @@
 """
 Instapaper bookmarks, highlights and annotations
 """
-from pathlib import Path
+from .common import get_files
 from typing import NamedTuple, Optional, List, Iterator
 from .common import group_by_key, PathIsh, get_files
 from my.config import instapaper as config
 import my.config.repos.instapexport.dal as dal
-def _get_files():
+Highlight = dal.Highlight
-    return get_files(config.export_path, glob='*.json')
+Bookmark = dal.Bookmark
-def get_dal() -> dal.DAL:
+def inputs():
-    return dal.DAL(_get_files())
+    return get_files(config.export_path)
-# TODO meh, come up with better name...
+def _dal() -> dal.DAL:
-class HighlightWithBm(NamedTuple):
+    return dal.DAL(inputs())
    highlight: dal.Highlight
    bookmark: dal.Bookmark
-def iter_highlights(**kwargs) -> Iterator[HighlightWithBm]:
+def pages():
-    # meh...
+    return _dal().pages()
-    dl = get_dal()
+get_pages = pages # todo also deprecate..
    hls = dl.highlights()
    bms = dl.bookmarks()
    for _, h in hls.items():
        yield HighlightWithBm(highlight=h, bookmark=bms[h.bid])
-# def get_highlights(**kwargs) -> List[Highlight]:
+# TODO dunno, move this to private?
-#     return list(iter_highlights(**kwargs))
+def is_todo(hl: Highlight) -> bool:
-def get_pages():
+    note = hl.note or ''
    return get_dal().pages()
 def get_todos() -> Iterator[HighlightWithBm]:
    def is_todo(hl: HighlightWithBm):
        h = hl.highlight
        note = h.note or ''
    note = note.lstrip().lower()
    return note.startswith('todo')
    return filter(is_todo, iter_highlights())
 def main():
    for h in get_todos():
        print(h)
--- a/my/lastfm/init.py
+++ b/my/lastfm/init.py
@ -2,27 +2,31 @@
 Last.fm scrobbles
 '''
 from .. import init
-from functools import lru_cache
+from ..common import get_files, mcachew, Json
-from typing import NamedTuple, Dict, Any
+
 from datetime import datetime
 from pathlib import Path
 import json
 from pathlib import Path
 from typing import NamedTuple, Any, Sequence, Iterable
 import pytz
 from my.config import lastfm as config
 # TODO Json type?
 # TODO memoised properties?
 # TODO lazy mode and eager mode?
 # lazy is a bit nicer in terms of more flexibility and less processing?
 # eager is a bit more explicit for error handling
-class Scrobble(NamedTuple):
+def inputs() -> Sequence[Path]:
-    raw: Dict[str, Any]
+    return get_files(config.export_path)
 class Scrobble(NamedTuple):
    raw: Json
    # TODO mm, no timezone? hopefuly it's UTC
    @property
    def dt(self) -> datetime:
        ts = int(self.raw['date'])
@ -45,22 +49,10 @@ class Scrobble(NamedTuple):
    # TODO could also be nice to make generic? maybe even depending on eagerness
-# TODO memoise...?
+@mcachew(hashf=lambda: inputs())
-# TODO watch out, if we keep the app running it might expire
+def scrobbles() -> Iterable[Scrobble]:
-def _iter_scrobbles():
+    last = max(inputs())
    # TODO use get_files
    last = max(Path(config.export_path).glob('*.json'))
    # TODO mm, no timezone? hopefuly it's UTC
    j = json.loads(last.read_text())
    for raw in j:
        yield Scrobble(raw=raw)
@lru_cache(1)
 def get_scrobbles():
    return list(sorted(_iter_scrobbles(), key=lambda s: s.dt))
 def test():
    assert len(get_scrobbles()) > 1000
--- a/my/lastfm/fill_influxdb.py
+++ b/my/lastfm/fill_influxdb.py
@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 # pip install influxdb
 from influxdb import InfluxDBClient # type: ignore
-from my.lastfm import get_scrobbles
+from my.lastfm import scrobbles
-def main():
+def main() -> None:
-    scrobbles = get_scrobbles()
+    scrobbles = scrobbles()
    client = InfluxDBClient()
    # TODO client.create_database('lastfm')
--- a/my/photos/init.py
+++ b/my/photos/init.py
@ -17,7 +17,7 @@ from ..error import Res
 from my.config import photos as config
-log = LazyLogger('my.photos')
+log = LazyLogger(__name__)
@ -46,13 +46,12 @@ class Photo(NamedTuple):
            raise RuntimeError(f'Weird path {self.path}, cant match against anything')
    @property
-    def linkname(self) -> str:
+    def name(self) -> str:
        return self._basename.strip('/')
    @property
    def url(self) -> str:
-        PHOTOS_URL = 'TODO FIXME'
+        return f'{config.base_url}{self._basename}'
        return PHOTOS_URL + self._basename
 from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
--- a/my/reading/polar.py
+++ b/my/reading/polar.py
@ -8,7 +8,6 @@ from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
 import json
 import pytz
 # TODO declare DEPENDS = [pytz??]
 from ..common import LazyLogger, get_files
--- a/my/reddit.py
+++ b/my/reddit.py
@ -15,13 +15,14 @@ import my.config.repos.rexport.dal as rexport
 def get_sources() -> Sequence[Path]:
    # TODO use zstd?
-    # TODO maybe add assert to get_files? (and allow to suppress it)
+    # TODO rename to export_path?
-    files = get_files(config.export_dir, glob='*.json.xz')
+    files = get_files(config.export_dir)
    res = list(map(CPath, files)); assert len(res) > 0
    # todo move the assert to get_files?
    return tuple(res)
-logger = LazyLogger(__package__, level='debug')
+logger = LazyLogger(__name__, level='debug')
 Sid        = rexport.Sid
@ -31,7 +32,7 @@ Submission = rexport.Submission
 Upvote     = rexport.Upvote
-def dal():
+def dal() -> rexport.DAL:
    # TODO lru cache? but be careful when it runs continuously
    return rexport.DAL(get_sources())
@ -173,12 +174,12 @@ def get_events(*args, **kwargs) -> List[Event]:
    return list(sorted(evit, key=lambda e: e.cmp_key))
-def test():
+def test() -> None:
    get_events(backups=get_sources()[-1:])
    list(saved())
-def test_unfav():
+def test_unfav() -> None:
    events = get_events()
    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
    uevents = [e for e in events if e.url == url]
@ -188,15 +189,15 @@ def test_unfav():
    uf = uevents[1]
    assert uf.text == 'unfavorited'
-
+# TODO move out..
-def test_get_all_saves():
+def test_get_all_saves() -> None:
    # TODO not sure if this is necesasry anymore?
    saves = list(saved())
    # just check that they are unique..
    make_dict(saves, key=lambda s: s.sid)
-def test_disappearing():
+def test_disappearing() -> None:
    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
    # but I guess it was just a short glitch... so whatever
    saves = get_events()
@ -205,14 +206,14 @@ def test_disappearing():
    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
-def test_unfavorite():
+def test_unfavorite() -> None:
    events = get_events()
    unfavs = [s for s in events if s.text == 'unfavorited']
    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
-def main():
+def main() -> None:
    # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
    events = get_events(parallel=False)
    print(len(events))
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@ -18,7 +18,7 @@ from my.config import rescuetime as config
 log = LazyLogger(__package__, level='info')
-def _get_exports() -> List[Path]:
+def inputs():
    return get_files(config.export_path, '*.json')
@ -28,7 +28,7 @@ Model = rescuexport.Model
 # TODO cache?
 def get_model(last=0) -> Model:
-    return Model(_get_exports()[-last:])
+    return Model(inputs()[-last:])
 def _without_errors():
--- a/setup.py
+++ b/setup.py
@ -4,8 +4,9 @@
 from setuptools import setup, find_namespace_packages # type: ignore
 INSTALL_REQUIRES = [
    'appdirs',
    'pytz',           # even though it's not needed by the core, it's so common anyway...
    'appdirs',        # very common, and makes it portable
    'more-itertools', # it's just too useful and very common anyway
 ]
--- a/tests/common.py
+++ b/tests/common.py
@ -0,0 +1,113 @@
 from pathlib import Path
 from my.common import get_files
 import pytest # type: ignore
 def test_single_file():
    '''
    Regular file path is just returned as is.
    '''
    "Exception if it doesn't exist"
    with pytest.raises(Exception):
        get_files('/tmp/hpi_test/file.ext')
    create('/tmp/hpi_test/file.ext')
    '''
    Couple of things:
    1. Return type is a tuple, it's friendlier for hashing/caching
    2. It always return pathlib.Path instead of plain strings
    '''
    assert get_files('/tmp/hpi_test/file.ext') == (
        Path('/tmp/hpi_test/file.ext'),
    )
 def test_multiple_files():
    '''
    If you pass a directory/multiple directories, it flattens the contents
    '''
    create('/tmp/hpi_test/dir1/')
    create('/tmp/hpi_test/dir1/zzz')
    create('/tmp/hpi_test/dir1/yyy')
    # create('/tmp/hpi_test/dir1/whatever/') # TODO not sure about this... should really allow extra dirs
    create('/tmp/hpi_test/dir2/')
    create('/tmp/hpi_test/dir2/mmm')
    create('/tmp/hpi_test/dir2/nnn')
    create('/tmp/hpi_test/dir3/')
    create('/tmp/hpi_test/dir3/ttt')
    assert get_files([
        Path('/tmp/hpi_test/dir3'), # it takes in Path as well as str
        '/tmp/hpi_test/dir1',
    ]) == (
        # the paths are always returned in sorted order (unless you pass sort=False)
        Path('/tmp/hpi_test/dir1/yyy'),
        Path('/tmp/hpi_test/dir1/zzz'),
        Path('/tmp/hpi_test/dir3/ttt'),
    )
 def test_explicit_glob():
    '''
    You can pass a glob to restrict the extensions
    '''
    create('/tmp/hpi_test/file_3.zip')
    create('/tmp/hpi_test/file_2.zip')
    create('/tmp/hpi_test/ignoreme')
    create('/tmp/hpi_test/file.zip')
    # todo walrus operator would be great here...
    expected = (
        Path('/tmp/hpi_test/file_2.zip'),
        Path('/tmp/hpi_test/file_3.zip'),
    )
    assert get_files('/tmp/hpi_test', 'file_*.zip') == expected
    "named argument should work too"
    assert get_files('/tmp/hpi_test', glob='file_*.zip') == expected
 def test_implicit_blog():
    '''
    Asterisc in the path results in globing too.
    '''
    # todo hopefully that makes sense? dunno why would anyone actually rely on asteriscs in names..
    # this is very convenient in configs, so people don't have to use some special types
    create('/tmp/hpi_test/123/')
    create('/tmp/hpi_test/123/dummy')
    create('/tmp/hpi_test/123/file.zip')
    create('/tmp/hpi_test/456/')
    create('/tmp/hpi_test/456/dummy')
    create('/tmp/hpi_test/456/file.zip')
    assert get_files(['/tmp/hpi_test/*/*.zip']) == (
        Path('/tmp/hpi_test/123/file.zip'),
        Path('/tmp/hpi_test/456/file.zip'),
    )
 # TODO not sure if should uniquify if the filenames end up same?
 # TODO not sure about the symlinks? and hidden files?
 test_path = Path('/tmp/hpi_test')
 def setup():
    teardown()
    test_path.mkdir()
 def teardown():
    import shutil
    if test_path.is_dir():
        shutil.rmtree(test_path)
 def create(f: str) -> None:
    if f.endswith('/'):
        Path(f).mkdir()
    else:
        Path(f).touch()
--- a/tests/instapaper.py
+++ b/tests/instapaper.py
@ -1,6 +1,5 @@
-from my.instapaper import get_todos
+from my.instapaper import pages
-def test_get_todos():
+def test_pages():
-    for t in get_todos():
+    assert len(list(pages())) > 3
        print(t)
--- a/tests/lastfm.py
+++ b/tests/lastfm.py
@ -0,0 +1,7 @@
 from more_itertools import ilen
 from my.lastfm import scrobbles
 def test():
    assert ilen(scrobbles()) > 1000