Merge pull request #37 from karlicoss/updates

various updates: implicit globs for get-files, mcachew type checking, modules cleanup
2020-05-03 17:19:55 +01:00 · 2020-05-03 17:19:55 +01:00 · 5aecc037e9
commit 5aecc037e9
parent 81ca1e2c25 0b61dd9e42
16 changed files with 285 additions and 170 deletions
--- a/my/coding/github.py
+++ b/my/coding/github.py
@ -20,8 +20,7 @@ from my.config import github as config
 import my.config.repos.ghexport.dal as ghexport


-logger = LazyLogger('my.github')
-# TODO __package__???
+logger = LazyLogger(__name__)


 class Event(NamedTuple):
@ -32,56 +31,75 @@ class Event(NamedTuple):
    body: Optional[str]=None


+# TODO hmm. need some sort of abstract syntax for this...
 # TODO split further, title too
 def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
+    # TODO would be nice to give access to raw event withing timeline
+    eid = e['id']
    tp = e['type']
    pl = e['payload']
    rname = e['repo']['name']
+
+    mapping = {
+        'CreateEvent': 'created',
+        'DeleteEvent': 'deleted',
+    }
+
    if tp == 'ForkEvent':
        url = e['payload']['forkee']['html_url']
-        return f"forked {rname}", url, None
+        return f"{rname}: forked", url, None
    elif tp == 'PushEvent':
-        return f"pushed to {rname}", None, None
+        commits = pl['commits']
+        messages = [c['message'] for c in commits]
+        body = '\n'.join(messages)
+        return f"{rname}: pushed\n{body}", None, None
    elif tp == 'WatchEvent':
-        return f"watching {rname}", None, None
-    elif tp == 'CreateEvent':
-        # TODO eh, only weird API link?
-        return f"created {rname}", None, f'created_{rname}'
+        return f"{rname}: watching", None, None
+    elif tp in mapping:
+        what = mapping[tp]
+        rt  = pl['ref_type']
+        ref = pl['ref']
+        # TODO link to branch? only contains weird API link though
+        # TODO hmm. include timestamp instead?
+        # breakpoint()
+        # TODO combine automatically instead
+        return f"{rname}: {what} {rt} {ref}", None, f'{rname}_{what}_{rt}_{ref}_{eid}'
    elif tp == 'PullRequestEvent':
        pr = pl['pull_request']
        action = pl['action']
        link = pr['html_url']
        title = pr['title']
-        return f"{action} PR {title}", link, f'pull_request_{link}'
+        return f"{rname}: {action} PR {title}", link, f'{rname}_{action}_pr_{link}'
    elif tp == "IssuesEvent":
        action = pl['action']
        iss = pl['issue']
        link = iss['html_url']
        title = iss['title']
-        return f"{action} issue {title}", link, None
+        return f"{rname}: {action} issue {title}", link, None
    elif tp == "IssueCommentEvent":
        com = pl['comment']
        link = com['html_url']
        iss = pl['issue']
        title = iss['title']
-        return f"commented on issue {title}", link, f'issue_comment_' + link
+        return f"{rname}: commented on issue {title}", link, f'issue_comment_' + link
    elif tp == "ReleaseEvent":
        action = pl['action']
        rel = pl['release']
        tag = rel['tag_name']
        link = rel['html_url']
-        return f"{action} {rname} [{tag}]", link, None
-    elif tp in (
-            "DeleteEvent",
-            "PublicEvent",
-    ):
-        return tp, None, None # TODO ???
+        return f"{rname}: {action} [{tag}]", link, None
+    elif tp in 'PublicEvent':
+        return f'{tp} {e}', None, None # TODO ???
    else:
        return tp, None, None


-def get_dal():
-    sources = get_files(config.export_dir, glob='*.json*')
+def inputs():
+   return get_files(config.export_dir, glob='*.json*')
+
+
+def _dal():
+    sources = inputs()
    sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
    return ghexport.DAL(sources)

@ -218,7 +236,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:

 # TODO hmm. not good, need to be lazier?...
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
-def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
+def iter_backup_events(dal=_dal()) -> Iterator[Event]:
    for d in dal.events():
        yield _parse_event(d)

--- a/my/common.py
+++ b/my/common.py
@ -1,7 +1,9 @@
+from glob import glob as do_glob
 from pathlib import Path
 import functools
 import types
-from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast
+from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
+import warnings

 from . import init

@ -46,6 +48,7 @@ def the(l: Iterable[T]) -> T:
    return first


+# TODO more_itertools.bucket?
 def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
    res: Dict[K, List[T]] = {}
    for i in l:
@ -106,9 +109,12 @@ from .kython.klogging import setup_logger, LazyLogger

 Paths = Union[Sequence[PathIsh], PathIsh]

-def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
+DEFAULT_GLOB = '*'
+def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.
+
+    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path] = []
@ -122,17 +128,38 @@ def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
+        else:
+            ss = str(src)
+            if '*' in ss:
+                if glob != DEFAULT_GLOB:
+                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
+                paths.extend(map(Path, do_glob(ss)))
            else:
                assert src.is_file(), src
-            # TODO FIXME assert matches glob??
+                # todo assert matches glob??
                paths.append(src)

    if sort:
        paths = list(sorted(paths))
-    return paths
+    return tuple(paths)


-def mcachew(*args, **kwargs):
+# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from typing import Callable, TypeVar
+    from typing_extensions import Protocol
+    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
+    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
+    # ok, that's actually a super nice 'pattern'
+    F = TypeVar('F')
+    class McachewType(Protocol):
+        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
+            ...
+
+    mcachew: McachewType
+
+def mcachew(*args, **kwargs): # type: ignore[no-redef]
    """
    Stands for 'Maybe cachew'.
    Defensive wrapper around @cachew to make it an optional dependency.
@ -140,7 +167,6 @@ def mcachew(*args, **kwargs):
    try:
        import cachew
    except ModuleNotFoundError:
-        import warnings
        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
        return lambda orig_func: orig_func
    else:
--- a/my/emfit/init.py
+++ b/my/emfit/init.py
@ -5,26 +5,21 @@
 Consumes data exported by https://github.com/karlicoss/backup-emfit
 """
 import json
-import logging
-from collections import OrderedDict as odict
 from dataclasses import dataclass
 from datetime import date, datetime, time, timedelta
+from itertools import groupby
 from pathlib import Path
 from typing import Dict, Iterator, List, NamedTuple, Any, cast

 import pytz
+from more_itertools import bucket

-from ..common import get_files, LazyLogger, cproperty, group_by_key, mcachew
+from ..common import get_files, LazyLogger, cproperty, mcachew

 from my.config import emfit as config


-logger = LazyLogger('my.emfit', level='info')
-
-
-# TODO FIXME remove?
-import kython
-timed = lambda f: kython.timed(f, logger=logger)
+logger = LazyLogger(__name__, level='info')


 def hhmm(minutes):
@ -35,13 +30,10 @@ AWAKE = 4

 Sid = str

-# TODO use tz provider for that?
-_TZ = pytz.timezone(config.tz)
-
 # TODO use common tz thing?
 def fromts(ts) -> datetime:
-    dt = datetime.fromtimestamp(ts)
-    return _TZ.localize(dt)
+    dt = datetime.fromtimestamp(ts, tz=pytz.utc)
+    return dt


 class Mixin:
@ -295,14 +287,14 @@ class Emfit(Mixin):

 # TODO move to common?
 def dir_hash(path: Path):
-    mtimes = tuple(p.stat().st_mtime for p in sorted(path.glob('*.json')))
+    mtimes = tuple(p.stat().st_mtime for p in get_files(path, glob='*.json'))
    return mtimes


+# TODO take __file__ into account somehow?
@mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger)
-def iter_datas_cached(path: Path) -> Iterator[Emfit]:
-    # TODO use get_files?
-    for f in sorted(path.glob('*.json')):
+def iter_datas(path: Path=config.export_path) -> Iterator[Emfit]:
+    for f in get_files(path, glob='*.json'):
        sid = f.stem
        if sid in config.excluded_sids:
            continue
@ -311,20 +303,17 @@ def iter_datas_cached(path: Path) -> Iterator[Emfit]:
        yield from Emfit.make(em)


-def iter_datas(path=config.export_path) -> Iterator[Emfit]:
-    yield from iter_datas_cached(path)
-
-
 def get_datas() -> List[Emfit]:
    return list(sorted(iter_datas(), key=lambda e: e.start))
 # TODO move away old entries if there is a diff??


-@timed
 def by_night() -> Dict[date, Emfit]:
-    res: Dict[date, Emfit] = odict()
+    res: Dict[date, Emfit] = {}
    # TODO shit. I need some sort of interrupted sleep detection?
-    for dd, sleeps in group_by_key(get_datas(), key=lambda s: s.date).items():
+    grouped = bucket(get_datas(), key=lambda s: s.date)
+    for dd in grouped:
+        sleeps = list(grouped[dd])
        if len(sleeps) > 1:
            logger.warning("multiple sleeps per night, not handled yet: %s", sleeps)
            continue
--- a/my/foursquare.py
+++ b/my/foursquare.py
@ -15,10 +15,10 @@ from .common import get_files, LazyLogger
 from my.config import foursquare as config


-logger = LazyLogger(__package__)
+logger = LazyLogger(__name__)


-def _get_exports() -> List[Path]:
+def inputs():
    return get_files(config.export_path, '*.json')


@ -62,7 +62,7 @@ class Place:

 def get_raw(fname=None):
    if fname is None:
-        fname = max(_get_exports())
+        fname = max(inputs())
    j = json.loads(Path(fname).read_text())
    assert isinstance(j, list)

--- a/my/hypothesis.py
+++ b/my/hypothesis.py
@ -3,50 +3,41 @@
 """
 from . import init

-from .common import PathIsh
-
-import my.config.repos.hypexport as hypexport
-from my.config.repos.hypexport import dal
+from .common import get_files
+from .error import Res, sort_res_by

+import my.config.repos.hypexport.dal as hypexport
 from my.config import hypothesis as config
-export_path: PathIsh = config.export_path

 ###

 from typing import List

-from .common import get_files, cproperty, group_by_key
-from .error import Res, sort_res_by
-
-
-

 # TODO weird. not sure why e.g. from dal import Highlight doesn't work..
-Highlight = dal.Highlight
-DAL = dal.DAL
-Page = dal.Page
+Highlight = hypexport.Highlight
+Page      = hypexport.Page


 # TODO eh. not sure if I should rename everything to dao/DAO or not...
-def dao() -> DAL:
-    sources = get_files(export_path, '*.json')
-    model = DAL(sources)
-    return model
+def _dal() -> hypexport.DAL:
+    sources = get_files(config.export_path, '*.json')
+    return hypexport.DAL(sources)


-def get_highlights() -> List[Res[Highlight]]:
-    return sort_res_by(dao().highlights(), key=lambda h: h.created)
+def highlights() -> List[Res[Highlight]]:
+    return sort_res_by(_dal().highlights(), key=lambda h: h.created)


 # TODO eh. always provide iterators? although sort_res_by could be neat too...
-def get_pages() -> List[Res[Page]]:
-    return sort_res_by(dao().pages(), key=lambda h: h.created)
+def pages() -> List[Res[Page]]:
+    return sort_res_by(_dal().pages(), key=lambda h: h.created)


 # TODO move to side tests?
 def test():
-    get_pages()
-    get_highlights()
+    list(pages())
+    list(highlights())


 def _main():
@ -55,3 +46,6 @@ def _main():

 if __name__ == '__main__':
    _main()
+
+get_highlights = highlights # TODO deprecate
+get_pages = pages # TODO deprecate
--- a/my/instapaper.py
+++ b/my/instapaper.py
@ -1,55 +1,32 @@
 """
 Instapaper bookmarks, highlights and annotations
 """
-from pathlib import Path
-from typing import NamedTuple, Optional, List, Iterator
-
-from .common import group_by_key, PathIsh, get_files
+from .common import get_files


 from my.config import instapaper as config
 import my.config.repos.instapexport.dal as dal


-def _get_files():
-    return get_files(config.export_path, glob='*.json')
+Highlight = dal.Highlight
+Bookmark = dal.Bookmark


-def get_dal() -> dal.DAL:
-    return dal.DAL(_get_files())
+def inputs():
+    return get_files(config.export_path)


-# TODO meh, come up with better name...
-class HighlightWithBm(NamedTuple):
-    highlight: dal.Highlight
-    bookmark: dal.Bookmark
+def _dal() -> dal.DAL:
+    return dal.DAL(inputs())


-def iter_highlights(**kwargs) -> Iterator[HighlightWithBm]:
-    # meh...
-    dl = get_dal()
-    hls = dl.highlights()
-    bms = dl.bookmarks()
-    for _, h in hls.items():
-        yield HighlightWithBm(highlight=h, bookmark=bms[h.bid])
+def pages():
+    return _dal().pages()
+get_pages = pages # todo also deprecate..


-# def get_highlights(**kwargs) -> List[Highlight]:
-#     return list(iter_highlights(**kwargs))
-def get_pages():
-    return get_dal().pages()
-
-
-
-def get_todos() -> Iterator[HighlightWithBm]:
-    def is_todo(hl: HighlightWithBm):
-        h = hl.highlight
-        note = h.note or ''
+# TODO dunno, move this to private?
+def is_todo(hl: Highlight) -> bool:
+    note = hl.note or ''
    note = note.lstrip().lower()
    return note.startswith('todo')
-    return filter(is_todo, iter_highlights())
-
-
-def main():
-    for h in get_todos():
-        print(h)
--- a/my/lastfm/init.py
+++ b/my/lastfm/init.py
@ -2,27 +2,31 @@
 Last.fm scrobbles
 '''

-from .. import init

-from functools import lru_cache
-from typing import NamedTuple, Dict, Any
+from ..common import get_files, mcachew, Json
+
 from datetime import datetime
-from pathlib import Path
 import json
+from pathlib import Path
+from typing import NamedTuple, Any, Sequence, Iterable

 import pytz

 from my.config import lastfm as config

-# TODO Json type?
 # TODO memoised properties?
 # TODO lazy mode and eager mode?
 # lazy is a bit nicer in terms of more flexibility and less processing?
 # eager is a bit more explicit for error handling

-class Scrobble(NamedTuple):
-    raw: Dict[str, Any]
+def inputs() -> Sequence[Path]:
+    return get_files(config.export_path)

+
+class Scrobble(NamedTuple):
+    raw: Json
+
+    # TODO mm, no timezone? hopefuly it's UTC
    @property
    def dt(self) -> datetime:
        ts = int(self.raw['date'])
@ -45,22 +49,10 @@ class Scrobble(NamedTuple):
    # TODO could also be nice to make generic? maybe even depending on eagerness


-# TODO memoise...?
-# TODO watch out, if we keep the app running it might expire
-def _iter_scrobbles():
-    # TODO use get_files
-    last = max(Path(config.export_path).glob('*.json'))
-    # TODO mm, no timezone? hopefuly it's UTC
+@mcachew(hashf=lambda: inputs())
+def scrobbles() -> Iterable[Scrobble]:
+    last = max(inputs())
    j = json.loads(last.read_text())

    for raw in j:
        yield Scrobble(raw=raw)
-
-
-@lru_cache(1)
-def get_scrobbles():
-    return list(sorted(_iter_scrobbles(), key=lambda s: s.dt))
-
-
-def test():
-    assert len(get_scrobbles()) > 1000
--- a/my/lastfm/fill_influxdb.py
+++ b/my/lastfm/fill_influxdb.py
@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 # pip install influxdb
 from influxdb import InfluxDBClient # type: ignore
-from my.lastfm import get_scrobbles
+from my.lastfm import scrobbles


-def main():
-    scrobbles = get_scrobbles()
+def main() -> None:
+    scrobbles = scrobbles()
    client = InfluxDBClient()
    # TODO client.create_database('lastfm')

--- a/my/photos/init.py
+++ b/my/photos/init.py
@ -17,7 +17,7 @@ from ..error import Res
 from my.config import photos as config


-log = LazyLogger('my.photos')
+log = LazyLogger(__name__)



@ -46,13 +46,12 @@ class Photo(NamedTuple):
            raise RuntimeError(f'Weird path {self.path}, cant match against anything')

    @property
-    def linkname(self) -> str:
+    def name(self) -> str:
        return self._basename.strip('/')

    @property
    def url(self) -> str:
-        PHOTOS_URL = 'TODO FIXME'
-        return PHOTOS_URL + self._basename
+        return f'{config.base_url}{self._basename}'


 from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
--- a/my/reading/polar.py
+++ b/my/reading/polar.py
@ -8,7 +8,6 @@ from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
 import json

 import pytz
-# TODO declare DEPENDS = [pytz??]

 from ..common import LazyLogger, get_files

--- a/my/reddit.py
+++ b/my/reddit.py
@ -15,13 +15,14 @@ import my.config.repos.rexport.dal as rexport

 def get_sources() -> Sequence[Path]:
    # TODO use zstd?
-    # TODO maybe add assert to get_files? (and allow to suppress it)
-    files = get_files(config.export_dir, glob='*.json.xz')
+    # TODO rename to export_path?
+    files = get_files(config.export_dir)
    res = list(map(CPath, files)); assert len(res) > 0
+    # todo move the assert to get_files?
    return tuple(res)


-logger = LazyLogger(__package__, level='debug')
+logger = LazyLogger(__name__, level='debug')


 Sid        = rexport.Sid
@ -31,7 +32,7 @@ Submission = rexport.Submission
 Upvote     = rexport.Upvote


-def dal():
+def dal() -> rexport.DAL:
    # TODO lru cache? but be careful when it runs continuously
    return rexport.DAL(get_sources())

@ -173,12 +174,12 @@ def get_events(*args, **kwargs) -> List[Event]:
    return list(sorted(evit, key=lambda e: e.cmp_key))


-def test():
+def test() -> None:
    get_events(backups=get_sources()[-1:])
    list(saved())


-def test_unfav():
+def test_unfav() -> None:
    events = get_events()
    url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
    uevents = [e for e in events if e.url == url]
@ -188,15 +189,15 @@ def test_unfav():
    uf = uevents[1]
    assert uf.text == 'unfavorited'

-
-def test_get_all_saves():
+# TODO move out..
+def test_get_all_saves() -> None:
    # TODO not sure if this is necesasry anymore?
    saves = list(saved())
    # just check that they are unique..
    make_dict(saves, key=lambda s: s.sid)


-def test_disappearing():
+def test_disappearing() -> None:
    # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
    # but I guess it was just a short glitch... so whatever
    saves = get_events()
@ -205,14 +206,14 @@ def test_disappearing():
    assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)


-def test_unfavorite():
+def test_unfavorite() -> None:
    events = get_events()
    unfavs = [s for s in events if s.text == 'unfavorited']
    [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
    assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)


-def main():
+def main() -> None:
    # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
    events = get_events(parallel=False)
    print(len(events))
--- a/my/rescuetime.py
+++ b/my/rescuetime.py
@ -18,7 +18,7 @@ from my.config import rescuetime as config
 log = LazyLogger(__package__, level='info')


-def _get_exports() -> List[Path]:
+def inputs():
    return get_files(config.export_path, '*.json')


@ -28,7 +28,7 @@ Model = rescuexport.Model

 # TODO cache?
 def get_model(last=0) -> Model:
-    return Model(_get_exports()[-last:])
+    return Model(inputs()[-last:])


 def _without_errors():
--- a/setup.py
+++ b/setup.py
@ -4,8 +4,9 @@
 from setuptools import setup, find_namespace_packages # type: ignore

 INSTALL_REQUIRES = [
-    'appdirs',
    'pytz',           # even though it's not needed by the core, it's so common anyway...
+    'appdirs',        # very common, and makes it portable
+    'more-itertools', # it's just too useful and very common anyway
 ]


--- a/tests/common.py
+++ b/tests/common.py
@ -0,0 +1,113 @@
+from pathlib import Path
+from my.common import get_files
+
+import pytest # type: ignore
+
+
+def test_single_file():
+    '''
+    Regular file path is just returned as is.
+    '''
+
+    "Exception if it doesn't exist"
+    with pytest.raises(Exception):
+        get_files('/tmp/hpi_test/file.ext')
+
+
+    create('/tmp/hpi_test/file.ext')
+
+    '''
+    Couple of things:
+    1. Return type is a tuple, it's friendlier for hashing/caching
+    2. It always return pathlib.Path instead of plain strings
+    '''
+    assert get_files('/tmp/hpi_test/file.ext') == (
+        Path('/tmp/hpi_test/file.ext'),
+    )
+
+
+def test_multiple_files():
+    '''
+    If you pass a directory/multiple directories, it flattens the contents
+    '''
+    create('/tmp/hpi_test/dir1/')
+    create('/tmp/hpi_test/dir1/zzz')
+    create('/tmp/hpi_test/dir1/yyy')
+    # create('/tmp/hpi_test/dir1/whatever/') # TODO not sure about this... should really allow extra dirs
+    create('/tmp/hpi_test/dir2/')
+    create('/tmp/hpi_test/dir2/mmm')
+    create('/tmp/hpi_test/dir2/nnn')
+    create('/tmp/hpi_test/dir3/')
+    create('/tmp/hpi_test/dir3/ttt')
+
+    assert get_files([
+        Path('/tmp/hpi_test/dir3'), # it takes in Path as well as str
+        '/tmp/hpi_test/dir1',
+    ]) == (
+        # the paths are always returned in sorted order (unless you pass sort=False)
+        Path('/tmp/hpi_test/dir1/yyy'),
+        Path('/tmp/hpi_test/dir1/zzz'),
+        Path('/tmp/hpi_test/dir3/ttt'),
+    )
+
+
+def test_explicit_glob():
+    '''
+    You can pass a glob to restrict the extensions
+    '''
+
+    create('/tmp/hpi_test/file_3.zip')
+    create('/tmp/hpi_test/file_2.zip')
+    create('/tmp/hpi_test/ignoreme')
+    create('/tmp/hpi_test/file.zip')
+
+    # todo walrus operator would be great here...
+    expected = (
+        Path('/tmp/hpi_test/file_2.zip'),
+        Path('/tmp/hpi_test/file_3.zip'),
+    )
+    assert get_files('/tmp/hpi_test', 'file_*.zip') == expected
+
+    "named argument should work too"
+    assert get_files('/tmp/hpi_test', glob='file_*.zip') == expected
+
+
+def test_implicit_blog():
+    '''
+    Asterisc in the path results in globing too.
+    '''
+    # todo hopefully that makes sense? dunno why would anyone actually rely on asteriscs in names..
+    # this is very convenient in configs, so people don't have to use some special types
+
+    create('/tmp/hpi_test/123/')
+    create('/tmp/hpi_test/123/dummy')
+    create('/tmp/hpi_test/123/file.zip')
+    create('/tmp/hpi_test/456/')
+    create('/tmp/hpi_test/456/dummy')
+    create('/tmp/hpi_test/456/file.zip')
+
+    assert get_files(['/tmp/hpi_test/*/*.zip']) == (
+        Path('/tmp/hpi_test/123/file.zip'),
+        Path('/tmp/hpi_test/456/file.zip'),
+    )
+
+# TODO not sure if should uniquify if the filenames end up same?
+# TODO not sure about the symlinks? and hidden files?
+
+test_path = Path('/tmp/hpi_test')
+def setup():
+    teardown()
+    test_path.mkdir()
+
+
+def teardown():
+    import shutil
+    if test_path.is_dir():
+        shutil.rmtree(test_path)
+
+
+def create(f: str) -> None:
+    if f.endswith('/'):
+        Path(f).mkdir()
+    else:
+        Path(f).touch()
--- a/tests/instapaper.py
+++ b/tests/instapaper.py
@ -1,6 +1,5 @@
-from my.instapaper import get_todos
+from my.instapaper import pages


-def test_get_todos():
-    for t in get_todos():
-        print(t)
+def test_pages():
+    assert len(list(pages())) > 3
--- a/tests/lastfm.py
+++ b/tests/lastfm.py
@ -0,0 +1,7 @@
+from more_itertools import ilen
+
+from my.lastfm import scrobbles
+
+
+def test():
+    assert ilen(scrobbles()) > 1000