Merge pull request #37 from karlicoss/updates

various updates: implicit globs for get-files, mcachew type checking, modules cleanup
This commit is contained in:
karlicoss 2020-05-03 17:19:55 +01:00 committed by GitHub
commit 5aecc037e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 285 additions and 170 deletions

View file

@ -20,8 +20,7 @@ from my.config import github as config
import my.config.repos.ghexport.dal as ghexport import my.config.repos.ghexport.dal as ghexport
logger = LazyLogger('my.github') logger = LazyLogger(__name__)
# TODO __package__???
class Event(NamedTuple): class Event(NamedTuple):
@ -32,56 +31,75 @@ class Event(NamedTuple):
body: Optional[str]=None body: Optional[str]=None
# TODO hmm. need some sort of abstract syntax for this...
# TODO split further, title too # TODO split further, title too
def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]: def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
# TODO would be nice to give access to raw event withing timeline
eid = e['id']
tp = e['type'] tp = e['type']
pl = e['payload'] pl = e['payload']
rname = e['repo']['name'] rname = e['repo']['name']
mapping = {
'CreateEvent': 'created',
'DeleteEvent': 'deleted',
}
if tp == 'ForkEvent': if tp == 'ForkEvent':
url = e['payload']['forkee']['html_url'] url = e['payload']['forkee']['html_url']
return f"forked {rname}", url, None return f"{rname}: forked", url, None
elif tp == 'PushEvent': elif tp == 'PushEvent':
return f"pushed to {rname}", None, None commits = pl['commits']
messages = [c['message'] for c in commits]
body = '\n'.join(messages)
return f"{rname}: pushed\n{body}", None, None
elif tp == 'WatchEvent': elif tp == 'WatchEvent':
return f"watching {rname}", None, None return f"{rname}: watching", None, None
elif tp == 'CreateEvent': elif tp in mapping:
# TODO eh, only weird API link? what = mapping[tp]
return f"created {rname}", None, f'created_{rname}' rt = pl['ref_type']
ref = pl['ref']
# TODO link to branch? only contains weird API link though
# TODO hmm. include timestamp instead?
# breakpoint()
# TODO combine automatically instead
return f"{rname}: {what} {rt} {ref}", None, f'{rname}_{what}_{rt}_{ref}_{eid}'
elif tp == 'PullRequestEvent': elif tp == 'PullRequestEvent':
pr = pl['pull_request'] pr = pl['pull_request']
action = pl['action'] action = pl['action']
link = pr['html_url'] link = pr['html_url']
title = pr['title'] title = pr['title']
return f"{action} PR {title}", link, f'pull_request_{link}' return f"{rname}: {action} PR {title}", link, f'{rname}_{action}_pr_{link}'
elif tp == "IssuesEvent": elif tp == "IssuesEvent":
action = pl['action'] action = pl['action']
iss = pl['issue'] iss = pl['issue']
link = iss['html_url'] link = iss['html_url']
title = iss['title'] title = iss['title']
return f"{action} issue {title}", link, None return f"{rname}: {action} issue {title}", link, None
elif tp == "IssueCommentEvent": elif tp == "IssueCommentEvent":
com = pl['comment'] com = pl['comment']
link = com['html_url'] link = com['html_url']
iss = pl['issue'] iss = pl['issue']
title = iss['title'] title = iss['title']
return f"commented on issue {title}", link, f'issue_comment_' + link return f"{rname}: commented on issue {title}", link, f'issue_comment_' + link
elif tp == "ReleaseEvent": elif tp == "ReleaseEvent":
action = pl['action'] action = pl['action']
rel = pl['release'] rel = pl['release']
tag = rel['tag_name'] tag = rel['tag_name']
link = rel['html_url'] link = rel['html_url']
return f"{action} {rname} [{tag}]", link, None return f"{rname}: {action} [{tag}]", link, None
elif tp in ( elif tp in 'PublicEvent':
"DeleteEvent", return f'{tp} {e}', None, None # TODO ???
"PublicEvent",
):
return tp, None, None # TODO ???
else: else:
return tp, None, None return tp, None, None
def get_dal(): def inputs():
sources = get_files(config.export_dir, glob='*.json*') return get_files(config.export_dir, glob='*.json*')
def _dal():
sources = inputs()
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg? sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
return ghexport.DAL(sources) return ghexport.DAL(sources)
@ -218,7 +236,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
# TODO hmm. not good, need to be lazier?... # TODO hmm. not good, need to be lazier?...
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources) @mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
def iter_backup_events(dal=get_dal()) -> Iterator[Event]: def iter_backup_events(dal=_dal()) -> Iterator[Event]:
for d in dal.events(): for d in dal.events():
yield _parse_event(d) yield _parse_event(d)

View file

@ -1,7 +1,9 @@
from glob import glob as do_glob
from pathlib import Path from pathlib import Path
import functools import functools
import types import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
import warnings
from . import init from . import init
@ -46,6 +48,7 @@ def the(l: Iterable[T]) -> T:
return first return first
# TODO more_itertools.bucket?
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]: def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
res: Dict[K, List[T]] = {} res: Dict[K, List[T]] = {}
for i in l: for i in l:
@ -106,9 +109,12 @@ from .kython.klogging import setup_logger, LazyLogger
Paths = Union[Sequence[PathIsh], PathIsh] Paths = Union[Sequence[PathIsh], PathIsh]
def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]: DEFAULT_GLOB = '*'
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
""" """
Helper function to avoid boilerplate. Helper function to avoid boilerplate.
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
""" """
# TODO FIXME mm, some wrapper to assert iterator isn't empty? # TODO FIXME mm, some wrapper to assert iterator isn't empty?
sources: List[Path] = [] sources: List[Path] = []
@ -122,17 +128,38 @@ def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
if src.is_dir(): if src.is_dir():
gp: Iterable[Path] = src.glob(glob) gp: Iterable[Path] = src.glob(glob)
paths.extend(gp) paths.extend(gp)
else:
ss = str(src)
if '*' in ss:
if glob != DEFAULT_GLOB:
warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
paths.extend(map(Path, do_glob(ss)))
else: else:
assert src.is_file(), src assert src.is_file(), src
# TODO FIXME assert matches glob?? # todo assert matches glob??
paths.append(src) paths.append(src)
if sort: if sort:
paths = list(sorted(paths)) paths = list(sorted(paths))
return paths return tuple(paths)
def mcachew(*args, **kwargs): # TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Callable, TypeVar
from typing_extensions import Protocol
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
...
mcachew: McachewType
def mcachew(*args, **kwargs): # type: ignore[no-redef]
""" """
Stands for 'Maybe cachew'. Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency. Defensive wrapper around @cachew to make it an optional dependency.
@ -140,7 +167,6 @@ def mcachew(*args, **kwargs):
try: try:
import cachew import cachew
except ModuleNotFoundError: except ModuleNotFoundError:
import warnings
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func return lambda orig_func: orig_func
else: else:

View file

@ -5,26 +5,21 @@
Consumes data exported by https://github.com/karlicoss/backup-emfit Consumes data exported by https://github.com/karlicoss/backup-emfit
""" """
import json import json
import logging
from collections import OrderedDict as odict
from dataclasses import dataclass from dataclasses import dataclass
from datetime import date, datetime, time, timedelta from datetime import date, datetime, time, timedelta
from itertools import groupby
from pathlib import Path from pathlib import Path
from typing import Dict, Iterator, List, NamedTuple, Any, cast from typing import Dict, Iterator, List, NamedTuple, Any, cast
import pytz import pytz
from more_itertools import bucket
from ..common import get_files, LazyLogger, cproperty, group_by_key, mcachew from ..common import get_files, LazyLogger, cproperty, mcachew
from my.config import emfit as config from my.config import emfit as config
logger = LazyLogger('my.emfit', level='info') logger = LazyLogger(__name__, level='info')
# TODO FIXME remove?
import kython
timed = lambda f: kython.timed(f, logger=logger)
def hhmm(minutes): def hhmm(minutes):
@ -35,13 +30,10 @@ AWAKE = 4
Sid = str Sid = str
# TODO use tz provider for that?
_TZ = pytz.timezone(config.tz)
# TODO use common tz thing? # TODO use common tz thing?
def fromts(ts) -> datetime: def fromts(ts) -> datetime:
dt = datetime.fromtimestamp(ts) dt = datetime.fromtimestamp(ts, tz=pytz.utc)
return _TZ.localize(dt) return dt
class Mixin: class Mixin:
@ -295,14 +287,14 @@ class Emfit(Mixin):
# TODO move to common? # TODO move to common?
def dir_hash(path: Path): def dir_hash(path: Path):
mtimes = tuple(p.stat().st_mtime for p in sorted(path.glob('*.json'))) mtimes = tuple(p.stat().st_mtime for p in get_files(path, glob='*.json'))
return mtimes return mtimes
# TODO take __file__ into account somehow?
@mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger) @mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger)
def iter_datas_cached(path: Path) -> Iterator[Emfit]: def iter_datas(path: Path=config.export_path) -> Iterator[Emfit]:
# TODO use get_files? for f in get_files(path, glob='*.json'):
for f in sorted(path.glob('*.json')):
sid = f.stem sid = f.stem
if sid in config.excluded_sids: if sid in config.excluded_sids:
continue continue
@ -311,20 +303,17 @@ def iter_datas_cached(path: Path) -> Iterator[Emfit]:
yield from Emfit.make(em) yield from Emfit.make(em)
def iter_datas(path=config.export_path) -> Iterator[Emfit]:
yield from iter_datas_cached(path)
def get_datas() -> List[Emfit]: def get_datas() -> List[Emfit]:
return list(sorted(iter_datas(), key=lambda e: e.start)) return list(sorted(iter_datas(), key=lambda e: e.start))
# TODO move away old entries if there is a diff?? # TODO move away old entries if there is a diff??
@timed
def by_night() -> Dict[date, Emfit]: def by_night() -> Dict[date, Emfit]:
res: Dict[date, Emfit] = odict() res: Dict[date, Emfit] = {}
# TODO shit. I need some sort of interrupted sleep detection? # TODO shit. I need some sort of interrupted sleep detection?
for dd, sleeps in group_by_key(get_datas(), key=lambda s: s.date).items(): grouped = bucket(get_datas(), key=lambda s: s.date)
for dd in grouped:
sleeps = list(grouped[dd])
if len(sleeps) > 1: if len(sleeps) > 1:
logger.warning("multiple sleeps per night, not handled yet: %s", sleeps) logger.warning("multiple sleeps per night, not handled yet: %s", sleeps)
continue continue

View file

@ -15,10 +15,10 @@ from .common import get_files, LazyLogger
from my.config import foursquare as config from my.config import foursquare as config
logger = LazyLogger(__package__) logger = LazyLogger(__name__)
def _get_exports() -> List[Path]: def inputs():
return get_files(config.export_path, '*.json') return get_files(config.export_path, '*.json')
@ -62,7 +62,7 @@ class Place:
def get_raw(fname=None): def get_raw(fname=None):
if fname is None: if fname is None:
fname = max(_get_exports()) fname = max(inputs())
j = json.loads(Path(fname).read_text()) j = json.loads(Path(fname).read_text())
assert isinstance(j, list) assert isinstance(j, list)

View file

@ -3,50 +3,41 @@
""" """
from . import init from . import init
from .common import PathIsh from .common import get_files
from .error import Res, sort_res_by
import my.config.repos.hypexport as hypexport
from my.config.repos.hypexport import dal
import my.config.repos.hypexport.dal as hypexport
from my.config import hypothesis as config from my.config import hypothesis as config
export_path: PathIsh = config.export_path
### ###
from typing import List from typing import List
from .common import get_files, cproperty, group_by_key
from .error import Res, sort_res_by
# TODO weird. not sure why e.g. from dal import Highlight doesn't work.. # TODO weird. not sure why e.g. from dal import Highlight doesn't work..
Highlight = dal.Highlight Highlight = hypexport.Highlight
DAL = dal.DAL Page = hypexport.Page
Page = dal.Page
# TODO eh. not sure if I should rename everything to dao/DAO or not... # TODO eh. not sure if I should rename everything to dao/DAO or not...
def dao() -> DAL: def _dal() -> hypexport.DAL:
sources = get_files(export_path, '*.json') sources = get_files(config.export_path, '*.json')
model = DAL(sources) return hypexport.DAL(sources)
return model
def get_highlights() -> List[Res[Highlight]]: def highlights() -> List[Res[Highlight]]:
return sort_res_by(dao().highlights(), key=lambda h: h.created) return sort_res_by(_dal().highlights(), key=lambda h: h.created)
# TODO eh. always provide iterators? although sort_res_by could be neat too... # TODO eh. always provide iterators? although sort_res_by could be neat too...
def get_pages() -> List[Res[Page]]: def pages() -> List[Res[Page]]:
return sort_res_by(dao().pages(), key=lambda h: h.created) return sort_res_by(_dal().pages(), key=lambda h: h.created)
# TODO move to side tests? # TODO move to side tests?
def test(): def test():
get_pages() list(pages())
get_highlights() list(highlights())
def _main(): def _main():
@ -55,3 +46,6 @@ def _main():
if __name__ == '__main__': if __name__ == '__main__':
_main() _main()
get_highlights = highlights # TODO deprecate
get_pages = pages # TODO deprecate

View file

@ -1,55 +1,32 @@
""" """
Instapaper bookmarks, highlights and annotations Instapaper bookmarks, highlights and annotations
""" """
from pathlib import Path from .common import get_files
from typing import NamedTuple, Optional, List, Iterator
from .common import group_by_key, PathIsh, get_files
from my.config import instapaper as config from my.config import instapaper as config
import my.config.repos.instapexport.dal as dal import my.config.repos.instapexport.dal as dal
def _get_files(): Highlight = dal.Highlight
return get_files(config.export_path, glob='*.json') Bookmark = dal.Bookmark
def get_dal() -> dal.DAL: def inputs():
return dal.DAL(_get_files()) return get_files(config.export_path)
# TODO meh, come up with better name... def _dal() -> dal.DAL:
class HighlightWithBm(NamedTuple): return dal.DAL(inputs())
highlight: dal.Highlight
bookmark: dal.Bookmark
def iter_highlights(**kwargs) -> Iterator[HighlightWithBm]: def pages():
# meh... return _dal().pages()
dl = get_dal() get_pages = pages # todo also deprecate..
hls = dl.highlights()
bms = dl.bookmarks()
for _, h in hls.items():
yield HighlightWithBm(highlight=h, bookmark=bms[h.bid])
# def get_highlights(**kwargs) -> List[Highlight]: # TODO dunno, move this to private?
# return list(iter_highlights(**kwargs)) def is_todo(hl: Highlight) -> bool:
def get_pages(): note = hl.note or ''
return get_dal().pages()
def get_todos() -> Iterator[HighlightWithBm]:
def is_todo(hl: HighlightWithBm):
h = hl.highlight
note = h.note or ''
note = note.lstrip().lower() note = note.lstrip().lower()
return note.startswith('todo') return note.startswith('todo')
return filter(is_todo, iter_highlights())
def main():
for h in get_todos():
print(h)

View file

@ -2,27 +2,31 @@
Last.fm scrobbles Last.fm scrobbles
''' '''
from .. import init
from functools import lru_cache from ..common import get_files, mcachew, Json
from typing import NamedTuple, Dict, Any
from datetime import datetime from datetime import datetime
from pathlib import Path
import json import json
from pathlib import Path
from typing import NamedTuple, Any, Sequence, Iterable
import pytz import pytz
from my.config import lastfm as config from my.config import lastfm as config
# TODO Json type?
# TODO memoised properties? # TODO memoised properties?
# TODO lazy mode and eager mode? # TODO lazy mode and eager mode?
# lazy is a bit nicer in terms of more flexibility and less processing? # lazy is a bit nicer in terms of more flexibility and less processing?
# eager is a bit more explicit for error handling # eager is a bit more explicit for error handling
class Scrobble(NamedTuple): def inputs() -> Sequence[Path]:
raw: Dict[str, Any] return get_files(config.export_path)
class Scrobble(NamedTuple):
raw: Json
# TODO mm, no timezone? hopefuly it's UTC
@property @property
def dt(self) -> datetime: def dt(self) -> datetime:
ts = int(self.raw['date']) ts = int(self.raw['date'])
@ -45,22 +49,10 @@ class Scrobble(NamedTuple):
# TODO could also be nice to make generic? maybe even depending on eagerness # TODO could also be nice to make generic? maybe even depending on eagerness
# TODO memoise...? @mcachew(hashf=lambda: inputs())
# TODO watch out, if we keep the app running it might expire def scrobbles() -> Iterable[Scrobble]:
def _iter_scrobbles(): last = max(inputs())
# TODO use get_files
last = max(Path(config.export_path).glob('*.json'))
# TODO mm, no timezone? hopefuly it's UTC
j = json.loads(last.read_text()) j = json.loads(last.read_text())
for raw in j: for raw in j:
yield Scrobble(raw=raw) yield Scrobble(raw=raw)
@lru_cache(1)
def get_scrobbles():
return list(sorted(_iter_scrobbles(), key=lambda s: s.dt))
def test():
assert len(get_scrobbles()) > 1000

View file

@ -1,11 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# pip install influxdb # pip install influxdb
from influxdb import InfluxDBClient # type: ignore from influxdb import InfluxDBClient # type: ignore
from my.lastfm import get_scrobbles from my.lastfm import scrobbles
def main(): def main() -> None:
scrobbles = get_scrobbles() scrobbles = scrobbles()
client = InfluxDBClient() client = InfluxDBClient()
# TODO client.create_database('lastfm') # TODO client.create_database('lastfm')

View file

@ -17,7 +17,7 @@ from ..error import Res
from my.config import photos as config from my.config import photos as config
log = LazyLogger('my.photos') log = LazyLogger(__name__)
@ -46,13 +46,12 @@ class Photo(NamedTuple):
raise RuntimeError(f'Weird path {self.path}, cant match against anything') raise RuntimeError(f'Weird path {self.path}, cant match against anything')
@property @property
def linkname(self) -> str: def name(self) -> str:
return self._basename.strip('/') return self._basename.strip('/')
@property @property
def url(self) -> str: def url(self) -> str:
PHOTOS_URL = 'TODO FIXME' return f'{config.base_url}{self._basename}'
return PHOTOS_URL + self._basename
from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref

View file

@ -8,7 +8,6 @@ from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
import json import json
import pytz import pytz
# TODO declare DEPENDS = [pytz??]
from ..common import LazyLogger, get_files from ..common import LazyLogger, get_files

View file

@ -15,13 +15,14 @@ import my.config.repos.rexport.dal as rexport
def get_sources() -> Sequence[Path]: def get_sources() -> Sequence[Path]:
# TODO use zstd? # TODO use zstd?
# TODO maybe add assert to get_files? (and allow to suppress it) # TODO rename to export_path?
files = get_files(config.export_dir, glob='*.json.xz') files = get_files(config.export_dir)
res = list(map(CPath, files)); assert len(res) > 0 res = list(map(CPath, files)); assert len(res) > 0
# todo move the assert to get_files?
return tuple(res) return tuple(res)
logger = LazyLogger(__package__, level='debug') logger = LazyLogger(__name__, level='debug')
Sid = rexport.Sid Sid = rexport.Sid
@ -31,7 +32,7 @@ Submission = rexport.Submission
Upvote = rexport.Upvote Upvote = rexport.Upvote
def dal(): def dal() -> rexport.DAL:
# TODO lru cache? but be careful when it runs continuously # TODO lru cache? but be careful when it runs continuously
return rexport.DAL(get_sources()) return rexport.DAL(get_sources())
@ -173,12 +174,12 @@ def get_events(*args, **kwargs) -> List[Event]:
return list(sorted(evit, key=lambda e: e.cmp_key)) return list(sorted(evit, key=lambda e: e.cmp_key))
def test(): def test() -> None:
get_events(backups=get_sources()[-1:]) get_events(backups=get_sources()[-1:])
list(saved()) list(saved())
def test_unfav(): def test_unfav() -> None:
events = get_events() events = get_events()
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/' url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
uevents = [e for e in events if e.url == url] uevents = [e for e in events if e.url == url]
@ -188,15 +189,15 @@ def test_unfav():
uf = uevents[1] uf = uevents[1]
assert uf.text == 'unfavorited' assert uf.text == 'unfavorited'
# TODO move out..
def test_get_all_saves(): def test_get_all_saves() -> None:
# TODO not sure if this is necesasry anymore? # TODO not sure if this is necesasry anymore?
saves = list(saved()) saves = list(saved())
# just check that they are unique.. # just check that they are unique..
make_dict(saves, key=lambda s: s.sid) make_dict(saves, key=lambda s: s.sid)
def test_disappearing(): def test_disappearing() -> None:
# eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
# but I guess it was just a short glitch... so whatever # but I guess it was just a short glitch... so whatever
saves = get_events() saves = get_events()
@ -205,14 +206,14 @@ def test_disappearing():
assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc) assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
def test_unfavorite(): def test_unfavorite() -> None:
events = get_events() events = get_events()
unfavs = [s for s in events if s.text == 'unfavorited'] unfavs = [s for s in events if s.text == 'unfavorited']
[xxx] = [u for u in unfavs if u.eid == 'unf-19ifop'] [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc) assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
def main(): def main() -> None:
# TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM... # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
events = get_events(parallel=False) events = get_events(parallel=False)
print(len(events)) print(len(events))

View file

@ -18,7 +18,7 @@ from my.config import rescuetime as config
log = LazyLogger(__package__, level='info') log = LazyLogger(__package__, level='info')
def _get_exports() -> List[Path]: def inputs():
return get_files(config.export_path, '*.json') return get_files(config.export_path, '*.json')
@ -28,7 +28,7 @@ Model = rescuexport.Model
# TODO cache? # TODO cache?
def get_model(last=0) -> Model: def get_model(last=0) -> Model:
return Model(_get_exports()[-last:]) return Model(inputs()[-last:])
def _without_errors(): def _without_errors():

View file

@ -4,8 +4,9 @@
from setuptools import setup, find_namespace_packages # type: ignore from setuptools import setup, find_namespace_packages # type: ignore
INSTALL_REQUIRES = [ INSTALL_REQUIRES = [
'appdirs',
'pytz', # even though it's not needed by the core, it's so common anyway... 'pytz', # even though it's not needed by the core, it's so common anyway...
'appdirs', # very common, and makes it portable
'more-itertools', # it's just too useful and very common anyway
] ]

113
tests/common.py Normal file
View file

@ -0,0 +1,113 @@
from pathlib import Path
from my.common import get_files
import pytest # type: ignore
def test_single_file():
'''
Regular file path is just returned as is.
'''
"Exception if it doesn't exist"
with pytest.raises(Exception):
get_files('/tmp/hpi_test/file.ext')
create('/tmp/hpi_test/file.ext')
'''
Couple of things:
1. Return type is a tuple, it's friendlier for hashing/caching
2. It always return pathlib.Path instead of plain strings
'''
assert get_files('/tmp/hpi_test/file.ext') == (
Path('/tmp/hpi_test/file.ext'),
)
def test_multiple_files():
'''
If you pass a directory/multiple directories, it flattens the contents
'''
create('/tmp/hpi_test/dir1/')
create('/tmp/hpi_test/dir1/zzz')
create('/tmp/hpi_test/dir1/yyy')
# create('/tmp/hpi_test/dir1/whatever/') # TODO not sure about this... should really allow extra dirs
create('/tmp/hpi_test/dir2/')
create('/tmp/hpi_test/dir2/mmm')
create('/tmp/hpi_test/dir2/nnn')
create('/tmp/hpi_test/dir3/')
create('/tmp/hpi_test/dir3/ttt')
assert get_files([
Path('/tmp/hpi_test/dir3'), # it takes in Path as well as str
'/tmp/hpi_test/dir1',
]) == (
# the paths are always returned in sorted order (unless you pass sort=False)
Path('/tmp/hpi_test/dir1/yyy'),
Path('/tmp/hpi_test/dir1/zzz'),
Path('/tmp/hpi_test/dir3/ttt'),
)
def test_explicit_glob():
'''
You can pass a glob to restrict the extensions
'''
create('/tmp/hpi_test/file_3.zip')
create('/tmp/hpi_test/file_2.zip')
create('/tmp/hpi_test/ignoreme')
create('/tmp/hpi_test/file.zip')
# todo walrus operator would be great here...
expected = (
Path('/tmp/hpi_test/file_2.zip'),
Path('/tmp/hpi_test/file_3.zip'),
)
assert get_files('/tmp/hpi_test', 'file_*.zip') == expected
"named argument should work too"
assert get_files('/tmp/hpi_test', glob='file_*.zip') == expected
def test_implicit_blog():
'''
Asterisc in the path results in globing too.
'''
# todo hopefully that makes sense? dunno why would anyone actually rely on asteriscs in names..
# this is very convenient in configs, so people don't have to use some special types
create('/tmp/hpi_test/123/')
create('/tmp/hpi_test/123/dummy')
create('/tmp/hpi_test/123/file.zip')
create('/tmp/hpi_test/456/')
create('/tmp/hpi_test/456/dummy')
create('/tmp/hpi_test/456/file.zip')
assert get_files(['/tmp/hpi_test/*/*.zip']) == (
Path('/tmp/hpi_test/123/file.zip'),
Path('/tmp/hpi_test/456/file.zip'),
)
# TODO not sure if should uniquify if the filenames end up same?
# TODO not sure about the symlinks? and hidden files?
test_path = Path('/tmp/hpi_test')
def setup():
teardown()
test_path.mkdir()
def teardown():
import shutil
if test_path.is_dir():
shutil.rmtree(test_path)
def create(f: str) -> None:
if f.endswith('/'):
Path(f).mkdir()
else:
Path(f).touch()

View file

@ -1,6 +1,5 @@
from my.instapaper import get_todos from my.instapaper import pages
def test_get_todos(): def test_pages():
for t in get_todos(): assert len(list(pages())) > 3
print(t)

7
tests/lastfm.py Normal file
View file

@ -0,0 +1,7 @@
from more_itertools import ilen
from my.lastfm import scrobbles
def test():
assert ilen(scrobbles()) > 1000