Merge pull request #37 from karlicoss/updates
various updates: implicit globs for get-files, mcachew type checking, modules cleanup
This commit is contained in:
commit
5aecc037e9
16 changed files with 285 additions and 170 deletions
|
@ -20,8 +20,7 @@ from my.config import github as config
|
||||||
import my.config.repos.ghexport.dal as ghexport
|
import my.config.repos.ghexport.dal as ghexport
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger('my.github')
|
logger = LazyLogger(__name__)
|
||||||
# TODO __package__???
|
|
||||||
|
|
||||||
|
|
||||||
class Event(NamedTuple):
|
class Event(NamedTuple):
|
||||||
|
@ -32,56 +31,75 @@ class Event(NamedTuple):
|
||||||
body: Optional[str]=None
|
body: Optional[str]=None
|
||||||
|
|
||||||
|
|
||||||
|
# TODO hmm. need some sort of abstract syntax for this...
|
||||||
# TODO split further, title too
|
# TODO split further, title too
|
||||||
def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
|
def _get_summary(e) -> Tuple[str, Optional[str], Optional[str]]:
|
||||||
|
# TODO would be nice to give access to raw event withing timeline
|
||||||
|
eid = e['id']
|
||||||
tp = e['type']
|
tp = e['type']
|
||||||
pl = e['payload']
|
pl = e['payload']
|
||||||
rname = e['repo']['name']
|
rname = e['repo']['name']
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'CreateEvent': 'created',
|
||||||
|
'DeleteEvent': 'deleted',
|
||||||
|
}
|
||||||
|
|
||||||
if tp == 'ForkEvent':
|
if tp == 'ForkEvent':
|
||||||
url = e['payload']['forkee']['html_url']
|
url = e['payload']['forkee']['html_url']
|
||||||
return f"forked {rname}", url, None
|
return f"{rname}: forked", url, None
|
||||||
elif tp == 'PushEvent':
|
elif tp == 'PushEvent':
|
||||||
return f"pushed to {rname}", None, None
|
commits = pl['commits']
|
||||||
|
messages = [c['message'] for c in commits]
|
||||||
|
body = '\n'.join(messages)
|
||||||
|
return f"{rname}: pushed\n{body}", None, None
|
||||||
elif tp == 'WatchEvent':
|
elif tp == 'WatchEvent':
|
||||||
return f"watching {rname}", None, None
|
return f"{rname}: watching", None, None
|
||||||
elif tp == 'CreateEvent':
|
elif tp in mapping:
|
||||||
# TODO eh, only weird API link?
|
what = mapping[tp]
|
||||||
return f"created {rname}", None, f'created_{rname}'
|
rt = pl['ref_type']
|
||||||
|
ref = pl['ref']
|
||||||
|
# TODO link to branch? only contains weird API link though
|
||||||
|
# TODO hmm. include timestamp instead?
|
||||||
|
# breakpoint()
|
||||||
|
# TODO combine automatically instead
|
||||||
|
return f"{rname}: {what} {rt} {ref}", None, f'{rname}_{what}_{rt}_{ref}_{eid}'
|
||||||
elif tp == 'PullRequestEvent':
|
elif tp == 'PullRequestEvent':
|
||||||
pr = pl['pull_request']
|
pr = pl['pull_request']
|
||||||
action = pl['action']
|
action = pl['action']
|
||||||
link = pr['html_url']
|
link = pr['html_url']
|
||||||
title = pr['title']
|
title = pr['title']
|
||||||
return f"{action} PR {title}", link, f'pull_request_{link}'
|
return f"{rname}: {action} PR {title}", link, f'{rname}_{action}_pr_{link}'
|
||||||
elif tp == "IssuesEvent":
|
elif tp == "IssuesEvent":
|
||||||
action = pl['action']
|
action = pl['action']
|
||||||
iss = pl['issue']
|
iss = pl['issue']
|
||||||
link = iss['html_url']
|
link = iss['html_url']
|
||||||
title = iss['title']
|
title = iss['title']
|
||||||
return f"{action} issue {title}", link, None
|
return f"{rname}: {action} issue {title}", link, None
|
||||||
elif tp == "IssueCommentEvent":
|
elif tp == "IssueCommentEvent":
|
||||||
com = pl['comment']
|
com = pl['comment']
|
||||||
link = com['html_url']
|
link = com['html_url']
|
||||||
iss = pl['issue']
|
iss = pl['issue']
|
||||||
title = iss['title']
|
title = iss['title']
|
||||||
return f"commented on issue {title}", link, f'issue_comment_' + link
|
return f"{rname}: commented on issue {title}", link, f'issue_comment_' + link
|
||||||
elif tp == "ReleaseEvent":
|
elif tp == "ReleaseEvent":
|
||||||
action = pl['action']
|
action = pl['action']
|
||||||
rel = pl['release']
|
rel = pl['release']
|
||||||
tag = rel['tag_name']
|
tag = rel['tag_name']
|
||||||
link = rel['html_url']
|
link = rel['html_url']
|
||||||
return f"{action} {rname} [{tag}]", link, None
|
return f"{rname}: {action} [{tag}]", link, None
|
||||||
elif tp in (
|
elif tp in 'PublicEvent':
|
||||||
"DeleteEvent",
|
return f'{tp} {e}', None, None # TODO ???
|
||||||
"PublicEvent",
|
|
||||||
):
|
|
||||||
return tp, None, None # TODO ???
|
|
||||||
else:
|
else:
|
||||||
return tp, None, None
|
return tp, None, None
|
||||||
|
|
||||||
|
|
||||||
def get_dal():
|
def inputs():
|
||||||
sources = get_files(config.export_dir, glob='*.json*')
|
return get_files(config.export_dir, glob='*.json*')
|
||||||
|
|
||||||
|
|
||||||
|
def _dal():
|
||||||
|
sources = inputs()
|
||||||
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
sources = list(map(CPath, sources)) # TODO maybe move it to get_files? e.g. compressed=True arg?
|
||||||
return ghexport.DAL(sources)
|
return ghexport.DAL(sources)
|
||||||
|
|
||||||
|
@ -218,7 +236,7 @@ def iter_gdpr_events() -> Iterator[Res[Event]]:
|
||||||
|
|
||||||
# TODO hmm. not good, need to be lazier?...
|
# TODO hmm. not good, need to be lazier?...
|
||||||
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
@mcachew(config.cache_dir, hashf=lambda dal: dal.sources)
|
||||||
def iter_backup_events(dal=get_dal()) -> Iterator[Event]:
|
def iter_backup_events(dal=_dal()) -> Iterator[Event]:
|
||||||
for d in dal.events():
|
for d in dal.events():
|
||||||
yield _parse_event(d)
|
yield _parse_event(d)
|
||||||
|
|
||||||
|
|
38
my/common.py
38
my/common.py
|
@ -1,7 +1,9 @@
|
||||||
|
from glob import glob as do_glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import functools
|
import functools
|
||||||
import types
|
import types
|
||||||
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast
|
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
|
||||||
|
import warnings
|
||||||
|
|
||||||
from . import init
|
from . import init
|
||||||
|
|
||||||
|
@ -46,6 +48,7 @@ def the(l: Iterable[T]) -> T:
|
||||||
return first
|
return first
|
||||||
|
|
||||||
|
|
||||||
|
# TODO more_itertools.bucket?
|
||||||
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
|
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
|
||||||
res: Dict[K, List[T]] = {}
|
res: Dict[K, List[T]] = {}
|
||||||
for i in l:
|
for i in l:
|
||||||
|
@ -106,9 +109,12 @@ from .kython.klogging import setup_logger, LazyLogger
|
||||||
|
|
||||||
Paths = Union[Sequence[PathIsh], PathIsh]
|
Paths = Union[Sequence[PathIsh], PathIsh]
|
||||||
|
|
||||||
def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
|
DEFAULT_GLOB = '*'
|
||||||
|
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
|
||||||
"""
|
"""
|
||||||
Helper function to avoid boilerplate.
|
Helper function to avoid boilerplate.
|
||||||
|
|
||||||
|
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
|
||||||
"""
|
"""
|
||||||
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
|
||||||
sources: List[Path] = []
|
sources: List[Path] = []
|
||||||
|
@ -122,17 +128,38 @@ def get_files(pp: Paths, glob: str, sort: bool=True) -> List[Path]:
|
||||||
if src.is_dir():
|
if src.is_dir():
|
||||||
gp: Iterable[Path] = src.glob(glob)
|
gp: Iterable[Path] = src.glob(glob)
|
||||||
paths.extend(gp)
|
paths.extend(gp)
|
||||||
|
else:
|
||||||
|
ss = str(src)
|
||||||
|
if '*' in ss:
|
||||||
|
if glob != DEFAULT_GLOB:
|
||||||
|
warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
|
||||||
|
paths.extend(map(Path, do_glob(ss)))
|
||||||
else:
|
else:
|
||||||
assert src.is_file(), src
|
assert src.is_file(), src
|
||||||
# TODO FIXME assert matches glob??
|
# todo assert matches glob??
|
||||||
paths.append(src)
|
paths.append(src)
|
||||||
|
|
||||||
if sort:
|
if sort:
|
||||||
paths = list(sorted(paths))
|
paths = list(sorted(paths))
|
||||||
return paths
|
return tuple(paths)
|
||||||
|
|
||||||
|
|
||||||
def mcachew(*args, **kwargs):
|
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from typing import Callable, TypeVar
|
||||||
|
from typing_extensions import Protocol
|
||||||
|
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
|
||||||
|
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
|
||||||
|
# ok, that's actually a super nice 'pattern'
|
||||||
|
F = TypeVar('F')
|
||||||
|
class McachewType(Protocol):
|
||||||
|
def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
|
||||||
|
...
|
||||||
|
|
||||||
|
mcachew: McachewType
|
||||||
|
|
||||||
|
def mcachew(*args, **kwargs): # type: ignore[no-redef]
|
||||||
"""
|
"""
|
||||||
Stands for 'Maybe cachew'.
|
Stands for 'Maybe cachew'.
|
||||||
Defensive wrapper around @cachew to make it an optional dependency.
|
Defensive wrapper around @cachew to make it an optional dependency.
|
||||||
|
@ -140,7 +167,6 @@ def mcachew(*args, **kwargs):
|
||||||
try:
|
try:
|
||||||
import cachew
|
import cachew
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
import warnings
|
|
||||||
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
|
||||||
return lambda orig_func: orig_func
|
return lambda orig_func: orig_func
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -5,26 +5,21 @@
|
||||||
Consumes data exported by https://github.com/karlicoss/backup-emfit
|
Consumes data exported by https://github.com/karlicoss/backup-emfit
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
from collections import OrderedDict as odict
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import date, datetime, time, timedelta
|
from datetime import date, datetime, time, timedelta
|
||||||
|
from itertools import groupby
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterator, List, NamedTuple, Any, cast
|
from typing import Dict, Iterator, List, NamedTuple, Any, cast
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
from more_itertools import bucket
|
||||||
|
|
||||||
from ..common import get_files, LazyLogger, cproperty, group_by_key, mcachew
|
from ..common import get_files, LazyLogger, cproperty, mcachew
|
||||||
|
|
||||||
from my.config import emfit as config
|
from my.config import emfit as config
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger('my.emfit', level='info')
|
logger = LazyLogger(__name__, level='info')
|
||||||
|
|
||||||
|
|
||||||
# TODO FIXME remove?
|
|
||||||
import kython
|
|
||||||
timed = lambda f: kython.timed(f, logger=logger)
|
|
||||||
|
|
||||||
|
|
||||||
def hhmm(minutes):
|
def hhmm(minutes):
|
||||||
|
@ -35,13 +30,10 @@ AWAKE = 4
|
||||||
|
|
||||||
Sid = str
|
Sid = str
|
||||||
|
|
||||||
# TODO use tz provider for that?
|
|
||||||
_TZ = pytz.timezone(config.tz)
|
|
||||||
|
|
||||||
# TODO use common tz thing?
|
# TODO use common tz thing?
|
||||||
def fromts(ts) -> datetime:
|
def fromts(ts) -> datetime:
|
||||||
dt = datetime.fromtimestamp(ts)
|
dt = datetime.fromtimestamp(ts, tz=pytz.utc)
|
||||||
return _TZ.localize(dt)
|
return dt
|
||||||
|
|
||||||
|
|
||||||
class Mixin:
|
class Mixin:
|
||||||
|
@ -295,14 +287,14 @@ class Emfit(Mixin):
|
||||||
|
|
||||||
# TODO move to common?
|
# TODO move to common?
|
||||||
def dir_hash(path: Path):
|
def dir_hash(path: Path):
|
||||||
mtimes = tuple(p.stat().st_mtime for p in sorted(path.glob('*.json')))
|
mtimes = tuple(p.stat().st_mtime for p in get_files(path, glob='*.json'))
|
||||||
return mtimes
|
return mtimes
|
||||||
|
|
||||||
|
|
||||||
|
# TODO take __file__ into account somehow?
|
||||||
@mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger)
|
@mcachew(cache_path=config.cache_path, hashf=dir_hash, logger=logger)
|
||||||
def iter_datas_cached(path: Path) -> Iterator[Emfit]:
|
def iter_datas(path: Path=config.export_path) -> Iterator[Emfit]:
|
||||||
# TODO use get_files?
|
for f in get_files(path, glob='*.json'):
|
||||||
for f in sorted(path.glob('*.json')):
|
|
||||||
sid = f.stem
|
sid = f.stem
|
||||||
if sid in config.excluded_sids:
|
if sid in config.excluded_sids:
|
||||||
continue
|
continue
|
||||||
|
@ -311,20 +303,17 @@ def iter_datas_cached(path: Path) -> Iterator[Emfit]:
|
||||||
yield from Emfit.make(em)
|
yield from Emfit.make(em)
|
||||||
|
|
||||||
|
|
||||||
def iter_datas(path=config.export_path) -> Iterator[Emfit]:
|
|
||||||
yield from iter_datas_cached(path)
|
|
||||||
|
|
||||||
|
|
||||||
def get_datas() -> List[Emfit]:
|
def get_datas() -> List[Emfit]:
|
||||||
return list(sorted(iter_datas(), key=lambda e: e.start))
|
return list(sorted(iter_datas(), key=lambda e: e.start))
|
||||||
# TODO move away old entries if there is a diff??
|
# TODO move away old entries if there is a diff??
|
||||||
|
|
||||||
|
|
||||||
@timed
|
|
||||||
def by_night() -> Dict[date, Emfit]:
|
def by_night() -> Dict[date, Emfit]:
|
||||||
res: Dict[date, Emfit] = odict()
|
res: Dict[date, Emfit] = {}
|
||||||
# TODO shit. I need some sort of interrupted sleep detection?
|
# TODO shit. I need some sort of interrupted sleep detection?
|
||||||
for dd, sleeps in group_by_key(get_datas(), key=lambda s: s.date).items():
|
grouped = bucket(get_datas(), key=lambda s: s.date)
|
||||||
|
for dd in grouped:
|
||||||
|
sleeps = list(grouped[dd])
|
||||||
if len(sleeps) > 1:
|
if len(sleeps) > 1:
|
||||||
logger.warning("multiple sleeps per night, not handled yet: %s", sleeps)
|
logger.warning("multiple sleeps per night, not handled yet: %s", sleeps)
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -15,10 +15,10 @@ from .common import get_files, LazyLogger
|
||||||
from my.config import foursquare as config
|
from my.config import foursquare as config
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__package__)
|
logger = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _get_exports() -> List[Path]:
|
def inputs():
|
||||||
return get_files(config.export_path, '*.json')
|
return get_files(config.export_path, '*.json')
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ class Place:
|
||||||
|
|
||||||
def get_raw(fname=None):
|
def get_raw(fname=None):
|
||||||
if fname is None:
|
if fname is None:
|
||||||
fname = max(_get_exports())
|
fname = max(inputs())
|
||||||
j = json.loads(Path(fname).read_text())
|
j = json.loads(Path(fname).read_text())
|
||||||
assert isinstance(j, list)
|
assert isinstance(j, list)
|
||||||
|
|
||||||
|
|
|
@ -3,50 +3,41 @@
|
||||||
"""
|
"""
|
||||||
from . import init
|
from . import init
|
||||||
|
|
||||||
from .common import PathIsh
|
from .common import get_files
|
||||||
|
from .error import Res, sort_res_by
|
||||||
import my.config.repos.hypexport as hypexport
|
|
||||||
from my.config.repos.hypexport import dal
|
|
||||||
|
|
||||||
|
import my.config.repos.hypexport.dal as hypexport
|
||||||
from my.config import hypothesis as config
|
from my.config import hypothesis as config
|
||||||
export_path: PathIsh = config.export_path
|
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from .common import get_files, cproperty, group_by_key
|
|
||||||
from .error import Res, sort_res_by
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# TODO weird. not sure why e.g. from dal import Highlight doesn't work..
|
# TODO weird. not sure why e.g. from dal import Highlight doesn't work..
|
||||||
Highlight = dal.Highlight
|
Highlight = hypexport.Highlight
|
||||||
DAL = dal.DAL
|
Page = hypexport.Page
|
||||||
Page = dal.Page
|
|
||||||
|
|
||||||
|
|
||||||
# TODO eh. not sure if I should rename everything to dao/DAO or not...
|
# TODO eh. not sure if I should rename everything to dao/DAO or not...
|
||||||
def dao() -> DAL:
|
def _dal() -> hypexport.DAL:
|
||||||
sources = get_files(export_path, '*.json')
|
sources = get_files(config.export_path, '*.json')
|
||||||
model = DAL(sources)
|
return hypexport.DAL(sources)
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
def get_highlights() -> List[Res[Highlight]]:
|
def highlights() -> List[Res[Highlight]]:
|
||||||
return sort_res_by(dao().highlights(), key=lambda h: h.created)
|
return sort_res_by(_dal().highlights(), key=lambda h: h.created)
|
||||||
|
|
||||||
|
|
||||||
# TODO eh. always provide iterators? although sort_res_by could be neat too...
|
# TODO eh. always provide iterators? although sort_res_by could be neat too...
|
||||||
def get_pages() -> List[Res[Page]]:
|
def pages() -> List[Res[Page]]:
|
||||||
return sort_res_by(dao().pages(), key=lambda h: h.created)
|
return sort_res_by(_dal().pages(), key=lambda h: h.created)
|
||||||
|
|
||||||
|
|
||||||
# TODO move to side tests?
|
# TODO move to side tests?
|
||||||
def test():
|
def test():
|
||||||
get_pages()
|
list(pages())
|
||||||
get_highlights()
|
list(highlights())
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
def _main():
|
||||||
|
@ -55,3 +46,6 @@ def _main():
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
_main()
|
_main()
|
||||||
|
|
||||||
|
get_highlights = highlights # TODO deprecate
|
||||||
|
get_pages = pages # TODO deprecate
|
||||||
|
|
|
@ -1,55 +1,32 @@
|
||||||
"""
|
"""
|
||||||
Instapaper bookmarks, highlights and annotations
|
Instapaper bookmarks, highlights and annotations
|
||||||
"""
|
"""
|
||||||
from pathlib import Path
|
from .common import get_files
|
||||||
from typing import NamedTuple, Optional, List, Iterator
|
|
||||||
|
|
||||||
from .common import group_by_key, PathIsh, get_files
|
|
||||||
|
|
||||||
|
|
||||||
from my.config import instapaper as config
|
from my.config import instapaper as config
|
||||||
import my.config.repos.instapexport.dal as dal
|
import my.config.repos.instapexport.dal as dal
|
||||||
|
|
||||||
|
|
||||||
def _get_files():
|
Highlight = dal.Highlight
|
||||||
return get_files(config.export_path, glob='*.json')
|
Bookmark = dal.Bookmark
|
||||||
|
|
||||||
|
|
||||||
def get_dal() -> dal.DAL:
|
def inputs():
|
||||||
return dal.DAL(_get_files())
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
# TODO meh, come up with better name...
|
def _dal() -> dal.DAL:
|
||||||
class HighlightWithBm(NamedTuple):
|
return dal.DAL(inputs())
|
||||||
highlight: dal.Highlight
|
|
||||||
bookmark: dal.Bookmark
|
|
||||||
|
|
||||||
|
|
||||||
def iter_highlights(**kwargs) -> Iterator[HighlightWithBm]:
|
def pages():
|
||||||
# meh...
|
return _dal().pages()
|
||||||
dl = get_dal()
|
get_pages = pages # todo also deprecate..
|
||||||
hls = dl.highlights()
|
|
||||||
bms = dl.bookmarks()
|
|
||||||
for _, h in hls.items():
|
|
||||||
yield HighlightWithBm(highlight=h, bookmark=bms[h.bid])
|
|
||||||
|
|
||||||
|
|
||||||
# def get_highlights(**kwargs) -> List[Highlight]:
|
# TODO dunno, move this to private?
|
||||||
# return list(iter_highlights(**kwargs))
|
def is_todo(hl: Highlight) -> bool:
|
||||||
def get_pages():
|
note = hl.note or ''
|
||||||
return get_dal().pages()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_todos() -> Iterator[HighlightWithBm]:
|
|
||||||
def is_todo(hl: HighlightWithBm):
|
|
||||||
h = hl.highlight
|
|
||||||
note = h.note or ''
|
|
||||||
note = note.lstrip().lower()
|
note = note.lstrip().lower()
|
||||||
return note.startswith('todo')
|
return note.startswith('todo')
|
||||||
return filter(is_todo, iter_highlights())
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
for h in get_todos():
|
|
||||||
print(h)
|
|
||||||
|
|
|
@ -2,27 +2,31 @@
|
||||||
Last.fm scrobbles
|
Last.fm scrobbles
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from .. import init
|
|
||||||
|
|
||||||
from functools import lru_cache
|
from ..common import get_files, mcachew, Json
|
||||||
from typing import NamedTuple, Dict, Any
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
|
||||||
import json
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import NamedTuple, Any, Sequence, Iterable
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from my.config import lastfm as config
|
from my.config import lastfm as config
|
||||||
|
|
||||||
# TODO Json type?
|
|
||||||
# TODO memoised properties?
|
# TODO memoised properties?
|
||||||
# TODO lazy mode and eager mode?
|
# TODO lazy mode and eager mode?
|
||||||
# lazy is a bit nicer in terms of more flexibility and less processing?
|
# lazy is a bit nicer in terms of more flexibility and less processing?
|
||||||
# eager is a bit more explicit for error handling
|
# eager is a bit more explicit for error handling
|
||||||
|
|
||||||
class Scrobble(NamedTuple):
|
def inputs() -> Sequence[Path]:
|
||||||
raw: Dict[str, Any]
|
return get_files(config.export_path)
|
||||||
|
|
||||||
|
|
||||||
|
class Scrobble(NamedTuple):
|
||||||
|
raw: Json
|
||||||
|
|
||||||
|
# TODO mm, no timezone? hopefuly it's UTC
|
||||||
@property
|
@property
|
||||||
def dt(self) -> datetime:
|
def dt(self) -> datetime:
|
||||||
ts = int(self.raw['date'])
|
ts = int(self.raw['date'])
|
||||||
|
@ -45,22 +49,10 @@ class Scrobble(NamedTuple):
|
||||||
# TODO could also be nice to make generic? maybe even depending on eagerness
|
# TODO could also be nice to make generic? maybe even depending on eagerness
|
||||||
|
|
||||||
|
|
||||||
# TODO memoise...?
|
@mcachew(hashf=lambda: inputs())
|
||||||
# TODO watch out, if we keep the app running it might expire
|
def scrobbles() -> Iterable[Scrobble]:
|
||||||
def _iter_scrobbles():
|
last = max(inputs())
|
||||||
# TODO use get_files
|
|
||||||
last = max(Path(config.export_path).glob('*.json'))
|
|
||||||
# TODO mm, no timezone? hopefuly it's UTC
|
|
||||||
j = json.loads(last.read_text())
|
j = json.loads(last.read_text())
|
||||||
|
|
||||||
for raw in j:
|
for raw in j:
|
||||||
yield Scrobble(raw=raw)
|
yield Scrobble(raw=raw)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(1)
|
|
||||||
def get_scrobbles():
|
|
||||||
return list(sorted(_iter_scrobbles(), key=lambda s: s.dt))
|
|
||||||
|
|
||||||
|
|
||||||
def test():
|
|
||||||
assert len(get_scrobbles()) > 1000
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# pip install influxdb
|
# pip install influxdb
|
||||||
from influxdb import InfluxDBClient # type: ignore
|
from influxdb import InfluxDBClient # type: ignore
|
||||||
from my.lastfm import get_scrobbles
|
from my.lastfm import scrobbles
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
scrobbles = get_scrobbles()
|
scrobbles = scrobbles()
|
||||||
client = InfluxDBClient()
|
client = InfluxDBClient()
|
||||||
# TODO client.create_database('lastfm')
|
# TODO client.create_database('lastfm')
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ from ..error import Res
|
||||||
from my.config import photos as config
|
from my.config import photos as config
|
||||||
|
|
||||||
|
|
||||||
log = LazyLogger('my.photos')
|
log = LazyLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,13 +46,12 @@ class Photo(NamedTuple):
|
||||||
raise RuntimeError(f'Weird path {self.path}, cant match against anything')
|
raise RuntimeError(f'Weird path {self.path}, cant match against anything')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def linkname(self) -> str:
|
def name(self) -> str:
|
||||||
return self._basename.strip('/')
|
return self._basename.strip('/')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url(self) -> str:
|
def url(self) -> str:
|
||||||
PHOTOS_URL = 'TODO FIXME'
|
return f'{config.base_url}{self._basename}'
|
||||||
return PHOTOS_URL + self._basename
|
|
||||||
|
|
||||||
|
|
||||||
from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
|
from .utils import get_exif_from_file, ExifTags, Exif, dt_from_path, convert_ref
|
||||||
|
|
|
@ -8,7 +8,6 @@ from typing import List, Dict, Iterator, NamedTuple, Sequence, Optional
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
# TODO declare DEPENDS = [pytz??]
|
|
||||||
|
|
||||||
from ..common import LazyLogger, get_files
|
from ..common import LazyLogger, get_files
|
||||||
|
|
||||||
|
|
23
my/reddit.py
23
my/reddit.py
|
@ -15,13 +15,14 @@ import my.config.repos.rexport.dal as rexport
|
||||||
|
|
||||||
def get_sources() -> Sequence[Path]:
|
def get_sources() -> Sequence[Path]:
|
||||||
# TODO use zstd?
|
# TODO use zstd?
|
||||||
# TODO maybe add assert to get_files? (and allow to suppress it)
|
# TODO rename to export_path?
|
||||||
files = get_files(config.export_dir, glob='*.json.xz')
|
files = get_files(config.export_dir)
|
||||||
res = list(map(CPath, files)); assert len(res) > 0
|
res = list(map(CPath, files)); assert len(res) > 0
|
||||||
|
# todo move the assert to get_files?
|
||||||
return tuple(res)
|
return tuple(res)
|
||||||
|
|
||||||
|
|
||||||
logger = LazyLogger(__package__, level='debug')
|
logger = LazyLogger(__name__, level='debug')
|
||||||
|
|
||||||
|
|
||||||
Sid = rexport.Sid
|
Sid = rexport.Sid
|
||||||
|
@ -31,7 +32,7 @@ Submission = rexport.Submission
|
||||||
Upvote = rexport.Upvote
|
Upvote = rexport.Upvote
|
||||||
|
|
||||||
|
|
||||||
def dal():
|
def dal() -> rexport.DAL:
|
||||||
# TODO lru cache? but be careful when it runs continuously
|
# TODO lru cache? but be careful when it runs continuously
|
||||||
return rexport.DAL(get_sources())
|
return rexport.DAL(get_sources())
|
||||||
|
|
||||||
|
@ -173,12 +174,12 @@ def get_events(*args, **kwargs) -> List[Event]:
|
||||||
return list(sorted(evit, key=lambda e: e.cmp_key))
|
return list(sorted(evit, key=lambda e: e.cmp_key))
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test() -> None:
|
||||||
get_events(backups=get_sources()[-1:])
|
get_events(backups=get_sources()[-1:])
|
||||||
list(saved())
|
list(saved())
|
||||||
|
|
||||||
|
|
||||||
def test_unfav():
|
def test_unfav() -> None:
|
||||||
events = get_events()
|
events = get_events()
|
||||||
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
|
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
|
||||||
uevents = [e for e in events if e.url == url]
|
uevents = [e for e in events if e.url == url]
|
||||||
|
@ -188,15 +189,15 @@ def test_unfav():
|
||||||
uf = uevents[1]
|
uf = uevents[1]
|
||||||
assert uf.text == 'unfavorited'
|
assert uf.text == 'unfavorited'
|
||||||
|
|
||||||
|
# TODO move out..
|
||||||
def test_get_all_saves():
|
def test_get_all_saves() -> None:
|
||||||
# TODO not sure if this is necesasry anymore?
|
# TODO not sure if this is necesasry anymore?
|
||||||
saves = list(saved())
|
saves = list(saved())
|
||||||
# just check that they are unique..
|
# just check that they are unique..
|
||||||
make_dict(saves, key=lambda s: s.sid)
|
make_dict(saves, key=lambda s: s.sid)
|
||||||
|
|
||||||
|
|
||||||
def test_disappearing():
|
def test_disappearing() -> None:
|
||||||
# eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
|
# eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
|
||||||
# but I guess it was just a short glitch... so whatever
|
# but I guess it was just a short glitch... so whatever
|
||||||
saves = get_events()
|
saves = get_events()
|
||||||
|
@ -205,14 +206,14 @@ def test_disappearing():
|
||||||
assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
|
assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
|
||||||
|
|
||||||
|
|
||||||
def test_unfavorite():
|
def test_unfavorite() -> None:
|
||||||
events = get_events()
|
events = get_events()
|
||||||
unfavs = [s for s in events if s.text == 'unfavorited']
|
unfavs = [s for s in events if s.text == 'unfavorited']
|
||||||
[xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
|
[xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
|
||||||
assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
|
assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
# TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
|
# TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
|
||||||
events = get_events(parallel=False)
|
events = get_events(parallel=False)
|
||||||
print(len(events))
|
print(len(events))
|
||||||
|
|
|
@ -18,7 +18,7 @@ from my.config import rescuetime as config
|
||||||
log = LazyLogger(__package__, level='info')
|
log = LazyLogger(__package__, level='info')
|
||||||
|
|
||||||
|
|
||||||
def _get_exports() -> List[Path]:
|
def inputs():
|
||||||
return get_files(config.export_path, '*.json')
|
return get_files(config.export_path, '*.json')
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ Model = rescuexport.Model
|
||||||
|
|
||||||
# TODO cache?
|
# TODO cache?
|
||||||
def get_model(last=0) -> Model:
|
def get_model(last=0) -> Model:
|
||||||
return Model(_get_exports()[-last:])
|
return Model(inputs()[-last:])
|
||||||
|
|
||||||
|
|
||||||
def _without_errors():
|
def _without_errors():
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -4,8 +4,9 @@
|
||||||
from setuptools import setup, find_namespace_packages # type: ignore
|
from setuptools import setup, find_namespace_packages # type: ignore
|
||||||
|
|
||||||
INSTALL_REQUIRES = [
|
INSTALL_REQUIRES = [
|
||||||
'appdirs',
|
|
||||||
'pytz', # even though it's not needed by the core, it's so common anyway...
|
'pytz', # even though it's not needed by the core, it's so common anyway...
|
||||||
|
'appdirs', # very common, and makes it portable
|
||||||
|
'more-itertools', # it's just too useful and very common anyway
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
113
tests/common.py
Normal file
113
tests/common.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from my.common import get_files
|
||||||
|
|
||||||
|
import pytest # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_file():
|
||||||
|
'''
|
||||||
|
Regular file path is just returned as is.
|
||||||
|
'''
|
||||||
|
|
||||||
|
"Exception if it doesn't exist"
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
get_files('/tmp/hpi_test/file.ext')
|
||||||
|
|
||||||
|
|
||||||
|
create('/tmp/hpi_test/file.ext')
|
||||||
|
|
||||||
|
'''
|
||||||
|
Couple of things:
|
||||||
|
1. Return type is a tuple, it's friendlier for hashing/caching
|
||||||
|
2. It always return pathlib.Path instead of plain strings
|
||||||
|
'''
|
||||||
|
assert get_files('/tmp/hpi_test/file.ext') == (
|
||||||
|
Path('/tmp/hpi_test/file.ext'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_files():
|
||||||
|
'''
|
||||||
|
If you pass a directory/multiple directories, it flattens the contents
|
||||||
|
'''
|
||||||
|
create('/tmp/hpi_test/dir1/')
|
||||||
|
create('/tmp/hpi_test/dir1/zzz')
|
||||||
|
create('/tmp/hpi_test/dir1/yyy')
|
||||||
|
# create('/tmp/hpi_test/dir1/whatever/') # TODO not sure about this... should really allow extra dirs
|
||||||
|
create('/tmp/hpi_test/dir2/')
|
||||||
|
create('/tmp/hpi_test/dir2/mmm')
|
||||||
|
create('/tmp/hpi_test/dir2/nnn')
|
||||||
|
create('/tmp/hpi_test/dir3/')
|
||||||
|
create('/tmp/hpi_test/dir3/ttt')
|
||||||
|
|
||||||
|
assert get_files([
|
||||||
|
Path('/tmp/hpi_test/dir3'), # it takes in Path as well as str
|
||||||
|
'/tmp/hpi_test/dir1',
|
||||||
|
]) == (
|
||||||
|
# the paths are always returned in sorted order (unless you pass sort=False)
|
||||||
|
Path('/tmp/hpi_test/dir1/yyy'),
|
||||||
|
Path('/tmp/hpi_test/dir1/zzz'),
|
||||||
|
Path('/tmp/hpi_test/dir3/ttt'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_explicit_glob():
|
||||||
|
'''
|
||||||
|
You can pass a glob to restrict the extensions
|
||||||
|
'''
|
||||||
|
|
||||||
|
create('/tmp/hpi_test/file_3.zip')
|
||||||
|
create('/tmp/hpi_test/file_2.zip')
|
||||||
|
create('/tmp/hpi_test/ignoreme')
|
||||||
|
create('/tmp/hpi_test/file.zip')
|
||||||
|
|
||||||
|
# todo walrus operator would be great here...
|
||||||
|
expected = (
|
||||||
|
Path('/tmp/hpi_test/file_2.zip'),
|
||||||
|
Path('/tmp/hpi_test/file_3.zip'),
|
||||||
|
)
|
||||||
|
assert get_files('/tmp/hpi_test', 'file_*.zip') == expected
|
||||||
|
|
||||||
|
"named argument should work too"
|
||||||
|
assert get_files('/tmp/hpi_test', glob='file_*.zip') == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_implicit_blog():
|
||||||
|
'''
|
||||||
|
Asterisc in the path results in globing too.
|
||||||
|
'''
|
||||||
|
# todo hopefully that makes sense? dunno why would anyone actually rely on asteriscs in names..
|
||||||
|
# this is very convenient in configs, so people don't have to use some special types
|
||||||
|
|
||||||
|
create('/tmp/hpi_test/123/')
|
||||||
|
create('/tmp/hpi_test/123/dummy')
|
||||||
|
create('/tmp/hpi_test/123/file.zip')
|
||||||
|
create('/tmp/hpi_test/456/')
|
||||||
|
create('/tmp/hpi_test/456/dummy')
|
||||||
|
create('/tmp/hpi_test/456/file.zip')
|
||||||
|
|
||||||
|
assert get_files(['/tmp/hpi_test/*/*.zip']) == (
|
||||||
|
Path('/tmp/hpi_test/123/file.zip'),
|
||||||
|
Path('/tmp/hpi_test/456/file.zip'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO not sure if should uniquify if the filenames end up same?
|
||||||
|
# TODO not sure about the symlinks? and hidden files?
|
||||||
|
|
||||||
|
test_path = Path('/tmp/hpi_test')
|
||||||
|
def setup():
|
||||||
|
teardown()
|
||||||
|
test_path.mkdir()
|
||||||
|
|
||||||
|
|
||||||
|
def teardown():
|
||||||
|
import shutil
|
||||||
|
if test_path.is_dir():
|
||||||
|
shutil.rmtree(test_path)
|
||||||
|
|
||||||
|
|
||||||
|
def create(f: str) -> None:
|
||||||
|
if f.endswith('/'):
|
||||||
|
Path(f).mkdir()
|
||||||
|
else:
|
||||||
|
Path(f).touch()
|
|
@ -1,6 +1,5 @@
|
||||||
from my.instapaper import get_todos
|
from my.instapaper import pages
|
||||||
|
|
||||||
|
|
||||||
def test_get_todos():
|
def test_pages():
|
||||||
for t in get_todos():
|
assert len(list(pages())) > 3
|
||||||
print(t)
|
|
||||||
|
|
7
tests/lastfm.py
Normal file
7
tests/lastfm.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
from more_itertools import ilen
|
||||||
|
|
||||||
|
from my.lastfm import scrobbles
|
||||||
|
|
||||||
|
|
||||||
|
def test():
|
||||||
|
assert ilen(scrobbles()) > 1000
|
Loading…
Add table
Add a link
Reference in a new issue