diff --git a/README.org b/README.org index 1d62c84..5df3383 100644 --- a/README.org +++ b/README.org @@ -5,6 +5,11 @@ #+macro: map @@html:@@$1@@html:@@ +If you're in a hurry, feel free to jump straight to the [[#usecases][demos]]. + +- see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP]] for the *installation/configuration guide* +- see [[https://github.com/karlicoss/HPI/tree/master/doc/DEVELOPMENT.org][DEVELOPMENT]] for the *development guide* + *TLDR*: I'm using [[https://github.com/karlicoss/HPI][HPI]] (Human Programming Interface) package as a means of unifying, accessing and interacting with all of my personal data. It's a Python library (named ~my~), a collection of modules for: @@ -48,11 +53,6 @@ and that's why I'm sharing this. Imagine if all your life was reflected digitally and available at your fingertips. This library is my attempt to achieve this vision. -If you're in a hurry, feel free to jump straight to the [[#usecases][demos]]. - -For *installation/configuration/development guide*, see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP.org]]. - - #+toc: headlines 2 @@ -593,4 +593,4 @@ In some near future I will write more about: - challenges I had so solve - more use-cases and demos -- it's impossible to fit everything in one post! -, but happy to answer any questions on these topics now! \ No newline at end of file +, but happy to answer any questions on these topics now! diff --git a/doc/DEVELOPMENT.org b/doc/DEVELOPMENT.org index dd78c57..f338760 100644 --- a/doc/DEVELOPMENT.org +++ b/doc/DEVELOPMENT.org @@ -1,13 +1,45 @@ +* Running tests +I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config +and [[file:../scripts/ci/run]] for the up to date info on the specifics. + * IDE setup: make sure my.config is in your package search path In runtime, ~my.config~ is imported from the user config directory dynamically. However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration. -- Pycharm: basically, follow the instruction [[https://stackoverflow.com/a/55278260/706389][here]] +- Pycharm: basically, follow the instructions [[https://stackoverflow.com/a/55278260/706389][here]] i.e. create a new interpreter configuration (e.g. name it "Python 3.7 (for HPI)"), and add =~/.config/my=. * Linting -You should be able to use ~./lint~ script to run mypy checks. +You should be able to use [[file:../lint]] script to run mypy checks. -~mypy.ini~ file points at =~/.config/my= by default. +[[file:../mypy.ini]] points at =~/.config/my= by default. + + +* Modifying/adding modules + +The easiest is just to run HPI via [[file:SETUP.org::#use-without-installing][with_my]] wrapper or with an editable PIP install. +That way your changes will be reflected immediately, and you will be able to quickly iterate/fix bugs/add new methods. + +The "proper way" (unless you want to contribute to the upstream) is to create a separate hierarchy and add your module to =PYTHONPATH=. + +For example, if you want to add an =awesomedatasource=, it could be: + +: custom_module +: └── my +: └──awesomedatasource.py + +You can use all existing HPI modules in =awesomedatasource.py=, for example, =my.config=, or everything from =my.core=. + +But also, you can use all the previously defined HPI modules too. This could be useful to *shadow/override* existing HPI module: + +: custom_reddit_overlay +: └── my +: └──reddit.py + +Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead. + +This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind. + +I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info. diff --git a/lint b/lint index 5d91b6e..bb2f097 100755 --- a/lint +++ b/lint @@ -31,25 +31,29 @@ def package_name(p: Path) -> str: else: return mname(p) +def subpackages(package: str) -> Iterable[str]: + ppath = package.replace('.', '/') + yield from sorted({ + package_name(p.relative_to(DIR)) for p in (DIR / ppath).rglob('*.py') + }) + + # TODO meh.. think how to check _everything_ on CI def core_modules() -> Iterable[str]: return [ - 'my.common', + *subpackages('my.core'), + *subpackages('my.kython'), 'my.config', - 'my.core', 'my.cfg', - 'my.error', - 'my.init', 'tests/misc.py', 'tests/get_files.py', # 'tests/config.py', TODO hmm. unclear how to type check this module ] + def all_modules() -> Iterable[str]: - yield from sorted(set( - package_name(p.relative_to(DIR)) for p in (DIR / 'my').rglob('*.py') - )) + yield from subpackages('my') yield from sorted( str(f.relative_to(DIR)) for f in (DIR / 'tests').rglob('*.py') ) @@ -63,11 +67,13 @@ def pylint(): def mypy(thing: str): is_package = Path(thing).suffix != '.py' - return run([ + cmd = [ 'mypy', '--color-output', # TODO eh? doesn't work.. *(['-p'] if is_package else []), thing, - ], stdout=PIPE, stderr=PIPE) + ] + print(' '.join(cmd), file=sys.stderr) + return run(cmd, stdout=PIPE, stderr=PIPE) def mypy_all() -> Iterable[Exception]: diff --git a/my/books/kobo.py b/my/books/kobo.py index 09fa8c9..e5603a0 100644 --- a/my/books/kobo.py +++ b/my/books/kobo.py @@ -1,8 +1,6 @@ """ [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats """ -from .. import init - from typing import Callable, Union, List from my.config import kobo as config diff --git a/my/calendar/holidays.py b/my/calendar/holidays.py index 5759ec8..4f45a93 100644 --- a/my/calendar/holidays.py +++ b/my/calendar/holidays.py @@ -13,7 +13,7 @@ from my.config.holidays_data import HOLIDAYS_DATA # pip3 install workalendar from workalendar.europe import UnitedKingdom # type: ignore -cal = UnitedKingdom() # TODO FIXME specify in config +cal = UnitedKingdom() # TODO # TODO that should depend on country/'location' of residence I suppose? diff --git a/my/cfg.py b/my/cfg.py index ddc102f..97268da 100644 --- a/my/cfg.py +++ b/my/cfg.py @@ -12,15 +12,12 @@ After that, you can set config attributes: export_path='/path/to/twitter/exports', ) """ -# TODO later, If I have config stubs that might be unnecessary too.. - -from . import init - +# todo why do we bring this into scope? don't remember.. import my.config as config def set_repo(name: str, repo): - from .init import assign_module + from .core.init import assign_module from . common import import_from module = import_from(repo, name) diff --git a/my/coding/codeforces.py b/my/coding/codeforces.py index fbbf586..138cc73 100644 --- a/my/coding/codeforces.py +++ b/my/coding/codeforces.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from .. import init - from my.config import codeforces as config from datetime import datetime diff --git a/my/coding/github.py b/my/coding/github.py index 0126e47..508801f 100644 --- a/my/coding/github.py +++ b/my/coding/github.py @@ -1,9 +1,6 @@ """ Github events and their metadata: comments/issues/pull requests """ - -from .. import init - from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set from datetime import datetime import json diff --git a/my/coding/topcoder.py b/my/coding/topcoder.py index de98114..c370b5d 100644 --- a/my/coding/topcoder.py +++ b/my/coding/topcoder.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from .. import init - from my.config import topcoder as config from datetime import datetime diff --git a/my/common.py b/my/common.py index 063f555..bbda576 100644 --- a/my/common.py +++ b/my/common.py @@ -1,197 +1,2 @@ -from glob import glob as do_glob -from pathlib import Path -import functools -import types -from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple -import warnings - -# some helper functions -PathIsh = Union[Path, str] - -# TODO port annotations to kython?.. -def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType: - p = Path(p) - if name is None: - name = p.stem - import importlib.util - spec = importlib.util.spec_from_file_location(name, p) - foo = importlib.util.module_from_spec(spec) - loader = spec.loader; assert loader is not None - loader.exec_module(foo) # type: ignore[attr-defined] - return foo - - -def import_from(path: PathIsh, name: str) -> types.ModuleType: - path = str(path) - import sys - try: - sys.path.append(path) - import importlib - return importlib.import_module(name) - finally: - sys.path.remove(path) - - -T = TypeVar('T') -K = TypeVar('K') -V = TypeVar('V') - -def the(l: Iterable[T]) -> T: - it = iter(l) - try: - first = next(it) - except StopIteration as ee: - raise RuntimeError('Empty iterator?') - assert all(e == first for e in it) - return first - - -# TODO more_itertools.bucket? -def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]: - res: Dict[K, List[T]] = {} - for i in l: - kk = key(i) - lst = res.get(kk, []) - lst.append(i) - res[kk] = lst - return res - - -def _identity(v: T) -> V: - return cast(V, v) - -def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]: - res: Dict[K, V] = {} - for i in l: - k = key(i) - v = value(i) - pv = res.get(k, None) # type: ignore - if pv is not None: - raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}") - res[k] = v - return res - - -Cl = TypeVar('Cl') -R = TypeVar('R') - -def cproperty(f: Callable[[Cl], R]) -> R: - return property(functools.lru_cache(maxsize=1)(f)) # type: ignore - - -# https://stackoverflow.com/a/12377059/706389 -def listify(fn=None, wrapper=list): - """ - Wraps a function's return value in wrapper (e.g. list) - Useful when an algorithm can be expressed more cleanly as a generator - """ - def listify_return(fn): - @functools.wraps(fn) - def listify_helper(*args, **kw): - return wrapper(fn(*args, **kw)) - return listify_helper - if fn is None: - return listify_return - return listify_return(fn) - - -# TODO FIXME use in bluemaestro -# def dictify(fn=None, key=None, value=None): -# def md(it): -# return make_dict(it, key=key, value=value) -# return listify(fn=fn, wrapper=md) - - -from .kython.klogging import setup_logger, LazyLogger - - -Paths = Union[Sequence[PathIsh], PathIsh] - -DEFAULT_GLOB = '*' -def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: - """ - Helper function to avoid boilerplate. - - Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense - """ - # TODO FIXME mm, some wrapper to assert iterator isn't empty? - sources: List[Path] = [] - if isinstance(pp, (str, Path)): - sources.append(Path(pp)) - else: - sources.extend(map(Path, pp)) - - paths: List[Path] = [] - for src in sources: - if src.is_dir(): - gp: Iterable[Path] = src.glob(glob) - paths.extend(gp) - else: - ss = str(src) - if '*' in ss: - if glob != DEFAULT_GLOB: - warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!") - paths.extend(map(Path, do_glob(ss))) - else: - assert src.is_file(), src - # todo assert matches glob?? - paths.append(src) - - if sort: - paths = list(sorted(paths)) - return tuple(paths) - - -# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff) -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing import Callable, TypeVar - from typing_extensions import Protocol - # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time.. - # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270 - # ok, that's actually a super nice 'pattern' - F = TypeVar('F') - class McachewType(Protocol): - def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]: - ... - - mcachew: McachewType - -def mcachew(*args, **kwargs): # type: ignore[no-redef] - """ - Stands for 'Maybe cachew'. - Defensive wrapper around @cachew to make it an optional dependency. - """ - try: - import cachew - except ModuleNotFoundError: - warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') - return lambda orig_func: orig_func - else: - import cachew.experimental - cachew.experimental.enable_exceptions() # TODO do it only once? - return cachew.cachew(*args, **kwargs) - - -@functools.lru_cache(1) -def _magic(): - import magic # type: ignore - return magic.Magic(mime=True) - - -# TODO could reuse in pdf module? -import mimetypes # todo do I need init()? -# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz?? -# whereas magic detects correctly: application/x-zstd and application/x-xz -def fastermime(path: PathIsh) -> str: - paths = str(path) - # mimetypes is faster - (mime, _) = mimetypes.guess_type(paths) - if mime is not None: - return mime - # magic is slower but returns more stuff - # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide? - return _magic().from_file(paths) - - -Json = Dict[str, Any] +# will be deprecated. please add stuff to my.core +from .core.common import * diff --git a/my/config/__init__.py b/my/config/__init__.py index da9d781..333ae6e 100644 --- a/my/config/__init__.py +++ b/my/config/__init__.py @@ -1,5 +1,5 @@ # TODO ok, this thing should trigger .cfg import presumably?? -from .. import init +from ..core import init # TODO maybe, reuse mycfg_template here? diff --git a/my/core/common.py b/my/core/common.py new file mode 100644 index 0000000..1557654 --- /dev/null +++ b/my/core/common.py @@ -0,0 +1,197 @@ +from glob import glob as do_glob +from pathlib import Path +import functools +import types +from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple +import warnings + +# some helper functions +PathIsh = Union[Path, str] + +# TODO port annotations to kython?.. +def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType: + p = Path(p) + if name is None: + name = p.stem + import importlib.util + spec = importlib.util.spec_from_file_location(name, p) + foo = importlib.util.module_from_spec(spec) + loader = spec.loader; assert loader is not None + loader.exec_module(foo) # type: ignore[attr-defined] + return foo + + +def import_from(path: PathIsh, name: str) -> types.ModuleType: + path = str(path) + import sys + try: + sys.path.append(path) + import importlib + return importlib.import_module(name) + finally: + sys.path.remove(path) + + +T = TypeVar('T') +K = TypeVar('K') +V = TypeVar('V') + +def the(l: Iterable[T]) -> T: + it = iter(l) + try: + first = next(it) + except StopIteration as ee: + raise RuntimeError('Empty iterator?') + assert all(e == first for e in it) + return first + + +# TODO more_itertools.bucket? +def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]: + res: Dict[K, List[T]] = {} + for i in l: + kk = key(i) + lst = res.get(kk, []) + lst.append(i) + res[kk] = lst + return res + + +def _identity(v: T) -> V: + return cast(V, v) + +def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]: + res: Dict[K, V] = {} + for i in l: + k = key(i) + v = value(i) + pv = res.get(k, None) # type: ignore + if pv is not None: + raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}") + res[k] = v + return res + + +Cl = TypeVar('Cl') +R = TypeVar('R') + +def cproperty(f: Callable[[Cl], R]) -> R: + return property(functools.lru_cache(maxsize=1)(f)) # type: ignore + + +# https://stackoverflow.com/a/12377059/706389 +def listify(fn=None, wrapper=list): + """ + Wraps a function's return value in wrapper (e.g. list) + Useful when an algorithm can be expressed more cleanly as a generator + """ + def listify_return(fn): + @functools.wraps(fn) + def listify_helper(*args, **kw): + return wrapper(fn(*args, **kw)) + return listify_helper + if fn is None: + return listify_return + return listify_return(fn) + + +# todo use in bluemaestro +# def dictify(fn=None, key=None, value=None): +# def md(it): +# return make_dict(it, key=key, value=value) +# return listify(fn=fn, wrapper=md) + + +from ..kython.klogging import setup_logger, LazyLogger + + +Paths = Union[Sequence[PathIsh], PathIsh] + +DEFAULT_GLOB = '*' +def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: + """ + Helper function to avoid boilerplate. + + Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense + """ + # TODO FIXME mm, some wrapper to assert iterator isn't empty? + sources: List[Path] = [] + if isinstance(pp, (str, Path)): + sources.append(Path(pp)) + else: + sources.extend(map(Path, pp)) + + paths: List[Path] = [] + for src in sources: + if src.is_dir(): + gp: Iterable[Path] = src.glob(glob) + paths.extend(gp) + else: + ss = str(src) + if '*' in ss: + if glob != DEFAULT_GLOB: + warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!") + paths.extend(map(Path, do_glob(ss))) + else: + assert src.is_file(), src + # todo assert matches glob?? + paths.append(src) + + if sort: + paths = list(sorted(paths)) + return tuple(paths) + + +# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff) +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Callable, TypeVar + from typing_extensions import Protocol + # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time.. + # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270 + # ok, that's actually a super nice 'pattern' + F = TypeVar('F') + class McachewType(Protocol): + def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]: + ... + + mcachew: McachewType + +def mcachew(*args, **kwargs): # type: ignore[no-redef] + """ + Stands for 'Maybe cachew'. + Defensive wrapper around @cachew to make it an optional dependency. + """ + try: + import cachew + except ModuleNotFoundError: + warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew') + return lambda orig_func: orig_func + else: + import cachew.experimental + cachew.experimental.enable_exceptions() # TODO do it only once? + return cachew.cachew(*args, **kwargs) + + +@functools.lru_cache(1) +def _magic(): + import magic # type: ignore + return magic.Magic(mime=True) + + +# TODO could reuse in pdf module? +import mimetypes # todo do I need init()? +# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz?? +# whereas magic detects correctly: application/x-zstd and application/x-xz +def fastermime(path: PathIsh) -> str: + paths = str(path) + # mimetypes is faster + (mime, _) = mimetypes.guess_type(paths) + if mime is not None: + return mime + # magic is slower but returns more stuff + # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide? + return _magic().from_file(paths) + + +Json = Dict[str, Any] diff --git a/my/core/error.py b/my/core/error.py new file mode 100644 index 0000000..4423940 --- /dev/null +++ b/my/core/error.py @@ -0,0 +1,99 @@ +""" +Various error handling helpers +See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail +""" + +from itertools import tee +from typing import Union, TypeVar, Iterable, List, Tuple, Type + + +T = TypeVar('T') +E = TypeVar('E', bound=Exception) # TODO make covariant? + +ResT = Union[T, E] + +Res = ResT[T, Exception] + + +def unwrap(res: Res[T]) -> T: + if isinstance(res, Exception): + raise res + else: + return res + + +def echain(ex: E, cause: Exception) -> E: + ex.__cause__ = cause + return ex + + +def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]: + # TODO would be nice to have ET=Exception default? + vit, eit = tee(l) + # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type? + values: Iterable[T] = ( + r # type: ignore[misc] + for r in vit + if not isinstance(r, ET)) + errors: Iterable[E] = ( + r + for r in eit + if isinstance(r, ET)) + # TODO would be interesting to be able to have yield statement anywehere in code + # so there are multiple 'entry points' to the return value + return (values, errors) + + +def sort_res_by(items: Iterable[ResT], key) -> List[ResT]: + """ + The general idea is: just alaways carry errors with the entry that precedes them + """ + # TODO ResT object should hold exception class?... + group = [] + groups = [] + for i in items: + if isinstance(i, Exception): + group.append(i) + else: + groups.append((i, group)) + group = [] + + results = [] + for v, errs in sorted(groups, key=lambda p: key(p[0])): + results.extend(errs) + results.append(v) + results.extend(group) + + return results + + +def test_sort_res_by() -> None: + class Exc(Exception): + def __eq__(self, other): + return self.args == other.args + + ress = [ + Exc('first'), + Exc('second'), + 5, + 3, + Exc('xxx'), + 2, + 1, + Exc('last'), + ] + results = sort_res_by(ress, lambda x: x) # type: ignore + assert results == [ + 1, + Exc('xxx'), + 2, + 3, + Exc('first'), + Exc('second'), + 5, + Exc('last'), + ] + + results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore + assert results2 == [Exc('last'), 0] + results[:-1] + diff --git a/my/init.py b/my/core/init.py similarity index 93% rename from my/init.py rename to my/core/init.py index 54686c3..e3a5e7a 100644 --- a/my/init.py +++ b/my/core/init.py @@ -8,9 +8,10 @@ A hook to insert user's config directory into Python's search path. Please let me know if you are aware of a better way of dealing with this! ''' +from types import ModuleType # TODO not ideal to keep it here, but this should really be a leaf in the import tree -def assign_module(parent: str, name: str, module): +def assign_module(parent: str, name: str, module: ModuleType) -> None: import sys import importlib parent_module = importlib.import_module(parent) @@ -20,13 +21,15 @@ def assign_module(parent: str, name: str, module): # TODO that crap should be tested... I guess will get it for free when I run rest of tests in the matrix setattr(parent_module, name, module) +del ModuleType # separate function to present namespace pollution -def setup_config(): +def setup_config() -> None: from pathlib import Path import sys import os import warnings + from typing import Optional # not sure if that's necessary, i.e. could rely on PYTHONPATH instead # on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path? diff --git a/my/error.py b/my/error.py index 721cb63..596c90e 100644 --- a/my/error.py +++ b/my/error.py @@ -1,99 +1,2 @@ -""" -Various error handling helpers -See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail -""" - -from itertools import tee -from typing import Union, TypeVar, Iterable, List, Tuple, Type - - -T = TypeVar('T') -E = TypeVar('E', bound=Exception) # TODO make covariant? - -ResT = Union[T, E] - -Res = ResT[T, Exception] - - -def unwrap(res: Res[T]) -> T: - if isinstance(res, Exception): - raise res - else: - return res - - -def echain(ex: E, cause: Exception) -> E: - ex.__cause__ = cause - return ex - - -def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]: - # TODO would be nice to have ET=Exception default? - vit, eit = tee(l) - # TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type? - values: Iterable[T] = ( - r # type: ignore[misc] - for r in vit - if not isinstance(r, ET)) - errors: Iterable[E] = ( - r - for r in eit - if isinstance(r, ET)) - # TODO would be interesting to be able to have yield statement anywehere in code - # so there are multiple 'entry points' to the return value - return (values, errors) - - -def sort_res_by(items: Iterable[ResT], key) -> List[ResT]: - """ - The general idea is: just alaways carry errors with the entry that precedes them - """ - # TODO ResT object should hold exception class?... - group = [] - groups = [] - for i in items: - if isinstance(i, Exception): - group.append(i) - else: - groups.append((i, group)) - group = [] - - results = [] - for v, errs in sorted(groups, key=lambda p: key(p[0])): - results.extend(errs) - results.append(v) - results.extend(group) - - return results - - -def test_sort_res_by(): - class Exc(Exception): - def __eq__(self, other): - return self.args == other.args - - ress = [ - Exc('first'), - Exc('second'), - 5, - 3, - Exc('xxx'), - 2, - 1, - Exc('last'), - ] - results = sort_res_by(ress, lambda x: x) # type: ignore - assert results == [ - 1, - Exc('xxx'), - 2, - 3, - Exc('first'), - Exc('second'), - 5, - Exc('last'), - ] - - results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore - assert results2 == [Exc('last'), 0] + results[:-1] - +# will be deprecated. please add stuff to my.core +from .core.error import * diff --git a/my/hypothesis.py b/my/hypothesis.py index 46e00bc..94d4edf 100644 --- a/my/hypothesis.py +++ b/my/hypothesis.py @@ -1,8 +1,6 @@ """ [[https://hypothes.is][Hypothes.is]] highlights and annotations """ -from . import init - from .common import get_files from .error import Res, sort_res_by diff --git a/my/kython/__init__.py b/my/kython/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/my/materialistic.py b/my/materialistic.py index 79cd448..36ff1dc 100644 --- a/my/materialistic.py +++ b/my/materialistic.py @@ -1,8 +1,6 @@ """ [[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews """ -from . import init - from datetime import datetime from typing import Any, Dict, Iterator, NamedTuple diff --git a/my/media/imdb.py b/my/media/imdb.py index 42a1bc0..23bccd7 100644 --- a/my/media/imdb.py +++ b/my/media/imdb.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from .. import init - import csv import json from datetime import datetime diff --git a/my/pdfs.py b/my/pdfs.py index 5e7a36f..3c04196 100755 --- a/my/pdfs.py +++ b/my/pdfs.py @@ -2,9 +2,6 @@ ''' PDF documents and annotations on your filesystem ''' - -from . import init - from concurrent.futures import ProcessPoolExecutor from datetime import datetime import re diff --git a/my/pinboard.py b/my/pinboard.py index 8685d50..d37ba07 100644 --- a/my/pinboard.py +++ b/my/pinboard.py @@ -1,8 +1,6 @@ """ [[https://pinboard.in][Pinboard]] bookmarks """ -from . import init - from .common import get_files from my.config.repos.pinbexport import dal as pinbexport diff --git a/my/reddit.py b/my/reddit.py index 2a341f7..b5293ed 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -1,8 +1,6 @@ """ Reddit data: saved items/comments/upvotes/etc. """ -from . import init - from pathlib import Path from typing import List, Sequence, Mapping, Iterator @@ -13,14 +11,14 @@ from my.config import reddit as config import my.config.repos.rexport.dal as rexport -def get_sources() -> Sequence[Path]: +def inputs() -> Sequence[Path]: # TODO rename to export_path? files = get_files(config.export_dir) + # TODO Cpath better be automatic by get_files... res = list(map(CPath, files)); assert len(res) > 0 # todo move the assert to get_files? return tuple(res) - logger = LazyLogger(__name__, level='debug') @@ -32,30 +30,30 @@ Upvote = rexport.Upvote def dal() -> rexport.DAL: - # TODO lru cache? but be careful when it runs continuously - return rexport.DAL(get_sources()) + return rexport.DAL(inputs()) -@mcachew(hashf=lambda: get_sources()) +@mcachew(hashf=lambda: inputs()) def saved() -> Iterator[Save]: return dal().saved() -@mcachew(hashf=lambda: get_sources()) +@mcachew(hashf=lambda: inputs()) def comments() -> Iterator[Comment]: return dal().comments() -@mcachew(hashf=lambda: get_sources()) +@mcachew(hashf=lambda: inputs()) def submissions() -> Iterator[Submission]: return dal().submissions() -@mcachew(hashf=lambda: get_sources()) +@mcachew(hashf=lambda: inputs()) def upvoted() -> Iterator[Upvote]: return dal().upvoted() +### the rest of the file is some elaborate attempt of restoring favorite/unfavorite times from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any from functools import lru_cache @@ -115,10 +113,11 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]: key=lambda s: s.save.sid, ) +# TODO hmm. think about it.. if we set default backups=inputs() +# it's called early so it ends up as a global variable that we can't monkey patch easily @mcachew('/L/data/.cache/reddit-events.cache') -def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]: +def _get_events(backups: Sequence[Path], parallel: bool=True) -> Iterator[Event]: # TODO cachew: let it transform return type? so you don't have to write a wrapper for lists? - # parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it? prev_saves: Mapping[Sid, SaveWithDt] = {} # TODO suppress first batch?? @@ -168,55 +167,18 @@ def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> I # TODO a bit awkward, favorited should compare lower than unfavorited? @lru_cache(1) -def get_events(*args, **kwargs) -> List[Event]: - evit = _get_events(*args, **kwargs) +def events(*args, **kwargs) -> List[Event]: + evit = _get_events(inputs(), *args, **kwargs) return list(sorted(evit, key=lambda e: e.cmp_key)) - -def test() -> None: - get_events(backups=get_sources()[-1:]) - list(saved()) - - -def test_unfav() -> None: - events = get_events() - url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/' - uevents = [e for e in events if e.url == url] - assert len(uevents) == 2 - ff = uevents[0] - assert ff.text == 'favorited' - uf = uevents[1] - assert uf.text == 'unfavorited' - -# TODO move out.. -def test_get_all_saves() -> None: - # TODO not sure if this is necesasry anymore? - saves = list(saved()) - # just check that they are unique.. - make_dict(saves, key=lambda s: s.sid) - - -def test_disappearing() -> None: - # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason - # but I guess it was just a short glitch... so whatever - saves = get_events() - favs = [s.kind for s in saves if s.text == 'favorited'] - [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"'] - assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc) - - -def test_unfavorite() -> None: - events = get_events() - unfavs = [s for s in events if s.text == 'unfavorited'] - [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop'] - assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc) +## def main() -> None: # TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM... - events = get_events(parallel=False) - print(len(events)) - for e in events: + el = events(parallel=False) + print(len(el)) + for e in el: print(e.text, e.url) # for e in get_ # 509 with urls.. @@ -226,3 +188,8 @@ def main() -> None: if __name__ == '__main__': main() + +# TODO deprecate... + +get_sources = inputs +get_events = events diff --git a/my/smscalls.py b/my/smscalls.py index e2d80f1..91d9af5 100644 --- a/my/smscalls.py +++ b/my/smscalls.py @@ -2,8 +2,6 @@ Phone calls and SMS messages """ # TODO extract SMS as well? I barely use them though.. -from . import init - from datetime import datetime from pathlib import Path from typing import NamedTuple, Iterator, Set diff --git a/tests/misc.py b/tests/misc.py index 73d1255..40d63a4 100644 --- a/tests/misc.py +++ b/tests/misc.py @@ -7,10 +7,32 @@ import zipfile from my.kython.kompress import kopen, kexists, CPath +def test_kopen(tmp_path: Path) -> None: + "Plaintext handled transparently" + assert kopen(tmp_path / 'file' ).read() == 'just plaintext' + assert kopen(tmp_path / 'file.xz').read() == 'compressed text' + + "For zips behaviour is a bit different (not sure about all this, tbh...)" + assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip' + + +def test_kexists(tmp_path: Path) -> None: + assert kexists(str(tmp_path / 'file.zip'), 'path/in/archive') + assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive') + + # TODO not sure about this? + assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive') + + +def test_cpath(tmp_path: Path) -> None: + CPath(str(tmp_path / 'file' )).read_text() == 'just plaintext' + CPath( tmp_path / 'file.xz').read_text() == 'compressed text' + # TODO not sure about zip files?? + import pytest # type: ignore -@pytest.fixture +@pytest.fixture(autouse=True) def prepare(tmp_path: Path): (tmp_path / 'file').write_text('just plaintext') with (tmp_path / 'file.xz').open('wb') as f: @@ -24,24 +46,5 @@ def prepare(tmp_path: Path): pass -def test_kopen(prepare, tmp_path: Path) -> None: - "Plaintext handled transparently" - assert kopen(tmp_path / 'file' ).read() == 'just plaintext' - assert kopen(tmp_path / 'file.xz').read() == 'compressed text' - - "For zips behaviour is a bit different (not sure about all this, tbh...)" - assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip' - - -def test_kexists(prepare, tmp_path: Path) -> None: - assert kexists(str(tmp_path / 'file.zip'), 'path/in/archive') - assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive') - - # TODO not sure about this? - assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive') - - -def test_cpath(prepare, tmp_path: Path) -> None: - CPath(str(tmp_path / 'file' )).read_text() == 'just plaintext' - CPath( tmp_path / 'file.xz').read_text() == 'compressed text' - # TODO not sure about zip files?? +# meh +from my.core.error import test_sort_res_by diff --git a/tests/reddit.py b/tests/reddit.py index 45be487..1068038 100644 --- a/tests/reddit.py +++ b/tests/reddit.py @@ -1,4 +1,57 @@ -# ugh. workaround for https://github.com/pytest-dev/pytest/issues/1927 -from my.reddit import * +from datetime import datetime +import pytz -# TODO for reddit test, patch up to take every 10th archive or something; but make sure it's deterministic +from my.reddit import events, inputs, saved +from my.common import make_dict + + +def test() -> None: + list(events()) + list(saved()) + + +def test_unfav() -> None: + ev = events() + url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/' + uev = [e for e in ev if e.url == url] + assert len(uev) == 2 + ff = uev[0] + # TODO could recover these from takeout perhaps? + assert ff.text == 'favorited [initial]' + uf = uev[1] + assert uf.text == 'unfavorited' + + +def test_saves() -> None: + # TODO not sure if this is necesasry anymore? + saves = list(saved()) + # just check that they are unique.. + make_dict(saves, key=lambda s: s.sid) + + +def test_disappearing() -> None: + # eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason + # but I guess it was just a short glitch... so whatever + saves = events() + favs = [s.kind for s in saves if s.text == 'favorited'] + [deal_with_it] = [f for f in favs if f.title == '"Deal with it!"'] + assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc) + + +def test_unfavorite() -> None: + evs = events() + unfavs = [s for s in evs if s.text == 'unfavorited'] + [xxx] = [u for u in unfavs if u.eid == 'unf-19ifop'] + assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc) + + +import pytest # type: ignore +@pytest.fixture(autouse=True, scope='module') +def prepare(): + from my.common import get_files + from my.config import reddit as config + files = get_files(config.export_dir) + # use less files for the test to make it faster + # first bit is for 'test_unfavorite, the second is for test_disappearing + files = files[300:330] + files[500:520] + config.export_dir = files # type: ignore diff --git a/tox.ini b/tox.ini index db24138..d11c30d 100644 --- a/tox.ini +++ b/tox.ini @@ -9,10 +9,10 @@ passenv = CI CI_* setenv = MY_CONFIG = nonexistent commands = pip install -e .[testing] - # TODO ?? # python -m pytest {posargs} - python3 -c 'import my.init; from my.config import stub as config; print(config.key)' - python3 -c 'import my.init; import my.config; import my.config.repos' # shouldn't fail at least + # todo these are probably not necessary anymore? + python3 -c 'from my.config import stub as config; print(config.key)' + python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least python3 -m pytest tests/misc.py tests/get_files.py tests/config.py::test_set_repo tests/config.py::test_environment_variable # TODO add; once I figure out porg depdencency?? tests/config.py # TODO run demo.py? just make sure with_my is a bit cleverer?