Merge pull request #42 from karlicoss/updates

cleanup, move stuff to my.core, update docs
This commit is contained in:
karlicoss 2020-05-06 23:23:41 +01:00 committed by GitHub
commit 40b6a82b7c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 471 additions and 429 deletions

View file

@ -5,6 +5,11 @@
#+macro: map @@html:<span style='color:darkgreen; font-weight: bolder'>@@$1@@html:</span>@@
If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
- see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP]] for the *installation/configuration guide*
- see [[https://github.com/karlicoss/HPI/tree/master/doc/DEVELOPMENT.org][DEVELOPMENT]] for the *development guide*
*TLDR*: I'm using [[https://github.com/karlicoss/HPI][HPI]] (Human Programming Interface) package as a means of unifying, accessing and interacting with all of my personal data.
It's a Python library (named ~my~), a collection of modules for:
@ -48,11 +53,6 @@ and that's why I'm sharing this.
Imagine if all your life was reflected digitally and available at your fingertips.
This library is my attempt to achieve this vision.
If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
For *installation/configuration/development guide*, see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP.org]].
#+toc: headlines 2
@ -593,4 +593,4 @@ In some near future I will write more about:
- challenges I had so solve
- more use-cases and demos -- it's impossible to fit everything in one post!
, but happy to answer any questions on these topics now!
, but happy to answer any questions on these topics now!

View file

@ -1,13 +1,45 @@
* Running tests
I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config
and [[file:../scripts/ci/run]] for the up to date info on the specifics.
* IDE setup: make sure my.config is in your package search path
In runtime, ~my.config~ is imported from the user config directory dynamically.
However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration.
- Pycharm: basically, follow the instruction [[https://stackoverflow.com/a/55278260/706389][here]]
- Pycharm: basically, follow the instructions [[https://stackoverflow.com/a/55278260/706389][here]]
i.e. create a new interpreter configuration (e.g. name it "Python 3.7 (for HPI)"), and add =~/.config/my=.
* Linting
You should be able to use ~./lint~ script to run mypy checks.
You should be able to use [[file:../lint]] script to run mypy checks.
~mypy.ini~ file points at =~/.config/my= by default.
[[file:../mypy.ini]] points at =~/.config/my= by default.
* Modifying/adding modules
The easiest is just to run HPI via [[file:SETUP.org::#use-without-installing][with_my]] wrapper or with an editable PIP install.
That way your changes will be reflected immediately, and you will be able to quickly iterate/fix bugs/add new methods.
The "proper way" (unless you want to contribute to the upstream) is to create a separate hierarchy and add your module to =PYTHONPATH=.
For example, if you want to add an =awesomedatasource=, it could be:
: custom_module
: └── my
: └──awesomedatasource.py
You can use all existing HPI modules in =awesomedatasource.py=, for example, =my.config=, or everything from =my.core=.
But also, you can use all the previously defined HPI modules too. This could be useful to *shadow/override* existing HPI module:
: custom_reddit_overlay
: └── my
: └──reddit.py
Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead.
This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind.
I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info.

24
lint
View file

@ -31,25 +31,29 @@ def package_name(p: Path) -> str:
else:
return mname(p)
def subpackages(package: str) -> Iterable[str]:
ppath = package.replace('.', '/')
yield from sorted({
package_name(p.relative_to(DIR)) for p in (DIR / ppath).rglob('*.py')
})
# TODO meh.. think how to check _everything_ on CI
def core_modules() -> Iterable[str]:
return [
'my.common',
*subpackages('my.core'),
*subpackages('my.kython'),
'my.config',
'my.core',
'my.cfg',
'my.error',
'my.init',
'tests/misc.py',
'tests/get_files.py',
# 'tests/config.py', TODO hmm. unclear how to type check this module
]
def all_modules() -> Iterable[str]:
yield from sorted(set(
package_name(p.relative_to(DIR)) for p in (DIR / 'my').rglob('*.py')
))
yield from subpackages('my')
yield from sorted(
str(f.relative_to(DIR)) for f in (DIR / 'tests').rglob('*.py')
)
@ -63,11 +67,13 @@ def pylint():
def mypy(thing: str):
is_package = Path(thing).suffix != '.py'
return run([
cmd = [
'mypy',
'--color-output', # TODO eh? doesn't work..
*(['-p'] if is_package else []), thing,
], stdout=PIPE, stderr=PIPE)
]
print(' '.join(cmd), file=sys.stderr)
return run(cmd, stdout=PIPE, stderr=PIPE)
def mypy_all() -> Iterable[Exception]:

View file

@ -1,8 +1,6 @@
"""
[[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats
"""
from .. import init
from typing import Callable, Union, List
from my.config import kobo as config

View file

@ -13,7 +13,7 @@ from my.config.holidays_data import HOLIDAYS_DATA
# pip3 install workalendar
from workalendar.europe import UnitedKingdom # type: ignore
cal = UnitedKingdom() # TODO FIXME specify in config
cal = UnitedKingdom() # TODO
# TODO that should depend on country/'location' of residence I suppose?

View file

@ -12,15 +12,12 @@ After that, you can set config attributes:
export_path='/path/to/twitter/exports',
)
"""
# TODO later, If I have config stubs that might be unnecessary too..
from . import init
# todo why do we bring this into scope? don't remember..
import my.config as config
def set_repo(name: str, repo):
from .init import assign_module
from .core.init import assign_module
from . common import import_from
module = import_from(repo, name)

View file

@ -1,6 +1,4 @@
#!/usr/bin/env python3
from .. import init
from my.config import codeforces as config
from datetime import datetime

View file

@ -1,9 +1,6 @@
"""
Github events and their metadata: comments/issues/pull requests
"""
from .. import init
from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
from datetime import datetime
import json

View file

@ -1,6 +1,4 @@
#!/usr/bin/env python3
from .. import init
from my.config import topcoder as config
from datetime import datetime

View file

@ -1,197 +1,2 @@
from glob import glob as do_glob
from pathlib import Path
import functools
import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
import warnings
# some helper functions
PathIsh = Union[Path, str]
# TODO port annotations to kython?..
def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
p = Path(p)
if name is None:
name = p.stem
import importlib.util
spec = importlib.util.spec_from_file_location(name, p)
foo = importlib.util.module_from_spec(spec)
loader = spec.loader; assert loader is not None
loader.exec_module(foo) # type: ignore[attr-defined]
return foo
def import_from(path: PathIsh, name: str) -> types.ModuleType:
path = str(path)
import sys
try:
sys.path.append(path)
import importlib
return importlib.import_module(name)
finally:
sys.path.remove(path)
T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
def the(l: Iterable[T]) -> T:
it = iter(l)
try:
first = next(it)
except StopIteration as ee:
raise RuntimeError('Empty iterator?')
assert all(e == first for e in it)
return first
# TODO more_itertools.bucket?
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
res: Dict[K, List[T]] = {}
for i in l:
kk = key(i)
lst = res.get(kk, [])
lst.append(i)
res[kk] = lst
return res
def _identity(v: T) -> V:
return cast(V, v)
def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
res: Dict[K, V] = {}
for i in l:
k = key(i)
v = value(i)
pv = res.get(k, None) # type: ignore
if pv is not None:
raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
res[k] = v
return res
Cl = TypeVar('Cl')
R = TypeVar('R')
def cproperty(f: Callable[[Cl], R]) -> R:
return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
# https://stackoverflow.com/a/12377059/706389
def listify(fn=None, wrapper=list):
"""
Wraps a function's return value in wrapper (e.g. list)
Useful when an algorithm can be expressed more cleanly as a generator
"""
def listify_return(fn):
@functools.wraps(fn)
def listify_helper(*args, **kw):
return wrapper(fn(*args, **kw))
return listify_helper
if fn is None:
return listify_return
return listify_return(fn)
# TODO FIXME use in bluemaestro
# def dictify(fn=None, key=None, value=None):
# def md(it):
# return make_dict(it, key=key, value=value)
# return listify(fn=fn, wrapper=md)
from .kython.klogging import setup_logger, LazyLogger
Paths = Union[Sequence[PathIsh], PathIsh]
DEFAULT_GLOB = '*'
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
"""
Helper function to avoid boilerplate.
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
"""
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
sources: List[Path] = []
if isinstance(pp, (str, Path)):
sources.append(Path(pp))
else:
sources.extend(map(Path, pp))
paths: List[Path] = []
for src in sources:
if src.is_dir():
gp: Iterable[Path] = src.glob(glob)
paths.extend(gp)
else:
ss = str(src)
if '*' in ss:
if glob != DEFAULT_GLOB:
warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
paths.extend(map(Path, do_glob(ss)))
else:
assert src.is_file(), src
# todo assert matches glob??
paths.append(src)
if sort:
paths = list(sorted(paths))
return tuple(paths)
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Callable, TypeVar
from typing_extensions import Protocol
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
...
mcachew: McachewType
def mcachew(*args, **kwargs): # type: ignore[no-redef]
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
import cachew.experimental
cachew.experimental.enable_exceptions() # TODO do it only once?
return cachew.cachew(*args, **kwargs)
@functools.lru_cache(1)
def _magic():
import magic # type: ignore
return magic.Magic(mime=True)
# TODO could reuse in pdf module?
import mimetypes # todo do I need init()?
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
# whereas magic detects correctly: application/x-zstd and application/x-xz
def fastermime(path: PathIsh) -> str:
paths = str(path)
# mimetypes is faster
(mime, _) = mimetypes.guess_type(paths)
if mime is not None:
return mime
# magic is slower but returns more stuff
# TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
return _magic().from_file(paths)
Json = Dict[str, Any]
# will be deprecated. please add stuff to my.core
from .core.common import *

View file

@ -1,5 +1,5 @@
# TODO ok, this thing should trigger .cfg import presumably??
from .. import init
from ..core import init
# TODO maybe, reuse mycfg_template here?

197
my/core/common.py Normal file
View file

@ -0,0 +1,197 @@
from glob import glob as do_glob
from pathlib import Path
import functools
import types
from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
import warnings
# some helper functions
PathIsh = Union[Path, str]
# TODO port annotations to kython?..
def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
p = Path(p)
if name is None:
name = p.stem
import importlib.util
spec = importlib.util.spec_from_file_location(name, p)
foo = importlib.util.module_from_spec(spec)
loader = spec.loader; assert loader is not None
loader.exec_module(foo) # type: ignore[attr-defined]
return foo
def import_from(path: PathIsh, name: str) -> types.ModuleType:
path = str(path)
import sys
try:
sys.path.append(path)
import importlib
return importlib.import_module(name)
finally:
sys.path.remove(path)
T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
def the(l: Iterable[T]) -> T:
it = iter(l)
try:
first = next(it)
except StopIteration as ee:
raise RuntimeError('Empty iterator?')
assert all(e == first for e in it)
return first
# TODO more_itertools.bucket?
def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
res: Dict[K, List[T]] = {}
for i in l:
kk = key(i)
lst = res.get(kk, [])
lst.append(i)
res[kk] = lst
return res
def _identity(v: T) -> V:
return cast(V, v)
def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
res: Dict[K, V] = {}
for i in l:
k = key(i)
v = value(i)
pv = res.get(k, None) # type: ignore
if pv is not None:
raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
res[k] = v
return res
Cl = TypeVar('Cl')
R = TypeVar('R')
def cproperty(f: Callable[[Cl], R]) -> R:
return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
# https://stackoverflow.com/a/12377059/706389
def listify(fn=None, wrapper=list):
"""
Wraps a function's return value in wrapper (e.g. list)
Useful when an algorithm can be expressed more cleanly as a generator
"""
def listify_return(fn):
@functools.wraps(fn)
def listify_helper(*args, **kw):
return wrapper(fn(*args, **kw))
return listify_helper
if fn is None:
return listify_return
return listify_return(fn)
# todo use in bluemaestro
# def dictify(fn=None, key=None, value=None):
# def md(it):
# return make_dict(it, key=key, value=value)
# return listify(fn=fn, wrapper=md)
from ..kython.klogging import setup_logger, LazyLogger
Paths = Union[Sequence[PathIsh], PathIsh]
DEFAULT_GLOB = '*'
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
"""
Helper function to avoid boilerplate.
Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
"""
# TODO FIXME mm, some wrapper to assert iterator isn't empty?
sources: List[Path] = []
if isinstance(pp, (str, Path)):
sources.append(Path(pp))
else:
sources.extend(map(Path, pp))
paths: List[Path] = []
for src in sources:
if src.is_dir():
gp: Iterable[Path] = src.glob(glob)
paths.extend(gp)
else:
ss = str(src)
if '*' in ss:
if glob != DEFAULT_GLOB:
warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
paths.extend(map(Path, do_glob(ss)))
else:
assert src.is_file(), src
# todo assert matches glob??
paths.append(src)
if sort:
paths = list(sorted(paths))
return tuple(paths)
# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Callable, TypeVar
from typing_extensions import Protocol
# TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
# I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
# ok, that's actually a super nice 'pattern'
F = TypeVar('F')
class McachewType(Protocol):
def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
...
mcachew: McachewType
def mcachew(*args, **kwargs): # type: ignore[no-redef]
"""
Stands for 'Maybe cachew'.
Defensive wrapper around @cachew to make it an optional dependency.
"""
try:
import cachew
except ModuleNotFoundError:
warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
return lambda orig_func: orig_func
else:
import cachew.experimental
cachew.experimental.enable_exceptions() # TODO do it only once?
return cachew.cachew(*args, **kwargs)
@functools.lru_cache(1)
def _magic():
import magic # type: ignore
return magic.Magic(mime=True)
# TODO could reuse in pdf module?
import mimetypes # todo do I need init()?
# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
# whereas magic detects correctly: application/x-zstd and application/x-xz
def fastermime(path: PathIsh) -> str:
paths = str(path)
# mimetypes is faster
(mime, _) = mimetypes.guess_type(paths)
if mime is not None:
return mime
# magic is slower but returns more stuff
# TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
return _magic().from_file(paths)
Json = Dict[str, Any]

99
my/core/error.py Normal file
View file

@ -0,0 +1,99 @@
"""
Various error handling helpers
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
"""
from itertools import tee
from typing import Union, TypeVar, Iterable, List, Tuple, Type
T = TypeVar('T')
E = TypeVar('E', bound=Exception) # TODO make covariant?
ResT = Union[T, E]
Res = ResT[T, Exception]
def unwrap(res: Res[T]) -> T:
if isinstance(res, Exception):
raise res
else:
return res
def echain(ex: E, cause: Exception) -> E:
ex.__cause__ = cause
return ex
def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
# TODO would be nice to have ET=Exception default?
vit, eit = tee(l)
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
values: Iterable[T] = (
r # type: ignore[misc]
for r in vit
if not isinstance(r, ET))
errors: Iterable[E] = (
r
for r in eit
if isinstance(r, ET))
# TODO would be interesting to be able to have yield statement anywehere in code
# so there are multiple 'entry points' to the return value
return (values, errors)
def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
"""
The general idea is: just alaways carry errors with the entry that precedes them
"""
# TODO ResT object should hold exception class?...
group = []
groups = []
for i in items:
if isinstance(i, Exception):
group.append(i)
else:
groups.append((i, group))
group = []
results = []
for v, errs in sorted(groups, key=lambda p: key(p[0])):
results.extend(errs)
results.append(v)
results.extend(group)
return results
def test_sort_res_by() -> None:
class Exc(Exception):
def __eq__(self, other):
return self.args == other.args
ress = [
Exc('first'),
Exc('second'),
5,
3,
Exc('xxx'),
2,
1,
Exc('last'),
]
results = sort_res_by(ress, lambda x: x) # type: ignore
assert results == [
1,
Exc('xxx'),
2,
3,
Exc('first'),
Exc('second'),
5,
Exc('last'),
]
results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
assert results2 == [Exc('last'), 0] + results[:-1]

View file

@ -8,9 +8,10 @@ A hook to insert user's config directory into Python's search path.
Please let me know if you are aware of a better way of dealing with this!
'''
from types import ModuleType
# TODO not ideal to keep it here, but this should really be a leaf in the import tree
def assign_module(parent: str, name: str, module):
def assign_module(parent: str, name: str, module: ModuleType) -> None:
import sys
import importlib
parent_module = importlib.import_module(parent)
@ -20,13 +21,15 @@ def assign_module(parent: str, name: str, module):
# TODO that crap should be tested... I guess will get it for free when I run rest of tests in the matrix
setattr(parent_module, name, module)
del ModuleType
# separate function to present namespace pollution
def setup_config():
def setup_config() -> None:
from pathlib import Path
import sys
import os
import warnings
from typing import Optional
# not sure if that's necessary, i.e. could rely on PYTHONPATH instead
# on the other hand, by using MY_CONFIG we are guaranteed to load it from the desired path?

View file

@ -1,99 +1,2 @@
"""
Various error handling helpers
See https://beepb00p.xyz/mypy-error-handling.html#kiss for more detail
"""
from itertools import tee
from typing import Union, TypeVar, Iterable, List, Tuple, Type
T = TypeVar('T')
E = TypeVar('E', bound=Exception) # TODO make covariant?
ResT = Union[T, E]
Res = ResT[T, Exception]
def unwrap(res: Res[T]) -> T:
if isinstance(res, Exception):
raise res
else:
return res
def echain(ex: E, cause: Exception) -> E:
ex.__cause__ = cause
return ex
def split_errors(l: Iterable[ResT[T, E]], ET: Type[E]) -> Tuple[Iterable[T], Iterable[E]]:
# TODO would be nice to have ET=Exception default?
vit, eit = tee(l)
# TODO ugh, not sure if I can reconcile type checking and runtime and convince mypy that ET and E are the same type?
values: Iterable[T] = (
r # type: ignore[misc]
for r in vit
if not isinstance(r, ET))
errors: Iterable[E] = (
r
for r in eit
if isinstance(r, ET))
# TODO would be interesting to be able to have yield statement anywehere in code
# so there are multiple 'entry points' to the return value
return (values, errors)
def sort_res_by(items: Iterable[ResT], key) -> List[ResT]:
"""
The general idea is: just alaways carry errors with the entry that precedes them
"""
# TODO ResT object should hold exception class?...
group = []
groups = []
for i in items:
if isinstance(i, Exception):
group.append(i)
else:
groups.append((i, group))
group = []
results = []
for v, errs in sorted(groups, key=lambda p: key(p[0])):
results.extend(errs)
results.append(v)
results.extend(group)
return results
def test_sort_res_by():
class Exc(Exception):
def __eq__(self, other):
return self.args == other.args
ress = [
Exc('first'),
Exc('second'),
5,
3,
Exc('xxx'),
2,
1,
Exc('last'),
]
results = sort_res_by(ress, lambda x: x) # type: ignore
assert results == [
1,
Exc('xxx'),
2,
3,
Exc('first'),
Exc('second'),
5,
Exc('last'),
]
results2 = sort_res_by(ress + [0], lambda x: x) # type: ignore
assert results2 == [Exc('last'), 0] + results[:-1]
# will be deprecated. please add stuff to my.core
from .core.error import *

View file

@ -1,8 +1,6 @@
"""
[[https://hypothes.is][Hypothes.is]] highlights and annotations
"""
from . import init
from .common import get_files
from .error import Res, sort_res_by

View file

View file

@ -1,8 +1,6 @@
"""
[[https://play.google.com/store/apps/details?id=io.github.hidroh.materialistic][Materialistic]] app for Hackernews
"""
from . import init
from datetime import datetime
from typing import Any, Dict, Iterator, NamedTuple

View file

@ -1,7 +1,4 @@
#!/usr/bin/env python3
from .. import init
import csv
import json
from datetime import datetime

View file

@ -2,9 +2,6 @@
'''
PDF documents and annotations on your filesystem
'''
from . import init
from concurrent.futures import ProcessPoolExecutor
from datetime import datetime
import re

View file

@ -1,8 +1,6 @@
"""
[[https://pinboard.in][Pinboard]] bookmarks
"""
from . import init
from .common import get_files
from my.config.repos.pinbexport import dal as pinbexport

View file

@ -1,8 +1,6 @@
"""
Reddit data: saved items/comments/upvotes/etc.
"""
from . import init
from pathlib import Path
from typing import List, Sequence, Mapping, Iterator
@ -13,14 +11,14 @@ from my.config import reddit as config
import my.config.repos.rexport.dal as rexport
def get_sources() -> Sequence[Path]:
def inputs() -> Sequence[Path]:
# TODO rename to export_path?
files = get_files(config.export_dir)
# TODO Cpath better be automatic by get_files...
res = list(map(CPath, files)); assert len(res) > 0
# todo move the assert to get_files?
return tuple(res)
logger = LazyLogger(__name__, level='debug')
@ -32,30 +30,30 @@ Upvote = rexport.Upvote
def dal() -> rexport.DAL:
# TODO lru cache? but be careful when it runs continuously
return rexport.DAL(get_sources())
return rexport.DAL(inputs())
@mcachew(hashf=lambda: get_sources())
@mcachew(hashf=lambda: inputs())
def saved() -> Iterator[Save]:
return dal().saved()
@mcachew(hashf=lambda: get_sources())
@mcachew(hashf=lambda: inputs())
def comments() -> Iterator[Comment]:
return dal().comments()
@mcachew(hashf=lambda: get_sources())
@mcachew(hashf=lambda: inputs())
def submissions() -> Iterator[Submission]:
return dal().submissions()
@mcachew(hashf=lambda: get_sources())
@mcachew(hashf=lambda: inputs())
def upvoted() -> Iterator[Upvote]:
return dal().upvoted()
### the rest of the file is some elaborate attempt of restoring favorite/unfavorite times
from typing import Dict, Union, Iterable, Iterator, NamedTuple, Any
from functools import lru_cache
@ -115,10 +113,11 @@ def _get_state(bfile: Path) -> Dict[Sid, SaveWithDt]:
key=lambda s: s.save.sid,
)
# TODO hmm. think about it.. if we set default backups=inputs()
# it's called early so it ends up as a global variable that we can't monkey patch easily
@mcachew('/L/data/.cache/reddit-events.cache')
def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> Iterator[Event]:
def _get_events(backups: Sequence[Path], parallel: bool=True) -> Iterator[Event]:
# TODO cachew: let it transform return type? so you don't have to write a wrapper for lists?
# parallel = False # NOTE: eh, not sure if still necessary? I think glumov didn't like it?
prev_saves: Mapping[Sid, SaveWithDt] = {}
# TODO suppress first batch??
@ -168,55 +167,18 @@ def _get_events(backups: Sequence[Path]=get_sources(), parallel: bool=True) -> I
# TODO a bit awkward, favorited should compare lower than unfavorited?
@lru_cache(1)
def get_events(*args, **kwargs) -> List[Event]:
evit = _get_events(*args, **kwargs)
def events(*args, **kwargs) -> List[Event]:
evit = _get_events(inputs(), *args, **kwargs)
return list(sorted(evit, key=lambda e: e.cmp_key))
def test() -> None:
get_events(backups=get_sources()[-1:])
list(saved())
def test_unfav() -> None:
events = get_events()
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
uevents = [e for e in events if e.url == url]
assert len(uevents) == 2
ff = uevents[0]
assert ff.text == 'favorited'
uf = uevents[1]
assert uf.text == 'unfavorited'
# TODO move out..
def test_get_all_saves() -> None:
# TODO not sure if this is necesasry anymore?
saves = list(saved())
# just check that they are unique..
make_dict(saves, key=lambda s: s.sid)
def test_disappearing() -> None:
# eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
# but I guess it was just a short glitch... so whatever
saves = get_events()
favs = [s.kind for s in saves if s.text == 'favorited']
[deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
def test_unfavorite() -> None:
events = get_events()
unfavs = [s for s in events if s.text == 'unfavorited']
[xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
##
def main() -> None:
# TODO eh. not sure why but parallel on seems to mess glumov up and cause OOM...
events = get_events(parallel=False)
print(len(events))
for e in events:
el = events(parallel=False)
print(len(el))
for e in el:
print(e.text, e.url)
# for e in get_
# 509 with urls..
@ -226,3 +188,8 @@ def main() -> None:
if __name__ == '__main__':
main()
# TODO deprecate...
get_sources = inputs
get_events = events

View file

@ -2,8 +2,6 @@
Phone calls and SMS messages
"""
# TODO extract SMS as well? I barely use them though..
from . import init
from datetime import datetime
from pathlib import Path
from typing import NamedTuple, Iterator, Set

View file

@ -7,10 +7,32 @@ import zipfile
from my.kython.kompress import kopen, kexists, CPath
def test_kopen(tmp_path: Path) -> None:
"Plaintext handled transparently"
assert kopen(tmp_path / 'file' ).read() == 'just plaintext'
assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
"For zips behaviour is a bit different (not sure about all this, tbh...)"
assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
def test_kexists(tmp_path: Path) -> None:
assert kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
# TODO not sure about this?
assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
def test_cpath(tmp_path: Path) -> None:
CPath(str(tmp_path / 'file' )).read_text() == 'just plaintext'
CPath( tmp_path / 'file.xz').read_text() == 'compressed text'
# TODO not sure about zip files??
import pytest # type: ignore
@pytest.fixture
@pytest.fixture(autouse=True)
def prepare(tmp_path: Path):
(tmp_path / 'file').write_text('just plaintext')
with (tmp_path / 'file.xz').open('wb') as f:
@ -24,24 +46,5 @@ def prepare(tmp_path: Path):
pass
def test_kopen(prepare, tmp_path: Path) -> None:
"Plaintext handled transparently"
assert kopen(tmp_path / 'file' ).read() == 'just plaintext'
assert kopen(tmp_path / 'file.xz').read() == 'compressed text'
"For zips behaviour is a bit different (not sure about all this, tbh...)"
assert kopen(tmp_path / 'file.zip', 'path/in/archive').read() == 'data in zip'
def test_kexists(prepare, tmp_path: Path) -> None:
assert kexists(str(tmp_path / 'file.zip'), 'path/in/archive')
assert not kexists(str(tmp_path / 'file.zip'), 'path/notin/archive')
# TODO not sure about this?
assert not kexists(tmp_path / 'nosuchzip.zip', 'path/in/archive')
def test_cpath(prepare, tmp_path: Path) -> None:
CPath(str(tmp_path / 'file' )).read_text() == 'just plaintext'
CPath( tmp_path / 'file.xz').read_text() == 'compressed text'
# TODO not sure about zip files??
# meh
from my.core.error import test_sort_res_by

View file

@ -1,4 +1,57 @@
# ugh. workaround for https://github.com/pytest-dev/pytest/issues/1927
from my.reddit import *
from datetime import datetime
import pytz
# TODO for reddit test, patch up to take every 10th archive or something; but make sure it's deterministic
from my.reddit import events, inputs, saved
from my.common import make_dict
def test() -> None:
list(events())
list(saved())
def test_unfav() -> None:
ev = events()
url = 'https://reddit.com/r/QuantifiedSelf/comments/acxy1v/personal_dashboard/'
uev = [e for e in ev if e.url == url]
assert len(uev) == 2
ff = uev[0]
# TODO could recover these from takeout perhaps?
assert ff.text == 'favorited [initial]'
uf = uev[1]
assert uf.text == 'unfavorited'
def test_saves() -> None:
# TODO not sure if this is necesasry anymore?
saves = list(saved())
# just check that they are unique..
make_dict(saves, key=lambda s: s.sid)
def test_disappearing() -> None:
# eh. so for instance, 'metro line colors' is missing from reddit-20190402005024.json for no reason
# but I guess it was just a short glitch... so whatever
saves = events()
favs = [s.kind for s in saves if s.text == 'favorited']
[deal_with_it] = [f for f in favs if f.title == '"Deal with it!"']
assert deal_with_it.backup_dt == datetime(2019, 4, 1, 23, 10, 25, tzinfo=pytz.utc)
def test_unfavorite() -> None:
evs = events()
unfavs = [s for s in evs if s.text == 'unfavorited']
[xxx] = [u for u in unfavs if u.eid == 'unf-19ifop']
assert xxx.dt == datetime(2019, 1, 28, 8, 10, 20, tzinfo=pytz.utc)
import pytest # type: ignore
@pytest.fixture(autouse=True, scope='module')
def prepare():
from my.common import get_files
from my.config import reddit as config
files = get_files(config.export_dir)
# use less files for the test to make it faster
# first bit is for 'test_unfavorite, the second is for test_disappearing
files = files[300:330] + files[500:520]
config.export_dir = files # type: ignore

View file

@ -9,10 +9,10 @@ passenv = CI CI_*
setenv = MY_CONFIG = nonexistent
commands =
pip install -e .[testing]
# TODO ??
# python -m pytest {posargs}
python3 -c 'import my.init; from my.config import stub as config; print(config.key)'
python3 -c 'import my.init; import my.config; import my.config.repos' # shouldn't fail at least
# todo these are probably not necessary anymore?
python3 -c 'from my.config import stub as config; print(config.key)'
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least
python3 -m pytest tests/misc.py tests/get_files.py tests/config.py::test_set_repo tests/config.py::test_environment_variable
# TODO add; once I figure out porg depdencency?? tests/config.py
# TODO run demo.py? just make sure with_my is a bit cleverer?