Merge pull request #52 from karlicoss/updates

Updates
This commit is contained in:
karlicoss 2020-05-18 23:40:58 +01:00 committed by GitHub
commit c410daa484
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 204 additions and 78 deletions

View file

@ -1,8 +1,23 @@
* TOC
:PROPERTIES:
:TOC: :include all :depth 3
:END:
:CONTENTS:
- [[#toc][TOC]]
- [[#running-tests][Running tests]]
- [[#ide-setup][IDE setup]]
- [[#linting][Linting]]
- [[#modifyingadding-modules][Modifying/adding modules]]
:END:
* Running tests
I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config
and [[file:../scripts/ci/run]] for the up to date info on the specifics.
* IDE setup: make sure my.config is in your package search path
* IDE setup
To benefit from type hinting, make sure =my.config= is in your package search path.
In runtime, ~my.config~ is imported from the user config directory dynamically.
However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration.
@ -43,3 +58,5 @@ Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downs
This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind.
I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info.
# TODO add example with overriding 'all'

View file

@ -1,11 +1,31 @@
This file is an overview of *documented* modules.
There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules, I'm progressively working on documenting them.
* TOC
:PROPERTIES:
:TOC: :include all
:END:
:CONTENTS:
- [[#toc][TOC]]
- [[#intro][Intro]]
- [[#configs][Configs]]
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
- [[#myhypothesis][my.hypothesis]]
- [[#myreddit][my.reddit]]
- [[#mytwittertwint][my.twitter.twint]]
- [[#mytwitterarchive][my.twitter.archive]]
- [[#mylastfm][my.lastfm]]
- [[#myreadingpolar][my.reading.polar]]
- [[#myinstapaper][my.instapaper]]
:END:
* Intro
See [[file:SETUP.org][SETUP]] to find out how to set up your own config.
Some explanations:
- =MY_CONFIG= is whereever you are keeping your private configuration (usually =~/.config/my/=)
- =MY_CONFIG= is the path where you are keeping your private configuration (usually =~/.config/my/=)
- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path=
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths.
@ -21,12 +41,15 @@ Some explanations:
- if the field has a default value, you can omit it from your private config altogether
* Configs
The config snippets below are meant to be modified accordingly and *pasted into your private configuration*, e.g =$MY_CONFIG/my/config.py=.
You don't have to set them up all at once, it's recommended to do it gradually.
#+begin_src python :dir .. :results output drawer :exports result
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
#+begin_src python :dir .. :results output drawer raw :exports result
# TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive??
import importlib
# from lint import all_modules # meh
@ -63,7 +86,7 @@ for cls, p in modules:
for x in ['.py', '__init__.py']:
if Path(mpath + x).exists():
mpath = mpath + x
print(f'- [[file:../{mpath}][{p}]]')
print(f'** [[file:../{mpath}][{p}]]')
mdoc = m.__doc__
if mdoc is not None:
print(indent(mdoc))
@ -73,10 +96,9 @@ for cls, p in modules:
#+end_src
#+RESULTS:
:results:
- [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
** [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
@ -84,7 +106,7 @@ for cls, p in modules:
class google:
takeout_path: Paths # path/paths/glob for the takeout zips
#+end_src
- [[file:../my/hypothesis.py][my.hypothesis]]
** [[file:../my/hypothesis.py][my.hypothesis]]
[[https://hypothes.is][Hypothes.is]] highlights and annotations
@ -98,10 +120,10 @@ for cls, p in modules:
export_path: Paths
# path to a local clone of hypexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/hypexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/hypexport
hypexport : Optional[PathIsh] = None
#+end_src
- [[file:../my/reddit.py][my.reddit]]
** [[file:../my/reddit.py][my.reddit]]
Reddit data: saved items/comments/upvotes/etc.
@ -115,10 +137,10 @@ for cls, p in modules:
export_path: Paths
# path to a local clone of rexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/rexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport
rexport : Optional[PathIsh] = None
#+end_src
- [[file:../my/twitter/twint.py][my.twitter.twint]]
** [[file:../my/twitter/twint.py][my.twitter.twint]]
Twitter data (tweets and favorites).
@ -128,7 +150,7 @@ for cls, p in modules:
class twint:
export_path: Paths # path[s]/glob to the twint Sqlite database
#+end_src
- [[file:../my/twitter/archive.py][my.twitter.archive]]
** [[file:../my/twitter/archive.py][my.twitter.archive]]
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
@ -136,7 +158,7 @@ for cls, p in modules:
class twitter:
export_path: Paths # path[s]/glob to the twitter archive takeout
#+end_src
- [[file:../my/lastfm][my.lastfm]]
** [[file:../my/lastfm][my.lastfm]]
Last.fm scrobbles
@ -147,7 +169,7 @@ for cls, p in modules:
"""
export_path: Paths
#+end_src
- [[file:../my/reading/polar.py][my.reading.polar]]
** [[file:../my/reading/polar.py][my.reading.polar]]
[[https://github.com/burtonator/polar-books][Polar]] articles and highlights
@ -159,7 +181,7 @@ for cls, p in modules:
polar_dir: PathIsh = Path('~/.polar').expanduser()
defensive: bool = True # pass False if you want it to fail faster on errors (useful for debugging)
#+end_src
- [[file:../my/instapaper.py][my.instapaper]]
** [[file:../my/instapaper.py][my.instapaper]]
[[https://www.instapaper.com][Instapaper]] bookmarks, highlights and annotations
@ -172,7 +194,6 @@ for cls, p in modules:
export_path : Paths
# path to a local clone of instapexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/instapexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
instapexport: Optional[PathIsh] = None
#+end_src
:end:

View file

@ -2,8 +2,36 @@
Please don't be shy and raise issues if something in the instructions is unclear.
You'd be really helping me, I want to make the setup as straightforward as possible!
# update with org-make-toc
* TOC
:PROPERTIES:
:TOC: :include all
:END:
:CONTENTS:
- [[#toc][TOC]]
- [[#few-notes][Few notes]]
- [[#setting-up-the-main-package][Setting up the main package]]
- [[#option-1-install-from-pip][option 1: install from PIP]]
- [[#option-2-local-install][option 2: local install]]
- [[#option-3-use-without-installing][option 3: use without installing]]
- [[#optional-packages][Optional packages]]
- [[#setting-up-the-modules][Setting up the modules]]
- [[#private-configuration-myconfig][private configuration (my.config)]]
- [[#module-dependencies][module dependencies]]
- [[#usage-examples][Usage examples]]
- [[#end-to-end-roam-research-setup][End-to-end Roam Research setup]]
- [[#polar][Polar]]
- [[#google-takeout][Google Takeout]]
- [[#kobo-reader][Kobo reader]]
- [[#orger][Orger]]
- [[#orger--polar][Orger + Polar]]
- [[#demopy][demo.py]]
:END:
* Few notes
I understand people may not super familiar with Python, PIP or generally unix, so here are some short notes:
I understand people may not super familiar with Python, PIP or generally unix, so here are some useful notes:
- only python3 is supported, and more specifically, ~python >= 3.6~.
- I'm using ~pip3~ command, but on your system you might only have ~pip~.
@ -13,7 +41,7 @@ I understand people may not super familiar with Python, PIP or generally unix, s
- similarly, I'm using =python3= in the documentation, but if your =python --version= says python3, it's okay to use =python=
- when you are using ~pip install~, [[https://stackoverflow.com/a/42989020/706389][always pass]] =--user=, and *never install third party packages with sudo* (unless you know what you are doing)
- throughout the guide I'm assuming the config directory is =~/.config=, but it's different on Mac/Windows.
- throughout the guide I'm assuming the user config directory is =~/.config=, but it's *different on Mac/Windows*.
See [[https://github.com/ActiveState/appdirs/blob/3fe6a83776843a46f20c2e5587afcffe05e03b39/appdirs.py#L187-L190][this]] if you're not sure what's your user config dir.
@ -22,12 +50,12 @@ This is a *required step*
You can choose one of the following options:
** install from [[https://pypi.org/project/HPI][PIP]]
This is the easiest way:
** option 1: install from [[https://pypi.org/project/HPI][PIP]]
This is the *easiest way*:
: pip3 install --user HPI
** local install
** option 2: local install
This is convenient if you're planning to add new modules or change the existing ones.
1. Clone the repository: =git clone git@github.com:karlicoss/HPI.git /path/to/hpi=
@ -39,7 +67,7 @@ This is convenient if you're planning to add new modules or change the existing
It's *extremely* convenient for developing and debugging.
** use without installing
** option 3: use without installing
This is less convenient, but gives you more control.
1. Clone the repository: =git clone git@github.com:karlicoss/HPI.git /path/to/hpi=
@ -59,7 +87,7 @@ This is less convenient, but gives you more control.
The benefit of this way is that you get a bit more control, explicitly allowing your scripts to use your data.
** optional packages
* Optional packages
You can also install some opional packages
: pip3 install 'HPI[optional]'
@ -69,12 +97,14 @@ They aren't necessary, but improve your experience. At the moment these are:
- [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access
- [[https://github.com/metachris/logzero][logzero]]: a nice logging library, supporting colors
* Setting up the modules
This is an *optional step* as some modules might work without extra setup.
* Setting up modules
This is an *optional step* as few modules work without extra setup.
But it depends on the specific module.
See [[file:MODULES.org][MODULES]] to read documentation on specific modules that interest you.
You might also find interesting to read [[file:CONFIGURING.org][CONFIGURING]], where I'm
elaborating on some rationales behind the current configuration system.
elaborating on some technical rationales behind the current configuration system.
** private configuration (=my.config=)
# TODO write about dynamic configuration
@ -87,7 +117,7 @@ The config is simply a *python package* (named =my.config=), expected to be in =
Since it's a Python package, generally it's very *flexible* and there are many ways to set it up.
- The simplest and very minimum you need is =~/.config/my/my/config.py=. For example:
- *The simplest and the very minimum* you need is =~/.config/my/my/config.py=. For example:
#+begin_src python
import pytz # yes, you can use any Python stuff in the config
@ -116,32 +146,6 @@ Since it's a Python package, generally it's very *flexible* and there are many w
- or you can just try running them and fill in the attributes Python complains about!
- My config layout is a bit more complicated:
#+begin_src python :exports results :results output
from pathlib import Path
home = Path("~").expanduser()
pp = home / '.config/my/my/config'
for p in sorted(pp.rglob('*')):
if '__pycache__' in p.parts:
continue
ps = str(p).replace(str(home), '~')
print(ps)
#+end_src
#+RESULTS:
#+begin_example
~/.config/my/my/config/__init__.py
~/.config/my/my/config/locations.py
~/.config/my/my/config/repos
~/.config/my/my/config/repos/endoexport
~/.config/my/my/config/repos/fbmessengerexport
~/.config/my/my/config/repos/kobuddy
~/.config/my/my/config/repos/monzoexport
~/.config/my/my/config/repos/pockexport
~/.config/my/my/config/repos/rexport
#+end_example
- Another example is in [[file:example_config][example_config]]:
#+begin_src bash :exports results :results output
@ -183,6 +187,32 @@ Feel free to add other files as well though to organize better, it's a real Pyth
Some things (e.g. links to external packages like [[https://github.com/karlicoss/hypexport][hypexport]]) are specified as *ordinary symlinks* in ~repos~ directory.
That way you get easy imports (e.g. =import my.config.repos.hypexport.model=) and proper IDE integration.
- my own config layout is a bit more complicated:
#+begin_src python :exports results :results output
from pathlib import Path
home = Path("~").expanduser()
pp = home / '.config/my/my/config'
for p in sorted(pp.rglob('*')):
if '__pycache__' in p.parts:
continue
ps = str(p).replace(str(home), '~')
print(ps)
#+end_src
#+RESULTS:
#+begin_example
~/.config/my/my/config/__init__.py
~/.config/my/my/config/locations.py
~/.config/my/my/config/repos
~/.config/my/my/config/repos/endoexport
~/.config/my/my/config/repos/fbmessengerexport
~/.config/my/my/config/repos/kobuddy
~/.config/my/my/config/repos/monzoexport
~/.config/my/my/config/repos/pockexport
~/.config/my/my/config/repos/rexport
#+end_example
# TODO link to post about exports?
** module dependencies
Dependencies are different for specific modules you're planning to use, so it's hard to specify.

View file

@ -27,3 +27,5 @@ def set_repo(name: str, repo: Union[Path, str]) -> None:
# TODO set_repo is still useful, but perhaps move this thing away to core?
# TODO ok, I need to get rid of this, better to rely on regular imports

View file

@ -9,6 +9,7 @@ import warnings
# some helper functions
PathIsh = Union[Path, str]
# TODO only used in tests? not sure if useful at all.
# TODO port annotations to kython?..
def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
p = Path(p)
@ -33,6 +34,13 @@ def import_from(path: PathIsh, name: str) -> types.ModuleType:
sys.path.remove(path)
def import_dir(path: PathIsh, extra: str='') -> types.ModuleType:
p = Path(path)
if p.parts[0] == '~':
p = p.expanduser() # TODO eh. not sure about this..
return import_from(p.parent, p.name + extra)
T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
@ -124,6 +132,8 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path,
paths: List[Path] = []
for src in sources:
if src.parts[0] == '~':
src = src.expanduser()
if src.is_dir():
gp: Iterable[Path] = src.glob(glob)
paths.extend(gp)

View file

@ -41,7 +41,10 @@ def setup_config() -> None:
mycfg_dir = Path(appdirs.user_config_dir('my'))
if not mycfg_dir.exists():
warnings.warn(f"my.config package isn't found! (expected at {mycfg_dir}). This is likely to result in issues.")
warnings.warn(f"""
'my.config' package isn't found! (expected at {mycfg_dir}). This is likely to result in issues.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
""".strip())
return
mpath = str(mycfg_dir)
@ -58,10 +61,12 @@ def setup_config() -> None:
import my.config
except ImportError as ex:
# just in case... who knows what crazy setup users have in mind.
warnings.warn(f"Importing my.config failed! (error: {ex}). This is likely to result in issues.")
# todo log?
warnings.warn(f"""
Importing 'my.config' failed! (error: {ex}). This is likely to result in issues.
See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info.
""")
setup_config()
del setup_config
# TODO move to my.core?

View file

@ -2,23 +2,42 @@
Just a demo module for testing and documentation purposes
'''
from .core import Paths
from .core import Paths, PathIsh
from typing import Optional
from datetime import tzinfo
import pytz
from my.config import demo as user_config
from dataclasses import dataclass
@dataclass
class demo(user_config):
data_path: Paths
username: str
timezone: tzinfo = pytz.utc
external: Optional[PathIsh] = None
@property
def external_module(self):
rpath = self.external
if rpath is not None:
from .core.common import import_dir
return import_dir(rpath)
import my.config.repos.external as m # type: ignore
return m
from .core import make_config
config = make_config(demo)
# TODO not sure about type checking?
external = config.external_module
from pathlib import Path
from typing import Sequence, Iterable
from datetime import datetime
@ -46,6 +65,6 @@ def items() -> Iterable[Item]:
for raw in j:
yield Item(
username=config.username,
raw=raw,
raw=external.identity(raw),
dt=dt,
)

View file

@ -19,18 +19,18 @@ class hypothesis(user_config):
export_path: Paths
# path to a local clone of hypexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/hypexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/hypexport
hypexport : Optional[PathIsh] = None
@property
def dal_module(self):
rpath = self.hypexport
if rpath is not None:
from .cfg import set_repo
set_repo('hypexport', rpath)
import my.config.repos.hypexport.dal as dal
return dal
from .core.common import import_dir
return import_dir(rpath, '.dal')
else:
import my.config.repos.hypexport.dal as dal
return dal
from .core.cfg import make_config

View file

@ -18,18 +18,18 @@ class instapaper(user_config):
export_path : Paths
# path to a local clone of instapexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/instapexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
instapexport: Optional[PathIsh] = None
@property
def dal_module(self):
rpath = self.instapexport
if rpath is not None:
from .cfg import set_repo
set_repo('instapexport', rpath)
import my.config.repos.instapexport.dal as dal
return dal
from .core.common import import_dir
return import_dir(rpath, '.dal')
else:
import my.config.repos.instapexport.dal as dal
return dal
from .core.cfg import make_config

View file

@ -19,18 +19,18 @@ class reddit(uconfig):
export_path: Paths
# path to a local clone of rexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/rexport
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport
rexport : Optional[PathIsh] = None
@property
def dal_module(self) -> ModuleType:
rpath = self.rexport
if rpath is not None:
from .cfg import set_repo
set_repo('rexport', rpath)
import my.config.repos.rexport.dal as dal
return dal
from .core.common import import_dir
return import_dir(rpath, '.dal')
else:
import my.config.repos.rexport.dal as dal
return dal
from .core.cfg import make_config, Attrs

View file

@ -11,6 +11,7 @@ def test_dynamic_config_1(tmp_path: Path) -> None:
class user_config:
username = 'user'
data_path = f'{tmp_path}/*.json'
external = f'{tmp_path}/external'
my.config.demo = user_config # type: ignore[misc, assignment]
from my.demo import items
@ -29,6 +30,7 @@ def test_dynamic_config_2(tmp_path: Path) -> None:
class user_config:
username = 'user2'
data_path = f'{tmp_path}/*.json'
external = f'{tmp_path}/external'
my.config.demo = user_config # type: ignore[misc, assignment]
from my.demo import items
@ -75,6 +77,7 @@ def test_attribute_handling(tmp_path: Path) -> None:
username = 'UUU'
data_path = f'{tmp_path}/*.json'
external = f'{tmp_path}/external'
my.config.demo = user_config # type: ignore[misc, assignment]
@ -99,4 +102,17 @@ def prepare(tmp_path: Path):
{"key2": 2}
]
''')
ext = tmp_path / 'external'
ext.mkdir()
(ext / '__init__.py').write_text('''
def identity(x):
from .submodule import hello
hello(x)
return x
''')
(ext / 'submodule.py').write_text('hello = lambda x: print("hello " + str(x))')
yield
ex = 'my.config.repos.external'
if ex in sys.modules:
del sys.modules[ex]

View file

@ -26,6 +26,12 @@ def test_single_file():
)
"if the path starts with ~, we expand it"
assert get_files('~/.bashrc') == (
Path('~').expanduser() / '.bashrc',
)
def test_multiple_files():
'''
If you pass a directory/multiple directories, it flattens the contents