diff --git a/doc/DEVELOPMENT.org b/doc/DEVELOPMENT.org index f338760..51a67e6 100644 --- a/doc/DEVELOPMENT.org +++ b/doc/DEVELOPMENT.org @@ -1,8 +1,23 @@ +* TOC +:PROPERTIES: +:TOC: :include all :depth 3 +:END: + +:CONTENTS: +- [[#toc][TOC]] +- [[#running-tests][Running tests]] +- [[#ide-setup][IDE setup]] +- [[#linting][Linting]] +- [[#modifyingadding-modules][Modifying/adding modules]] +:END: + * Running tests I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config and [[file:../scripts/ci/run]] for the up to date info on the specifics. -* IDE setup: make sure my.config is in your package search path +* IDE setup +To benefit from type hinting, make sure =my.config= is in your package search path. + In runtime, ~my.config~ is imported from the user config directory dynamically. However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration. @@ -43,3 +58,5 @@ Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downs This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind. I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info. + +# TODO add example with overriding 'all' diff --git a/doc/MODULES.org b/doc/MODULES.org index d896eaa..7f83e9f 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -1,11 +1,31 @@ This file is an overview of *documented* modules. There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules, I'm progressively working on documenting them. +* TOC +:PROPERTIES: +:TOC: :include all +:END: +:CONTENTS: +- [[#toc][TOC]] +- [[#intro][Intro]] +- [[#configs][Configs]] + - [[#mygoogletakeoutpaths][my.google.takeout.paths]] + - [[#myhypothesis][my.hypothesis]] + - [[#myreddit][my.reddit]] + - [[#mytwittertwint][my.twitter.twint]] + - [[#mytwitterarchive][my.twitter.archive]] + - [[#mylastfm][my.lastfm]] + - [[#myreadingpolar][my.reading.polar]] + - [[#myinstapaper][my.instapaper]] +:END: + +* Intro + See [[file:SETUP.org][SETUP]] to find out how to set up your own config. Some explanations: -- =MY_CONFIG= is whereever you are keeping your private configuration (usually =~/.config/my/=) +- =MY_CONFIG= is the path where you are keeping your private configuration (usually =~/.config/my/=) - [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths - [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path= - [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths. @@ -21,12 +41,15 @@ Some explanations: - if the field has a default value, you can omit it from your private config altogether +* Configs The config snippets below are meant to be modified accordingly and *pasted into your private configuration*, e.g =$MY_CONFIG/my/config.py=. You don't have to set them up all at once, it's recommended to do it gradually. -#+begin_src python :dir .. :results output drawer :exports result +# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh. + +#+begin_src python :dir .. :results output drawer raw :exports result # TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive?? import importlib # from lint import all_modules # meh @@ -63,7 +86,7 @@ for cls, p in modules: for x in ['.py', '__init__.py']: if Path(mpath + x).exists(): mpath = mpath + x - print(f'- [[file:../{mpath}][{p}]]') + print(f'** [[file:../{mpath}][{p}]]') mdoc = m.__doc__ if mdoc is not None: print(indent(mdoc)) @@ -73,10 +96,9 @@ for cls, p in modules: #+end_src #+RESULTS: -:results: -- [[file:../my/google/takeout/paths.py][my.google.takeout.paths]] +** [[file:../my/google/takeout/paths.py][my.google.takeout.paths]] Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data @@ -84,7 +106,7 @@ for cls, p in modules: class google: takeout_path: Paths # path/paths/glob for the takeout zips #+end_src -- [[file:../my/hypothesis.py][my.hypothesis]] +** [[file:../my/hypothesis.py][my.hypothesis]] [[https://hypothes.is][Hypothes.is]] highlights and annotations @@ -98,10 +120,10 @@ for cls, p in modules: export_path: Paths # path to a local clone of hypexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/hypexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/hypexport hypexport : Optional[PathIsh] = None #+end_src -- [[file:../my/reddit.py][my.reddit]] +** [[file:../my/reddit.py][my.reddit]] Reddit data: saved items/comments/upvotes/etc. @@ -115,10 +137,10 @@ for cls, p in modules: export_path: Paths # path to a local clone of rexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/rexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport rexport : Optional[PathIsh] = None #+end_src -- [[file:../my/twitter/twint.py][my.twitter.twint]] +** [[file:../my/twitter/twint.py][my.twitter.twint]] Twitter data (tweets and favorites). @@ -128,7 +150,7 @@ for cls, p in modules: class twint: export_path: Paths # path[s]/glob to the twint Sqlite database #+end_src -- [[file:../my/twitter/archive.py][my.twitter.archive]] +** [[file:../my/twitter/archive.py][my.twitter.archive]] Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]]) @@ -136,7 +158,7 @@ for cls, p in modules: class twitter: export_path: Paths # path[s]/glob to the twitter archive takeout #+end_src -- [[file:../my/lastfm][my.lastfm]] +** [[file:../my/lastfm][my.lastfm]] Last.fm scrobbles @@ -147,7 +169,7 @@ for cls, p in modules: """ export_path: Paths #+end_src -- [[file:../my/reading/polar.py][my.reading.polar]] +** [[file:../my/reading/polar.py][my.reading.polar]] [[https://github.com/burtonator/polar-books][Polar]] articles and highlights @@ -159,7 +181,7 @@ for cls, p in modules: polar_dir: PathIsh = Path('~/.polar').expanduser() defensive: bool = True # pass False if you want it to fail faster on errors (useful for debugging) #+end_src -- [[file:../my/instapaper.py][my.instapaper]] +** [[file:../my/instapaper.py][my.instapaper]] [[https://www.instapaper.com][Instapaper]] bookmarks, highlights and annotations @@ -172,7 +194,6 @@ for cls, p in modules: export_path : Paths # path to a local clone of instapexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/instapexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport instapexport: Optional[PathIsh] = None #+end_src -:end: diff --git a/doc/SETUP.org b/doc/SETUP.org index 90c0df8..17582bd 100644 --- a/doc/SETUP.org +++ b/doc/SETUP.org @@ -2,8 +2,36 @@ Please don't be shy and raise issues if something in the instructions is unclear. You'd be really helping me, I want to make the setup as straightforward as possible! +# update with org-make-toc +* TOC +:PROPERTIES: +:TOC: :include all +:END: + +:CONTENTS: +- [[#toc][TOC]] +- [[#few-notes][Few notes]] +- [[#setting-up-the-main-package][Setting up the main package]] + - [[#option-1-install-from-pip][option 1: install from PIP]] + - [[#option-2-local-install][option 2: local install]] + - [[#option-3-use-without-installing][option 3: use without installing]] +- [[#optional-packages][Optional packages]] +- [[#setting-up-the-modules][Setting up the modules]] + - [[#private-configuration-myconfig][private configuration (my.config)]] + - [[#module-dependencies][module dependencies]] +- [[#usage-examples][Usage examples]] + - [[#end-to-end-roam-research-setup][End-to-end Roam Research setup]] + - [[#polar][Polar]] + - [[#google-takeout][Google Takeout]] + - [[#kobo-reader][Kobo reader]] + - [[#orger][Orger]] + - [[#orger--polar][Orger + Polar]] + - [[#demopy][demo.py]] +:END: + + * Few notes -I understand people may not super familiar with Python, PIP or generally unix, so here are some short notes: +I understand people may not super familiar with Python, PIP or generally unix, so here are some useful notes: - only python3 is supported, and more specifically, ~python >= 3.6~. - I'm using ~pip3~ command, but on your system you might only have ~pip~. @@ -13,7 +41,7 @@ I understand people may not super familiar with Python, PIP or generally unix, s - similarly, I'm using =python3= in the documentation, but if your =python --version= says python3, it's okay to use =python= - when you are using ~pip install~, [[https://stackoverflow.com/a/42989020/706389][always pass]] =--user=, and *never install third party packages with sudo* (unless you know what you are doing) -- throughout the guide I'm assuming the config directory is =~/.config=, but it's different on Mac/Windows. +- throughout the guide I'm assuming the user config directory is =~/.config=, but it's *different on Mac/Windows*. See [[https://github.com/ActiveState/appdirs/blob/3fe6a83776843a46f20c2e5587afcffe05e03b39/appdirs.py#L187-L190][this]] if you're not sure what's your user config dir. @@ -22,12 +50,12 @@ This is a *required step* You can choose one of the following options: -** install from [[https://pypi.org/project/HPI][PIP]] -This is the easiest way: +** option 1: install from [[https://pypi.org/project/HPI][PIP]] +This is the *easiest way*: : pip3 install --user HPI -** local install +** option 2: local install This is convenient if you're planning to add new modules or change the existing ones. 1. Clone the repository: =git clone git@github.com:karlicoss/HPI.git /path/to/hpi= @@ -39,7 +67,7 @@ This is convenient if you're planning to add new modules or change the existing It's *extremely* convenient for developing and debugging. -** use without installing +** option 3: use without installing This is less convenient, but gives you more control. 1. Clone the repository: =git clone git@github.com:karlicoss/HPI.git /path/to/hpi= @@ -59,7 +87,7 @@ This is less convenient, but gives you more control. The benefit of this way is that you get a bit more control, explicitly allowing your scripts to use your data. -** optional packages +* Optional packages You can also install some opional packages : pip3 install 'HPI[optional]' @@ -69,12 +97,14 @@ They aren't necessary, but improve your experience. At the moment these are: - [[https://github.com/karlicoss/cachew][cachew]]: automatic caching library, which can greatly speedup data access - [[https://github.com/metachris/logzero][logzero]]: a nice logging library, supporting colors -* Setting up the modules -This is an *optional step* as some modules might work without extra setup. +* Setting up modules +This is an *optional step* as few modules work without extra setup. But it depends on the specific module. +See [[file:MODULES.org][MODULES]] to read documentation on specific modules that interest you. + You might also find interesting to read [[file:CONFIGURING.org][CONFIGURING]], where I'm -elaborating on some rationales behind the current configuration system. +elaborating on some technical rationales behind the current configuration system. ** private configuration (=my.config=) # TODO write about dynamic configuration @@ -87,7 +117,7 @@ The config is simply a *python package* (named =my.config=), expected to be in = Since it's a Python package, generally it's very *flexible* and there are many ways to set it up. -- The simplest and very minimum you need is =~/.config/my/my/config.py=. For example: +- *The simplest and the very minimum* you need is =~/.config/my/my/config.py=. For example: #+begin_src python import pytz # yes, you can use any Python stuff in the config @@ -116,32 +146,6 @@ Since it's a Python package, generally it's very *flexible* and there are many w - or you can just try running them and fill in the attributes Python complains about! -- My config layout is a bit more complicated: - - #+begin_src python :exports results :results output - from pathlib import Path - home = Path("~").expanduser() - pp = home / '.config/my/my/config' - for p in sorted(pp.rglob('*')): - if '__pycache__' in p.parts: - continue - ps = str(p).replace(str(home), '~') - print(ps) - #+end_src - - #+RESULTS: - #+begin_example - ~/.config/my/my/config/__init__.py - ~/.config/my/my/config/locations.py - ~/.config/my/my/config/repos - ~/.config/my/my/config/repos/endoexport - ~/.config/my/my/config/repos/fbmessengerexport - ~/.config/my/my/config/repos/kobuddy - ~/.config/my/my/config/repos/monzoexport - ~/.config/my/my/config/repos/pockexport - ~/.config/my/my/config/repos/rexport - #+end_example - - Another example is in [[file:example_config][example_config]]: #+begin_src bash :exports results :results output @@ -183,6 +187,32 @@ Feel free to add other files as well though to organize better, it's a real Pyth Some things (e.g. links to external packages like [[https://github.com/karlicoss/hypexport][hypexport]]) are specified as *ordinary symlinks* in ~repos~ directory. That way you get easy imports (e.g. =import my.config.repos.hypexport.model=) and proper IDE integration. +- my own config layout is a bit more complicated: + + #+begin_src python :exports results :results output + from pathlib import Path + home = Path("~").expanduser() + pp = home / '.config/my/my/config' + for p in sorted(pp.rglob('*')): + if '__pycache__' in p.parts: + continue + ps = str(p).replace(str(home), '~') + print(ps) + #+end_src + + #+RESULTS: + #+begin_example + ~/.config/my/my/config/__init__.py + ~/.config/my/my/config/locations.py + ~/.config/my/my/config/repos + ~/.config/my/my/config/repos/endoexport + ~/.config/my/my/config/repos/fbmessengerexport + ~/.config/my/my/config/repos/kobuddy + ~/.config/my/my/config/repos/monzoexport + ~/.config/my/my/config/repos/pockexport + ~/.config/my/my/config/repos/rexport + #+end_example + # TODO link to post about exports? ** module dependencies Dependencies are different for specific modules you're planning to use, so it's hard to specify. diff --git a/my/cfg.py b/my/cfg.py index 3447525..2cd6454 100644 --- a/my/cfg.py +++ b/my/cfg.py @@ -27,3 +27,5 @@ def set_repo(name: str, repo: Union[Path, str]) -> None: # TODO set_repo is still useful, but perhaps move this thing away to core? + +# TODO ok, I need to get rid of this, better to rely on regular imports diff --git a/my/core/common.py b/my/core/common.py index 985ca67..dec0b15 100644 --- a/my/core/common.py +++ b/my/core/common.py @@ -9,6 +9,7 @@ import warnings # some helper functions PathIsh = Union[Path, str] +# TODO only used in tests? not sure if useful at all. # TODO port annotations to kython?.. def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType: p = Path(p) @@ -33,6 +34,13 @@ def import_from(path: PathIsh, name: str) -> types.ModuleType: sys.path.remove(path) +def import_dir(path: PathIsh, extra: str='') -> types.ModuleType: + p = Path(path) + if p.parts[0] == '~': + p = p.expanduser() # TODO eh. not sure about this.. + return import_from(p.parent, p.name + extra) + + T = TypeVar('T') K = TypeVar('K') V = TypeVar('V') @@ -124,6 +132,8 @@ def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, paths: List[Path] = [] for src in sources: + if src.parts[0] == '~': + src = src.expanduser() if src.is_dir(): gp: Iterable[Path] = src.glob(glob) paths.extend(gp) diff --git a/my/core/init.py b/my/core/init.py index 4070f4d..dd21a0a 100644 --- a/my/core/init.py +++ b/my/core/init.py @@ -41,7 +41,10 @@ def setup_config() -> None: mycfg_dir = Path(appdirs.user_config_dir('my')) if not mycfg_dir.exists(): - warnings.warn(f"my.config package isn't found! (expected at {mycfg_dir}). This is likely to result in issues.") + warnings.warn(f""" +'my.config' package isn't found! (expected at {mycfg_dir}). This is likely to result in issues. +See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info. +""".strip()) return mpath = str(mycfg_dir) @@ -58,10 +61,12 @@ def setup_config() -> None: import my.config except ImportError as ex: # just in case... who knows what crazy setup users have in mind. - warnings.warn(f"Importing my.config failed! (error: {ex}). This is likely to result in issues.") + # todo log? + warnings.warn(f""" +Importing 'my.config' failed! (error: {ex}). This is likely to result in issues. +See https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org#setting-up-the-modules for more info. +""") setup_config() del setup_config - -# TODO move to my.core? diff --git a/my/demo.py b/my/demo.py index 2df9d67..3a9d1b3 100644 --- a/my/demo.py +++ b/my/demo.py @@ -2,23 +2,42 @@ Just a demo module for testing and documentation purposes ''' -from .core import Paths +from .core import Paths, PathIsh +from typing import Optional from datetime import tzinfo import pytz from my.config import demo as user_config from dataclasses import dataclass + @dataclass class demo(user_config): data_path: Paths username: str timezone: tzinfo = pytz.utc + external: Optional[PathIsh] = None + + @property + def external_module(self): + rpath = self.external + if rpath is not None: + from .core.common import import_dir + return import_dir(rpath) + + import my.config.repos.external as m # type: ignore + return m + + from .core import make_config config = make_config(demo) +# TODO not sure about type checking? +external = config.external_module + + from pathlib import Path from typing import Sequence, Iterable from datetime import datetime @@ -46,6 +65,6 @@ def items() -> Iterable[Item]: for raw in j: yield Item( username=config.username, - raw=raw, + raw=external.identity(raw), dt=dt, ) diff --git a/my/hypothesis.py b/my/hypothesis.py index 738ba6e..bacc51e 100644 --- a/my/hypothesis.py +++ b/my/hypothesis.py @@ -19,18 +19,18 @@ class hypothesis(user_config): export_path: Paths # path to a local clone of hypexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/hypexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/hypexport hypexport : Optional[PathIsh] = None @property def dal_module(self): rpath = self.hypexport if rpath is not None: - from .cfg import set_repo - set_repo('hypexport', rpath) - - import my.config.repos.hypexport.dal as dal - return dal + from .core.common import import_dir + return import_dir(rpath, '.dal') + else: + import my.config.repos.hypexport.dal as dal + return dal from .core.cfg import make_config diff --git a/my/instapaper.py b/my/instapaper.py index 3ea064d..9730eeb 100644 --- a/my/instapaper.py +++ b/my/instapaper.py @@ -18,18 +18,18 @@ class instapaper(user_config): export_path : Paths # path to a local clone of instapexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/instapexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport instapexport: Optional[PathIsh] = None @property def dal_module(self): rpath = self.instapexport if rpath is not None: - from .cfg import set_repo - set_repo('instapexport', rpath) - - import my.config.repos.instapexport.dal as dal - return dal + from .core.common import import_dir + return import_dir(rpath, '.dal') + else: + import my.config.repos.instapexport.dal as dal + return dal from .core.cfg import make_config diff --git a/my/reddit.py b/my/reddit.py index 7e9f908..afc5bfb 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -19,18 +19,18 @@ class reddit(uconfig): export_path: Paths # path to a local clone of rexport - # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/repos/rexport + # alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport rexport : Optional[PathIsh] = None @property def dal_module(self) -> ModuleType: rpath = self.rexport if rpath is not None: - from .cfg import set_repo - set_repo('rexport', rpath) - - import my.config.repos.rexport.dal as dal - return dal + from .core.common import import_dir + return import_dir(rpath, '.dal') + else: + import my.config.repos.rexport.dal as dal + return dal from .core.cfg import make_config, Attrs diff --git a/tests/demo.py b/tests/demo.py index 4dfae6d..9da5966 100644 --- a/tests/demo.py +++ b/tests/demo.py @@ -11,6 +11,7 @@ def test_dynamic_config_1(tmp_path: Path) -> None: class user_config: username = 'user' data_path = f'{tmp_path}/*.json' + external = f'{tmp_path}/external' my.config.demo = user_config # type: ignore[misc, assignment] from my.demo import items @@ -29,6 +30,7 @@ def test_dynamic_config_2(tmp_path: Path) -> None: class user_config: username = 'user2' data_path = f'{tmp_path}/*.json' + external = f'{tmp_path}/external' my.config.demo = user_config # type: ignore[misc, assignment] from my.demo import items @@ -75,6 +77,7 @@ def test_attribute_handling(tmp_path: Path) -> None: username = 'UUU' data_path = f'{tmp_path}/*.json' + external = f'{tmp_path}/external' my.config.demo = user_config # type: ignore[misc, assignment] @@ -99,4 +102,17 @@ def prepare(tmp_path: Path): {"key2": 2} ] ''') + ext = tmp_path / 'external' + ext.mkdir() + (ext / '__init__.py').write_text(''' +def identity(x): + from .submodule import hello + hello(x) + return x + +''') + (ext / 'submodule.py').write_text('hello = lambda x: print("hello " + str(x))') yield + ex = 'my.config.repos.external' + if ex in sys.modules: + del sys.modules[ex] diff --git a/tests/get_files.py b/tests/get_files.py index a3fd5a9..29e0528 100644 --- a/tests/get_files.py +++ b/tests/get_files.py @@ -26,6 +26,12 @@ def test_single_file(): ) + "if the path starts with ~, we expand it" + assert get_files('~/.bashrc') == ( + Path('~').expanduser() / '.bashrc', + ) + + def test_multiple_files(): ''' If you pass a directory/multiple directories, it flattens the contents