Hopefully this makes a lot of sense in the first place, and not that many users, so deserves breaking.
277 lines
9.7 KiB
Org Mode
277 lines
9.7 KiB
Org Mode
This file is an overview of *documented* modules (which I'm progressively expanding).
|
|
|
|
There are many more, see:
|
|
|
|
- [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules.
|
|
- you can also run =hpi modules= to list what's available on your system
|
|
- source code is always the primary source of truth
|
|
|
|
If you have some issues with the setup, see [[file:SETUP.org::#troubleshooting]["Troubleshooting"]].
|
|
|
|
* TOC
|
|
:PROPERTIES:
|
|
:TOC: :include all
|
|
:END:
|
|
:CONTENTS:
|
|
- [[#toc][TOC]]
|
|
- [[#intro][Intro]]
|
|
- [[#configs][Configs]]
|
|
- [[#mygoogletakeoutpaths][my.google.takeout.paths]]
|
|
- [[#myhypothesis][my.hypothesis]]
|
|
- [[#myreddit][my.reddit]]
|
|
- [[#mypocket][my.pocket]]
|
|
- [[#mytwittertwint][my.twitter.twint]]
|
|
- [[#mytwitterarchive][my.twitter.archive]]
|
|
- [[#mylastfm][my.lastfm]]
|
|
- [[#myreadingpolar][my.reading.polar]]
|
|
- [[#myinstapaper][my.instapaper]]
|
|
- [[#mygithubgdpr][my.github.gdpr]]
|
|
- [[#mygithubghexport][my.github.ghexport]]
|
|
- [[#mykobo][my.kobo]]
|
|
:END:
|
|
|
|
* Intro
|
|
|
|
See [[file:SETUP.org][SETUP]] to find out how to set up your own config.
|
|
|
|
Some explanations:
|
|
|
|
- =MY_CONFIG= is the path where you are keeping your private configuration (usually =~/.config/my/=)
|
|
- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths
|
|
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path=
|
|
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths.
|
|
|
|
It's 'smart', allows you to be flexible about your config:
|
|
|
|
- simple =str= or a =Path=
|
|
- =/a/path/to/directory/=, so the module will consume all files from this directory
|
|
- a list of files/directories (it will be flattened)
|
|
- a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed)
|
|
- empty string (e.g. ~export_path = ''~), this will prevent the module from consuming any data
|
|
|
|
This can be useful for modules that merge multiple data sources (for example, =my.twitter= or =my.github=)
|
|
|
|
Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]].
|
|
|
|
- if the field has a default value, you can omit it from your private config altogether
|
|
|
|
* Configs
|
|
|
|
The config snippets below are meant to be modified accordingly and *pasted into your private configuration*, e.g =$MY_CONFIG/my/config.py=.
|
|
|
|
You don't have to set them up all at once, it's recommended to do it gradually.
|
|
|
|
# TODO hmm. drawer raw means it can output outlines, but then have to manually erase the generated results. ugh.
|
|
|
|
#+begin_src python :dir .. :results output drawer raw :exports result
|
|
# TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive??
|
|
# yep.. https://stackoverflow.com/q/41203765/706389
|
|
import importlib
|
|
# from lint import all_modules # meh
|
|
# TODO figure out how to discover configs automatically...
|
|
modules = [
|
|
('google' , 'my.google.takeout.paths'),
|
|
('hypothesis' , 'my.hypothesis' ),
|
|
('reddit' , 'my.reddit' ),
|
|
('pocket' , 'my.pocket' ),
|
|
('twint' , 'my.twitter.twint' ),
|
|
('twitter_archive', 'my.twitter.archive' ),
|
|
('lastfm' , 'my.lastfm' ),
|
|
('polar' , 'my.reading.polar' ),
|
|
('instapaper' , 'my.instapaper' ),
|
|
('github' , 'my.github.gdpr' ),
|
|
('github' , 'my.github.ghexport' ),
|
|
('kobo' , 'my.kobo' ),
|
|
]
|
|
|
|
def indent(s, spaces=4):
|
|
return ''.join(' ' * spaces + l for l in s.splitlines(keepends=True))
|
|
|
|
from pathlib import Path
|
|
import inspect
|
|
from dataclasses import fields
|
|
import re
|
|
print('\n') # ugh. hack for org-ruby drawers bug
|
|
for cls, p in modules:
|
|
m = importlib.import_module(p)
|
|
C = getattr(m, cls)
|
|
src = inspect.getsource(C)
|
|
i = src.find('@property')
|
|
if i != -1:
|
|
src = src[:i]
|
|
src = src.strip()
|
|
src = re.sub(r'(class \w+)\(.*', r'\1:', src)
|
|
mpath = p.replace('.', '/')
|
|
for x in ['.py', '__init__.py']:
|
|
if Path(mpath + x).exists():
|
|
mpath = mpath + x
|
|
print(f'** [[file:../{mpath}][{p}]]')
|
|
mdoc = m.__doc__
|
|
if mdoc is not None:
|
|
print(indent(mdoc))
|
|
print(f' #+begin_src python')
|
|
print(indent(src))
|
|
print(f' #+end_src')
|
|
#+end_src
|
|
|
|
#+RESULTS:
|
|
|
|
|
|
** [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
|
|
|
|
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
|
|
|
|
#+begin_src python
|
|
class google:
|
|
takeout_path: Paths # path/paths/glob for the takeout zips
|
|
#+end_src
|
|
** [[file:../my/hypothesis.py][my.hypothesis]]
|
|
|
|
[[https://hypothes.is][Hypothes.is]] highlights and annotations
|
|
|
|
#+begin_src python
|
|
class hypothesis:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/hypexport][hypexport]] outputs
|
|
'''
|
|
|
|
# paths[s]/glob to the exported JSON data
|
|
export_path: Paths
|
|
|
|
# path to a local clone of hypexport
|
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/hypexport
|
|
hypexport : Optional[PathIsh] = None
|
|
#+end_src
|
|
** [[file:../my/reddit.py][my.reddit]]
|
|
|
|
Reddit data: saved items/comments/upvotes/etc.
|
|
|
|
#+begin_src python
|
|
class reddit:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/rexport][rexport]] output.
|
|
'''
|
|
|
|
# path[s]/glob to the exported JSON data
|
|
export_path: Paths
|
|
|
|
# path to a local clone of rexport
|
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/rexport
|
|
rexport : Optional[PathIsh] = None
|
|
#+end_src
|
|
** [[file:../my/pocket.py][my.pocket]]
|
|
|
|
[[https://getpocket.com][Pocket]] bookmarks and highlights
|
|
|
|
#+begin_src python
|
|
class pocket:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/pockexport][pockexport]] outputs
|
|
'''
|
|
|
|
# paths[s]/glob to the exported JSON data
|
|
export_path: Paths
|
|
|
|
# path to a local clone of pockexport
|
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/pockexport
|
|
pockexport : Optional[PathIsh] = None
|
|
#+end_src
|
|
** [[file:../my/twitter/twint.py][my.twitter.twint]]
|
|
|
|
Twitter data (tweets and favorites).
|
|
|
|
Uses [[https://github.com/twintproject/twint][Twint]] data export.
|
|
|
|
Requirements: =pip3 install --user dataset=
|
|
|
|
#+begin_src python
|
|
class twint:
|
|
export_path: Paths # path[s]/glob to the twint Sqlite database
|
|
#+end_src
|
|
** [[file:../my/twitter/archive.py][my.twitter.archive]]
|
|
|
|
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
|
|
|
|
#+begin_src python
|
|
class twitter_archive:
|
|
export_path: Paths # path[s]/glob to the twitter archive takeout
|
|
#+end_src
|
|
** [[file:../my/lastfm][my.lastfm]]
|
|
|
|
Last.fm scrobbles
|
|
|
|
#+begin_src python
|
|
class lastfm:
|
|
"""
|
|
Uses [[https://github.com/karlicoss/lastfm-backup][lastfm-backup]] outputs
|
|
"""
|
|
export_path: Paths
|
|
#+end_src
|
|
** [[file:../my/reading/polar.py][my.reading.polar]]
|
|
|
|
[[https://github.com/burtonator/polar-bookshelf][Polar]] articles and highlights
|
|
|
|
#+begin_src python
|
|
class polar:
|
|
'''
|
|
Polar config is optional, you only need it if you want to specify custom 'polar_dir'
|
|
'''
|
|
polar_dir: PathIsh = Path('~/.polar').expanduser()
|
|
defensive: bool = True # pass False if you want it to fail faster on errors (useful for debugging)
|
|
#+end_src
|
|
** [[file:../my/instapaper.py][my.instapaper]]
|
|
|
|
[[https://www.instapaper.com][Instapaper]] bookmarks, highlights and annotations
|
|
|
|
#+begin_src python
|
|
class instapaper:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/instapexport][instapexport]] outputs.
|
|
'''
|
|
# path[s]/glob to the exported JSON data
|
|
export_path : Paths
|
|
|
|
# path to a local clone of instapexport
|
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/instapexport
|
|
instapexport: Optional[PathIsh] = None
|
|
#+end_src
|
|
** [[file:../my/github/gdpr.py][my.github.gdpr]]
|
|
|
|
Github data (uses [[https://github.com/settings/admin][official GDPR export]])
|
|
|
|
#+begin_src python
|
|
class github:
|
|
gdpr_dir: PathIsh # path to unpacked GDPR archive
|
|
#+end_src
|
|
** [[file:../my/github/ghexport.py][my.github.ghexport]]
|
|
|
|
Github data: events, comments, etc. (API data)
|
|
|
|
#+begin_src python
|
|
class github:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/ghexport][ghexport]] outputs.
|
|
'''
|
|
# path[s]/glob to the exported JSON data
|
|
export_path: Paths
|
|
|
|
# path to a local clone of ghexport
|
|
# alternatively, you can put the repository (or a symlink) in $MY_CONFIG/my/config/repos/ghexport
|
|
ghexport : Optional[PathIsh] = None
|
|
|
|
# path to a cache directory
|
|
# if omitted, will use /tmp
|
|
cache_dir: Optional[PathIsh] = None
|
|
#+end_src
|
|
** [[file:../my/kobo.py][my.kobo]]
|
|
|
|
[[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats
|
|
|
|
#+begin_src python
|
|
class kobo:
|
|
'''
|
|
Uses [[https://github.com/karlicoss/kobuddy#as-a-backup-tool][kobuddy]] outputs.
|
|
'''
|
|
# path[s]/glob to the exported databases
|
|
export_path: Paths
|
|
#+end_src
|