Autoextract documentation for some modules, improve docs

This commit is contained in:
Dima Gerasimov 2020-05-10 17:25:57 +01:00
parent 9cb39103c6
commit 976b3da6f4
4 changed files with 88 additions and 36 deletions

View file

@ -1,7 +1,28 @@
# TODO explain Paths/PathIsh This file is an overview of *documented* modules. There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules.
See [[file:SETUP.org][SETUP]] to find out how to set up your own config.
Some explanations:
- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path=
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths.
It's 'smart', allows you to be flexible about your config:
- simple =str= or a =Path=
- =/a/path/to/directory/=, so the module will consume all files from this directory
- a list of files/directories (it will be flattened)
- a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed)
Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]].
- if the field has a default value, you can omit it from your private config.
#+begin_src python :dir .. :results output :exports result Modules:
#+begin_src python :dir .. :results output drawer :exports result
# TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive?? # TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive??
import importlib import importlib
# from lint import all_modules # meh # from lint import all_modules # meh
@ -13,11 +34,14 @@ modules = [
('twitter', 'my.twitter.archive' ), ('twitter', 'my.twitter.archive' ),
] ]
# TODO generate links? def indent(s, spaces=4):
return ''.join(' ' * spaces + l for l in s.splitlines(keepends=True))
from pathlib import Path
import inspect import inspect
from dataclasses import fields from dataclasses import fields
# print(',#+begin_src python') import re
print('\n') # ugh. hack for org-ruby drawers bug
for cls, p in modules: for cls, p in modules:
m = importlib.import_module(p) m = importlib.import_module(p)
C = getattr(m, cls) C = getattr(m, cls)
@ -25,33 +49,60 @@ for cls, p in modules:
i = src.find('@property') i = src.find('@property')
if i != -1: if i != -1:
src = src[:i] src = src[:i]
print(src) src = src.strip()
# print('#+end_src') src = re.sub(r'(class \w+)\(.*', r'\1:', src)
mpath = p.replace('.', '/')
for x in ['.py', '__init__.py']:
if Path(mpath + x).exists():
mpath = mpath + x
print(f'- [[file:../{mpath}][{p}]]')
mdoc = m.__doc__
if mdoc is not None:
print(indent(mdoc))
print(f' #+begin_src python')
print(indent(src))
print(f' #+end_src')
#+end_src #+end_src
#+RESULTS: #+RESULTS:
#+begin_example :results:
class google(user_config):
'''
Expects [[https://takeout.google.com][Google Takeout]] data.
'''
takeout_path: Paths # path/paths/glob for the takeout zips
class reddit(uconfig):
'''
Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
'''
export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport
class twint(user_config): - [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
'''
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
#+begin_src python
class google:
takeout_path: Paths # path/paths/glob for the takeout zips
#+end_src
- [[file:../my/reddit.py][my.reddit]]
Reddit data: saved items/comments/upvotes/etc.
Uses [[https://github.com/karlicoss/rexport][rexport]] output.
#+begin_src python
class reddit:
export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport
#+end_src
- [[file:../my/twitter/twint.py][my.twitter.twint]]
Twitter data (tweets and favorites).
Uses [[https://github.com/twintproject/twint][Twint]] data export. Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
class twitter(user_config): #+begin_src python
export_path: Paths # path[s]/glob to the twitter archive takeout class twint:
export_path: Paths # path[s]/glob to the twint Sqlite database
#+end_src
- [[file:../my/twitter/archive.py][my.twitter.archive]]
#+end_example Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
#+begin_src python
class twitter:
export_path: Paths # path[s]/glob to the twitter archive takeout
#+end_src
:end:

View file

@ -1,15 +1,18 @@
'''
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
'''
from dataclasses import dataclass from dataclasses import dataclass
from ...core.common import Paths from ...core.common import Paths
from my.config import google as user_config from my.config import google as user_config
@dataclass @dataclass
class google(user_config): class google(user_config):
'''
Expects [[https://takeout.google.com][Google Takeout]] data.
'''
takeout_path: Paths # path/paths/glob for the takeout zips takeout_path: Paths # path/paths/glob for the takeout zips
### ###
# TODO rename 'google' to 'takeout'? not sure
from ...core.cfg import make_config from ...core.cfg import make_config
config = make_config(google) config = make_config(google)

View file

@ -1,5 +1,7 @@
""" """
Reddit data: saved items/comments/upvotes/etc. Reddit data: saved items/comments/upvotes/etc.
Uses [[https://github.com/karlicoss/rexport][rexport]] output.
""" """
from typing import Optional from typing import Optional
@ -11,9 +13,6 @@ from dataclasses import dataclass
@dataclass @dataclass
class reddit(uconfig): class reddit(uconfig):
'''
Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
'''
export_path: Paths # path[s]/glob to the exported data export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport rexport : Optional[PathIsh] = None # path to a local clone of rexport

View file

@ -1,5 +1,7 @@
""" """
Twitter data (tweets and favorites). Twitter data (tweets and favorites).
Uses [[https://github.com/twintproject/twint][Twint]] data export.
""" """
from ..core.common import Paths from ..core.common import Paths
@ -8,10 +10,7 @@ from my.config import twint as user_config
@dataclass @dataclass
class twint(user_config): class twint(user_config):
''' export_path: Paths # path[s]/glob to the twint Sqlite database
Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
from ..core.cfg import make_config from ..core.cfg import make_config