Autoextract documentation for some modules, improve docs

2020-05-10 17:25:57 +01:00 · 2020-05-10 17:25:57 +01:00 · 976b3da6f4
commit 976b3da6f4
parent 9cb39103c6
4 changed files with 88 additions and 36 deletions
--- a/doc/MODULES.org
+++ b/doc/MODULES.org
@ -1,7 +1,28 @@
-# TODO explain Paths/PathIsh
+This file is an overview of *documented* modules. There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules.
+
+See [[file:SETUP.org][SETUP]] to find out how to set up your own config.
+
+Some explanations:
+
+- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths
+- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path=
+- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths.
+
+  It's 'smart', allows you to be flexible about your config:
+
+  - simple =str= or a =Path=
+  - =/a/path/to/directory/=, so the module will consume all files from this directory
+  - a list of files/directories (it will be flattened)
+  - a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed)
+
+  Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]].
+
+- if the field has a default value, you can omit it from your private config.


-#+begin_src python :dir .. :results output :exports result
+Modules:
+
+#+begin_src python :dir .. :results output drawer :exports result
 # TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive??
 import importlib
 # from lint import all_modules # meh
@ -13,11 +34,14 @@ modules = [
    ('twitter', 'my.twitter.archive'     ),
 ]

-# TODO generate links?
+def indent(s, spaces=4):
+    return ''.join(' ' * spaces + l for l in s.splitlines(keepends=True))

+from pathlib import Path
 import inspect
 from dataclasses import fields
-# print(',#+begin_src python')
+import re
+print('\n') # ugh. hack for org-ruby drawers bug
 for cls, p in modules:
    m = importlib.import_module(p)
    C = getattr(m, cls)
@ -25,33 +49,60 @@ for cls, p in modules:
    i = src.find('@property')
    if i != -1:
        src = src[:i]
-    print(src)
-# print('#+end_src')
+    src = src.strip()
+    src = re.sub(r'(class \w+)\(.*', r'\1:', src)
+    mpath = p.replace('.', '/')
+    for x in ['.py', '__init__.py']:
+        if Path(mpath + x).exists():
+            mpath = mpath + x
+    print(f'- [[file:../{mpath}][{p}]]')
+    mdoc = m.__doc__
+    if mdoc is not None:
+        print(indent(mdoc))
+    print(f'    #+begin_src python')
+    print(indent(src))
+    print(f'    #+end_src')
 #+end_src

 #+RESULTS:
-#+begin_example
-class google(user_config):
-    '''
-    Expects [[https://takeout.google.com][Google Takeout]] data.
-    '''
-    takeout_path: Paths # path/paths/glob for the takeout zips
-
-class reddit(uconfig):
-    '''
-    Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
-    '''
-    export_path: Paths                     # path[s]/glob to the exported data
-    rexport    : Optional[PathIsh] = None  # path to a local clone of rexport
+:results:


-class twint(user_config):
-    '''
+- [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
+
+    Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
+
+    #+begin_src python
+    class google:
+        takeout_path: Paths # path/paths/glob for the takeout zips
+    #+end_src
+- [[file:../my/reddit.py][my.reddit]]
+
+    Reddit data: saved items/comments/upvotes/etc.
+
+    Uses [[https://github.com/karlicoss/rexport][rexport]] output.
+
+    #+begin_src python
+    class reddit:
+        export_path: Paths                     # path[s]/glob to the exported data
+        rexport    : Optional[PathIsh] = None  # path to a local clone of rexport
+    #+end_src
+- [[file:../my/twitter/twint.py][my.twitter.twint]]
+
+    Twitter data (tweets and favorites).
+
    Uses [[https://github.com/twintproject/twint][Twint]] data export.
-    '''
-    export_path: Paths # path[s]/glob to twint Sqlite database

-class twitter(user_config):
-    export_path: Paths # path[s]/glob to the twitter archive takeout
+    #+begin_src python
+    class twint:
+        export_path: Paths # path[s]/glob to the twint Sqlite database
+    #+end_src
+- [[file:../my/twitter/archive.py][my.twitter.archive]]

-#+end_example
+    Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
+
+    #+begin_src python
+    class twitter:
+        export_path: Paths # path[s]/glob to the twitter archive takeout
+    #+end_src
+:end:
--- a/my/google/takeout/paths.py
+++ b/my/google/takeout/paths.py
@ -1,15 +1,18 @@
+'''
+Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
+'''
+
 from dataclasses import dataclass
 from ...core.common import Paths

 from my.config import google as user_config
@dataclass
 class google(user_config):
-    '''
-    Expects [[https://takeout.google.com][Google Takeout]] data.
-    '''
    takeout_path: Paths # path/paths/glob for the takeout zips
 ###

+# TODO rename 'google' to 'takeout'? not sure
+
 from ...core.cfg import make_config
 config = make_config(google)

--- a/my/reddit.py
+++ b/my/reddit.py
@ -1,5 +1,7 @@
 """
 Reddit data: saved items/comments/upvotes/etc.
+
+Uses [[https://github.com/karlicoss/rexport][rexport]] output.
 """

 from typing import Optional
@ -11,9 +13,6 @@ from dataclasses import dataclass

@dataclass
 class reddit(uconfig):
-    '''
-    Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
-    '''
    export_path: Paths                     # path[s]/glob to the exported data
    rexport    : Optional[PathIsh] = None  # path to a local clone of rexport

--- a/my/twitter/twint.py
+++ b/my/twitter/twint.py
@ -1,5 +1,7 @@
 """
 Twitter data (tweets and favorites).
+
+Uses [[https://github.com/twintproject/twint][Twint]] data export.
 """

 from ..core.common import Paths
@ -8,10 +10,7 @@ from my.config import twint as user_config

@dataclass
 class twint(user_config):
-    '''
-    Uses [[https://github.com/twintproject/twint][Twint]] data export.
-    '''
-    export_path: Paths # path[s]/glob to twint Sqlite database
+    export_path: Paths # path[s]/glob to the twint Sqlite database


 from ..core.cfg import make_config