Autoextract documentation for some modules, improve docs

This commit is contained in:
Dima Gerasimov 2020-05-10 17:25:57 +01:00
parent 9cb39103c6
commit 976b3da6f4
4 changed files with 88 additions and 36 deletions

View file

@ -1,7 +1,28 @@
# TODO explain Paths/PathIsh
This file is an overview of *documented* modules. There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules.
See [[file:SETUP.org][SETUP]] to find out how to set up your own config.
Some explanations:
- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path=
- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths.
It's 'smart', allows you to be flexible about your config:
- simple =str= or a =Path=
- =/a/path/to/directory/=, so the module will consume all files from this directory
- a list of files/directories (it will be flattened)
- a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed)
Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]].
- if the field has a default value, you can omit it from your private config.
#+begin_src python :dir .. :results output :exports result
Modules:
#+begin_src python :dir .. :results output drawer :exports result
# TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive??
import importlib
# from lint import all_modules # meh
@ -13,11 +34,14 @@ modules = [
('twitter', 'my.twitter.archive' ),
]
# TODO generate links?
def indent(s, spaces=4):
return ''.join(' ' * spaces + l for l in s.splitlines(keepends=True))
from pathlib import Path
import inspect
from dataclasses import fields
# print(',#+begin_src python')
import re
print('\n') # ugh. hack for org-ruby drawers bug
for cls, p in modules:
m = importlib.import_module(p)
C = getattr(m, cls)
@ -25,33 +49,60 @@ for cls, p in modules:
i = src.find('@property')
if i != -1:
src = src[:i]
print(src)
# print('#+end_src')
src = src.strip()
src = re.sub(r'(class \w+)\(.*', r'\1:', src)
mpath = p.replace('.', '/')
for x in ['.py', '__init__.py']:
if Path(mpath + x).exists():
mpath = mpath + x
print(f'- [[file:../{mpath}][{p}]]')
mdoc = m.__doc__
if mdoc is not None:
print(indent(mdoc))
print(f' #+begin_src python')
print(indent(src))
print(f' #+end_src')
#+end_src
#+RESULTS:
#+begin_example
class google(user_config):
'''
Expects [[https://takeout.google.com][Google Takeout]] data.
'''
takeout_path: Paths # path/paths/glob for the takeout zips
class reddit(uconfig):
'''
Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
'''
export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport
:results:
class twint(user_config):
'''
- [[file:../my/google/takeout/paths.py][my.google.takeout.paths]]
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
#+begin_src python
class google:
takeout_path: Paths # path/paths/glob for the takeout zips
#+end_src
- [[file:../my/reddit.py][my.reddit]]
Reddit data: saved items/comments/upvotes/etc.
Uses [[https://github.com/karlicoss/rexport][rexport]] output.
#+begin_src python
class reddit:
export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport
#+end_src
- [[file:../my/twitter/twint.py][my.twitter.twint]]
Twitter data (tweets and favorites).
Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
class twitter(user_config):
export_path: Paths # path[s]/glob to the twitter archive takeout
#+begin_src python
class twint:
export_path: Paths # path[s]/glob to the twint Sqlite database
#+end_src
- [[file:../my/twitter/archive.py][my.twitter.archive]]
#+end_example
Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]])
#+begin_src python
class twitter:
export_path: Paths # path[s]/glob to the twitter archive takeout
#+end_src
:end:

View file

@ -1,15 +1,18 @@
'''
Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data
'''
from dataclasses import dataclass
from ...core.common import Paths
from my.config import google as user_config
@dataclass
class google(user_config):
'''
Expects [[https://takeout.google.com][Google Takeout]] data.
'''
takeout_path: Paths # path/paths/glob for the takeout zips
###
# TODO rename 'google' to 'takeout'? not sure
from ...core.cfg import make_config
config = make_config(google)

View file

@ -1,5 +1,7 @@
"""
Reddit data: saved items/comments/upvotes/etc.
Uses [[https://github.com/karlicoss/rexport][rexport]] output.
"""
from typing import Optional
@ -11,9 +13,6 @@ from dataclasses import dataclass
@dataclass
class reddit(uconfig):
'''
Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output
'''
export_path: Paths # path[s]/glob to the exported data
rexport : Optional[PathIsh] = None # path to a local clone of rexport

View file

@ -1,5 +1,7 @@
"""
Twitter data (tweets and favorites).
Uses [[https://github.com/twintproject/twint][Twint]] data export.
"""
from ..core.common import Paths
@ -8,10 +10,7 @@ from my.config import twint as user_config
@dataclass
class twint(user_config):
'''
Uses [[https://github.com/twintproject/twint][Twint]] data export.
'''
export_path: Paths # path[s]/glob to twint Sqlite database
export_path: Paths # path[s]/glob to the twint Sqlite database
from ..core.cfg import make_config