diff --git a/doc/MODULES.org b/doc/MODULES.org index 022a2df..7d97f29 100644 --- a/doc/MODULES.org +++ b/doc/MODULES.org @@ -1,7 +1,28 @@ -# TODO explain Paths/PathIsh +This file is an overview of *documented* modules. There are many more, see [[file:../README.org::#whats-inside]["What's inside"]] for the full list of modules. + +See [[file:SETUP.org][SETUP]] to find out how to set up your own config. + +Some explanations: + +- [[https://docs.python.org/3/library/pathlib.html#pathlib.Path][Path]] is a standard Python object to represent paths +- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L9][PathIsh]] is a helper type to allow using either =str=, or a =Path= +- [[https://github.com/karlicoss/HPI/blob/5f4acfddeeeba18237e8b039c8f62bcaa62a4ac2/my/core/common.py#L108][Paths]] is another helper type for paths. + + It's 'smart', allows you to be flexible about your config: + + - simple =str= or a =Path= + - =/a/path/to/directory/=, so the module will consume all files from this directory + - a list of files/directories (it will be flattened) + - a [[https://docs.python.org/3/library/glob.html?highlight=glob#glob.glob][glob]] string, so you can be flexible about the format of your data on disk (e.g. if you want to keep it compressed) + + Typically, such variable will be passed to =get_files= to actually extract the list of real files to use. You can see usage examples [[https://github.com/karlicoss/HPI/blob/master/tests/get_files.py][here]]. + +- if the field has a default value, you can omit it from your private config. -#+begin_src python :dir .. :results output :exports result +Modules: + +#+begin_src python :dir .. :results output drawer :exports result # TODO ugh, pkgutil.walk_packages doesn't recurse and find packages like my.twitter.archive?? import importlib # from lint import all_modules # meh @@ -13,11 +34,14 @@ modules = [ ('twitter', 'my.twitter.archive' ), ] -# TODO generate links? +def indent(s, spaces=4): + return ''.join(' ' * spaces + l for l in s.splitlines(keepends=True)) +from pathlib import Path import inspect from dataclasses import fields -# print(',#+begin_src python') +import re +print('\n') # ugh. hack for org-ruby drawers bug for cls, p in modules: m = importlib.import_module(p) C = getattr(m, cls) @@ -25,33 +49,60 @@ for cls, p in modules: i = src.find('@property') if i != -1: src = src[:i] - print(src) -# print('#+end_src') + src = src.strip() + src = re.sub(r'(class \w+)\(.*', r'\1:', src) + mpath = p.replace('.', '/') + for x in ['.py', '__init__.py']: + if Path(mpath + x).exists(): + mpath = mpath + x + print(f'- [[file:../{mpath}][{p}]]') + mdoc = m.__doc__ + if mdoc is not None: + print(indent(mdoc)) + print(f' #+begin_src python') + print(indent(src)) + print(f' #+end_src') #+end_src #+RESULTS: -#+begin_example -class google(user_config): - ''' - Expects [[https://takeout.google.com][Google Takeout]] data. - ''' - takeout_path: Paths # path/paths/glob for the takeout zips - -class reddit(uconfig): - ''' - Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output - ''' - export_path: Paths # path[s]/glob to the exported data - rexport : Optional[PathIsh] = None # path to a local clone of rexport +:results: -class twint(user_config): - ''' +- [[file:../my/google/takeout/paths.py][my.google.takeout.paths]] + + Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data + + #+begin_src python + class google: + takeout_path: Paths # path/paths/glob for the takeout zips + #+end_src +- [[file:../my/reddit.py][my.reddit]] + + Reddit data: saved items/comments/upvotes/etc. + + Uses [[https://github.com/karlicoss/rexport][rexport]] output. + + #+begin_src python + class reddit: + export_path: Paths # path[s]/glob to the exported data + rexport : Optional[PathIsh] = None # path to a local clone of rexport + #+end_src +- [[file:../my/twitter/twint.py][my.twitter.twint]] + + Twitter data (tweets and favorites). + Uses [[https://github.com/twintproject/twint][Twint]] data export. - ''' - export_path: Paths # path[s]/glob to twint Sqlite database -class twitter(user_config): - export_path: Paths # path[s]/glob to the twitter archive takeout + #+begin_src python + class twint: + export_path: Paths # path[s]/glob to the twint Sqlite database + #+end_src +- [[file:../my/twitter/archive.py][my.twitter.archive]] -#+end_example + Twitter data (uses [[https://help.twitter.com/en/managing-your-account/how-to-download-your-twitter-archive][official twitter archive export]]) + + #+begin_src python + class twitter: + export_path: Paths # path[s]/glob to the twitter archive takeout + #+end_src +:end: diff --git a/my/google/takeout/paths.py b/my/google/takeout/paths.py index 1bc346d..dff698b 100644 --- a/my/google/takeout/paths.py +++ b/my/google/takeout/paths.py @@ -1,15 +1,18 @@ +''' +Module for locating and accessing [[https://takeout.google.com][Google Takeout]] data +''' + from dataclasses import dataclass from ...core.common import Paths from my.config import google as user_config @dataclass class google(user_config): - ''' - Expects [[https://takeout.google.com][Google Takeout]] data. - ''' takeout_path: Paths # path/paths/glob for the takeout zips ### +# TODO rename 'google' to 'takeout'? not sure + from ...core.cfg import make_config config = make_config(google) diff --git a/my/reddit.py b/my/reddit.py index 2afa801..6fab1df 100755 --- a/my/reddit.py +++ b/my/reddit.py @@ -1,5 +1,7 @@ """ Reddit data: saved items/comments/upvotes/etc. + +Uses [[https://github.com/karlicoss/rexport][rexport]] output. """ from typing import Optional @@ -11,9 +13,6 @@ from dataclasses import dataclass @dataclass class reddit(uconfig): - ''' - Reddit module uses [[https://github.com/karlicoss/rexport][rexport]] output - ''' export_path: Paths # path[s]/glob to the exported data rexport : Optional[PathIsh] = None # path to a local clone of rexport diff --git a/my/twitter/twint.py b/my/twitter/twint.py index 36763d4..99b858e 100644 --- a/my/twitter/twint.py +++ b/my/twitter/twint.py @@ -1,5 +1,7 @@ """ Twitter data (tweets and favorites). + +Uses [[https://github.com/twintproject/twint][Twint]] data export. """ from ..core.common import Paths @@ -8,10 +10,7 @@ from my.config import twint as user_config @dataclass class twint(user_config): - ''' - Uses [[https://github.com/twintproject/twint][Twint]] data export. - ''' - export_path: Paths # path[s]/glob to twint Sqlite database + export_path: Paths # path[s]/glob to the twint Sqlite database from ..core.cfg import make_config