go-org-orgwiki/org/document.go
Niklas Fasching b61e49eb85 Preserve whitespace (indentation) inside paragraphs
We want original whitespace to be rendered in some cases (e.g. verse
blocks). This requires information about the original whitespace to be
preserved during paragraph parsing. As html ignores (collapses) whitespace by
default we don't have to adapt the html writer and can just selectively enable
rendering of the preseverved whitespace wherever we want it using
css (white-space: pre).

To differentiate meaningful whitespace from document structure based
indentation (i.e. list item base indentation) we need to introduce
document.baseLvl. A paragraph by itself does not have enough information to
differentiate both kinds of whitespace and needs this information as context
[0].

As we're already touching list indentation i went along and improved (fixed?)
descriptive list item indentation rendering in the org writer (it should match
emacs tab behavior - i.e. indent subsequent lines up to the `:: `).

[0] e.g. list items can contain blank lines - a paragraph starting with a blank
line would not know that it is part of a list item / has a base indentation -
the blank line would suggest a baseLvl of 0.
2019-12-22 14:17:14 +01:00

261 lines
7.7 KiB
Go

// Package org is an Org mode syntax processor.
//
// It parses plain text into an AST and can export it as HTML or pretty printed Org mode syntax.
// Further export formats can be defined using the Writer interface.
//
// You probably want to start with something like this:
// input := strings.NewReader("Your Org mode input")
// html, err := org.New().Parse(input, "./").Write(org.NewHTMLWriter())
// if err != nil {
// log.Fatalf("Something went wrong: %s", err)
// }
// log.Print(html)
package org
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
)
type Configuration struct {
MaxEmphasisNewLines int // Maximum number of newlines inside an emphasis. See org-emphasis-regexp-components newline.
AutoLink bool // Try to convert text passages that look like hyperlinks into hyperlinks.
DefaultSettings map[string]string // Default values for settings that are overriden by setting the same key in BufferSettings.
Log *log.Logger // Log is used to print warnings during parsing.
ReadFile func(filename string) ([]byte, error) // ReadFile is used to read e.g. #+INCLUDE files.
}
// Document contains the parsing results and a pointer to the Configuration.
type Document struct {
*Configuration
Path string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE).
tokens []token
baseLvl int
Nodes []Node
NamedNodes map[string]Node
Outline Outline // Outline is a Table Of Contents for the document and contains all sections (headline + content).
BufferSettings map[string]string // Settings contains all settings that were parsed from keywords.
Error error
}
// Node represents a parsed node of the document.
type Node interface {
String() string // String returns the pretty printed Org mode string for the node (see OrgWriter).
}
type lexFn = func(line string) (t token, ok bool)
type parseFn = func(*Document, int, stopFn) (int, Node)
type stopFn = func(*Document, int) bool
type token struct {
kind string
lvl int
content string
matches []string
}
var lexFns = []lexFn{
lexHeadline,
lexDrawer,
lexBlock,
lexList,
lexTable,
lexHorizontalRule,
lexKeywordOrComment,
lexFootnoteDefinition,
lexExample,
lexText,
}
var nilToken = token{"nil", -1, "", nil}
var orgWriter = NewOrgWriter()
// New returns a new Configuration with (hopefully) sane defaults.
func New() *Configuration {
return &Configuration{
AutoLink: true,
MaxEmphasisNewLines: 1,
DefaultSettings: map[string]string{
"TODO": "TODO | DONE",
"EXCLUDE_TAGS": "noexport",
"OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t",
},
Log: log.New(os.Stderr, "go-org: ", 0),
ReadFile: ioutil.ReadFile,
}
}
// String returns the pretty printed Org mode string for the given nodes (see OrgWriter).
func String(nodes []Node) string { return orgWriter.WriteNodesAsString(nodes...) }
// Write is called after with an instance of the Writer interface to export a parsed Document into another format.
func (d *Document) Write(w Writer) (out string, err error) {
defer func() {
if recovered := recover(); recovered != nil {
err = fmt.Errorf("could not write output: %s", recovered)
}
}()
if d.Error != nil {
return "", d.Error
} else if d.Nodes == nil {
return "", fmt.Errorf("could not write output: parse was not called")
}
w.Before(d)
WriteNodes(w, d.Nodes...)
w.After(d)
return w.String(), err
}
// Parse parses the input into an AST (and some other helpful fields like Outline).
// To allow method chaining, errors are stored in document.Error rather than being returned.
func (c *Configuration) Parse(input io.Reader, path string) (d *Document) {
outlineSection := &Section{}
d = &Document{
Configuration: c,
Outline: Outline{outlineSection, outlineSection, 0},
BufferSettings: map[string]string{},
NamedNodes: map[string]Node{},
Path: path,
}
defer func() {
if recovered := recover(); recovered != nil {
d.Error = fmt.Errorf("could not parse input: %v", recovered)
}
}()
if d.tokens != nil {
d.Error = fmt.Errorf("parse was called multiple times")
}
d.tokenize(input)
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return i >= len(d.tokens) })
d.Nodes = nodes
return d
}
// Silent disables all logging of warnings during parsing.
func (c *Configuration) Silent() *Configuration {
c.Log = log.New(ioutil.Discard, "", 0)
return c
}
func (d *Document) tokenize(input io.Reader) {
d.tokens = []token{}
scanner := bufio.NewScanner(input)
for scanner.Scan() {
d.tokens = append(d.tokens, tokenize(scanner.Text()))
}
if err := scanner.Err(); err != nil {
d.Error = fmt.Errorf("could not tokenize input: %s", err)
}
}
// Get returns the value for key in BufferSettings or DefaultSettings if key does not exist in the former
func (d *Document) Get(key string) string {
if v, ok := d.BufferSettings[key]; ok {
return v
}
if v, ok := d.DefaultSettings[key]; ok {
return v
}
return ""
}
// GetOption returns the value associated to the export option key
// Currently supported options:
// - < (export timestamps)
// - e (export org entities)
// - f (export footnotes)
// - toc (export table of content)
// - todo (export headline todo status)
// - pri (export headline priority)
// - tags (export headline tags)
// see https://orgmode.org/manual/Export-settings.html for more information
func (d *Document) GetOption(key string) bool {
get := func(settings map[string]string) string {
for _, field := range strings.Fields(settings["OPTIONS"]) {
if strings.HasPrefix(field, key+":") {
return field[len(key)+1:]
}
}
return ""
}
value := get(d.BufferSettings)
if value == "" {
value = get(d.DefaultSettings)
}
switch value {
case "t":
return true
case "nil":
return false
default:
d.Log.Printf("Bad value for export option %s (%s)", key, value)
return false
}
}
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
switch d.tokens[i].kind {
case "unorderedList", "orderedList":
consumed, node = d.parseList(i, stop)
case "tableRow", "tableSeparator":
consumed, node = d.parseTable(i, stop)
case "beginBlock":
consumed, node = d.parseBlock(i, stop)
case "beginDrawer":
consumed, node = d.parseDrawer(i, stop)
case "text":
consumed, node = d.parseParagraph(i, stop)
case "example":
consumed, node = d.parseExample(i, stop)
case "horizontalRule":
consumed, node = d.parseHorizontalRule(i, stop)
case "comment":
consumed, node = d.parseComment(i, stop)
case "keyword":
consumed, node = d.parseKeyword(i, stop)
case "headline":
consumed, node = d.parseHeadline(i, stop)
case "footnoteDefinition":
consumed, node = d.parseFootnoteDefinition(i, stop)
}
if consumed != 0 {
return consumed, node
}
d.Log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
d.tokens[i] = token{"text", len(m[1]), m[2], m}
return d.parseOne(i, stop)
}
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
start, nodes := i, []Node{}
for i < len(d.tokens) && !stop(d, i) {
consumed, node := d.parseOne(i, stop)
i += consumed
nodes = append(nodes, node)
}
return i - start, nodes
}
func (d *Document) addHeadline(headline *Headline) int {
current := &Section{Headline: headline}
d.Outline.last.add(current)
d.Outline.count++
d.Outline.last = current
return d.Outline.count
}
func tokenize(line string) token {
for _, lexFn := range lexFns {
if token, ok := lexFn(line); ok {
return token
}
}
panic(fmt.Sprintf("could not lex line: %s", line))
}