We want original whitespace to be rendered in some cases (e.g. verse blocks). This requires information about the original whitespace to be preserved during paragraph parsing. As html ignores (collapses) whitespace by default we don't have to adapt the html writer and can just selectively enable rendering of the preseverved whitespace wherever we want it using css (white-space: pre). To differentiate meaningful whitespace from document structure based indentation (i.e. list item base indentation) we need to introduce document.baseLvl. A paragraph by itself does not have enough information to differentiate both kinds of whitespace and needs this information as context [0]. As we're already touching list indentation i went along and improved (fixed?) descriptive list item indentation rendering in the org writer (it should match emacs tab behavior - i.e. indent subsequent lines up to the `:: `). [0] e.g. list items can contain blank lines - a paragraph starting with a blank line would not know that it is part of a list item / has a base indentation - the blank line would suggest a baseLvl of 0.
261 lines
7.7 KiB
Go
261 lines
7.7 KiB
Go
// Package org is an Org mode syntax processor.
|
|
//
|
|
// It parses plain text into an AST and can export it as HTML or pretty printed Org mode syntax.
|
|
// Further export formats can be defined using the Writer interface.
|
|
//
|
|
// You probably want to start with something like this:
|
|
// input := strings.NewReader("Your Org mode input")
|
|
// html, err := org.New().Parse(input, "./").Write(org.NewHTMLWriter())
|
|
// if err != nil {
|
|
// log.Fatalf("Something went wrong: %s", err)
|
|
// }
|
|
// log.Print(html)
|
|
package org
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
type Configuration struct {
|
|
MaxEmphasisNewLines int // Maximum number of newlines inside an emphasis. See org-emphasis-regexp-components newline.
|
|
AutoLink bool // Try to convert text passages that look like hyperlinks into hyperlinks.
|
|
DefaultSettings map[string]string // Default values for settings that are overriden by setting the same key in BufferSettings.
|
|
Log *log.Logger // Log is used to print warnings during parsing.
|
|
ReadFile func(filename string) ([]byte, error) // ReadFile is used to read e.g. #+INCLUDE files.
|
|
}
|
|
|
|
// Document contains the parsing results and a pointer to the Configuration.
|
|
type Document struct {
|
|
*Configuration
|
|
Path string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE).
|
|
tokens []token
|
|
baseLvl int
|
|
Nodes []Node
|
|
NamedNodes map[string]Node
|
|
Outline Outline // Outline is a Table Of Contents for the document and contains all sections (headline + content).
|
|
BufferSettings map[string]string // Settings contains all settings that were parsed from keywords.
|
|
Error error
|
|
}
|
|
|
|
// Node represents a parsed node of the document.
|
|
type Node interface {
|
|
String() string // String returns the pretty printed Org mode string for the node (see OrgWriter).
|
|
}
|
|
|
|
type lexFn = func(line string) (t token, ok bool)
|
|
type parseFn = func(*Document, int, stopFn) (int, Node)
|
|
type stopFn = func(*Document, int) bool
|
|
|
|
type token struct {
|
|
kind string
|
|
lvl int
|
|
content string
|
|
matches []string
|
|
}
|
|
|
|
var lexFns = []lexFn{
|
|
lexHeadline,
|
|
lexDrawer,
|
|
lexBlock,
|
|
lexList,
|
|
lexTable,
|
|
lexHorizontalRule,
|
|
lexKeywordOrComment,
|
|
lexFootnoteDefinition,
|
|
lexExample,
|
|
lexText,
|
|
}
|
|
|
|
var nilToken = token{"nil", -1, "", nil}
|
|
var orgWriter = NewOrgWriter()
|
|
|
|
// New returns a new Configuration with (hopefully) sane defaults.
|
|
func New() *Configuration {
|
|
return &Configuration{
|
|
AutoLink: true,
|
|
MaxEmphasisNewLines: 1,
|
|
DefaultSettings: map[string]string{
|
|
"TODO": "TODO | DONE",
|
|
"EXCLUDE_TAGS": "noexport",
|
|
"OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t",
|
|
},
|
|
Log: log.New(os.Stderr, "go-org: ", 0),
|
|
ReadFile: ioutil.ReadFile,
|
|
}
|
|
}
|
|
|
|
// String returns the pretty printed Org mode string for the given nodes (see OrgWriter).
|
|
func String(nodes []Node) string { return orgWriter.WriteNodesAsString(nodes...) }
|
|
|
|
// Write is called after with an instance of the Writer interface to export a parsed Document into another format.
|
|
func (d *Document) Write(w Writer) (out string, err error) {
|
|
defer func() {
|
|
if recovered := recover(); recovered != nil {
|
|
err = fmt.Errorf("could not write output: %s", recovered)
|
|
}
|
|
}()
|
|
if d.Error != nil {
|
|
return "", d.Error
|
|
} else if d.Nodes == nil {
|
|
return "", fmt.Errorf("could not write output: parse was not called")
|
|
}
|
|
w.Before(d)
|
|
WriteNodes(w, d.Nodes...)
|
|
w.After(d)
|
|
return w.String(), err
|
|
}
|
|
|
|
// Parse parses the input into an AST (and some other helpful fields like Outline).
|
|
// To allow method chaining, errors are stored in document.Error rather than being returned.
|
|
func (c *Configuration) Parse(input io.Reader, path string) (d *Document) {
|
|
outlineSection := &Section{}
|
|
d = &Document{
|
|
Configuration: c,
|
|
Outline: Outline{outlineSection, outlineSection, 0},
|
|
BufferSettings: map[string]string{},
|
|
NamedNodes: map[string]Node{},
|
|
Path: path,
|
|
}
|
|
defer func() {
|
|
if recovered := recover(); recovered != nil {
|
|
d.Error = fmt.Errorf("could not parse input: %v", recovered)
|
|
}
|
|
}()
|
|
if d.tokens != nil {
|
|
d.Error = fmt.Errorf("parse was called multiple times")
|
|
}
|
|
d.tokenize(input)
|
|
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return i >= len(d.tokens) })
|
|
d.Nodes = nodes
|
|
return d
|
|
}
|
|
|
|
// Silent disables all logging of warnings during parsing.
|
|
func (c *Configuration) Silent() *Configuration {
|
|
c.Log = log.New(ioutil.Discard, "", 0)
|
|
return c
|
|
}
|
|
|
|
func (d *Document) tokenize(input io.Reader) {
|
|
d.tokens = []token{}
|
|
scanner := bufio.NewScanner(input)
|
|
for scanner.Scan() {
|
|
d.tokens = append(d.tokens, tokenize(scanner.Text()))
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
d.Error = fmt.Errorf("could not tokenize input: %s", err)
|
|
}
|
|
}
|
|
|
|
// Get returns the value for key in BufferSettings or DefaultSettings if key does not exist in the former
|
|
func (d *Document) Get(key string) string {
|
|
if v, ok := d.BufferSettings[key]; ok {
|
|
return v
|
|
}
|
|
if v, ok := d.DefaultSettings[key]; ok {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// GetOption returns the value associated to the export option key
|
|
// Currently supported options:
|
|
// - < (export timestamps)
|
|
// - e (export org entities)
|
|
// - f (export footnotes)
|
|
// - toc (export table of content)
|
|
// - todo (export headline todo status)
|
|
// - pri (export headline priority)
|
|
// - tags (export headline tags)
|
|
// see https://orgmode.org/manual/Export-settings.html for more information
|
|
func (d *Document) GetOption(key string) bool {
|
|
get := func(settings map[string]string) string {
|
|
for _, field := range strings.Fields(settings["OPTIONS"]) {
|
|
if strings.HasPrefix(field, key+":") {
|
|
return field[len(key)+1:]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
value := get(d.BufferSettings)
|
|
if value == "" {
|
|
value = get(d.DefaultSettings)
|
|
}
|
|
switch value {
|
|
case "t":
|
|
return true
|
|
case "nil":
|
|
return false
|
|
default:
|
|
d.Log.Printf("Bad value for export option %s (%s)", key, value)
|
|
return false
|
|
}
|
|
}
|
|
|
|
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
|
|
switch d.tokens[i].kind {
|
|
case "unorderedList", "orderedList":
|
|
consumed, node = d.parseList(i, stop)
|
|
case "tableRow", "tableSeparator":
|
|
consumed, node = d.parseTable(i, stop)
|
|
case "beginBlock":
|
|
consumed, node = d.parseBlock(i, stop)
|
|
case "beginDrawer":
|
|
consumed, node = d.parseDrawer(i, stop)
|
|
case "text":
|
|
consumed, node = d.parseParagraph(i, stop)
|
|
case "example":
|
|
consumed, node = d.parseExample(i, stop)
|
|
case "horizontalRule":
|
|
consumed, node = d.parseHorizontalRule(i, stop)
|
|
case "comment":
|
|
consumed, node = d.parseComment(i, stop)
|
|
case "keyword":
|
|
consumed, node = d.parseKeyword(i, stop)
|
|
case "headline":
|
|
consumed, node = d.parseHeadline(i, stop)
|
|
case "footnoteDefinition":
|
|
consumed, node = d.parseFootnoteDefinition(i, stop)
|
|
}
|
|
|
|
if consumed != 0 {
|
|
return consumed, node
|
|
}
|
|
d.Log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
|
|
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
|
|
d.tokens[i] = token{"text", len(m[1]), m[2], m}
|
|
return d.parseOne(i, stop)
|
|
}
|
|
|
|
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
|
|
start, nodes := i, []Node{}
|
|
for i < len(d.tokens) && !stop(d, i) {
|
|
consumed, node := d.parseOne(i, stop)
|
|
i += consumed
|
|
nodes = append(nodes, node)
|
|
}
|
|
return i - start, nodes
|
|
}
|
|
|
|
func (d *Document) addHeadline(headline *Headline) int {
|
|
current := &Section{Headline: headline}
|
|
d.Outline.last.add(current)
|
|
d.Outline.count++
|
|
d.Outline.last = current
|
|
return d.Outline.count
|
|
}
|
|
|
|
func tokenize(line string) token {
|
|
for _, lexFn := range lexFns {
|
|
if token, ok := lexFn(line); ok {
|
|
return token
|
|
}
|
|
}
|
|
panic(fmt.Sprintf("could not lex line: %s", line))
|
|
}
|