Add basic parser and org -> AST -> org rendering

This commit is contained in:
Niklas Fasching 2018-12-02 14:06:08 +01:00
parent 60835c66fc
commit 0b2972e32a
14 changed files with 1123 additions and 0 deletions

View file

@ -7,3 +7,6 @@ A basic org-mode parser in go
- https://orgmode.org/worg/dev/org-syntax.html - https://orgmode.org/worg/dev/org-syntax.html
- https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el
- https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el
- test cases
- [[https://github.com/bdewey/org-ruby/blob/master/spec/html_examples][org-ruby]]
- pandoc, goorgeous

47
block.go Normal file
View file

@ -0,0 +1,47 @@
package org
import (
"regexp"
"strings"
"unicode"
)
type Block struct {
Name string
Parameters []string
Children []Node
}
var beginBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+BEGIN_(\w+)(.*)`)
var endBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+END_(\w+)`)
func lexBlock(line string) (token, bool) {
if m := beginBlockRegexp.FindStringSubmatch(line); m != nil {
return token{"beginBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
} else if m := endBlockRegexp.FindStringSubmatch(line); m != nil {
return token{"endBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
}
return nilToken, false
}
func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
t, start, nodes := d.tokens[i], i, []Node{}
name, parameters := t.content, strings.Fields(t.matches[3])
trim := trimIndentUpTo(d.tokens[i].lvl)
for i++; !(d.tokens[i].kind == "endBlock" && d.tokens[i].content == name); i++ {
if parentStop(d, i) {
return 0, nil
}
nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}})
}
return i + 1 - start, Block{name, parameters, nodes}
}
func trimIndentUpTo(max int) func(string) string {
return func(line string) string {
i := 0
for ; i < len(line) && i < max && unicode.IsSpace(rune(line[i])); i++ {
}
return line[i:]
}
}

31
cmd/org/org.go Normal file
View file

@ -0,0 +1,31 @@
package main
import (
"bytes"
"io/ioutil"
"log"
"os"
"strings"
"github.com/niklasfasching/org"
)
func main() {
log.SetFlags(0)
if len(os.Args) < 3 {
log.Println("USAGE: org FILE OUTPUT_FORMAT")
log.Fatal("supported output formats: org")
}
bs, err := ioutil.ReadFile(os.Args[1])
if err != nil {
log.Fatal(err)
}
r, out := bytes.NewReader(bs), ""
switch strings.ToLower(os.Args[2]) {
case "org":
out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String()
default:
log.Fatal("Unsupported output format")
}
log.Println(out)
}

150
document.go Normal file
View file

@ -0,0 +1,150 @@
package org
import (
"bufio"
"fmt"
"io"
"log"
)
type Document struct {
tokens []token
Nodes []Node
Footnotes Footnotes
StatusKeywords []string
MaxEmphasisNewLines int
BufferSettings map[string]string
DefaultSettings map[string]string
}
type Writer interface {
before(*Document)
after(*Document)
writeNodes(...Node)
String() string
}
type Node interface{}
type lexFn = func(line string) (t token, ok bool)
type parseFn = func(*Document, int, stopFn) (int, Node)
type stopFn = func(*Document, int) bool
type token struct {
kind string
lvl int
content string
matches []string
}
var lexFns = []lexFn{
lexHeadline,
lexBlock,
lexList,
lexTable,
lexHorizontalRule,
lexKeywordOrComment,
lexFootnoteDefinition,
lexText,
}
var nilToken = token{"nil", -1, "", nil}
func NewDocument() *Document {
return &Document{
Footnotes: Footnotes{
ExcludeHeading: true,
Title: "Footnotes",
Definitions: map[string]FootnoteDefinition{},
},
MaxEmphasisNewLines: 1,
BufferSettings: map[string]string{},
DefaultSettings: map[string]string{
"TODO": "TODO | DONE",
},
}
}
func (d *Document) Write(w Writer) Writer {
if d.Nodes == nil {
panic("cannot Write() empty document: you must call Parse() first")
}
w.before(d)
w.writeNodes(d.Nodes...)
w.after(d)
return w
}
func (d *Document) Parse(input io.Reader) *Document {
d.tokens = []token{}
scanner := bufio.NewScanner(input)
for scanner.Scan() {
d.tokens = append(d.tokens, tokenize(scanner.Text()))
}
if err := scanner.Err(); err != nil {
panic(err)
}
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return !(i < len(d.tokens)) })
d.Nodes = nodes
return d
}
func (d *Document) Get(key string) string {
if v, ok := d.BufferSettings[key]; ok {
return v
}
if v, ok := d.DefaultSettings[key]; ok {
return v
}
return ""
}
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
switch d.tokens[i].kind {
case "unorderedList", "orderedList":
consumed, node = d.parseList(i, stop)
case "tableRow", "tableSeparator":
consumed, node = d.parseTable(i, stop)
case "beginBlock":
consumed, node = d.parseBlock(i, stop)
case "text":
consumed, node = d.parseParagraph(i, stop)
case "horizontalRule":
consumed, node = d.parseHorizontalRule(i, stop)
case "comment":
consumed, node = d.parseComment(i, stop)
case "keyword":
consumed, node = d.parseKeyword(i, stop)
case "headline":
consumed, node = d.parseHeadline(i, stop)
case "footnoteDefinition":
consumed, node = d.parseFootnoteDefinition(i, stop)
}
if consumed != 0 {
return consumed, node
}
log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
d.tokens[i] = token{"text", len(m[1]), m[2], m}
return d.parseOne(i, stop)
}
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
start, nodes := i, []Node{}
for i < len(d.tokens) {
consumed, node := d.parseOne(i, stop)
i += consumed
nodes = append(nodes, node)
}
return i - start, nodes
}
func tokenize(line string) token {
for _, lexFn := range lexFns {
if token, ok := lexFn(line); ok {
return token
}
}
panic(fmt.Sprintf("could not lex line: %s", line))
}

38
footnote.go Normal file
View file

@ -0,0 +1,38 @@
package org
import (
"regexp"
)
type Footnotes struct {
ExcludeHeading bool
Title string
Definitions map[string]FootnoteDefinition
Order []string
}
type FootnoteDefinition struct {
Name string
Children []Node
}
var footnoteDefinitionRegexp = regexp.MustCompile(`^\[fn:([\w-]+)\]\s+(.+)`)
func lexFootnoteDefinition(line string) (token, bool) {
if m := footnoteDefinitionRegexp.FindStringSubmatch(line); m != nil {
return token{"footnoteDefinition", 0, m[1], m}, true
}
return nilToken, false
}
func (d *Document) parseFootnoteDefinition(i int, parentStop stopFn) (int, Node) {
name := d.tokens[i].content
d.tokens[i] = tokenize(d.tokens[i].matches[2])
stop := func(d *Document, i int) bool {
return parentStop(d, i) || isSecondBlankLine(d, i) ||
d.tokens[i].kind == "headline" || d.tokens[i].kind == "footnoteDefinition"
}
consumed, nodes := d.parseMany(i, stop)
d.Footnotes.Definitions[name] = FootnoteDefinition{name, nodes}
return consumed, nil
}

69
headline.go Normal file
View file

@ -0,0 +1,69 @@
package org
import (
"regexp"
"strings"
"unicode"
)
type Headline struct {
Lvl int
Status string
Priority string
Title []Node
Tags []string
Children []Node
}
var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`)
var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`)
func lexHeadline(line string) (token, bool) {
if m := headlineRegexp.FindStringSubmatch(line); m != nil {
return token{"headline", 0, m[2], m}, true
}
return nilToken, false
}
func (d *Document) todoKeywords() []string {
return strings.FieldsFunc(d.Get("TODO"), func(r rune) bool {
return unicode.IsSpace(r) || r == '|'
})
}
func (d *Document) parseHeadline(i int, parentStop stopFn) (int, Node) {
t, headline := d.tokens[i], Headline{}
headline.Lvl = len(t.matches[1])
text := t.content
for _, k := range d.todoKeywords() {
if strings.HasPrefix(text, k) && len(text) > len(k) && unicode.IsSpace(rune(text[len(k)])) {
headline.Status = k
text = text[len(k)+1:]
break
}
}
if len(text) >= 3 && text[0:2] == "[#" && strings.Contains("ABC", text[2:3]) && text[3] == ']' {
headline.Priority = text[2:3]
text = strings.TrimSpace(text[4:])
}
if m := tagRegexp.FindStringSubmatch(text); m != nil {
text = m[1]
headline.Tags = strings.FieldsFunc(m[2], func(r rune) bool { return r == ':' })
}
headline.Title = d.parseInline(text)
stop := func(d *Document, i int) bool {
return parentStop(d, i) || d.tokens[i].kind == "headline" && d.tokens[i].lvl <= headline.Lvl
}
consumed, nodes := d.parseMany(i+1, stop)
headline.Children = nodes
if headline.Lvl == 1 && text == d.Footnotes.Title && d.Footnotes.ExcludeHeading {
return consumed + 1, nil
}
return consumed + 1, headline
}

184
inline.go Normal file
View file

@ -0,0 +1,184 @@
package org
import (
"regexp"
"strings"
"unicode"
)
type Text struct{ Content string }
type Linebreak struct{}
type Emphasis struct {
Kind string
Content []Node
}
type FootnoteLink struct{ Name string }
type RegularLink struct {
Protocol string
Description []Node
URL string
}
var redundantSpaces = regexp.MustCompile("[ \t]+")
var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`)
var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`)
func (d *Document) parseInline(input string) (nodes []Node) {
previous, current := 0, 0
for current < len(input) {
consumed, node := 0, (Node)(nil)
switch input[current] {
case '^':
consumed, node = d.parseSubOrSuperScript(input, current)
case '_':
consumed, node = d.parseSubScriptOrEmphasis(input, current)
case '*', '/', '=', '~', '+':
consumed, node = d.parseEmphasis(input, current)
case '[':
consumed, node = d.parseRegularLinkOrFootnoteReference(input, current)
case '\\':
consumed, node = d.parseExplicitLineBreak(input, current)
}
if consumed != 0 {
if current > previous {
nodes = append(nodes, Text{input[previous:current]})
}
if node != nil {
nodes = append(nodes, node)
}
current += consumed
previous = current
} else {
current++
}
}
if previous < len(input) {
nodes = append(nodes, Text{input[previous:]})
}
return nodes
}
func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
if start == 0 || input[start-1] == '\n' || start+1 >= len(input) || input[start+1] != '\\' {
return 0, nil
}
for i := start + 1; ; i++ {
if i == len(input)-1 || input[i] == '\n' {
return i + 1 - start, Linebreak{}
}
if !unicode.IsSpace(rune(input[i])) {
break
}
}
return 0, nil
}
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}}
}
return 0, nil
}
func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
return consumed, node
}
return d.parseEmphasis(input, start)
}
func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) {
if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
return d.parseRegularLink(input, start)
} else if len(input[start:]) >= 1 && input[start] == '[' {
return d.parseFootnoteReference(input, start)
}
return 0, nil
}
func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
name, definition := m[1], m[3]
seen := false
for _, otherName := range d.Footnotes.Order {
if name == otherName {
seen = true
}
}
if !seen {
d.Footnotes.Order = append(d.Footnotes.Order, name)
}
if definition != "" {
d.Footnotes.Definitions[name] = FootnoteDefinition{name, d.parseInline(definition)}
}
return len(m[0]), FootnoteLink{name}
}
return 0, nil
}
func (d *Document) parseRegularLink(input string, start int) (int, Node) {
if len(input[start:]) == 0 || input[start+1] != '[' {
return 0, nil
}
input = input[start:]
end := strings.Index(input, "]]")
if end == -1 {
return 0, nil
}
rawLink := input[2:end]
link, description, parts := "", []Node{}, strings.Split(rawLink, "][")
if len(parts) == 2 {
link, description = parts[0], d.parseInline(parts[1])
} else {
link, description = rawLink, []Node{Text{rawLink}}
}
consumed := end + 2
protocol, parts := "", strings.SplitN(link, ":", 2)
if len(parts) == 2 {
protocol = parts[0]
}
return consumed, RegularLink{protocol, description, link}
}
func (d *Document) parseEmphasis(input string, start int) (int, Node) {
marker, i := input[start], start
if !hasValidPreAndBorderChars(input, i) {
return 0, nil
}
for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
if input[i] == '\n' {
consumedNewLines++
}
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
}
}
return 0, nil
}
// see org-emphasis-regexp-components (emacs elisp variable)
func hasValidPreAndBorderChars(input string, i int) bool {
return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
}
func hasValidPostAndBorderChars(input string, i int) bool {
return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
}
func isValidPreChar(r rune) bool {
return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
}
func isValidPostChar(r rune) bool {
return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
}
func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }

36
keyword.go Normal file
View file

@ -0,0 +1,36 @@
package org
import (
"regexp"
"strings"
)
type Keyword struct {
Key string
Value string
}
type Comment struct{ Content string }
var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):\s(.*)`)
var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`)
func lexKeywordOrComment(line string) (token, bool) {
if m := keywordRegexp.FindStringSubmatch(line); m != nil {
return token{"keyword", len(m[1]), m[2], m}, true
} else if m := commentRegexp.FindStringSubmatch(line); m != nil {
return token{"comment", len(m[1]), m[2], m}, true
}
return nilToken, false
}
func (d *Document) parseKeyword(i int, stop stopFn) (int, Node) {
t := d.tokens[i]
k, v := t.matches[2], t.matches[3]
d.BufferSettings[k] = strings.Join([]string{d.BufferSettings[k], v}, "\n")
return 1, Keyword{k, v}
}
func (d *Document) parseComment(i int, stop stopFn) (int, Node) {
return 1, Comment{d.tokens[i].content}
}

82
list.go Normal file
View file

@ -0,0 +1,82 @@
package org
import (
"fmt"
"regexp"
"strings"
"unicode"
)
type List struct {
Kind string
Items []Node
}
type ListItem struct {
Bullet string
Children []Node
}
var unorderedListRegexp = regexp.MustCompile(`^(\s*)([-]|[+]|[*])\s(.*)`)
var orderedListRegexp = regexp.MustCompile(`^(\s*)(([0-9]+|[a-zA-Z])[.)])\s+(.*)`)
func lexList(line string) (token, bool) {
if m := unorderedListRegexp.FindStringSubmatch(line); m != nil {
return token{"unorderedList", len(m[1]), m[3], m}, true
} else if m := orderedListRegexp.FindStringSubmatch(line); m != nil {
return token{"orderedList", len(m[1]), m[4], m}, true
}
return nilToken, false
}
func isListToken(t token) bool {
return t.kind == "unorderedList" || t.kind == "orderedList"
}
func stopIndentBelow(t token, minIndent int) bool {
return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
}
func listKind(t token) string {
switch bullet := t.matches[2]; {
case bullet == "*" || bullet == "+" || bullet == "-":
return bullet
case unicode.IsLetter(rune(bullet[0])):
return "letter"
case unicode.IsDigit(rune(bullet[0])):
return "number"
default:
panic(fmt.Sprintf("bad list bullet '%s': %#v", bullet, t))
}
}
func (d *Document) parseList(i int, parentStop stopFn) (int, Node) {
start, lvl := i, d.tokens[i].lvl
list := List{Kind: listKind(d.tokens[i])}
for !parentStop(d, i) && d.tokens[i].lvl == lvl && isListToken(d.tokens[i]) {
consumed, node := d.parseListItem(i, parentStop)
i += consumed
list.Items = append(list.Items, node)
}
return i - start, list
}
func (d *Document) parseListItem(i int, parentStop stopFn) (int, Node) {
start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2]
minIndent := d.tokens[i].lvl + len(bullet)
d.tokens[i] = tokenize(strings.Repeat(" ", minIndent) + d.tokens[i].content)
stop := func(d *Document, i int) bool {
if parentStop(d, i) {
return true
}
t := d.tokens[i]
return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
}
for !stop(d, i) && !isSecondBlankLine(d, i) {
consumed, node := d.parseOne(i, stop)
i += consumed
nodes = append(nodes, node)
}
return i - start, ListItem{bullet, nodes}
}

243
org.go Normal file
View file

@ -0,0 +1,243 @@
package org
import (
"fmt"
"regexp"
"strings"
)
type stringBuilder = strings.Builder
type OrgWriter struct {
TagsColumn int // see org-tags-column
stringBuilder
indent string
}
var emphasisOrgBorders = map[string][]string{
"_": []string{"_", "_"},
"*": []string{"*", "*"},
"/": []string{"/", "/"},
"+": []string{"+", "+"},
"~": []string{"~", "~"},
"=": []string{"=", "="},
"_{}": []string{"_{", "}"},
"^{}": []string{"^{", "}"},
}
func NewOrgWriter() *OrgWriter {
return &OrgWriter{
TagsColumn: 77,
}
}
func (w *OrgWriter) before(d *Document) {}
func (w *OrgWriter) after(d *Document) {
fs := d.Footnotes
if len(fs.Definitions) == 0 {
return
}
w.WriteString("* " + fs.Title + "\n")
for _, name := range fs.Order {
w.writeNodes(fs.Definitions[name])
}
}
func (w *OrgWriter) emptyClone() *OrgWriter {
wcopy := *w
wcopy.stringBuilder = strings.Builder{}
return &wcopy
}
func (w *OrgWriter) writeNodes(ns ...Node) {
for _, n := range ns {
switch n := n.(type) {
case Comment:
w.writeComment(n)
case Keyword:
w.writeKeyword(n)
case Headline:
w.writeHeadline(n)
case Block:
w.writeBlock(n)
case FootnoteDefinition:
w.writeFootnoteDefinition(n)
case List:
w.writeList(n)
case ListItem:
w.writeListItem(n)
case Table:
w.writeTable(n)
case TableHeader:
w.writeTableHeader(n)
case TableRow:
w.writeTableRow(n)
case TableSeparator:
w.writeTableSeparator(n)
case Paragraph:
w.writeParagraph(n)
case HorizontalRule:
w.writeHorizontalRule(n)
case Line:
w.writeLine(n)
case Text:
w.writeText(n)
case Emphasis:
w.writeEmphasis(n)
case Linebreak:
w.writeLinebreak(n)
case RegularLink:
w.writeRegularLink(n)
case FootnoteLink:
w.writeFootnoteLink(n)
default:
if n != nil {
panic(fmt.Sprintf("bad node %#v", n))
}
}
}
}
var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n")
func (w *OrgWriter) String() string {
s := w.stringBuilder.String()
return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n")
}
func (w *OrgWriter) writeHeadline(h Headline) {
tmp := w.emptyClone()
tmp.WriteString(strings.Repeat("*", h.Lvl))
if h.Status != "" {
tmp.WriteString(" " + h.Status)
}
if h.Priority != "" {
tmp.WriteString(" [#" + h.Priority + "]")
}
tmp.WriteString(" ")
tmp.writeNodes(h.Title...)
hString := tmp.String()
if len(h.Tags) != 0 {
hString += " "
tString := ":" + strings.Join(h.Tags, ":") + ":"
if n := w.TagsColumn - len(tString) - len(hString); n > 0 {
w.WriteString(hString + strings.Repeat(" ", n) + tString)
} else {
w.WriteString(hString + tString)
}
} else {
w.WriteString(hString)
}
w.WriteString("\n")
if len(h.Children) != 0 {
w.WriteString(w.indent)
}
w.writeNodes(h.Children...)
}
func (w *OrgWriter) writeBlock(b Block) {
w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " ")))
w.writeNodes(b.Children...)
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
}
func (w *OrgWriter) writeFootnoteDefinition(f FootnoteDefinition) {
w.WriteString(fmt.Sprintf("[fn:%s] ", f.Name))
w.writeNodes(f.Children...)
}
func (w *OrgWriter) writeParagraph(p Paragraph) {
w.writeNodes(p.Children...)
}
func (w *OrgWriter) writeKeyword(k Keyword) {
w.WriteString(w.indent + fmt.Sprintf("#+%s: %s\n", k.Key, k.Value))
}
func (w *OrgWriter) writeComment(c Comment) {
w.WriteString(w.indent + "#" + c.Content)
}
func (w *OrgWriter) writeList(l List) { w.writeNodes(l.Items...) }
func (w *OrgWriter) writeListItem(li ListItem) {
w.WriteString(w.indent + li.Bullet + " ")
liWriter := w.emptyClone()
liWriter.indent = w.indent + strings.Repeat(" ", len(li.Bullet)+1)
liWriter.writeNodes(li.Children...)
w.WriteString(strings.TrimPrefix(liWriter.String(), liWriter.indent))
}
func (w *OrgWriter) writeTable(t Table) {
// TODO: pretty print tables
w.writeNodes(t.Header)
w.writeNodes(t.Rows...)
}
func (w *OrgWriter) writeTableHeader(th TableHeader) {
w.writeTableColumns(th.Columns)
w.writeNodes(th.Separator)
}
func (w *OrgWriter) writeTableRow(tr TableRow) {
w.writeTableColumns(tr.Columns)
}
func (w *OrgWriter) writeTableSeparator(ts TableSeparator) {
w.WriteString(w.indent + ts.Content + "\n")
}
func (w *OrgWriter) writeTableColumns(columns [][]Node) {
w.WriteString(w.indent + "| ")
for _, columnNodes := range columns {
w.writeNodes(columnNodes...)
w.WriteString(" | ")
}
w.WriteString("\n")
}
func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) {
w.WriteString(w.indent + "-----\n")
}
func (w *OrgWriter) writeLine(l Line) {
w.WriteString(w.indent)
w.writeNodes(l.Children...)
w.WriteString("\n")
}
func (w *OrgWriter) writeText(t Text) { w.WriteString(t.Content) }
func (w *OrgWriter) writeEmphasis(e Emphasis) {
borders, ok := emphasisOrgBorders[e.Kind]
if !ok {
panic(fmt.Sprintf("bad emphasis %#v", e))
}
w.WriteString(borders[0])
w.writeNodes(e.Content...)
w.WriteString(borders[1])
}
func (w *OrgWriter) writeLinebreak(l Linebreak) {
w.WriteString(`\\`)
}
func (w *OrgWriter) writeFootnoteLink(l FootnoteLink) {
w.WriteString("[fn:" + l.Name + "]")
}
func (w *OrgWriter) writeRegularLink(l RegularLink) {
descriptionWriter := w.emptyClone()
descriptionWriter.writeNodes(l.Description...)
description := descriptionWriter.String()
if l.URL != description {
w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description))
} else {
w.WriteString(fmt.Sprintf("[[%s]]", l.URL))
}
}

61
org_test.go Normal file
View file

@ -0,0 +1,61 @@
package org
import (
"fmt"
"io/ioutil"
"path/filepath"
"strings"
"testing"
"github.com/pmezard/go-difflib/difflib"
)
func TestOrgWriter(t *testing.T) {
for _, path := range orgTestFiles() {
expected := fileString(path)
reader, writer := strings.NewReader(expected), NewOrgWriter()
actual := NewDocument().Parse(reader).Write(writer).String()
if actual != expected {
t.Errorf("%s:\n%s'", path, diff(actual, expected))
} else {
t.Logf("%s: passed!", path)
}
}
}
func orgTestFiles() []string {
dir := "./testdata"
files, err := ioutil.ReadDir(dir)
if err != nil {
panic(fmt.Sprintf("Could not read directory: %s", err))
}
orgFiles := []string{}
for _, f := range files {
name := f.Name()
if filepath.Ext(name) != ".org" {
continue
}
orgFiles = append(orgFiles, filepath.Join(dir, name))
}
return orgFiles
}
func fileString(path string) string {
bs, err := ioutil.ReadFile(path)
if err != nil {
panic(fmt.Sprintf("Could not read file %s: %s", path, err))
}
return string(bs)
}
func diff(actual, expected string) string {
diff := difflib.UnifiedDiff{
A: difflib.SplitLines(actual),
B: difflib.SplitLines(expected),
FromFile: "Actual",
ToFile: "Expected",
Context: 3,
}
text, _ := difflib.GetUnifiedDiffString(diff)
return text
}

57
paragraph.go Normal file
View file

@ -0,0 +1,57 @@
package org
import (
"regexp"
)
type Line struct{ Children []Node }
type Paragraph struct{ Children []Node }
type HorizontalRule struct{}
var horizontalRuleRegexp = regexp.MustCompile(`^(\s*)-{5,}\s*$`)
var plainTextRegexp = regexp.MustCompile(`^(\s*)(.*)`)
func lexText(line string) (token, bool) {
if m := plainTextRegexp.FindStringSubmatch(line); m != nil {
return token{"text", len(m[1]), m[2], m}, true
}
return nilToken, false
}
func lexHorizontalRule(line string) (token, bool) {
if m := horizontalRuleRegexp.FindStringSubmatch(line); m != nil {
return token{"horizontalRule", len(m[1]), "", m}, true
}
return nilToken, false
}
func isSecondBlankLine(d *Document, i int) bool {
if i-1 <= 0 {
return false
}
t1, t2 := d.tokens[i-1], d.tokens[i]
if t1.kind == "text" && t2.kind == "text" && t1.content == "" && t2.content == "" {
return true
}
return false
}
func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) {
lines, start := []Node{Line{d.parseInline(d.tokens[i].content)}}, i
i++
stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" }
for ; !stop(d, i) && !isSecondBlankLine(d, i); i++ {
if isSecondBlankLine(d, i) {
lines = lines[:len(lines)-1]
i++
break
}
lines = append(lines, Line{d.parseInline(d.tokens[i].content)})
}
consumed := i - start
return consumed, Paragraph{lines}
}
func (d *Document) parseHorizontalRule(i int, parentStop stopFn) (int, Node) {
return 1, HorizontalRule{}
}

63
table.go Normal file
View file

@ -0,0 +1,63 @@
package org
import (
"regexp"
"strings"
)
type Table struct {
Header Node
Rows []Node
}
type TableSeparator struct{ Content string }
type TableHeader struct {
Columns [][]Node
Separator TableSeparator
}
type TableRow struct{ Columns [][]Node }
var tableSeparatorRegexp = regexp.MustCompile(`^(\s*)(\|[+-|]*)\s*$`)
var tableRowRegexp = regexp.MustCompile(`^(\s*)(\|.*)`)
func lexTable(line string) (token, bool) {
if m := tableSeparatorRegexp.FindStringSubmatch(line); m != nil {
return token{"tableSeparator", len(m[1]), m[2], m}, true
} else if m := tableRowRegexp.FindStringSubmatch(line); m != nil {
return token{"tableRow", len(m[1]), m[2], m}, true
}
return nilToken, false
}
func (d *Document) parseTable(i int, parentStop stopFn) (int, Node) {
rows, start := []Node{}, i
for !parentStop(d, i) && (d.tokens[i].kind == "tableRow" || d.tokens[i].kind == "tableSeparator") {
consumed, row := d.parseTableRowOrSeparator(i, parentStop)
i += consumed
rows = append(rows, row)
}
consumed := i - start
if len(rows) >= 2 {
if row, ok := rows[0].(TableRow); ok {
if separator, ok := rows[1].(TableSeparator); ok {
return consumed, Table{TableHeader{row.Columns, separator}, rows[2:]}
}
}
}
return consumed, Table{nil, rows}
}
func (d *Document) parseTableRowOrSeparator(i int, _ stopFn) (int, Node) {
if d.tokens[i].kind == "tableSeparator" {
return 1, TableSeparator{d.tokens[i].content}
}
fields := strings.FieldsFunc(d.tokens[i].content, func(r rune) bool { return r == '|' })
row := TableRow{}
for _, field := range fields {
row.Columns = append(row.Columns, d.parseInline(strings.TrimSpace(field)))
}
return 1, row
}

59
testdata/example.org vendored Normal file
View file

@ -0,0 +1,59 @@
#+TITLE: Example org mode file
#+AUTHOR: Niklas Fasching
#+DESCRIPTION: just some random elements with little explanation
* Motivation
To validate the parser we'll try printing the AST back to org-mode source - if that
works we can be kind of sure that the parsing worked.
At least I hope so - I would like to get around writing tests for the individual parsing
functions...
** Headlines with TODO status, priority & tags
*** TODO [#B] Headline with todo status & priority
*** DONE Headline with TODO status
*** [#A] Headline with tags & priority :foo:bar:
this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
** Lists
- unordered list item 1
- unordered list item 2 - with ~inline~ /markup/
1. ordered sublist item 1
a) ordered sublist item 1
b) ordered sublist item 2
c) ordered sublist item 3
2. ordered sublist item 2
- unordered list item 3 - and a [[https://example.com][link]]
and some lines of text
1. and another subitem
#+BEGIN_SRC sh
echo with a block
#+END_SRC
2. and another one with a table
| a | b | c |
|---+---+---|
| 1 | 2 | 3 |
and text with an empty line in between as well!
- unordered list item 4
** Inline
- /emphasis/ and a hard line break \\
- /.emphasis with dot border chars./
- /emphasis with a slash/inside/
- /emphasis/ followed by raw text with slash /
- ->/not an emphasis/<-
- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/
- _underlined_ *bold* =verbatim= ~code~ +strikethrough+
- *bold string with an *asterisk inside*
- links
1. regular link [[https://example.com]] link without description
2. regular link [[https://example.com][example.com]] link with description
3. regular link to a file (image) [[file:my-img.png]]
** Footnotes
- normal footnote reference [fn:1]
- further references to the same footnote should not [fn:1] render duplicates in the footnote list
- also inline footnotes are supported via =fn:2:inline definition=. But we won't test that because it would
cause the output to look different from the input
* Footnotes
[fn:1] Foobar