Add basic parser and org -> AST -> org rendering

2018-12-02 14:06:08 +01:00 · 2018-12-02 14:06:08 +01:00 · 0b2972e32a
commit 0b2972e32a
parent 60835c66fc
14 changed files with 1123 additions and 0 deletions
--- a/README.org
+++ b/README.org
@ -7,3 +7,6 @@ A basic org-mode parser in go
  - https://orgmode.org/worg/dev/org-syntax.html
  - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el
  - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el
+- test cases
+  - [[https://github.com/bdewey/org-ruby/blob/master/spec/html_examples][org-ruby]]
+  - pandoc, goorgeous
--- a/block.go
+++ b/block.go
@ -0,0 +1,47 @@
+package org
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+type Block struct {
+	Name       string
+	Parameters []string
+	Children   []Node
+}
+
+var beginBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+BEGIN_(\w+)(.*)`)
+var endBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+END_(\w+)`)
+
+func lexBlock(line string) (token, bool) {
+	if m := beginBlockRegexp.FindStringSubmatch(line); m != nil {
+		return token{"beginBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
+	} else if m := endBlockRegexp.FindStringSubmatch(line); m != nil {
+		return token{"endBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
+	}
+	return nilToken, false
+}
+
+func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
+	t, start, nodes := d.tokens[i], i, []Node{}
+	name, parameters := t.content, strings.Fields(t.matches[3])
+	trim := trimIndentUpTo(d.tokens[i].lvl)
+	for i++; !(d.tokens[i].kind == "endBlock" && d.tokens[i].content == name); i++ {
+		if parentStop(d, i) {
+			return 0, nil
+		}
+		nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}})
+	}
+	return i + 1 - start, Block{name, parameters, nodes}
+}
+
+func trimIndentUpTo(max int) func(string) string {
+	return func(line string) string {
+		i := 0
+		for ; i < len(line) && i < max && unicode.IsSpace(rune(line[i])); i++ {
+		}
+		return line[i:]
+	}
+}
--- a/cmd/org/org.go
+++ b/cmd/org/org.go
@ -0,0 +1,31 @@
+package main
+
+import (
+	"bytes"
+	"io/ioutil"
+	"log"
+	"os"
+	"strings"
+
+	"github.com/niklasfasching/org"
+)
+
+func main() {
+	log.SetFlags(0)
+	if len(os.Args) < 3 {
+		log.Println("USAGE: org FILE OUTPUT_FORMAT")
+		log.Fatal("supported output formats: org")
+	}
+	bs, err := ioutil.ReadFile(os.Args[1])
+	if err != nil {
+		log.Fatal(err)
+	}
+	r, out := bytes.NewReader(bs), ""
+	switch strings.ToLower(os.Args[2]) {
+	case "org":
+		out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String()
+	default:
+		log.Fatal("Unsupported output format")
+	}
+	log.Println(out)
+}
--- a/document.go
+++ b/document.go
@ -0,0 +1,150 @@
+package org
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"log"
+)
+
+type Document struct {
+	tokens              []token
+	Nodes               []Node
+	Footnotes           Footnotes
+	StatusKeywords      []string
+	MaxEmphasisNewLines int
+	BufferSettings      map[string]string
+	DefaultSettings     map[string]string
+}
+
+type Writer interface {
+	before(*Document)
+	after(*Document)
+	writeNodes(...Node)
+	String() string
+}
+
+type Node interface{}
+
+type lexFn = func(line string) (t token, ok bool)
+type parseFn = func(*Document, int, stopFn) (int, Node)
+type stopFn = func(*Document, int) bool
+
+type token struct {
+	kind    string
+	lvl     int
+	content string
+	matches []string
+}
+
+var lexFns = []lexFn{
+	lexHeadline,
+	lexBlock,
+	lexList,
+	lexTable,
+	lexHorizontalRule,
+	lexKeywordOrComment,
+	lexFootnoteDefinition,
+	lexText,
+}
+
+var nilToken = token{"nil", -1, "", nil}
+
+func NewDocument() *Document {
+	return &Document{
+		Footnotes: Footnotes{
+			ExcludeHeading: true,
+			Title:          "Footnotes",
+			Definitions:    map[string]FootnoteDefinition{},
+		},
+		MaxEmphasisNewLines: 1,
+		BufferSettings:      map[string]string{},
+		DefaultSettings: map[string]string{
+			"TODO": "TODO | DONE",
+		},
+	}
+}
+
+func (d *Document) Write(w Writer) Writer {
+	if d.Nodes == nil {
+		panic("cannot Write() empty document: you must call Parse() first")
+	}
+	w.before(d)
+	w.writeNodes(d.Nodes...)
+	w.after(d)
+	return w
+}
+
+func (d *Document) Parse(input io.Reader) *Document {
+	d.tokens = []token{}
+	scanner := bufio.NewScanner(input)
+	for scanner.Scan() {
+		d.tokens = append(d.tokens, tokenize(scanner.Text()))
+	}
+	if err := scanner.Err(); err != nil {
+		panic(err)
+	}
+	_, nodes := d.parseMany(0, func(d *Document, i int) bool { return !(i < len(d.tokens)) })
+	d.Nodes = nodes
+	return d
+}
+
+func (d *Document) Get(key string) string {
+	if v, ok := d.BufferSettings[key]; ok {
+		return v
+	}
+	if v, ok := d.DefaultSettings[key]; ok {
+		return v
+	}
+	return ""
+}
+
+func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
+	switch d.tokens[i].kind {
+	case "unorderedList", "orderedList":
+		consumed, node = d.parseList(i, stop)
+	case "tableRow", "tableSeparator":
+		consumed, node = d.parseTable(i, stop)
+	case "beginBlock":
+		consumed, node = d.parseBlock(i, stop)
+	case "text":
+		consumed, node = d.parseParagraph(i, stop)
+	case "horizontalRule":
+		consumed, node = d.parseHorizontalRule(i, stop)
+	case "comment":
+		consumed, node = d.parseComment(i, stop)
+	case "keyword":
+		consumed, node = d.parseKeyword(i, stop)
+	case "headline":
+		consumed, node = d.parseHeadline(i, stop)
+	case "footnoteDefinition":
+		consumed, node = d.parseFootnoteDefinition(i, stop)
+	}
+
+	if consumed != 0 {
+		return consumed, node
+	}
+	log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
+	m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
+	d.tokens[i] = token{"text", len(m[1]), m[2], m}
+	return d.parseOne(i, stop)
+}
+
+func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
+	start, nodes := i, []Node{}
+	for i < len(d.tokens) {
+		consumed, node := d.parseOne(i, stop)
+		i += consumed
+		nodes = append(nodes, node)
+	}
+	return i - start, nodes
+}
+
+func tokenize(line string) token {
+	for _, lexFn := range lexFns {
+		if token, ok := lexFn(line); ok {
+			return token
+		}
+	}
+	panic(fmt.Sprintf("could not lex line: %s", line))
+}
--- a/footnote.go
+++ b/footnote.go
@ -0,0 +1,38 @@
+package org
+
+import (
+	"regexp"
+)
+
+type Footnotes struct {
+	ExcludeHeading bool
+	Title          string
+	Definitions    map[string]FootnoteDefinition
+	Order          []string
+}
+
+type FootnoteDefinition struct {
+	Name     string
+	Children []Node
+}
+
+var footnoteDefinitionRegexp = regexp.MustCompile(`^\[fn:([\w-]+)\]\s+(.+)`)
+
+func lexFootnoteDefinition(line string) (token, bool) {
+	if m := footnoteDefinitionRegexp.FindStringSubmatch(line); m != nil {
+		return token{"footnoteDefinition", 0, m[1], m}, true
+	}
+	return nilToken, false
+}
+
+func (d *Document) parseFootnoteDefinition(i int, parentStop stopFn) (int, Node) {
+	name := d.tokens[i].content
+	d.tokens[i] = tokenize(d.tokens[i].matches[2])
+	stop := func(d *Document, i int) bool {
+		return parentStop(d, i) || isSecondBlankLine(d, i) ||
+			d.tokens[i].kind == "headline" || d.tokens[i].kind == "footnoteDefinition"
+	}
+	consumed, nodes := d.parseMany(i, stop)
+	d.Footnotes.Definitions[name] = FootnoteDefinition{name, nodes}
+	return consumed, nil
+}
--- a/headline.go
+++ b/headline.go
@ -0,0 +1,69 @@
+package org
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+type Headline struct {
+	Lvl      int
+	Status   string
+	Priority string
+	Title    []Node
+	Tags     []string
+	Children []Node
+}
+
+var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`)
+var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`)
+
+func lexHeadline(line string) (token, bool) {
+	if m := headlineRegexp.FindStringSubmatch(line); m != nil {
+		return token{"headline", 0, m[2], m}, true
+	}
+	return nilToken, false
+}
+
+func (d *Document) todoKeywords() []string {
+	return strings.FieldsFunc(d.Get("TODO"), func(r rune) bool {
+		return unicode.IsSpace(r) || r == '|'
+	})
+}
+
+func (d *Document) parseHeadline(i int, parentStop stopFn) (int, Node) {
+	t, headline := d.tokens[i], Headline{}
+	headline.Lvl = len(t.matches[1])
+	text := t.content
+
+	for _, k := range d.todoKeywords() {
+		if strings.HasPrefix(text, k) && len(text) > len(k) && unicode.IsSpace(rune(text[len(k)])) {
+			headline.Status = k
+			text = text[len(k)+1:]
+			break
+		}
+	}
+
+	if len(text) >= 3 && text[0:2] == "[#" && strings.Contains("ABC", text[2:3]) && text[3] == ']' {
+		headline.Priority = text[2:3]
+		text = strings.TrimSpace(text[4:])
+	}
+
+	if m := tagRegexp.FindStringSubmatch(text); m != nil {
+		text = m[1]
+		headline.Tags = strings.FieldsFunc(m[2], func(r rune) bool { return r == ':' })
+	}
+
+	headline.Title = d.parseInline(text)
+
+	stop := func(d *Document, i int) bool {
+		return parentStop(d, i) || d.tokens[i].kind == "headline" && d.tokens[i].lvl <= headline.Lvl
+	}
+	consumed, nodes := d.parseMany(i+1, stop)
+	headline.Children = nodes
+
+	if headline.Lvl == 1 && text == d.Footnotes.Title && d.Footnotes.ExcludeHeading {
+		return consumed + 1, nil
+	}
+	return consumed + 1, headline
+}
--- a/inline.go
+++ b/inline.go
@ -0,0 +1,184 @@
+package org
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+type Text struct{ Content string }
+
+type Linebreak struct{}
+
+type Emphasis struct {
+	Kind    string
+	Content []Node
+}
+
+type FootnoteLink struct{ Name string }
+
+type RegularLink struct {
+	Protocol    string
+	Description []Node
+	URL         string
+}
+
+var redundantSpaces = regexp.MustCompile("[ \t]+")
+var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`)
+var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`)
+
+func (d *Document) parseInline(input string) (nodes []Node) {
+	previous, current := 0, 0
+	for current < len(input) {
+		consumed, node := 0, (Node)(nil)
+		switch input[current] {
+		case '^':
+			consumed, node = d.parseSubOrSuperScript(input, current)
+		case '_':
+			consumed, node = d.parseSubScriptOrEmphasis(input, current)
+		case '*', '/', '=', '~', '+':
+			consumed, node = d.parseEmphasis(input, current)
+		case '[':
+			consumed, node = d.parseRegularLinkOrFootnoteReference(input, current)
+		case '\\':
+			consumed, node = d.parseExplicitLineBreak(input, current)
+		}
+		if consumed != 0 {
+			if current > previous {
+				nodes = append(nodes, Text{input[previous:current]})
+			}
+			if node != nil {
+				nodes = append(nodes, node)
+			}
+			current += consumed
+			previous = current
+		} else {
+			current++
+		}
+	}
+
+	if previous < len(input) {
+		nodes = append(nodes, Text{input[previous:]})
+	}
+	return nodes
+}
+
+func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
+	if start == 0 || input[start-1] == '\n' || start+1 >= len(input) || input[start+1] != '\\' {
+		return 0, nil
+	}
+	for i := start + 1; ; i++ {
+		if i == len(input)-1 || input[i] == '\n' {
+			return i + 1 - start, Linebreak{}
+		}
+		if !unicode.IsSpace(rune(input[i])) {
+			break
+		}
+	}
+	return 0, nil
+}
+
+func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
+	if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
+		return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}}
+	}
+	return 0, nil
+}
+
+func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
+	if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
+		return consumed, node
+	}
+	return d.parseEmphasis(input, start)
+}
+
+func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) {
+	if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
+		return d.parseRegularLink(input, start)
+	} else if len(input[start:]) >= 1 && input[start] == '[' {
+		return d.parseFootnoteReference(input, start)
+	}
+	return 0, nil
+}
+
+func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
+	if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
+		name, definition := m[1], m[3]
+		seen := false
+		for _, otherName := range d.Footnotes.Order {
+			if name == otherName {
+				seen = true
+			}
+		}
+		if !seen {
+			d.Footnotes.Order = append(d.Footnotes.Order, name)
+		}
+		if definition != "" {
+			d.Footnotes.Definitions[name] = FootnoteDefinition{name, d.parseInline(definition)}
+		}
+		return len(m[0]), FootnoteLink{name}
+	}
+	return 0, nil
+}
+
+func (d *Document) parseRegularLink(input string, start int) (int, Node) {
+	if len(input[start:]) == 0 || input[start+1] != '[' {
+		return 0, nil
+	}
+	input = input[start:]
+	end := strings.Index(input, "]]")
+	if end == -1 {
+		return 0, nil
+	}
+
+	rawLink := input[2:end]
+	link, description, parts := "", []Node{}, strings.Split(rawLink, "][")
+	if len(parts) == 2 {
+		link, description = parts[0], d.parseInline(parts[1])
+	} else {
+		link, description = rawLink, []Node{Text{rawLink}}
+	}
+	consumed := end + 2
+	protocol, parts := "", strings.SplitN(link, ":", 2)
+	if len(parts) == 2 {
+		protocol = parts[0]
+	}
+	return consumed, RegularLink{protocol, description, link}
+}
+
+func (d *Document) parseEmphasis(input string, start int) (int, Node) {
+	marker, i := input[start], start
+	if !hasValidPreAndBorderChars(input, i) {
+		return 0, nil
+	}
+	for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
+		if input[i] == '\n' {
+			consumedNewLines++
+		}
+
+		if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
+			return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
+		}
+	}
+	return 0, nil
+}
+
+// see org-emphasis-regexp-components (emacs elisp variable)
+
+func hasValidPreAndBorderChars(input string, i int) bool {
+	return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
+}
+
+func hasValidPostAndBorderChars(input string, i int) bool {
+	return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
+}
+
+func isValidPreChar(r rune) bool {
+	return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
+}
+
+func isValidPostChar(r rune) bool {
+	return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
+}
+
+func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
--- a/keyword.go
+++ b/keyword.go
@ -0,0 +1,36 @@
+package org
+
+import (
+	"regexp"
+	"strings"
+)
+
+type Keyword struct {
+	Key   string
+	Value string
+}
+
+type Comment struct{ Content string }
+
+var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):\s(.*)`)
+var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`)
+
+func lexKeywordOrComment(line string) (token, bool) {
+	if m := keywordRegexp.FindStringSubmatch(line); m != nil {
+		return token{"keyword", len(m[1]), m[2], m}, true
+	} else if m := commentRegexp.FindStringSubmatch(line); m != nil {
+		return token{"comment", len(m[1]), m[2], m}, true
+	}
+	return nilToken, false
+}
+
+func (d *Document) parseKeyword(i int, stop stopFn) (int, Node) {
+	t := d.tokens[i]
+	k, v := t.matches[2], t.matches[3]
+	d.BufferSettings[k] = strings.Join([]string{d.BufferSettings[k], v}, "\n")
+	return 1, Keyword{k, v}
+}
+
+func (d *Document) parseComment(i int, stop stopFn) (int, Node) {
+	return 1, Comment{d.tokens[i].content}
+}
--- a/list.go
+++ b/list.go
@ -0,0 +1,82 @@
+package org
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+type List struct {
+	Kind  string
+	Items []Node
+}
+
+type ListItem struct {
+	Bullet   string
+	Children []Node
+}
+
+var unorderedListRegexp = regexp.MustCompile(`^(\s*)([-]|[+]|[*])\s(.*)`)
+var orderedListRegexp = regexp.MustCompile(`^(\s*)(([0-9]+|[a-zA-Z])[.)])\s+(.*)`)
+
+func lexList(line string) (token, bool) {
+	if m := unorderedListRegexp.FindStringSubmatch(line); m != nil {
+		return token{"unorderedList", len(m[1]), m[3], m}, true
+	} else if m := orderedListRegexp.FindStringSubmatch(line); m != nil {
+		return token{"orderedList", len(m[1]), m[4], m}, true
+	}
+	return nilToken, false
+}
+
+func isListToken(t token) bool {
+	return t.kind == "unorderedList" || t.kind == "orderedList"
+}
+
+func stopIndentBelow(t token, minIndent int) bool {
+	return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
+}
+
+func listKind(t token) string {
+	switch bullet := t.matches[2]; {
+	case bullet == "*" || bullet == "+" || bullet == "-":
+		return bullet
+	case unicode.IsLetter(rune(bullet[0])):
+		return "letter"
+	case unicode.IsDigit(rune(bullet[0])):
+		return "number"
+	default:
+		panic(fmt.Sprintf("bad list bullet '%s': %#v", bullet, t))
+	}
+}
+
+func (d *Document) parseList(i int, parentStop stopFn) (int, Node) {
+	start, lvl := i, d.tokens[i].lvl
+
+	list := List{Kind: listKind(d.tokens[i])}
+	for !parentStop(d, i) && d.tokens[i].lvl == lvl && isListToken(d.tokens[i]) {
+		consumed, node := d.parseListItem(i, parentStop)
+		i += consumed
+		list.Items = append(list.Items, node)
+	}
+	return i - start, list
+}
+
+func (d *Document) parseListItem(i int, parentStop stopFn) (int, Node) {
+	start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2]
+	minIndent := d.tokens[i].lvl + len(bullet)
+	d.tokens[i] = tokenize(strings.Repeat(" ", minIndent) + d.tokens[i].content)
+	stop := func(d *Document, i int) bool {
+		if parentStop(d, i) {
+			return true
+		}
+		t := d.tokens[i]
+		return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
+	}
+	for !stop(d, i) && !isSecondBlankLine(d, i) {
+		consumed, node := d.parseOne(i, stop)
+		i += consumed
+		nodes = append(nodes, node)
+	}
+	return i - start, ListItem{bullet, nodes}
+}
--- a/org.go
+++ b/org.go
@ -0,0 +1,243 @@
+package org
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+type stringBuilder = strings.Builder
+
+type OrgWriter struct {
+	TagsColumn int // see org-tags-column
+	stringBuilder
+	indent string
+}
+
+var emphasisOrgBorders = map[string][]string{
+	"_":   []string{"_", "_"},
+	"*":   []string{"*", "*"},
+	"/":   []string{"/", "/"},
+	"+":   []string{"+", "+"},
+	"~":   []string{"~", "~"},
+	"=":   []string{"=", "="},
+	"_{}": []string{"_{", "}"},
+	"^{}": []string{"^{", "}"},
+}
+
+func NewOrgWriter() *OrgWriter {
+	return &OrgWriter{
+		TagsColumn: 77,
+	}
+}
+
+func (w *OrgWriter) before(d *Document) {}
+func (w *OrgWriter) after(d *Document) {
+	fs := d.Footnotes
+	if len(fs.Definitions) == 0 {
+		return
+	}
+	w.WriteString("* " + fs.Title + "\n")
+	for _, name := range fs.Order {
+		w.writeNodes(fs.Definitions[name])
+	}
+}
+
+func (w *OrgWriter) emptyClone() *OrgWriter {
+	wcopy := *w
+	wcopy.stringBuilder = strings.Builder{}
+	return &wcopy
+}
+
+func (w *OrgWriter) writeNodes(ns ...Node) {
+	for _, n := range ns {
+		switch n := n.(type) {
+		case Comment:
+			w.writeComment(n)
+		case Keyword:
+			w.writeKeyword(n)
+		case Headline:
+			w.writeHeadline(n)
+		case Block:
+			w.writeBlock(n)
+
+		case FootnoteDefinition:
+			w.writeFootnoteDefinition(n)
+
+		case List:
+			w.writeList(n)
+		case ListItem:
+			w.writeListItem(n)
+
+		case Table:
+			w.writeTable(n)
+		case TableHeader:
+			w.writeTableHeader(n)
+		case TableRow:
+			w.writeTableRow(n)
+		case TableSeparator:
+			w.writeTableSeparator(n)
+
+		case Paragraph:
+			w.writeParagraph(n)
+		case HorizontalRule:
+			w.writeHorizontalRule(n)
+		case Line:
+			w.writeLine(n)
+
+		case Text:
+			w.writeText(n)
+		case Emphasis:
+			w.writeEmphasis(n)
+		case Linebreak:
+			w.writeLinebreak(n)
+		case RegularLink:
+			w.writeRegularLink(n)
+		case FootnoteLink:
+			w.writeFootnoteLink(n)
+		default:
+			if n != nil {
+				panic(fmt.Sprintf("bad node %#v", n))
+			}
+		}
+	}
+}
+
+var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n")
+
+func (w *OrgWriter) String() string {
+	s := w.stringBuilder.String()
+	return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n")
+}
+
+func (w *OrgWriter) writeHeadline(h Headline) {
+	tmp := w.emptyClone()
+	tmp.WriteString(strings.Repeat("*", h.Lvl))
+	if h.Status != "" {
+		tmp.WriteString(" " + h.Status)
+	}
+	if h.Priority != "" {
+		tmp.WriteString(" [#" + h.Priority + "]")
+	}
+	tmp.WriteString(" ")
+	tmp.writeNodes(h.Title...)
+	hString := tmp.String()
+	if len(h.Tags) != 0 {
+		hString += " "
+		tString := ":" + strings.Join(h.Tags, ":") + ":"
+		if n := w.TagsColumn - len(tString) - len(hString); n > 0 {
+			w.WriteString(hString + strings.Repeat(" ", n) + tString)
+		} else {
+			w.WriteString(hString + tString)
+		}
+	} else {
+		w.WriteString(hString)
+	}
+	w.WriteString("\n")
+	if len(h.Children) != 0 {
+		w.WriteString(w.indent)
+	}
+	w.writeNodes(h.Children...)
+}
+
+func (w *OrgWriter) writeBlock(b Block) {
+	w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " ")))
+	w.writeNodes(b.Children...)
+	w.WriteString(w.indent + "#+END_" + b.Name + "\n")
+}
+
+func (w *OrgWriter) writeFootnoteDefinition(f FootnoteDefinition) {
+	w.WriteString(fmt.Sprintf("[fn:%s] ", f.Name))
+	w.writeNodes(f.Children...)
+}
+
+func (w *OrgWriter) writeParagraph(p Paragraph) {
+	w.writeNodes(p.Children...)
+}
+
+func (w *OrgWriter) writeKeyword(k Keyword) {
+	w.WriteString(w.indent + fmt.Sprintf("#+%s: %s\n", k.Key, k.Value))
+}
+
+func (w *OrgWriter) writeComment(c Comment) {
+	w.WriteString(w.indent + "#" + c.Content)
+}
+
+func (w *OrgWriter) writeList(l List) { w.writeNodes(l.Items...) }
+
+func (w *OrgWriter) writeListItem(li ListItem) {
+	w.WriteString(w.indent + li.Bullet + " ")
+	liWriter := w.emptyClone()
+	liWriter.indent = w.indent + strings.Repeat(" ", len(li.Bullet)+1)
+	liWriter.writeNodes(li.Children...)
+	w.WriteString(strings.TrimPrefix(liWriter.String(), liWriter.indent))
+}
+
+func (w *OrgWriter) writeTable(t Table) {
+	// TODO: pretty print tables
+	w.writeNodes(t.Header)
+	w.writeNodes(t.Rows...)
+}
+
+func (w *OrgWriter) writeTableHeader(th TableHeader) {
+	w.writeTableColumns(th.Columns)
+	w.writeNodes(th.Separator)
+}
+
+func (w *OrgWriter) writeTableRow(tr TableRow) {
+	w.writeTableColumns(tr.Columns)
+}
+
+func (w *OrgWriter) writeTableSeparator(ts TableSeparator) {
+	w.WriteString(w.indent + ts.Content + "\n")
+}
+
+func (w *OrgWriter) writeTableColumns(columns [][]Node) {
+	w.WriteString(w.indent + "| ")
+	for _, columnNodes := range columns {
+		w.writeNodes(columnNodes...)
+		w.WriteString(" | ")
+	}
+	w.WriteString("\n")
+}
+
+func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) {
+	w.WriteString(w.indent + "-----\n")
+}
+
+func (w *OrgWriter) writeLine(l Line) {
+	w.WriteString(w.indent)
+	w.writeNodes(l.Children...)
+	w.WriteString("\n")
+}
+
+func (w *OrgWriter) writeText(t Text) { w.WriteString(t.Content) }
+
+func (w *OrgWriter) writeEmphasis(e Emphasis) {
+	borders, ok := emphasisOrgBorders[e.Kind]
+	if !ok {
+		panic(fmt.Sprintf("bad emphasis %#v", e))
+	}
+	w.WriteString(borders[0])
+	w.writeNodes(e.Content...)
+	w.WriteString(borders[1])
+}
+
+func (w *OrgWriter) writeLinebreak(l Linebreak) {
+	w.WriteString(`\\`)
+}
+
+func (w *OrgWriter) writeFootnoteLink(l FootnoteLink) {
+	w.WriteString("[fn:" + l.Name + "]")
+}
+
+func (w *OrgWriter) writeRegularLink(l RegularLink) {
+	descriptionWriter := w.emptyClone()
+	descriptionWriter.writeNodes(l.Description...)
+	description := descriptionWriter.String()
+	if l.URL != description {
+		w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description))
+	} else {
+		w.WriteString(fmt.Sprintf("[[%s]]", l.URL))
+	}
+}
--- a/org_test.go
+++ b/org_test.go
@ -0,0 +1,61 @@
+package org
+
+import (
+	"fmt"
+	"io/ioutil"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/pmezard/go-difflib/difflib"
+)
+
+func TestOrgWriter(t *testing.T) {
+	for _, path := range orgTestFiles() {
+		expected := fileString(path)
+		reader, writer := strings.NewReader(expected), NewOrgWriter()
+		actual := NewDocument().Parse(reader).Write(writer).String()
+		if actual != expected {
+			t.Errorf("%s:\n%s'", path, diff(actual, expected))
+		} else {
+			t.Logf("%s: passed!", path)
+		}
+	}
+}
+
+func orgTestFiles() []string {
+	dir := "./testdata"
+	files, err := ioutil.ReadDir(dir)
+	if err != nil {
+		panic(fmt.Sprintf("Could not read directory: %s", err))
+	}
+	orgFiles := []string{}
+	for _, f := range files {
+		name := f.Name()
+		if filepath.Ext(name) != ".org" {
+			continue
+		}
+		orgFiles = append(orgFiles, filepath.Join(dir, name))
+	}
+	return orgFiles
+}
+
+func fileString(path string) string {
+	bs, err := ioutil.ReadFile(path)
+	if err != nil {
+		panic(fmt.Sprintf("Could not read file %s: %s", path, err))
+	}
+	return string(bs)
+}
+
+func diff(actual, expected string) string {
+	diff := difflib.UnifiedDiff{
+		A:        difflib.SplitLines(actual),
+		B:        difflib.SplitLines(expected),
+		FromFile: "Actual",
+		ToFile:   "Expected",
+		Context:  3,
+	}
+	text, _ := difflib.GetUnifiedDiffString(diff)
+	return text
+}
--- a/paragraph.go
+++ b/paragraph.go
@ -0,0 +1,57 @@
+package org
+
+import (
+	"regexp"
+)
+
+type Line struct{ Children []Node }
+type Paragraph struct{ Children []Node }
+type HorizontalRule struct{}
+
+var horizontalRuleRegexp = regexp.MustCompile(`^(\s*)-{5,}\s*$`)
+var plainTextRegexp = regexp.MustCompile(`^(\s*)(.*)`)
+
+func lexText(line string) (token, bool) {
+	if m := plainTextRegexp.FindStringSubmatch(line); m != nil {
+		return token{"text", len(m[1]), m[2], m}, true
+	}
+	return nilToken, false
+}
+
+func lexHorizontalRule(line string) (token, bool) {
+	if m := horizontalRuleRegexp.FindStringSubmatch(line); m != nil {
+		return token{"horizontalRule", len(m[1]), "", m}, true
+	}
+	return nilToken, false
+}
+
+func isSecondBlankLine(d *Document, i int) bool {
+	if i-1 <= 0 {
+		return false
+	}
+	t1, t2 := d.tokens[i-1], d.tokens[i]
+	if t1.kind == "text" && t2.kind == "text" && t1.content == "" && t2.content == "" {
+		return true
+	}
+	return false
+}
+
+func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) {
+	lines, start := []Node{Line{d.parseInline(d.tokens[i].content)}}, i
+	i++
+	stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" }
+	for ; !stop(d, i) && !isSecondBlankLine(d, i); i++ {
+		if isSecondBlankLine(d, i) {
+			lines = lines[:len(lines)-1]
+			i++
+			break
+		}
+		lines = append(lines, Line{d.parseInline(d.tokens[i].content)})
+	}
+	consumed := i - start
+	return consumed, Paragraph{lines}
+}
+
+func (d *Document) parseHorizontalRule(i int, parentStop stopFn) (int, Node) {
+	return 1, HorizontalRule{}
+}
--- a/table.go
+++ b/table.go
@ -0,0 +1,63 @@
+package org
+
+import (
+	"regexp"
+	"strings"
+)
+
+type Table struct {
+	Header Node
+	Rows   []Node
+}
+
+type TableSeparator struct{ Content string }
+
+type TableHeader struct {
+	Columns   [][]Node
+	Separator TableSeparator
+}
+
+type TableRow struct{ Columns [][]Node }
+
+var tableSeparatorRegexp = regexp.MustCompile(`^(\s*)(\|[+-|]*)\s*$`)
+var tableRowRegexp = regexp.MustCompile(`^(\s*)(\|.*)`)
+
+func lexTable(line string) (token, bool) {
+	if m := tableSeparatorRegexp.FindStringSubmatch(line); m != nil {
+		return token{"tableSeparator", len(m[1]), m[2], m}, true
+	} else if m := tableRowRegexp.FindStringSubmatch(line); m != nil {
+		return token{"tableRow", len(m[1]), m[2], m}, true
+	}
+	return nilToken, false
+}
+
+func (d *Document) parseTable(i int, parentStop stopFn) (int, Node) {
+	rows, start := []Node{}, i
+	for !parentStop(d, i) && (d.tokens[i].kind == "tableRow" || d.tokens[i].kind == "tableSeparator") {
+		consumed, row := d.parseTableRowOrSeparator(i, parentStop)
+		i += consumed
+		rows = append(rows, row)
+	}
+
+	consumed := i - start
+	if len(rows) >= 2 {
+		if row, ok := rows[0].(TableRow); ok {
+			if separator, ok := rows[1].(TableSeparator); ok {
+				return consumed, Table{TableHeader{row.Columns, separator}, rows[2:]}
+			}
+		}
+	}
+	return consumed, Table{nil, rows}
+}
+
+func (d *Document) parseTableRowOrSeparator(i int, _ stopFn) (int, Node) {
+	if d.tokens[i].kind == "tableSeparator" {
+		return 1, TableSeparator{d.tokens[i].content}
+	}
+	fields := strings.FieldsFunc(d.tokens[i].content, func(r rune) bool { return r == '|' })
+	row := TableRow{}
+	for _, field := range fields {
+		row.Columns = append(row.Columns, d.parseInline(strings.TrimSpace(field)))
+	}
+	return 1, row
+}
--- a/testdata/example.org
+++ b/testdata/example.org
@ -0,0 +1,59 @@
+#+TITLE: Example org mode file
+#+AUTHOR: Niklas Fasching
+#+DESCRIPTION: just some random elements with little explanation
+
+* Motivation
+
+To validate the parser we'll try printing the AST back to org-mode source - if that
+works we can be kind of sure that the parsing worked.
+At least I hope so - I would like to get around writing tests for the individual parsing
+functions...
+
+** Headlines with TODO status, priority & tags
+*** TODO [#B] Headline with todo status & priority
+*** DONE Headline with TODO status
+*** [#A] Headline with tags & priority                              :foo:bar:
+this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
+** Lists
+- unordered list item 1
+- unordered list item 2 - with ~inline~ /markup/
+  1. ordered sublist item 1
+     a) ordered sublist item 1
+     b) ordered sublist item 2
+     c) ordered sublist item 3
+  2. ordered sublist item 2
+- unordered list item 3 - and a [[https://example.com][link]]
+  and some lines of text
+  1. and another subitem
+     #+BEGIN_SRC sh
+     echo with a block
+     #+END_SRC
+  2. and another one with a table
+     | a | b | c |
+     |---+---+---|
+     | 1 | 2 | 3 |
+
+     and text with an empty line in between as well!
+- unordered list item 4
+
+** Inline
+- /emphasis/ and a hard line break \\
+- /.emphasis with dot border chars./
+- /emphasis with a slash/inside/
+- /emphasis/ followed by raw text with slash /
+- ->/not an emphasis/<-
+- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/
+- _underlined_ *bold*  =verbatim= ~code~ +strikethrough+
+- *bold string with an *asterisk inside*
+- links
+  1. regular link [[https://example.com]] link without description
+  2. regular link [[https://example.com][example.com]] link with description
+  3. regular link to a file (image) [[file:my-img.png]]
+** Footnotes
+- normal footnote reference [fn:1]
+- further references to the same footnote should not [fn:1] render duplicates in the footnote list
+- also inline footnotes are supported via =fn:2:inline definition=. But we won't test that because it would
+  cause the output to look different from the input
+
+* Footnotes
+[fn:1] Foobar