Add basic parser and org -> AST -> org rendering
This commit is contained in:
parent
60835c66fc
commit
0b2972e32a
14 changed files with 1123 additions and 0 deletions
|
@ -7,3 +7,6 @@ A basic org-mode parser in go
|
|||
- https://orgmode.org/worg/dev/org-syntax.html
|
||||
- https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el
|
||||
- https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el
|
||||
- test cases
|
||||
- [[https://github.com/bdewey/org-ruby/blob/master/spec/html_examples][org-ruby]]
|
||||
- pandoc, goorgeous
|
||||
|
|
47
block.go
Normal file
47
block.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type Block struct {
|
||||
Name string
|
||||
Parameters []string
|
||||
Children []Node
|
||||
}
|
||||
|
||||
var beginBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+BEGIN_(\w+)(.*)`)
|
||||
var endBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+END_(\w+)`)
|
||||
|
||||
func lexBlock(line string) (token, bool) {
|
||||
if m := beginBlockRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"beginBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
|
||||
} else if m := endBlockRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"endBlock", len(m[1]), strings.ToUpper(m[2]), m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
|
||||
t, start, nodes := d.tokens[i], i, []Node{}
|
||||
name, parameters := t.content, strings.Fields(t.matches[3])
|
||||
trim := trimIndentUpTo(d.tokens[i].lvl)
|
||||
for i++; !(d.tokens[i].kind == "endBlock" && d.tokens[i].content == name); i++ {
|
||||
if parentStop(d, i) {
|
||||
return 0, nil
|
||||
}
|
||||
nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}})
|
||||
}
|
||||
return i + 1 - start, Block{name, parameters, nodes}
|
||||
}
|
||||
|
||||
func trimIndentUpTo(max int) func(string) string {
|
||||
return func(line string) string {
|
||||
i := 0
|
||||
for ; i < len(line) && i < max && unicode.IsSpace(rune(line[i])); i++ {
|
||||
}
|
||||
return line[i:]
|
||||
}
|
||||
}
|
31
cmd/org/org.go
Normal file
31
cmd/org/org.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/niklasfasching/org"
|
||||
)
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
if len(os.Args) < 3 {
|
||||
log.Println("USAGE: org FILE OUTPUT_FORMAT")
|
||||
log.Fatal("supported output formats: org")
|
||||
}
|
||||
bs, err := ioutil.ReadFile(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
r, out := bytes.NewReader(bs), ""
|
||||
switch strings.ToLower(os.Args[2]) {
|
||||
case "org":
|
||||
out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String()
|
||||
default:
|
||||
log.Fatal("Unsupported output format")
|
||||
}
|
||||
log.Println(out)
|
||||
}
|
150
document.go
Normal file
150
document.go
Normal file
|
@ -0,0 +1,150 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
)
|
||||
|
||||
type Document struct {
|
||||
tokens []token
|
||||
Nodes []Node
|
||||
Footnotes Footnotes
|
||||
StatusKeywords []string
|
||||
MaxEmphasisNewLines int
|
||||
BufferSettings map[string]string
|
||||
DefaultSettings map[string]string
|
||||
}
|
||||
|
||||
type Writer interface {
|
||||
before(*Document)
|
||||
after(*Document)
|
||||
writeNodes(...Node)
|
||||
String() string
|
||||
}
|
||||
|
||||
type Node interface{}
|
||||
|
||||
type lexFn = func(line string) (t token, ok bool)
|
||||
type parseFn = func(*Document, int, stopFn) (int, Node)
|
||||
type stopFn = func(*Document, int) bool
|
||||
|
||||
type token struct {
|
||||
kind string
|
||||
lvl int
|
||||
content string
|
||||
matches []string
|
||||
}
|
||||
|
||||
var lexFns = []lexFn{
|
||||
lexHeadline,
|
||||
lexBlock,
|
||||
lexList,
|
||||
lexTable,
|
||||
lexHorizontalRule,
|
||||
lexKeywordOrComment,
|
||||
lexFootnoteDefinition,
|
||||
lexText,
|
||||
}
|
||||
|
||||
var nilToken = token{"nil", -1, "", nil}
|
||||
|
||||
func NewDocument() *Document {
|
||||
return &Document{
|
||||
Footnotes: Footnotes{
|
||||
ExcludeHeading: true,
|
||||
Title: "Footnotes",
|
||||
Definitions: map[string]FootnoteDefinition{},
|
||||
},
|
||||
MaxEmphasisNewLines: 1,
|
||||
BufferSettings: map[string]string{},
|
||||
DefaultSettings: map[string]string{
|
||||
"TODO": "TODO | DONE",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) Write(w Writer) Writer {
|
||||
if d.Nodes == nil {
|
||||
panic("cannot Write() empty document: you must call Parse() first")
|
||||
}
|
||||
w.before(d)
|
||||
w.writeNodes(d.Nodes...)
|
||||
w.after(d)
|
||||
return w
|
||||
}
|
||||
|
||||
func (d *Document) Parse(input io.Reader) *Document {
|
||||
d.tokens = []token{}
|
||||
scanner := bufio.NewScanner(input)
|
||||
for scanner.Scan() {
|
||||
d.tokens = append(d.tokens, tokenize(scanner.Text()))
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return !(i < len(d.tokens)) })
|
||||
d.Nodes = nodes
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *Document) Get(key string) string {
|
||||
if v, ok := d.BufferSettings[key]; ok {
|
||||
return v
|
||||
}
|
||||
if v, ok := d.DefaultSettings[key]; ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
|
||||
switch d.tokens[i].kind {
|
||||
case "unorderedList", "orderedList":
|
||||
consumed, node = d.parseList(i, stop)
|
||||
case "tableRow", "tableSeparator":
|
||||
consumed, node = d.parseTable(i, stop)
|
||||
case "beginBlock":
|
||||
consumed, node = d.parseBlock(i, stop)
|
||||
case "text":
|
||||
consumed, node = d.parseParagraph(i, stop)
|
||||
case "horizontalRule":
|
||||
consumed, node = d.parseHorizontalRule(i, stop)
|
||||
case "comment":
|
||||
consumed, node = d.parseComment(i, stop)
|
||||
case "keyword":
|
||||
consumed, node = d.parseKeyword(i, stop)
|
||||
case "headline":
|
||||
consumed, node = d.parseHeadline(i, stop)
|
||||
case "footnoteDefinition":
|
||||
consumed, node = d.parseFootnoteDefinition(i, stop)
|
||||
}
|
||||
|
||||
if consumed != 0 {
|
||||
return consumed, node
|
||||
}
|
||||
log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
|
||||
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
|
||||
d.tokens[i] = token{"text", len(m[1]), m[2], m}
|
||||
return d.parseOne(i, stop)
|
||||
}
|
||||
|
||||
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
|
||||
start, nodes := i, []Node{}
|
||||
for i < len(d.tokens) {
|
||||
consumed, node := d.parseOne(i, stop)
|
||||
i += consumed
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
return i - start, nodes
|
||||
}
|
||||
|
||||
func tokenize(line string) token {
|
||||
for _, lexFn := range lexFns {
|
||||
if token, ok := lexFn(line); ok {
|
||||
return token
|
||||
}
|
||||
}
|
||||
panic(fmt.Sprintf("could not lex line: %s", line))
|
||||
}
|
38
footnote.go
Normal file
38
footnote.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type Footnotes struct {
|
||||
ExcludeHeading bool
|
||||
Title string
|
||||
Definitions map[string]FootnoteDefinition
|
||||
Order []string
|
||||
}
|
||||
|
||||
type FootnoteDefinition struct {
|
||||
Name string
|
||||
Children []Node
|
||||
}
|
||||
|
||||
var footnoteDefinitionRegexp = regexp.MustCompile(`^\[fn:([\w-]+)\]\s+(.+)`)
|
||||
|
||||
func lexFootnoteDefinition(line string) (token, bool) {
|
||||
if m := footnoteDefinitionRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"footnoteDefinition", 0, m[1], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func (d *Document) parseFootnoteDefinition(i int, parentStop stopFn) (int, Node) {
|
||||
name := d.tokens[i].content
|
||||
d.tokens[i] = tokenize(d.tokens[i].matches[2])
|
||||
stop := func(d *Document, i int) bool {
|
||||
return parentStop(d, i) || isSecondBlankLine(d, i) ||
|
||||
d.tokens[i].kind == "headline" || d.tokens[i].kind == "footnoteDefinition"
|
||||
}
|
||||
consumed, nodes := d.parseMany(i, stop)
|
||||
d.Footnotes.Definitions[name] = FootnoteDefinition{name, nodes}
|
||||
return consumed, nil
|
||||
}
|
69
headline.go
Normal file
69
headline.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type Headline struct {
|
||||
Lvl int
|
||||
Status string
|
||||
Priority string
|
||||
Title []Node
|
||||
Tags []string
|
||||
Children []Node
|
||||
}
|
||||
|
||||
var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`)
|
||||
var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`)
|
||||
|
||||
func lexHeadline(line string) (token, bool) {
|
||||
if m := headlineRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"headline", 0, m[2], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func (d *Document) todoKeywords() []string {
|
||||
return strings.FieldsFunc(d.Get("TODO"), func(r rune) bool {
|
||||
return unicode.IsSpace(r) || r == '|'
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Document) parseHeadline(i int, parentStop stopFn) (int, Node) {
|
||||
t, headline := d.tokens[i], Headline{}
|
||||
headline.Lvl = len(t.matches[1])
|
||||
text := t.content
|
||||
|
||||
for _, k := range d.todoKeywords() {
|
||||
if strings.HasPrefix(text, k) && len(text) > len(k) && unicode.IsSpace(rune(text[len(k)])) {
|
||||
headline.Status = k
|
||||
text = text[len(k)+1:]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(text) >= 3 && text[0:2] == "[#" && strings.Contains("ABC", text[2:3]) && text[3] == ']' {
|
||||
headline.Priority = text[2:3]
|
||||
text = strings.TrimSpace(text[4:])
|
||||
}
|
||||
|
||||
if m := tagRegexp.FindStringSubmatch(text); m != nil {
|
||||
text = m[1]
|
||||
headline.Tags = strings.FieldsFunc(m[2], func(r rune) bool { return r == ':' })
|
||||
}
|
||||
|
||||
headline.Title = d.parseInline(text)
|
||||
|
||||
stop := func(d *Document, i int) bool {
|
||||
return parentStop(d, i) || d.tokens[i].kind == "headline" && d.tokens[i].lvl <= headline.Lvl
|
||||
}
|
||||
consumed, nodes := d.parseMany(i+1, stop)
|
||||
headline.Children = nodes
|
||||
|
||||
if headline.Lvl == 1 && text == d.Footnotes.Title && d.Footnotes.ExcludeHeading {
|
||||
return consumed + 1, nil
|
||||
}
|
||||
return consumed + 1, headline
|
||||
}
|
184
inline.go
Normal file
184
inline.go
Normal file
|
@ -0,0 +1,184 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type Text struct{ Content string }
|
||||
|
||||
type Linebreak struct{}
|
||||
|
||||
type Emphasis struct {
|
||||
Kind string
|
||||
Content []Node
|
||||
}
|
||||
|
||||
type FootnoteLink struct{ Name string }
|
||||
|
||||
type RegularLink struct {
|
||||
Protocol string
|
||||
Description []Node
|
||||
URL string
|
||||
}
|
||||
|
||||
var redundantSpaces = regexp.MustCompile("[ \t]+")
|
||||
var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`)
|
||||
var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`)
|
||||
|
||||
func (d *Document) parseInline(input string) (nodes []Node) {
|
||||
previous, current := 0, 0
|
||||
for current < len(input) {
|
||||
consumed, node := 0, (Node)(nil)
|
||||
switch input[current] {
|
||||
case '^':
|
||||
consumed, node = d.parseSubOrSuperScript(input, current)
|
||||
case '_':
|
||||
consumed, node = d.parseSubScriptOrEmphasis(input, current)
|
||||
case '*', '/', '=', '~', '+':
|
||||
consumed, node = d.parseEmphasis(input, current)
|
||||
case '[':
|
||||
consumed, node = d.parseRegularLinkOrFootnoteReference(input, current)
|
||||
case '\\':
|
||||
consumed, node = d.parseExplicitLineBreak(input, current)
|
||||
}
|
||||
if consumed != 0 {
|
||||
if current > previous {
|
||||
nodes = append(nodes, Text{input[previous:current]})
|
||||
}
|
||||
if node != nil {
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
current += consumed
|
||||
previous = current
|
||||
} else {
|
||||
current++
|
||||
}
|
||||
}
|
||||
|
||||
if previous < len(input) {
|
||||
nodes = append(nodes, Text{input[previous:]})
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
|
||||
if start == 0 || input[start-1] == '\n' || start+1 >= len(input) || input[start+1] != '\\' {
|
||||
return 0, nil
|
||||
}
|
||||
for i := start + 1; ; i++ {
|
||||
if i == len(input)-1 || input[i] == '\n' {
|
||||
return i + 1 - start, Linebreak{}
|
||||
}
|
||||
if !unicode.IsSpace(rune(input[i])) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
|
||||
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
|
||||
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
|
||||
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
|
||||
return consumed, node
|
||||
}
|
||||
return d.parseEmphasis(input, start)
|
||||
}
|
||||
|
||||
func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) {
|
||||
if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
|
||||
return d.parseRegularLink(input, start)
|
||||
} else if len(input[start:]) >= 1 && input[start] == '[' {
|
||||
return d.parseFootnoteReference(input, start)
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
|
||||
if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
|
||||
name, definition := m[1], m[3]
|
||||
seen := false
|
||||
for _, otherName := range d.Footnotes.Order {
|
||||
if name == otherName {
|
||||
seen = true
|
||||
}
|
||||
}
|
||||
if !seen {
|
||||
d.Footnotes.Order = append(d.Footnotes.Order, name)
|
||||
}
|
||||
if definition != "" {
|
||||
d.Footnotes.Definitions[name] = FootnoteDefinition{name, d.parseInline(definition)}
|
||||
}
|
||||
return len(m[0]), FootnoteLink{name}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (d *Document) parseRegularLink(input string, start int) (int, Node) {
|
||||
if len(input[start:]) == 0 || input[start+1] != '[' {
|
||||
return 0, nil
|
||||
}
|
||||
input = input[start:]
|
||||
end := strings.Index(input, "]]")
|
||||
if end == -1 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
rawLink := input[2:end]
|
||||
link, description, parts := "", []Node{}, strings.Split(rawLink, "][")
|
||||
if len(parts) == 2 {
|
||||
link, description = parts[0], d.parseInline(parts[1])
|
||||
} else {
|
||||
link, description = rawLink, []Node{Text{rawLink}}
|
||||
}
|
||||
consumed := end + 2
|
||||
protocol, parts := "", strings.SplitN(link, ":", 2)
|
||||
if len(parts) == 2 {
|
||||
protocol = parts[0]
|
||||
}
|
||||
return consumed, RegularLink{protocol, description, link}
|
||||
}
|
||||
|
||||
func (d *Document) parseEmphasis(input string, start int) (int, Node) {
|
||||
marker, i := input[start], start
|
||||
if !hasValidPreAndBorderChars(input, i) {
|
||||
return 0, nil
|
||||
}
|
||||
for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
|
||||
if input[i] == '\n' {
|
||||
consumedNewLines++
|
||||
}
|
||||
|
||||
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
|
||||
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// see org-emphasis-regexp-components (emacs elisp variable)
|
||||
|
||||
func hasValidPreAndBorderChars(input string, i int) bool {
|
||||
return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
|
||||
}
|
||||
|
||||
func hasValidPostAndBorderChars(input string, i int) bool {
|
||||
return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
|
||||
}
|
||||
|
||||
func isValidPreChar(r rune) bool {
|
||||
return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
|
||||
}
|
||||
|
||||
func isValidPostChar(r rune) bool {
|
||||
return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
|
||||
}
|
||||
|
||||
func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
|
36
keyword.go
Normal file
36
keyword.go
Normal file
|
@ -0,0 +1,36 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Keyword struct {
|
||||
Key string
|
||||
Value string
|
||||
}
|
||||
|
||||
type Comment struct{ Content string }
|
||||
|
||||
var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):\s(.*)`)
|
||||
var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`)
|
||||
|
||||
func lexKeywordOrComment(line string) (token, bool) {
|
||||
if m := keywordRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"keyword", len(m[1]), m[2], m}, true
|
||||
} else if m := commentRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"comment", len(m[1]), m[2], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func (d *Document) parseKeyword(i int, stop stopFn) (int, Node) {
|
||||
t := d.tokens[i]
|
||||
k, v := t.matches[2], t.matches[3]
|
||||
d.BufferSettings[k] = strings.Join([]string{d.BufferSettings[k], v}, "\n")
|
||||
return 1, Keyword{k, v}
|
||||
}
|
||||
|
||||
func (d *Document) parseComment(i int, stop stopFn) (int, Node) {
|
||||
return 1, Comment{d.tokens[i].content}
|
||||
}
|
82
list.go
Normal file
82
list.go
Normal file
|
@ -0,0 +1,82 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type List struct {
|
||||
Kind string
|
||||
Items []Node
|
||||
}
|
||||
|
||||
type ListItem struct {
|
||||
Bullet string
|
||||
Children []Node
|
||||
}
|
||||
|
||||
var unorderedListRegexp = regexp.MustCompile(`^(\s*)([-]|[+]|[*])\s(.*)`)
|
||||
var orderedListRegexp = regexp.MustCompile(`^(\s*)(([0-9]+|[a-zA-Z])[.)])\s+(.*)`)
|
||||
|
||||
func lexList(line string) (token, bool) {
|
||||
if m := unorderedListRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"unorderedList", len(m[1]), m[3], m}, true
|
||||
} else if m := orderedListRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"orderedList", len(m[1]), m[4], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func isListToken(t token) bool {
|
||||
return t.kind == "unorderedList" || t.kind == "orderedList"
|
||||
}
|
||||
|
||||
func stopIndentBelow(t token, minIndent int) bool {
|
||||
return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
|
||||
}
|
||||
|
||||
func listKind(t token) string {
|
||||
switch bullet := t.matches[2]; {
|
||||
case bullet == "*" || bullet == "+" || bullet == "-":
|
||||
return bullet
|
||||
case unicode.IsLetter(rune(bullet[0])):
|
||||
return "letter"
|
||||
case unicode.IsDigit(rune(bullet[0])):
|
||||
return "number"
|
||||
default:
|
||||
panic(fmt.Sprintf("bad list bullet '%s': %#v", bullet, t))
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) parseList(i int, parentStop stopFn) (int, Node) {
|
||||
start, lvl := i, d.tokens[i].lvl
|
||||
|
||||
list := List{Kind: listKind(d.tokens[i])}
|
||||
for !parentStop(d, i) && d.tokens[i].lvl == lvl && isListToken(d.tokens[i]) {
|
||||
consumed, node := d.parseListItem(i, parentStop)
|
||||
i += consumed
|
||||
list.Items = append(list.Items, node)
|
||||
}
|
||||
return i - start, list
|
||||
}
|
||||
|
||||
func (d *Document) parseListItem(i int, parentStop stopFn) (int, Node) {
|
||||
start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2]
|
||||
minIndent := d.tokens[i].lvl + len(bullet)
|
||||
d.tokens[i] = tokenize(strings.Repeat(" ", minIndent) + d.tokens[i].content)
|
||||
stop := func(d *Document, i int) bool {
|
||||
if parentStop(d, i) {
|
||||
return true
|
||||
}
|
||||
t := d.tokens[i]
|
||||
return t.lvl < minIndent && !(t.kind == "text" && t.content == "")
|
||||
}
|
||||
for !stop(d, i) && !isSecondBlankLine(d, i) {
|
||||
consumed, node := d.parseOne(i, stop)
|
||||
i += consumed
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
return i - start, ListItem{bullet, nodes}
|
||||
}
|
243
org.go
Normal file
243
org.go
Normal file
|
@ -0,0 +1,243 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type stringBuilder = strings.Builder
|
||||
|
||||
type OrgWriter struct {
|
||||
TagsColumn int // see org-tags-column
|
||||
stringBuilder
|
||||
indent string
|
||||
}
|
||||
|
||||
var emphasisOrgBorders = map[string][]string{
|
||||
"_": []string{"_", "_"},
|
||||
"*": []string{"*", "*"},
|
||||
"/": []string{"/", "/"},
|
||||
"+": []string{"+", "+"},
|
||||
"~": []string{"~", "~"},
|
||||
"=": []string{"=", "="},
|
||||
"_{}": []string{"_{", "}"},
|
||||
"^{}": []string{"^{", "}"},
|
||||
}
|
||||
|
||||
func NewOrgWriter() *OrgWriter {
|
||||
return &OrgWriter{
|
||||
TagsColumn: 77,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *OrgWriter) before(d *Document) {}
|
||||
func (w *OrgWriter) after(d *Document) {
|
||||
fs := d.Footnotes
|
||||
if len(fs.Definitions) == 0 {
|
||||
return
|
||||
}
|
||||
w.WriteString("* " + fs.Title + "\n")
|
||||
for _, name := range fs.Order {
|
||||
w.writeNodes(fs.Definitions[name])
|
||||
}
|
||||
}
|
||||
|
||||
func (w *OrgWriter) emptyClone() *OrgWriter {
|
||||
wcopy := *w
|
||||
wcopy.stringBuilder = strings.Builder{}
|
||||
return &wcopy
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeNodes(ns ...Node) {
|
||||
for _, n := range ns {
|
||||
switch n := n.(type) {
|
||||
case Comment:
|
||||
w.writeComment(n)
|
||||
case Keyword:
|
||||
w.writeKeyword(n)
|
||||
case Headline:
|
||||
w.writeHeadline(n)
|
||||
case Block:
|
||||
w.writeBlock(n)
|
||||
|
||||
case FootnoteDefinition:
|
||||
w.writeFootnoteDefinition(n)
|
||||
|
||||
case List:
|
||||
w.writeList(n)
|
||||
case ListItem:
|
||||
w.writeListItem(n)
|
||||
|
||||
case Table:
|
||||
w.writeTable(n)
|
||||
case TableHeader:
|
||||
w.writeTableHeader(n)
|
||||
case TableRow:
|
||||
w.writeTableRow(n)
|
||||
case TableSeparator:
|
||||
w.writeTableSeparator(n)
|
||||
|
||||
case Paragraph:
|
||||
w.writeParagraph(n)
|
||||
case HorizontalRule:
|
||||
w.writeHorizontalRule(n)
|
||||
case Line:
|
||||
w.writeLine(n)
|
||||
|
||||
case Text:
|
||||
w.writeText(n)
|
||||
case Emphasis:
|
||||
w.writeEmphasis(n)
|
||||
case Linebreak:
|
||||
w.writeLinebreak(n)
|
||||
case RegularLink:
|
||||
w.writeRegularLink(n)
|
||||
case FootnoteLink:
|
||||
w.writeFootnoteLink(n)
|
||||
default:
|
||||
if n != nil {
|
||||
panic(fmt.Sprintf("bad node %#v", n))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n")
|
||||
|
||||
func (w *OrgWriter) String() string {
|
||||
s := w.stringBuilder.String()
|
||||
return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeHeadline(h Headline) {
|
||||
tmp := w.emptyClone()
|
||||
tmp.WriteString(strings.Repeat("*", h.Lvl))
|
||||
if h.Status != "" {
|
||||
tmp.WriteString(" " + h.Status)
|
||||
}
|
||||
if h.Priority != "" {
|
||||
tmp.WriteString(" [#" + h.Priority + "]")
|
||||
}
|
||||
tmp.WriteString(" ")
|
||||
tmp.writeNodes(h.Title...)
|
||||
hString := tmp.String()
|
||||
if len(h.Tags) != 0 {
|
||||
hString += " "
|
||||
tString := ":" + strings.Join(h.Tags, ":") + ":"
|
||||
if n := w.TagsColumn - len(tString) - len(hString); n > 0 {
|
||||
w.WriteString(hString + strings.Repeat(" ", n) + tString)
|
||||
} else {
|
||||
w.WriteString(hString + tString)
|
||||
}
|
||||
} else {
|
||||
w.WriteString(hString)
|
||||
}
|
||||
w.WriteString("\n")
|
||||
if len(h.Children) != 0 {
|
||||
w.WriteString(w.indent)
|
||||
}
|
||||
w.writeNodes(h.Children...)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeBlock(b Block) {
|
||||
w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " ")))
|
||||
w.writeNodes(b.Children...)
|
||||
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeFootnoteDefinition(f FootnoteDefinition) {
|
||||
w.WriteString(fmt.Sprintf("[fn:%s] ", f.Name))
|
||||
w.writeNodes(f.Children...)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeParagraph(p Paragraph) {
|
||||
w.writeNodes(p.Children...)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeKeyword(k Keyword) {
|
||||
w.WriteString(w.indent + fmt.Sprintf("#+%s: %s\n", k.Key, k.Value))
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeComment(c Comment) {
|
||||
w.WriteString(w.indent + "#" + c.Content)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeList(l List) { w.writeNodes(l.Items...) }
|
||||
|
||||
func (w *OrgWriter) writeListItem(li ListItem) {
|
||||
w.WriteString(w.indent + li.Bullet + " ")
|
||||
liWriter := w.emptyClone()
|
||||
liWriter.indent = w.indent + strings.Repeat(" ", len(li.Bullet)+1)
|
||||
liWriter.writeNodes(li.Children...)
|
||||
w.WriteString(strings.TrimPrefix(liWriter.String(), liWriter.indent))
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeTable(t Table) {
|
||||
// TODO: pretty print tables
|
||||
w.writeNodes(t.Header)
|
||||
w.writeNodes(t.Rows...)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeTableHeader(th TableHeader) {
|
||||
w.writeTableColumns(th.Columns)
|
||||
w.writeNodes(th.Separator)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeTableRow(tr TableRow) {
|
||||
w.writeTableColumns(tr.Columns)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeTableSeparator(ts TableSeparator) {
|
||||
w.WriteString(w.indent + ts.Content + "\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeTableColumns(columns [][]Node) {
|
||||
w.WriteString(w.indent + "| ")
|
||||
for _, columnNodes := range columns {
|
||||
w.writeNodes(columnNodes...)
|
||||
w.WriteString(" | ")
|
||||
}
|
||||
w.WriteString("\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) {
|
||||
w.WriteString(w.indent + "-----\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeLine(l Line) {
|
||||
w.WriteString(w.indent)
|
||||
w.writeNodes(l.Children...)
|
||||
w.WriteString("\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeText(t Text) { w.WriteString(t.Content) }
|
||||
|
||||
func (w *OrgWriter) writeEmphasis(e Emphasis) {
|
||||
borders, ok := emphasisOrgBorders[e.Kind]
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("bad emphasis %#v", e))
|
||||
}
|
||||
w.WriteString(borders[0])
|
||||
w.writeNodes(e.Content...)
|
||||
w.WriteString(borders[1])
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeLinebreak(l Linebreak) {
|
||||
w.WriteString(`\\`)
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeFootnoteLink(l FootnoteLink) {
|
||||
w.WriteString("[fn:" + l.Name + "]")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeRegularLink(l RegularLink) {
|
||||
descriptionWriter := w.emptyClone()
|
||||
descriptionWriter.writeNodes(l.Description...)
|
||||
description := descriptionWriter.String()
|
||||
if l.URL != description {
|
||||
w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description))
|
||||
} else {
|
||||
w.WriteString(fmt.Sprintf("[[%s]]", l.URL))
|
||||
}
|
||||
}
|
61
org_test.go
Normal file
61
org_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/pmezard/go-difflib/difflib"
|
||||
)
|
||||
|
||||
func TestOrgWriter(t *testing.T) {
|
||||
for _, path := range orgTestFiles() {
|
||||
expected := fileString(path)
|
||||
reader, writer := strings.NewReader(expected), NewOrgWriter()
|
||||
actual := NewDocument().Parse(reader).Write(writer).String()
|
||||
if actual != expected {
|
||||
t.Errorf("%s:\n%s'", path, diff(actual, expected))
|
||||
} else {
|
||||
t.Logf("%s: passed!", path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func orgTestFiles() []string {
|
||||
dir := "./testdata"
|
||||
files, err := ioutil.ReadDir(dir)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Could not read directory: %s", err))
|
||||
}
|
||||
orgFiles := []string{}
|
||||
for _, f := range files {
|
||||
name := f.Name()
|
||||
if filepath.Ext(name) != ".org" {
|
||||
continue
|
||||
}
|
||||
orgFiles = append(orgFiles, filepath.Join(dir, name))
|
||||
}
|
||||
return orgFiles
|
||||
}
|
||||
|
||||
func fileString(path string) string {
|
||||
bs, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Could not read file %s: %s", path, err))
|
||||
}
|
||||
return string(bs)
|
||||
}
|
||||
|
||||
func diff(actual, expected string) string {
|
||||
diff := difflib.UnifiedDiff{
|
||||
A: difflib.SplitLines(actual),
|
||||
B: difflib.SplitLines(expected),
|
||||
FromFile: "Actual",
|
||||
ToFile: "Expected",
|
||||
Context: 3,
|
||||
}
|
||||
text, _ := difflib.GetUnifiedDiffString(diff)
|
||||
return text
|
||||
}
|
57
paragraph.go
Normal file
57
paragraph.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type Line struct{ Children []Node }
|
||||
type Paragraph struct{ Children []Node }
|
||||
type HorizontalRule struct{}
|
||||
|
||||
var horizontalRuleRegexp = regexp.MustCompile(`^(\s*)-{5,}\s*$`)
|
||||
var plainTextRegexp = regexp.MustCompile(`^(\s*)(.*)`)
|
||||
|
||||
func lexText(line string) (token, bool) {
|
||||
if m := plainTextRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"text", len(m[1]), m[2], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func lexHorizontalRule(line string) (token, bool) {
|
||||
if m := horizontalRuleRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"horizontalRule", len(m[1]), "", m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func isSecondBlankLine(d *Document, i int) bool {
|
||||
if i-1 <= 0 {
|
||||
return false
|
||||
}
|
||||
t1, t2 := d.tokens[i-1], d.tokens[i]
|
||||
if t1.kind == "text" && t2.kind == "text" && t1.content == "" && t2.content == "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) {
|
||||
lines, start := []Node{Line{d.parseInline(d.tokens[i].content)}}, i
|
||||
i++
|
||||
stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" }
|
||||
for ; !stop(d, i) && !isSecondBlankLine(d, i); i++ {
|
||||
if isSecondBlankLine(d, i) {
|
||||
lines = lines[:len(lines)-1]
|
||||
i++
|
||||
break
|
||||
}
|
||||
lines = append(lines, Line{d.parseInline(d.tokens[i].content)})
|
||||
}
|
||||
consumed := i - start
|
||||
return consumed, Paragraph{lines}
|
||||
}
|
||||
|
||||
func (d *Document) parseHorizontalRule(i int, parentStop stopFn) (int, Node) {
|
||||
return 1, HorizontalRule{}
|
||||
}
|
63
table.go
Normal file
63
table.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
package org
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Table struct {
|
||||
Header Node
|
||||
Rows []Node
|
||||
}
|
||||
|
||||
type TableSeparator struct{ Content string }
|
||||
|
||||
type TableHeader struct {
|
||||
Columns [][]Node
|
||||
Separator TableSeparator
|
||||
}
|
||||
|
||||
type TableRow struct{ Columns [][]Node }
|
||||
|
||||
var tableSeparatorRegexp = regexp.MustCompile(`^(\s*)(\|[+-|]*)\s*$`)
|
||||
var tableRowRegexp = regexp.MustCompile(`^(\s*)(\|.*)`)
|
||||
|
||||
func lexTable(line string) (token, bool) {
|
||||
if m := tableSeparatorRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"tableSeparator", len(m[1]), m[2], m}, true
|
||||
} else if m := tableRowRegexp.FindStringSubmatch(line); m != nil {
|
||||
return token{"tableRow", len(m[1]), m[2], m}, true
|
||||
}
|
||||
return nilToken, false
|
||||
}
|
||||
|
||||
func (d *Document) parseTable(i int, parentStop stopFn) (int, Node) {
|
||||
rows, start := []Node{}, i
|
||||
for !parentStop(d, i) && (d.tokens[i].kind == "tableRow" || d.tokens[i].kind == "tableSeparator") {
|
||||
consumed, row := d.parseTableRowOrSeparator(i, parentStop)
|
||||
i += consumed
|
||||
rows = append(rows, row)
|
||||
}
|
||||
|
||||
consumed := i - start
|
||||
if len(rows) >= 2 {
|
||||
if row, ok := rows[0].(TableRow); ok {
|
||||
if separator, ok := rows[1].(TableSeparator); ok {
|
||||
return consumed, Table{TableHeader{row.Columns, separator}, rows[2:]}
|
||||
}
|
||||
}
|
||||
}
|
||||
return consumed, Table{nil, rows}
|
||||
}
|
||||
|
||||
func (d *Document) parseTableRowOrSeparator(i int, _ stopFn) (int, Node) {
|
||||
if d.tokens[i].kind == "tableSeparator" {
|
||||
return 1, TableSeparator{d.tokens[i].content}
|
||||
}
|
||||
fields := strings.FieldsFunc(d.tokens[i].content, func(r rune) bool { return r == '|' })
|
||||
row := TableRow{}
|
||||
for _, field := range fields {
|
||||
row.Columns = append(row.Columns, d.parseInline(strings.TrimSpace(field)))
|
||||
}
|
||||
return 1, row
|
||||
}
|
59
testdata/example.org
vendored
Normal file
59
testdata/example.org
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
#+TITLE: Example org mode file
|
||||
#+AUTHOR: Niklas Fasching
|
||||
#+DESCRIPTION: just some random elements with little explanation
|
||||
|
||||
* Motivation
|
||||
|
||||
To validate the parser we'll try printing the AST back to org-mode source - if that
|
||||
works we can be kind of sure that the parsing worked.
|
||||
At least I hope so - I would like to get around writing tests for the individual parsing
|
||||
functions...
|
||||
|
||||
** Headlines with TODO status, priority & tags
|
||||
*** TODO [#B] Headline with todo status & priority
|
||||
*** DONE Headline with TODO status
|
||||
*** [#A] Headline with tags & priority :foo:bar:
|
||||
this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
|
||||
** Lists
|
||||
- unordered list item 1
|
||||
- unordered list item 2 - with ~inline~ /markup/
|
||||
1. ordered sublist item 1
|
||||
a) ordered sublist item 1
|
||||
b) ordered sublist item 2
|
||||
c) ordered sublist item 3
|
||||
2. ordered sublist item 2
|
||||
- unordered list item 3 - and a [[https://example.com][link]]
|
||||
and some lines of text
|
||||
1. and another subitem
|
||||
#+BEGIN_SRC sh
|
||||
echo with a block
|
||||
#+END_SRC
|
||||
2. and another one with a table
|
||||
| a | b | c |
|
||||
|---+---+---|
|
||||
| 1 | 2 | 3 |
|
||||
|
||||
and text with an empty line in between as well!
|
||||
- unordered list item 4
|
||||
|
||||
** Inline
|
||||
- /emphasis/ and a hard line break \\
|
||||
- /.emphasis with dot border chars./
|
||||
- /emphasis with a slash/inside/
|
||||
- /emphasis/ followed by raw text with slash /
|
||||
- ->/not an emphasis/<-
|
||||
- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/
|
||||
- _underlined_ *bold* =verbatim= ~code~ +strikethrough+
|
||||
- *bold string with an *asterisk inside*
|
||||
- links
|
||||
1. regular link [[https://example.com]] link without description
|
||||
2. regular link [[https://example.com][example.com]] link with description
|
||||
3. regular link to a file (image) [[file:my-img.png]]
|
||||
** Footnotes
|
||||
- normal footnote reference [fn:1]
|
||||
- further references to the same footnote should not [fn:1] render duplicates in the footnote list
|
||||
- also inline footnotes are supported via =fn:2:inline definition=. But we won't test that because it would
|
||||
cause the output to look different from the input
|
||||
|
||||
* Footnotes
|
||||
[fn:1] Foobar
|
Loading…
Add table
Add a link
Reference in a new issue