From 0b2972e32a67f30ef8585eb0062c58d445ec69d5 Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Sun, 2 Dec 2018 14:06:08 +0100 Subject: [PATCH] Add basic parser and org -> AST -> org rendering --- README.org | 3 + block.go | 47 +++++++++ cmd/org/org.go | 31 ++++++ document.go | 150 ++++++++++++++++++++++++++ footnote.go | 38 +++++++ headline.go | 69 ++++++++++++ inline.go | 184 ++++++++++++++++++++++++++++++++ keyword.go | 36 +++++++ list.go | 82 +++++++++++++++ org.go | 243 +++++++++++++++++++++++++++++++++++++++++++ org_test.go | 61 +++++++++++ paragraph.go | 57 ++++++++++ table.go | 63 +++++++++++ testdata/example.org | 59 +++++++++++ 14 files changed, 1123 insertions(+) create mode 100644 block.go create mode 100644 cmd/org/org.go create mode 100644 document.go create mode 100644 footnote.go create mode 100644 headline.go create mode 100644 inline.go create mode 100644 keyword.go create mode 100644 list.go create mode 100644 org.go create mode 100644 org_test.go create mode 100644 paragraph.go create mode 100644 table.go create mode 100644 testdata/example.org diff --git a/README.org b/README.org index 85021cb..24d4d64 100644 --- a/README.org +++ b/README.org @@ -7,3 +7,6 @@ A basic org-mode parser in go - https://orgmode.org/worg/dev/org-syntax.html - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el +- test cases + - [[https://github.com/bdewey/org-ruby/blob/master/spec/html_examples][org-ruby]] + - pandoc, goorgeous diff --git a/block.go b/block.go new file mode 100644 index 0000000..07cc060 --- /dev/null +++ b/block.go @@ -0,0 +1,47 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Block struct { + Name string + Parameters []string + Children []Node +} + +var beginBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+BEGIN_(\w+)(.*)`) +var endBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+END_(\w+)`) + +func lexBlock(line string) (token, bool) { + if m := beginBlockRegexp.FindStringSubmatch(line); m != nil { + return token{"beginBlock", len(m[1]), strings.ToUpper(m[2]), m}, true + } else if m := endBlockRegexp.FindStringSubmatch(line); m != nil { + return token{"endBlock", len(m[1]), strings.ToUpper(m[2]), m}, true + } + return nilToken, false +} + +func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { + t, start, nodes := d.tokens[i], i, []Node{} + name, parameters := t.content, strings.Fields(t.matches[3]) + trim := trimIndentUpTo(d.tokens[i].lvl) + for i++; !(d.tokens[i].kind == "endBlock" && d.tokens[i].content == name); i++ { + if parentStop(d, i) { + return 0, nil + } + nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}}) + } + return i + 1 - start, Block{name, parameters, nodes} +} + +func trimIndentUpTo(max int) func(string) string { + return func(line string) string { + i := 0 + for ; i < len(line) && i < max && unicode.IsSpace(rune(line[i])); i++ { + } + return line[i:] + } +} diff --git a/cmd/org/org.go b/cmd/org/org.go new file mode 100644 index 0000000..c5c11ba --- /dev/null +++ b/cmd/org/org.go @@ -0,0 +1,31 @@ +package main + +import ( + "bytes" + "io/ioutil" + "log" + "os" + "strings" + + "github.com/niklasfasching/org" +) + +func main() { + log.SetFlags(0) + if len(os.Args) < 3 { + log.Println("USAGE: org FILE OUTPUT_FORMAT") + log.Fatal("supported output formats: org") + } + bs, err := ioutil.ReadFile(os.Args[1]) + if err != nil { + log.Fatal(err) + } + r, out := bytes.NewReader(bs), "" + switch strings.ToLower(os.Args[2]) { + case "org": + out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String() + default: + log.Fatal("Unsupported output format") + } + log.Println(out) +} diff --git a/document.go b/document.go new file mode 100644 index 0000000..f7d27d7 --- /dev/null +++ b/document.go @@ -0,0 +1,150 @@ +package org + +import ( + "bufio" + "fmt" + "io" + "log" +) + +type Document struct { + tokens []token + Nodes []Node + Footnotes Footnotes + StatusKeywords []string + MaxEmphasisNewLines int + BufferSettings map[string]string + DefaultSettings map[string]string +} + +type Writer interface { + before(*Document) + after(*Document) + writeNodes(...Node) + String() string +} + +type Node interface{} + +type lexFn = func(line string) (t token, ok bool) +type parseFn = func(*Document, int, stopFn) (int, Node) +type stopFn = func(*Document, int) bool + +type token struct { + kind string + lvl int + content string + matches []string +} + +var lexFns = []lexFn{ + lexHeadline, + lexBlock, + lexList, + lexTable, + lexHorizontalRule, + lexKeywordOrComment, + lexFootnoteDefinition, + lexText, +} + +var nilToken = token{"nil", -1, "", nil} + +func NewDocument() *Document { + return &Document{ + Footnotes: Footnotes{ + ExcludeHeading: true, + Title: "Footnotes", + Definitions: map[string]FootnoteDefinition{}, + }, + MaxEmphasisNewLines: 1, + BufferSettings: map[string]string{}, + DefaultSettings: map[string]string{ + "TODO": "TODO | DONE", + }, + } +} + +func (d *Document) Write(w Writer) Writer { + if d.Nodes == nil { + panic("cannot Write() empty document: you must call Parse() first") + } + w.before(d) + w.writeNodes(d.Nodes...) + w.after(d) + return w +} + +func (d *Document) Parse(input io.Reader) *Document { + d.tokens = []token{} + scanner := bufio.NewScanner(input) + for scanner.Scan() { + d.tokens = append(d.tokens, tokenize(scanner.Text())) + } + if err := scanner.Err(); err != nil { + panic(err) + } + _, nodes := d.parseMany(0, func(d *Document, i int) bool { return !(i < len(d.tokens)) }) + d.Nodes = nodes + return d +} + +func (d *Document) Get(key string) string { + if v, ok := d.BufferSettings[key]; ok { + return v + } + if v, ok := d.DefaultSettings[key]; ok { + return v + } + return "" +} + +func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) { + switch d.tokens[i].kind { + case "unorderedList", "orderedList": + consumed, node = d.parseList(i, stop) + case "tableRow", "tableSeparator": + consumed, node = d.parseTable(i, stop) + case "beginBlock": + consumed, node = d.parseBlock(i, stop) + case "text": + consumed, node = d.parseParagraph(i, stop) + case "horizontalRule": + consumed, node = d.parseHorizontalRule(i, stop) + case "comment": + consumed, node = d.parseComment(i, stop) + case "keyword": + consumed, node = d.parseKeyword(i, stop) + case "headline": + consumed, node = d.parseHeadline(i, stop) + case "footnoteDefinition": + consumed, node = d.parseFootnoteDefinition(i, stop) + } + + if consumed != 0 { + return consumed, node + } + log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i]) + m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0]) + d.tokens[i] = token{"text", len(m[1]), m[2], m} + return d.parseOne(i, stop) +} + +func (d *Document) parseMany(i int, stop stopFn) (int, []Node) { + start, nodes := i, []Node{} + for i < len(d.tokens) { + consumed, node := d.parseOne(i, stop) + i += consumed + nodes = append(nodes, node) + } + return i - start, nodes +} + +func tokenize(line string) token { + for _, lexFn := range lexFns { + if token, ok := lexFn(line); ok { + return token + } + } + panic(fmt.Sprintf("could not lex line: %s", line)) +} diff --git a/footnote.go b/footnote.go new file mode 100644 index 0000000..91cbff7 --- /dev/null +++ b/footnote.go @@ -0,0 +1,38 @@ +package org + +import ( + "regexp" +) + +type Footnotes struct { + ExcludeHeading bool + Title string + Definitions map[string]FootnoteDefinition + Order []string +} + +type FootnoteDefinition struct { + Name string + Children []Node +} + +var footnoteDefinitionRegexp = regexp.MustCompile(`^\[fn:([\w-]+)\]\s+(.+)`) + +func lexFootnoteDefinition(line string) (token, bool) { + if m := footnoteDefinitionRegexp.FindStringSubmatch(line); m != nil { + return token{"footnoteDefinition", 0, m[1], m}, true + } + return nilToken, false +} + +func (d *Document) parseFootnoteDefinition(i int, parentStop stopFn) (int, Node) { + name := d.tokens[i].content + d.tokens[i] = tokenize(d.tokens[i].matches[2]) + stop := func(d *Document, i int) bool { + return parentStop(d, i) || isSecondBlankLine(d, i) || + d.tokens[i].kind == "headline" || d.tokens[i].kind == "footnoteDefinition" + } + consumed, nodes := d.parseMany(i, stop) + d.Footnotes.Definitions[name] = FootnoteDefinition{name, nodes} + return consumed, nil +} diff --git a/headline.go b/headline.go new file mode 100644 index 0000000..1527f3c --- /dev/null +++ b/headline.go @@ -0,0 +1,69 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Headline struct { + Lvl int + Status string + Priority string + Title []Node + Tags []string + Children []Node +} + +var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`) +var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`) + +func lexHeadline(line string) (token, bool) { + if m := headlineRegexp.FindStringSubmatch(line); m != nil { + return token{"headline", 0, m[2], m}, true + } + return nilToken, false +} + +func (d *Document) todoKeywords() []string { + return strings.FieldsFunc(d.Get("TODO"), func(r rune) bool { + return unicode.IsSpace(r) || r == '|' + }) +} + +func (d *Document) parseHeadline(i int, parentStop stopFn) (int, Node) { + t, headline := d.tokens[i], Headline{} + headline.Lvl = len(t.matches[1]) + text := t.content + + for _, k := range d.todoKeywords() { + if strings.HasPrefix(text, k) && len(text) > len(k) && unicode.IsSpace(rune(text[len(k)])) { + headline.Status = k + text = text[len(k)+1:] + break + } + } + + if len(text) >= 3 && text[0:2] == "[#" && strings.Contains("ABC", text[2:3]) && text[3] == ']' { + headline.Priority = text[2:3] + text = strings.TrimSpace(text[4:]) + } + + if m := tagRegexp.FindStringSubmatch(text); m != nil { + text = m[1] + headline.Tags = strings.FieldsFunc(m[2], func(r rune) bool { return r == ':' }) + } + + headline.Title = d.parseInline(text) + + stop := func(d *Document, i int) bool { + return parentStop(d, i) || d.tokens[i].kind == "headline" && d.tokens[i].lvl <= headline.Lvl + } + consumed, nodes := d.parseMany(i+1, stop) + headline.Children = nodes + + if headline.Lvl == 1 && text == d.Footnotes.Title && d.Footnotes.ExcludeHeading { + return consumed + 1, nil + } + return consumed + 1, headline +} diff --git a/inline.go b/inline.go new file mode 100644 index 0000000..004969d --- /dev/null +++ b/inline.go @@ -0,0 +1,184 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Text struct{ Content string } + +type Linebreak struct{} + +type Emphasis struct { + Kind string + Content []Node +} + +type FootnoteLink struct{ Name string } + +type RegularLink struct { + Protocol string + Description []Node + URL string +} + +var redundantSpaces = regexp.MustCompile("[ \t]+") +var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`) +var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`) + +func (d *Document) parseInline(input string) (nodes []Node) { + previous, current := 0, 0 + for current < len(input) { + consumed, node := 0, (Node)(nil) + switch input[current] { + case '^': + consumed, node = d.parseSubOrSuperScript(input, current) + case '_': + consumed, node = d.parseSubScriptOrEmphasis(input, current) + case '*', '/', '=', '~', '+': + consumed, node = d.parseEmphasis(input, current) + case '[': + consumed, node = d.parseRegularLinkOrFootnoteReference(input, current) + case '\\': + consumed, node = d.parseExplicitLineBreak(input, current) + } + if consumed != 0 { + if current > previous { + nodes = append(nodes, Text{input[previous:current]}) + } + if node != nil { + nodes = append(nodes, node) + } + current += consumed + previous = current + } else { + current++ + } + } + + if previous < len(input) { + nodes = append(nodes, Text{input[previous:]}) + } + return nodes +} + +func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) { + if start == 0 || input[start-1] == '\n' || start+1 >= len(input) || input[start+1] != '\\' { + return 0, nil + } + for i := start + 1; ; i++ { + if i == len(input)-1 || input[i] == '\n' { + return i + 1 - start, Linebreak{} + } + if !unicode.IsSpace(rune(input[i])) { + break + } + } + return 0, nil +} + +func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) { + if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil { + return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}} + } + return 0, nil +} + +func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) { + if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 { + return consumed, node + } + return d.parseEmphasis(input, start) +} + +func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) { + if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' { + return d.parseRegularLink(input, start) + } else if len(input[start:]) >= 1 && input[start] == '[' { + return d.parseFootnoteReference(input, start) + } + return 0, nil +} + +func (d *Document) parseFootnoteReference(input string, start int) (int, Node) { + if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil { + name, definition := m[1], m[3] + seen := false + for _, otherName := range d.Footnotes.Order { + if name == otherName { + seen = true + } + } + if !seen { + d.Footnotes.Order = append(d.Footnotes.Order, name) + } + if definition != "" { + d.Footnotes.Definitions[name] = FootnoteDefinition{name, d.parseInline(definition)} + } + return len(m[0]), FootnoteLink{name} + } + return 0, nil +} + +func (d *Document) parseRegularLink(input string, start int) (int, Node) { + if len(input[start:]) == 0 || input[start+1] != '[' { + return 0, nil + } + input = input[start:] + end := strings.Index(input, "]]") + if end == -1 { + return 0, nil + } + + rawLink := input[2:end] + link, description, parts := "", []Node{}, strings.Split(rawLink, "][") + if len(parts) == 2 { + link, description = parts[0], d.parseInline(parts[1]) + } else { + link, description = rawLink, []Node{Text{rawLink}} + } + consumed := end + 2 + protocol, parts := "", strings.SplitN(link, ":", 2) + if len(parts) == 2 { + protocol = parts[0] + } + return consumed, RegularLink{protocol, description, link} +} + +func (d *Document) parseEmphasis(input string, start int) (int, Node) { + marker, i := input[start], start + if !hasValidPreAndBorderChars(input, i) { + return 0, nil + } + for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ { + if input[i] == '\n' { + consumedNewLines++ + } + + if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) { + return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])} + } + } + return 0, nil +} + +// see org-emphasis-regexp-components (emacs elisp variable) + +func hasValidPreAndBorderChars(input string, i int) bool { + return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1]))) +} + +func hasValidPostAndBorderChars(input string, i int) bool { + return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1]))) +} + +func isValidPreChar(r rune) bool { + return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r) +} + +func isValidPostChar(r rune) bool { + return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r) +} + +func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) } diff --git a/keyword.go b/keyword.go new file mode 100644 index 0000000..3eb8c65 --- /dev/null +++ b/keyword.go @@ -0,0 +1,36 @@ +package org + +import ( + "regexp" + "strings" +) + +type Keyword struct { + Key string + Value string +} + +type Comment struct{ Content string } + +var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):\s(.*)`) +var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`) + +func lexKeywordOrComment(line string) (token, bool) { + if m := keywordRegexp.FindStringSubmatch(line); m != nil { + return token{"keyword", len(m[1]), m[2], m}, true + } else if m := commentRegexp.FindStringSubmatch(line); m != nil { + return token{"comment", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func (d *Document) parseKeyword(i int, stop stopFn) (int, Node) { + t := d.tokens[i] + k, v := t.matches[2], t.matches[3] + d.BufferSettings[k] = strings.Join([]string{d.BufferSettings[k], v}, "\n") + return 1, Keyword{k, v} +} + +func (d *Document) parseComment(i int, stop stopFn) (int, Node) { + return 1, Comment{d.tokens[i].content} +} diff --git a/list.go b/list.go new file mode 100644 index 0000000..f8362f2 --- /dev/null +++ b/list.go @@ -0,0 +1,82 @@ +package org + +import ( + "fmt" + "regexp" + "strings" + "unicode" +) + +type List struct { + Kind string + Items []Node +} + +type ListItem struct { + Bullet string + Children []Node +} + +var unorderedListRegexp = regexp.MustCompile(`^(\s*)([-]|[+]|[*])\s(.*)`) +var orderedListRegexp = regexp.MustCompile(`^(\s*)(([0-9]+|[a-zA-Z])[.)])\s+(.*)`) + +func lexList(line string) (token, bool) { + if m := unorderedListRegexp.FindStringSubmatch(line); m != nil { + return token{"unorderedList", len(m[1]), m[3], m}, true + } else if m := orderedListRegexp.FindStringSubmatch(line); m != nil { + return token{"orderedList", len(m[1]), m[4], m}, true + } + return nilToken, false +} + +func isListToken(t token) bool { + return t.kind == "unorderedList" || t.kind == "orderedList" +} + +func stopIndentBelow(t token, minIndent int) bool { + return t.lvl < minIndent && !(t.kind == "text" && t.content == "") +} + +func listKind(t token) string { + switch bullet := t.matches[2]; { + case bullet == "*" || bullet == "+" || bullet == "-": + return bullet + case unicode.IsLetter(rune(bullet[0])): + return "letter" + case unicode.IsDigit(rune(bullet[0])): + return "number" + default: + panic(fmt.Sprintf("bad list bullet '%s': %#v", bullet, t)) + } +} + +func (d *Document) parseList(i int, parentStop stopFn) (int, Node) { + start, lvl := i, d.tokens[i].lvl + + list := List{Kind: listKind(d.tokens[i])} + for !parentStop(d, i) && d.tokens[i].lvl == lvl && isListToken(d.tokens[i]) { + consumed, node := d.parseListItem(i, parentStop) + i += consumed + list.Items = append(list.Items, node) + } + return i - start, list +} + +func (d *Document) parseListItem(i int, parentStop stopFn) (int, Node) { + start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2] + minIndent := d.tokens[i].lvl + len(bullet) + d.tokens[i] = tokenize(strings.Repeat(" ", minIndent) + d.tokens[i].content) + stop := func(d *Document, i int) bool { + if parentStop(d, i) { + return true + } + t := d.tokens[i] + return t.lvl < minIndent && !(t.kind == "text" && t.content == "") + } + for !stop(d, i) && !isSecondBlankLine(d, i) { + consumed, node := d.parseOne(i, stop) + i += consumed + nodes = append(nodes, node) + } + return i - start, ListItem{bullet, nodes} +} diff --git a/org.go b/org.go new file mode 100644 index 0000000..7e0f53f --- /dev/null +++ b/org.go @@ -0,0 +1,243 @@ +package org + +import ( + "fmt" + "regexp" + "strings" +) + +type stringBuilder = strings.Builder + +type OrgWriter struct { + TagsColumn int // see org-tags-column + stringBuilder + indent string +} + +var emphasisOrgBorders = map[string][]string{ + "_": []string{"_", "_"}, + "*": []string{"*", "*"}, + "/": []string{"/", "/"}, + "+": []string{"+", "+"}, + "~": []string{"~", "~"}, + "=": []string{"=", "="}, + "_{}": []string{"_{", "}"}, + "^{}": []string{"^{", "}"}, +} + +func NewOrgWriter() *OrgWriter { + return &OrgWriter{ + TagsColumn: 77, + } +} + +func (w *OrgWriter) before(d *Document) {} +func (w *OrgWriter) after(d *Document) { + fs := d.Footnotes + if len(fs.Definitions) == 0 { + return + } + w.WriteString("* " + fs.Title + "\n") + for _, name := range fs.Order { + w.writeNodes(fs.Definitions[name]) + } +} + +func (w *OrgWriter) emptyClone() *OrgWriter { + wcopy := *w + wcopy.stringBuilder = strings.Builder{} + return &wcopy +} + +func (w *OrgWriter) writeNodes(ns ...Node) { + for _, n := range ns { + switch n := n.(type) { + case Comment: + w.writeComment(n) + case Keyword: + w.writeKeyword(n) + case Headline: + w.writeHeadline(n) + case Block: + w.writeBlock(n) + + case FootnoteDefinition: + w.writeFootnoteDefinition(n) + + case List: + w.writeList(n) + case ListItem: + w.writeListItem(n) + + case Table: + w.writeTable(n) + case TableHeader: + w.writeTableHeader(n) + case TableRow: + w.writeTableRow(n) + case TableSeparator: + w.writeTableSeparator(n) + + case Paragraph: + w.writeParagraph(n) + case HorizontalRule: + w.writeHorizontalRule(n) + case Line: + w.writeLine(n) + + case Text: + w.writeText(n) + case Emphasis: + w.writeEmphasis(n) + case Linebreak: + w.writeLinebreak(n) + case RegularLink: + w.writeRegularLink(n) + case FootnoteLink: + w.writeFootnoteLink(n) + default: + if n != nil { + panic(fmt.Sprintf("bad node %#v", n)) + } + } + } +} + +var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n") + +func (w *OrgWriter) String() string { + s := w.stringBuilder.String() + return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n") +} + +func (w *OrgWriter) writeHeadline(h Headline) { + tmp := w.emptyClone() + tmp.WriteString(strings.Repeat("*", h.Lvl)) + if h.Status != "" { + tmp.WriteString(" " + h.Status) + } + if h.Priority != "" { + tmp.WriteString(" [#" + h.Priority + "]") + } + tmp.WriteString(" ") + tmp.writeNodes(h.Title...) + hString := tmp.String() + if len(h.Tags) != 0 { + hString += " " + tString := ":" + strings.Join(h.Tags, ":") + ":" + if n := w.TagsColumn - len(tString) - len(hString); n > 0 { + w.WriteString(hString + strings.Repeat(" ", n) + tString) + } else { + w.WriteString(hString + tString) + } + } else { + w.WriteString(hString) + } + w.WriteString("\n") + if len(h.Children) != 0 { + w.WriteString(w.indent) + } + w.writeNodes(h.Children...) +} + +func (w *OrgWriter) writeBlock(b Block) { + w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " "))) + w.writeNodes(b.Children...) + w.WriteString(w.indent + "#+END_" + b.Name + "\n") +} + +func (w *OrgWriter) writeFootnoteDefinition(f FootnoteDefinition) { + w.WriteString(fmt.Sprintf("[fn:%s] ", f.Name)) + w.writeNodes(f.Children...) +} + +func (w *OrgWriter) writeParagraph(p Paragraph) { + w.writeNodes(p.Children...) +} + +func (w *OrgWriter) writeKeyword(k Keyword) { + w.WriteString(w.indent + fmt.Sprintf("#+%s: %s\n", k.Key, k.Value)) +} + +func (w *OrgWriter) writeComment(c Comment) { + w.WriteString(w.indent + "#" + c.Content) +} + +func (w *OrgWriter) writeList(l List) { w.writeNodes(l.Items...) } + +func (w *OrgWriter) writeListItem(li ListItem) { + w.WriteString(w.indent + li.Bullet + " ") + liWriter := w.emptyClone() + liWriter.indent = w.indent + strings.Repeat(" ", len(li.Bullet)+1) + liWriter.writeNodes(li.Children...) + w.WriteString(strings.TrimPrefix(liWriter.String(), liWriter.indent)) +} + +func (w *OrgWriter) writeTable(t Table) { + // TODO: pretty print tables + w.writeNodes(t.Header) + w.writeNodes(t.Rows...) +} + +func (w *OrgWriter) writeTableHeader(th TableHeader) { + w.writeTableColumns(th.Columns) + w.writeNodes(th.Separator) +} + +func (w *OrgWriter) writeTableRow(tr TableRow) { + w.writeTableColumns(tr.Columns) +} + +func (w *OrgWriter) writeTableSeparator(ts TableSeparator) { + w.WriteString(w.indent + ts.Content + "\n") +} + +func (w *OrgWriter) writeTableColumns(columns [][]Node) { + w.WriteString(w.indent + "| ") + for _, columnNodes := range columns { + w.writeNodes(columnNodes...) + w.WriteString(" | ") + } + w.WriteString("\n") +} + +func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) { + w.WriteString(w.indent + "-----\n") +} + +func (w *OrgWriter) writeLine(l Line) { + w.WriteString(w.indent) + w.writeNodes(l.Children...) + w.WriteString("\n") +} + +func (w *OrgWriter) writeText(t Text) { w.WriteString(t.Content) } + +func (w *OrgWriter) writeEmphasis(e Emphasis) { + borders, ok := emphasisOrgBorders[e.Kind] + if !ok { + panic(fmt.Sprintf("bad emphasis %#v", e)) + } + w.WriteString(borders[0]) + w.writeNodes(e.Content...) + w.WriteString(borders[1]) +} + +func (w *OrgWriter) writeLinebreak(l Linebreak) { + w.WriteString(`\\`) +} + +func (w *OrgWriter) writeFootnoteLink(l FootnoteLink) { + w.WriteString("[fn:" + l.Name + "]") +} + +func (w *OrgWriter) writeRegularLink(l RegularLink) { + descriptionWriter := w.emptyClone() + descriptionWriter.writeNodes(l.Description...) + description := descriptionWriter.String() + if l.URL != description { + w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description)) + } else { + w.WriteString(fmt.Sprintf("[[%s]]", l.URL)) + } +} diff --git a/org_test.go b/org_test.go new file mode 100644 index 0000000..157687d --- /dev/null +++ b/org_test.go @@ -0,0 +1,61 @@ +package org + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strings" + "testing" + + "github.com/pmezard/go-difflib/difflib" +) + +func TestOrgWriter(t *testing.T) { + for _, path := range orgTestFiles() { + expected := fileString(path) + reader, writer := strings.NewReader(expected), NewOrgWriter() + actual := NewDocument().Parse(reader).Write(writer).String() + if actual != expected { + t.Errorf("%s:\n%s'", path, diff(actual, expected)) + } else { + t.Logf("%s: passed!", path) + } + } +} + +func orgTestFiles() []string { + dir := "./testdata" + files, err := ioutil.ReadDir(dir) + if err != nil { + panic(fmt.Sprintf("Could not read directory: %s", err)) + } + orgFiles := []string{} + for _, f := range files { + name := f.Name() + if filepath.Ext(name) != ".org" { + continue + } + orgFiles = append(orgFiles, filepath.Join(dir, name)) + } + return orgFiles +} + +func fileString(path string) string { + bs, err := ioutil.ReadFile(path) + if err != nil { + panic(fmt.Sprintf("Could not read file %s: %s", path, err)) + } + return string(bs) +} + +func diff(actual, expected string) string { + diff := difflib.UnifiedDiff{ + A: difflib.SplitLines(actual), + B: difflib.SplitLines(expected), + FromFile: "Actual", + ToFile: "Expected", + Context: 3, + } + text, _ := difflib.GetUnifiedDiffString(diff) + return text +} diff --git a/paragraph.go b/paragraph.go new file mode 100644 index 0000000..9d8c289 --- /dev/null +++ b/paragraph.go @@ -0,0 +1,57 @@ +package org + +import ( + "regexp" +) + +type Line struct{ Children []Node } +type Paragraph struct{ Children []Node } +type HorizontalRule struct{} + +var horizontalRuleRegexp = regexp.MustCompile(`^(\s*)-{5,}\s*$`) +var plainTextRegexp = regexp.MustCompile(`^(\s*)(.*)`) + +func lexText(line string) (token, bool) { + if m := plainTextRegexp.FindStringSubmatch(line); m != nil { + return token{"text", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func lexHorizontalRule(line string) (token, bool) { + if m := horizontalRuleRegexp.FindStringSubmatch(line); m != nil { + return token{"horizontalRule", len(m[1]), "", m}, true + } + return nilToken, false +} + +func isSecondBlankLine(d *Document, i int) bool { + if i-1 <= 0 { + return false + } + t1, t2 := d.tokens[i-1], d.tokens[i] + if t1.kind == "text" && t2.kind == "text" && t1.content == "" && t2.content == "" { + return true + } + return false +} + +func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) { + lines, start := []Node{Line{d.parseInline(d.tokens[i].content)}}, i + i++ + stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" } + for ; !stop(d, i) && !isSecondBlankLine(d, i); i++ { + if isSecondBlankLine(d, i) { + lines = lines[:len(lines)-1] + i++ + break + } + lines = append(lines, Line{d.parseInline(d.tokens[i].content)}) + } + consumed := i - start + return consumed, Paragraph{lines} +} + +func (d *Document) parseHorizontalRule(i int, parentStop stopFn) (int, Node) { + return 1, HorizontalRule{} +} diff --git a/table.go b/table.go new file mode 100644 index 0000000..3a84939 --- /dev/null +++ b/table.go @@ -0,0 +1,63 @@ +package org + +import ( + "regexp" + "strings" +) + +type Table struct { + Header Node + Rows []Node +} + +type TableSeparator struct{ Content string } + +type TableHeader struct { + Columns [][]Node + Separator TableSeparator +} + +type TableRow struct{ Columns [][]Node } + +var tableSeparatorRegexp = regexp.MustCompile(`^(\s*)(\|[+-|]*)\s*$`) +var tableRowRegexp = regexp.MustCompile(`^(\s*)(\|.*)`) + +func lexTable(line string) (token, bool) { + if m := tableSeparatorRegexp.FindStringSubmatch(line); m != nil { + return token{"tableSeparator", len(m[1]), m[2], m}, true + } else if m := tableRowRegexp.FindStringSubmatch(line); m != nil { + return token{"tableRow", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func (d *Document) parseTable(i int, parentStop stopFn) (int, Node) { + rows, start := []Node{}, i + for !parentStop(d, i) && (d.tokens[i].kind == "tableRow" || d.tokens[i].kind == "tableSeparator") { + consumed, row := d.parseTableRowOrSeparator(i, parentStop) + i += consumed + rows = append(rows, row) + } + + consumed := i - start + if len(rows) >= 2 { + if row, ok := rows[0].(TableRow); ok { + if separator, ok := rows[1].(TableSeparator); ok { + return consumed, Table{TableHeader{row.Columns, separator}, rows[2:]} + } + } + } + return consumed, Table{nil, rows} +} + +func (d *Document) parseTableRowOrSeparator(i int, _ stopFn) (int, Node) { + if d.tokens[i].kind == "tableSeparator" { + return 1, TableSeparator{d.tokens[i].content} + } + fields := strings.FieldsFunc(d.tokens[i].content, func(r rune) bool { return r == '|' }) + row := TableRow{} + for _, field := range fields { + row.Columns = append(row.Columns, d.parseInline(strings.TrimSpace(field))) + } + return 1, row +} diff --git a/testdata/example.org b/testdata/example.org new file mode 100644 index 0000000..90cad73 --- /dev/null +++ b/testdata/example.org @@ -0,0 +1,59 @@ +#+TITLE: Example org mode file +#+AUTHOR: Niklas Fasching +#+DESCRIPTION: just some random elements with little explanation + +* Motivation + +To validate the parser we'll try printing the AST back to org-mode source - if that +works we can be kind of sure that the parsing worked. +At least I hope so - I would like to get around writing tests for the individual parsing +functions... + +** Headlines with TODO status, priority & tags +*** TODO [#B] Headline with todo status & priority +*** DONE Headline with TODO status +*** [#A] Headline with tags & priority :foo:bar: +this one is cheating a little as tags are ALWAYS printed right aligned to a given column number... +** Lists +- unordered list item 1 +- unordered list item 2 - with ~inline~ /markup/ + 1. ordered sublist item 1 + a) ordered sublist item 1 + b) ordered sublist item 2 + c) ordered sublist item 3 + 2. ordered sublist item 2 +- unordered list item 3 - and a [[https://example.com][link]] + and some lines of text + 1. and another subitem + #+BEGIN_SRC sh + echo with a block + #+END_SRC + 2. and another one with a table + | a | b | c | + |---+---+---| + | 1 | 2 | 3 | + + and text with an empty line in between as well! +- unordered list item 4 + +** Inline +- /emphasis/ and a hard line break \\ +- /.emphasis with dot border chars./ +- /emphasis with a slash/inside/ +- /emphasis/ followed by raw text with slash / +- ->/not an emphasis/<- +- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/ +- _underlined_ *bold* =verbatim= ~code~ +strikethrough+ +- *bold string with an *asterisk inside* +- links + 1. regular link [[https://example.com]] link without description + 2. regular link [[https://example.com][example.com]] link with description + 3. regular link to a file (image) [[file:my-img.png]] +** Footnotes +- normal footnote reference [fn:1] +- further references to the same footnote should not [fn:1] render duplicates in the footnote list +- also inline footnotes are supported via =fn:2:inline definition=. But we won't test that because it would + cause the output to look different from the input + +* Footnotes +[fn:1] Foobar