diff --git a/README.org b/README.org index 85021cb..24d4d64 100644 --- a/README.org +++ b/README.org @@ -7,3 +7,6 @@ A basic org-mode parser in go - https://orgmode.org/worg/dev/org-syntax.html - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org.el - https://github.com/abo-abo/org-mode/blob/mirror/lisp/org-element.el +- test cases + - [[https://github.com/bdewey/org-ruby/blob/master/spec/html_examples][org-ruby]] + - pandoc, goorgeous diff --git a/block.go b/block.go new file mode 100644 index 0000000..07cc060 --- /dev/null +++ b/block.go @@ -0,0 +1,47 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Block struct { + Name string + Parameters []string + Children []Node +} + +var beginBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+BEGIN_(\w+)(.*)`) +var endBlockRegexp = regexp.MustCompile(`(?i)^(\s*)#\+END_(\w+)`) + +func lexBlock(line string) (token, bool) { + if m := beginBlockRegexp.FindStringSubmatch(line); m != nil { + return token{"beginBlock", len(m[1]), strings.ToUpper(m[2]), m}, true + } else if m := endBlockRegexp.FindStringSubmatch(line); m != nil { + return token{"endBlock", len(m[1]), strings.ToUpper(m[2]), m}, true + } + return nilToken, false +} + +func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { + t, start, nodes := d.tokens[i], i, []Node{} + name, parameters := t.content, strings.Fields(t.matches[3]) + trim := trimIndentUpTo(d.tokens[i].lvl) + for i++; !(d.tokens[i].kind == "endBlock" && d.tokens[i].content == name); i++ { + if parentStop(d, i) { + return 0, nil + } + nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}}) + } + return i + 1 - start, Block{name, parameters, nodes} +} + +func trimIndentUpTo(max int) func(string) string { + return func(line string) string { + i := 0 + for ; i < len(line) && i < max && unicode.IsSpace(rune(line[i])); i++ { + } + return line[i:] + } +} diff --git a/cmd/org/org.go b/cmd/org/org.go new file mode 100644 index 0000000..c5c11ba --- /dev/null +++ b/cmd/org/org.go @@ -0,0 +1,31 @@ +package main + +import ( + "bytes" + "io/ioutil" + "log" + "os" + "strings" + + "github.com/niklasfasching/org" +) + +func main() { + log.SetFlags(0) + if len(os.Args) < 3 { + log.Println("USAGE: org FILE OUTPUT_FORMAT") + log.Fatal("supported output formats: org") + } + bs, err := ioutil.ReadFile(os.Args[1]) + if err != nil { + log.Fatal(err) + } + r, out := bytes.NewReader(bs), "" + switch strings.ToLower(os.Args[2]) { + case "org": + out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String() + default: + log.Fatal("Unsupported output format") + } + log.Println(out) +} diff --git a/document.go b/document.go new file mode 100644 index 0000000..f7d27d7 --- /dev/null +++ b/document.go @@ -0,0 +1,150 @@ +package org + +import ( + "bufio" + "fmt" + "io" + "log" +) + +type Document struct { + tokens []token + Nodes []Node + Footnotes Footnotes + StatusKeywords []string + MaxEmphasisNewLines int + BufferSettings map[string]string + DefaultSettings map[string]string +} + +type Writer interface { + before(*Document) + after(*Document) + writeNodes(...Node) + String() string +} + +type Node interface{} + +type lexFn = func(line string) (t token, ok bool) +type parseFn = func(*Document, int, stopFn) (int, Node) +type stopFn = func(*Document, int) bool + +type token struct { + kind string + lvl int + content string + matches []string +} + +var lexFns = []lexFn{ + lexHeadline, + lexBlock, + lexList, + lexTable, + lexHorizontalRule, + lexKeywordOrComment, + lexFootnoteDefinition, + lexText, +} + +var nilToken = token{"nil", -1, "", nil} + +func NewDocument() *Document { + return &Document{ + Footnotes: Footnotes{ + ExcludeHeading: true, + Title: "Footnotes", + Definitions: map[string]FootnoteDefinition{}, + }, + MaxEmphasisNewLines: 1, + BufferSettings: map[string]string{}, + DefaultSettings: map[string]string{ + "TODO": "TODO | DONE", + }, + } +} + +func (d *Document) Write(w Writer) Writer { + if d.Nodes == nil { + panic("cannot Write() empty document: you must call Parse() first") + } + w.before(d) + w.writeNodes(d.Nodes...) + w.after(d) + return w +} + +func (d *Document) Parse(input io.Reader) *Document { + d.tokens = []token{} + scanner := bufio.NewScanner(input) + for scanner.Scan() { + d.tokens = append(d.tokens, tokenize(scanner.Text())) + } + if err := scanner.Err(); err != nil { + panic(err) + } + _, nodes := d.parseMany(0, func(d *Document, i int) bool { return !(i < len(d.tokens)) }) + d.Nodes = nodes + return d +} + +func (d *Document) Get(key string) string { + if v, ok := d.BufferSettings[key]; ok { + return v + } + if v, ok := d.DefaultSettings[key]; ok { + return v + } + return "" +} + +func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) { + switch d.tokens[i].kind { + case "unorderedList", "orderedList": + consumed, node = d.parseList(i, stop) + case "tableRow", "tableSeparator": + consumed, node = d.parseTable(i, stop) + case "beginBlock": + consumed, node = d.parseBlock(i, stop) + case "text": + consumed, node = d.parseParagraph(i, stop) + case "horizontalRule": + consumed, node = d.parseHorizontalRule(i, stop) + case "comment": + consumed, node = d.parseComment(i, stop) + case "keyword": + consumed, node = d.parseKeyword(i, stop) + case "headline": + consumed, node = d.parseHeadline(i, stop) + case "footnoteDefinition": + consumed, node = d.parseFootnoteDefinition(i, stop) + } + + if consumed != 0 { + return consumed, node + } + log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i]) + m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0]) + d.tokens[i] = token{"text", len(m[1]), m[2], m} + return d.parseOne(i, stop) +} + +func (d *Document) parseMany(i int, stop stopFn) (int, []Node) { + start, nodes := i, []Node{} + for i < len(d.tokens) { + consumed, node := d.parseOne(i, stop) + i += consumed + nodes = append(nodes, node) + } + return i - start, nodes +} + +func tokenize(line string) token { + for _, lexFn := range lexFns { + if token, ok := lexFn(line); ok { + return token + } + } + panic(fmt.Sprintf("could not lex line: %s", line)) +} diff --git a/footnote.go b/footnote.go new file mode 100644 index 0000000..91cbff7 --- /dev/null +++ b/footnote.go @@ -0,0 +1,38 @@ +package org + +import ( + "regexp" +) + +type Footnotes struct { + ExcludeHeading bool + Title string + Definitions map[string]FootnoteDefinition + Order []string +} + +type FootnoteDefinition struct { + Name string + Children []Node +} + +var footnoteDefinitionRegexp = regexp.MustCompile(`^\[fn:([\w-]+)\]\s+(.+)`) + +func lexFootnoteDefinition(line string) (token, bool) { + if m := footnoteDefinitionRegexp.FindStringSubmatch(line); m != nil { + return token{"footnoteDefinition", 0, m[1], m}, true + } + return nilToken, false +} + +func (d *Document) parseFootnoteDefinition(i int, parentStop stopFn) (int, Node) { + name := d.tokens[i].content + d.tokens[i] = tokenize(d.tokens[i].matches[2]) + stop := func(d *Document, i int) bool { + return parentStop(d, i) || isSecondBlankLine(d, i) || + d.tokens[i].kind == "headline" || d.tokens[i].kind == "footnoteDefinition" + } + consumed, nodes := d.parseMany(i, stop) + d.Footnotes.Definitions[name] = FootnoteDefinition{name, nodes} + return consumed, nil +} diff --git a/headline.go b/headline.go new file mode 100644 index 0000000..1527f3c --- /dev/null +++ b/headline.go @@ -0,0 +1,69 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Headline struct { + Lvl int + Status string + Priority string + Title []Node + Tags []string + Children []Node +} + +var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`) +var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`) + +func lexHeadline(line string) (token, bool) { + if m := headlineRegexp.FindStringSubmatch(line); m != nil { + return token{"headline", 0, m[2], m}, true + } + return nilToken, false +} + +func (d *Document) todoKeywords() []string { + return strings.FieldsFunc(d.Get("TODO"), func(r rune) bool { + return unicode.IsSpace(r) || r == '|' + }) +} + +func (d *Document) parseHeadline(i int, parentStop stopFn) (int, Node) { + t, headline := d.tokens[i], Headline{} + headline.Lvl = len(t.matches[1]) + text := t.content + + for _, k := range d.todoKeywords() { + if strings.HasPrefix(text, k) && len(text) > len(k) && unicode.IsSpace(rune(text[len(k)])) { + headline.Status = k + text = text[len(k)+1:] + break + } + } + + if len(text) >= 3 && text[0:2] == "[#" && strings.Contains("ABC", text[2:3]) && text[3] == ']' { + headline.Priority = text[2:3] + text = strings.TrimSpace(text[4:]) + } + + if m := tagRegexp.FindStringSubmatch(text); m != nil { + text = m[1] + headline.Tags = strings.FieldsFunc(m[2], func(r rune) bool { return r == ':' }) + } + + headline.Title = d.parseInline(text) + + stop := func(d *Document, i int) bool { + return parentStop(d, i) || d.tokens[i].kind == "headline" && d.tokens[i].lvl <= headline.Lvl + } + consumed, nodes := d.parseMany(i+1, stop) + headline.Children = nodes + + if headline.Lvl == 1 && text == d.Footnotes.Title && d.Footnotes.ExcludeHeading { + return consumed + 1, nil + } + return consumed + 1, headline +} diff --git a/inline.go b/inline.go new file mode 100644 index 0000000..004969d --- /dev/null +++ b/inline.go @@ -0,0 +1,184 @@ +package org + +import ( + "regexp" + "strings" + "unicode" +) + +type Text struct{ Content string } + +type Linebreak struct{} + +type Emphasis struct { + Kind string + Content []Node +} + +type FootnoteLink struct{ Name string } + +type RegularLink struct { + Protocol string + Description []Node + URL string +} + +var redundantSpaces = regexp.MustCompile("[ \t]+") +var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`) +var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`) + +func (d *Document) parseInline(input string) (nodes []Node) { + previous, current := 0, 0 + for current < len(input) { + consumed, node := 0, (Node)(nil) + switch input[current] { + case '^': + consumed, node = d.parseSubOrSuperScript(input, current) + case '_': + consumed, node = d.parseSubScriptOrEmphasis(input, current) + case '*', '/', '=', '~', '+': + consumed, node = d.parseEmphasis(input, current) + case '[': + consumed, node = d.parseRegularLinkOrFootnoteReference(input, current) + case '\\': + consumed, node = d.parseExplicitLineBreak(input, current) + } + if consumed != 0 { + if current > previous { + nodes = append(nodes, Text{input[previous:current]}) + } + if node != nil { + nodes = append(nodes, node) + } + current += consumed + previous = current + } else { + current++ + } + } + + if previous < len(input) { + nodes = append(nodes, Text{input[previous:]}) + } + return nodes +} + +func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) { + if start == 0 || input[start-1] == '\n' || start+1 >= len(input) || input[start+1] != '\\' { + return 0, nil + } + for i := start + 1; ; i++ { + if i == len(input)-1 || input[i] == '\n' { + return i + 1 - start, Linebreak{} + } + if !unicode.IsSpace(rune(input[i])) { + break + } + } + return 0, nil +} + +func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) { + if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil { + return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}} + } + return 0, nil +} + +func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) { + if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 { + return consumed, node + } + return d.parseEmphasis(input, start) +} + +func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) { + if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' { + return d.parseRegularLink(input, start) + } else if len(input[start:]) >= 1 && input[start] == '[' { + return d.parseFootnoteReference(input, start) + } + return 0, nil +} + +func (d *Document) parseFootnoteReference(input string, start int) (int, Node) { + if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil { + name, definition := m[1], m[3] + seen := false + for _, otherName := range d.Footnotes.Order { + if name == otherName { + seen = true + } + } + if !seen { + d.Footnotes.Order = append(d.Footnotes.Order, name) + } + if definition != "" { + d.Footnotes.Definitions[name] = FootnoteDefinition{name, d.parseInline(definition)} + } + return len(m[0]), FootnoteLink{name} + } + return 0, nil +} + +func (d *Document) parseRegularLink(input string, start int) (int, Node) { + if len(input[start:]) == 0 || input[start+1] != '[' { + return 0, nil + } + input = input[start:] + end := strings.Index(input, "]]") + if end == -1 { + return 0, nil + } + + rawLink := input[2:end] + link, description, parts := "", []Node{}, strings.Split(rawLink, "][") + if len(parts) == 2 { + link, description = parts[0], d.parseInline(parts[1]) + } else { + link, description = rawLink, []Node{Text{rawLink}} + } + consumed := end + 2 + protocol, parts := "", strings.SplitN(link, ":", 2) + if len(parts) == 2 { + protocol = parts[0] + } + return consumed, RegularLink{protocol, description, link} +} + +func (d *Document) parseEmphasis(input string, start int) (int, Node) { + marker, i := input[start], start + if !hasValidPreAndBorderChars(input, i) { + return 0, nil + } + for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ { + if input[i] == '\n' { + consumedNewLines++ + } + + if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) { + return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])} + } + } + return 0, nil +} + +// see org-emphasis-regexp-components (emacs elisp variable) + +func hasValidPreAndBorderChars(input string, i int) bool { + return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1]))) +} + +func hasValidPostAndBorderChars(input string, i int) bool { + return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1]))) +} + +func isValidPreChar(r rune) bool { + return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r) +} + +func isValidPostChar(r rune) bool { + return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r) +} + +func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) } diff --git a/keyword.go b/keyword.go new file mode 100644 index 0000000..3eb8c65 --- /dev/null +++ b/keyword.go @@ -0,0 +1,36 @@ +package org + +import ( + "regexp" + "strings" +) + +type Keyword struct { + Key string + Value string +} + +type Comment struct{ Content string } + +var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):\s(.*)`) +var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`) + +func lexKeywordOrComment(line string) (token, bool) { + if m := keywordRegexp.FindStringSubmatch(line); m != nil { + return token{"keyword", len(m[1]), m[2], m}, true + } else if m := commentRegexp.FindStringSubmatch(line); m != nil { + return token{"comment", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func (d *Document) parseKeyword(i int, stop stopFn) (int, Node) { + t := d.tokens[i] + k, v := t.matches[2], t.matches[3] + d.BufferSettings[k] = strings.Join([]string{d.BufferSettings[k], v}, "\n") + return 1, Keyword{k, v} +} + +func (d *Document) parseComment(i int, stop stopFn) (int, Node) { + return 1, Comment{d.tokens[i].content} +} diff --git a/list.go b/list.go new file mode 100644 index 0000000..f8362f2 --- /dev/null +++ b/list.go @@ -0,0 +1,82 @@ +package org + +import ( + "fmt" + "regexp" + "strings" + "unicode" +) + +type List struct { + Kind string + Items []Node +} + +type ListItem struct { + Bullet string + Children []Node +} + +var unorderedListRegexp = regexp.MustCompile(`^(\s*)([-]|[+]|[*])\s(.*)`) +var orderedListRegexp = regexp.MustCompile(`^(\s*)(([0-9]+|[a-zA-Z])[.)])\s+(.*)`) + +func lexList(line string) (token, bool) { + if m := unorderedListRegexp.FindStringSubmatch(line); m != nil { + return token{"unorderedList", len(m[1]), m[3], m}, true + } else if m := orderedListRegexp.FindStringSubmatch(line); m != nil { + return token{"orderedList", len(m[1]), m[4], m}, true + } + return nilToken, false +} + +func isListToken(t token) bool { + return t.kind == "unorderedList" || t.kind == "orderedList" +} + +func stopIndentBelow(t token, minIndent int) bool { + return t.lvl < minIndent && !(t.kind == "text" && t.content == "") +} + +func listKind(t token) string { + switch bullet := t.matches[2]; { + case bullet == "*" || bullet == "+" || bullet == "-": + return bullet + case unicode.IsLetter(rune(bullet[0])): + return "letter" + case unicode.IsDigit(rune(bullet[0])): + return "number" + default: + panic(fmt.Sprintf("bad list bullet '%s': %#v", bullet, t)) + } +} + +func (d *Document) parseList(i int, parentStop stopFn) (int, Node) { + start, lvl := i, d.tokens[i].lvl + + list := List{Kind: listKind(d.tokens[i])} + for !parentStop(d, i) && d.tokens[i].lvl == lvl && isListToken(d.tokens[i]) { + consumed, node := d.parseListItem(i, parentStop) + i += consumed + list.Items = append(list.Items, node) + } + return i - start, list +} + +func (d *Document) parseListItem(i int, parentStop stopFn) (int, Node) { + start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2] + minIndent := d.tokens[i].lvl + len(bullet) + d.tokens[i] = tokenize(strings.Repeat(" ", minIndent) + d.tokens[i].content) + stop := func(d *Document, i int) bool { + if parentStop(d, i) { + return true + } + t := d.tokens[i] + return t.lvl < minIndent && !(t.kind == "text" && t.content == "") + } + for !stop(d, i) && !isSecondBlankLine(d, i) { + consumed, node := d.parseOne(i, stop) + i += consumed + nodes = append(nodes, node) + } + return i - start, ListItem{bullet, nodes} +} diff --git a/org.go b/org.go new file mode 100644 index 0000000..7e0f53f --- /dev/null +++ b/org.go @@ -0,0 +1,243 @@ +package org + +import ( + "fmt" + "regexp" + "strings" +) + +type stringBuilder = strings.Builder + +type OrgWriter struct { + TagsColumn int // see org-tags-column + stringBuilder + indent string +} + +var emphasisOrgBorders = map[string][]string{ + "_": []string{"_", "_"}, + "*": []string{"*", "*"}, + "/": []string{"/", "/"}, + "+": []string{"+", "+"}, + "~": []string{"~", "~"}, + "=": []string{"=", "="}, + "_{}": []string{"_{", "}"}, + "^{}": []string{"^{", "}"}, +} + +func NewOrgWriter() *OrgWriter { + return &OrgWriter{ + TagsColumn: 77, + } +} + +func (w *OrgWriter) before(d *Document) {} +func (w *OrgWriter) after(d *Document) { + fs := d.Footnotes + if len(fs.Definitions) == 0 { + return + } + w.WriteString("* " + fs.Title + "\n") + for _, name := range fs.Order { + w.writeNodes(fs.Definitions[name]) + } +} + +func (w *OrgWriter) emptyClone() *OrgWriter { + wcopy := *w + wcopy.stringBuilder = strings.Builder{} + return &wcopy +} + +func (w *OrgWriter) writeNodes(ns ...Node) { + for _, n := range ns { + switch n := n.(type) { + case Comment: + w.writeComment(n) + case Keyword: + w.writeKeyword(n) + case Headline: + w.writeHeadline(n) + case Block: + w.writeBlock(n) + + case FootnoteDefinition: + w.writeFootnoteDefinition(n) + + case List: + w.writeList(n) + case ListItem: + w.writeListItem(n) + + case Table: + w.writeTable(n) + case TableHeader: + w.writeTableHeader(n) + case TableRow: + w.writeTableRow(n) + case TableSeparator: + w.writeTableSeparator(n) + + case Paragraph: + w.writeParagraph(n) + case HorizontalRule: + w.writeHorizontalRule(n) + case Line: + w.writeLine(n) + + case Text: + w.writeText(n) + case Emphasis: + w.writeEmphasis(n) + case Linebreak: + w.writeLinebreak(n) + case RegularLink: + w.writeRegularLink(n) + case FootnoteLink: + w.writeFootnoteLink(n) + default: + if n != nil { + panic(fmt.Sprintf("bad node %#v", n)) + } + } + } +} + +var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n") + +func (w *OrgWriter) String() string { + s := w.stringBuilder.String() + return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n") +} + +func (w *OrgWriter) writeHeadline(h Headline) { + tmp := w.emptyClone() + tmp.WriteString(strings.Repeat("*", h.Lvl)) + if h.Status != "" { + tmp.WriteString(" " + h.Status) + } + if h.Priority != "" { + tmp.WriteString(" [#" + h.Priority + "]") + } + tmp.WriteString(" ") + tmp.writeNodes(h.Title...) + hString := tmp.String() + if len(h.Tags) != 0 { + hString += " " + tString := ":" + strings.Join(h.Tags, ":") + ":" + if n := w.TagsColumn - len(tString) - len(hString); n > 0 { + w.WriteString(hString + strings.Repeat(" ", n) + tString) + } else { + w.WriteString(hString + tString) + } + } else { + w.WriteString(hString) + } + w.WriteString("\n") + if len(h.Children) != 0 { + w.WriteString(w.indent) + } + w.writeNodes(h.Children...) +} + +func (w *OrgWriter) writeBlock(b Block) { + w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " "))) + w.writeNodes(b.Children...) + w.WriteString(w.indent + "#+END_" + b.Name + "\n") +} + +func (w *OrgWriter) writeFootnoteDefinition(f FootnoteDefinition) { + w.WriteString(fmt.Sprintf("[fn:%s] ", f.Name)) + w.writeNodes(f.Children...) +} + +func (w *OrgWriter) writeParagraph(p Paragraph) { + w.writeNodes(p.Children...) +} + +func (w *OrgWriter) writeKeyword(k Keyword) { + w.WriteString(w.indent + fmt.Sprintf("#+%s: %s\n", k.Key, k.Value)) +} + +func (w *OrgWriter) writeComment(c Comment) { + w.WriteString(w.indent + "#" + c.Content) +} + +func (w *OrgWriter) writeList(l List) { w.writeNodes(l.Items...) } + +func (w *OrgWriter) writeListItem(li ListItem) { + w.WriteString(w.indent + li.Bullet + " ") + liWriter := w.emptyClone() + liWriter.indent = w.indent + strings.Repeat(" ", len(li.Bullet)+1) + liWriter.writeNodes(li.Children...) + w.WriteString(strings.TrimPrefix(liWriter.String(), liWriter.indent)) +} + +func (w *OrgWriter) writeTable(t Table) { + // TODO: pretty print tables + w.writeNodes(t.Header) + w.writeNodes(t.Rows...) +} + +func (w *OrgWriter) writeTableHeader(th TableHeader) { + w.writeTableColumns(th.Columns) + w.writeNodes(th.Separator) +} + +func (w *OrgWriter) writeTableRow(tr TableRow) { + w.writeTableColumns(tr.Columns) +} + +func (w *OrgWriter) writeTableSeparator(ts TableSeparator) { + w.WriteString(w.indent + ts.Content + "\n") +} + +func (w *OrgWriter) writeTableColumns(columns [][]Node) { + w.WriteString(w.indent + "| ") + for _, columnNodes := range columns { + w.writeNodes(columnNodes...) + w.WriteString(" | ") + } + w.WriteString("\n") +} + +func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) { + w.WriteString(w.indent + "-----\n") +} + +func (w *OrgWriter) writeLine(l Line) { + w.WriteString(w.indent) + w.writeNodes(l.Children...) + w.WriteString("\n") +} + +func (w *OrgWriter) writeText(t Text) { w.WriteString(t.Content) } + +func (w *OrgWriter) writeEmphasis(e Emphasis) { + borders, ok := emphasisOrgBorders[e.Kind] + if !ok { + panic(fmt.Sprintf("bad emphasis %#v", e)) + } + w.WriteString(borders[0]) + w.writeNodes(e.Content...) + w.WriteString(borders[1]) +} + +func (w *OrgWriter) writeLinebreak(l Linebreak) { + w.WriteString(`\\`) +} + +func (w *OrgWriter) writeFootnoteLink(l FootnoteLink) { + w.WriteString("[fn:" + l.Name + "]") +} + +func (w *OrgWriter) writeRegularLink(l RegularLink) { + descriptionWriter := w.emptyClone() + descriptionWriter.writeNodes(l.Description...) + description := descriptionWriter.String() + if l.URL != description { + w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description)) + } else { + w.WriteString(fmt.Sprintf("[[%s]]", l.URL)) + } +} diff --git a/org_test.go b/org_test.go new file mode 100644 index 0000000..157687d --- /dev/null +++ b/org_test.go @@ -0,0 +1,61 @@ +package org + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strings" + "testing" + + "github.com/pmezard/go-difflib/difflib" +) + +func TestOrgWriter(t *testing.T) { + for _, path := range orgTestFiles() { + expected := fileString(path) + reader, writer := strings.NewReader(expected), NewOrgWriter() + actual := NewDocument().Parse(reader).Write(writer).String() + if actual != expected { + t.Errorf("%s:\n%s'", path, diff(actual, expected)) + } else { + t.Logf("%s: passed!", path) + } + } +} + +func orgTestFiles() []string { + dir := "./testdata" + files, err := ioutil.ReadDir(dir) + if err != nil { + panic(fmt.Sprintf("Could not read directory: %s", err)) + } + orgFiles := []string{} + for _, f := range files { + name := f.Name() + if filepath.Ext(name) != ".org" { + continue + } + orgFiles = append(orgFiles, filepath.Join(dir, name)) + } + return orgFiles +} + +func fileString(path string) string { + bs, err := ioutil.ReadFile(path) + if err != nil { + panic(fmt.Sprintf("Could not read file %s: %s", path, err)) + } + return string(bs) +} + +func diff(actual, expected string) string { + diff := difflib.UnifiedDiff{ + A: difflib.SplitLines(actual), + B: difflib.SplitLines(expected), + FromFile: "Actual", + ToFile: "Expected", + Context: 3, + } + text, _ := difflib.GetUnifiedDiffString(diff) + return text +} diff --git a/paragraph.go b/paragraph.go new file mode 100644 index 0000000..9d8c289 --- /dev/null +++ b/paragraph.go @@ -0,0 +1,57 @@ +package org + +import ( + "regexp" +) + +type Line struct{ Children []Node } +type Paragraph struct{ Children []Node } +type HorizontalRule struct{} + +var horizontalRuleRegexp = regexp.MustCompile(`^(\s*)-{5,}\s*$`) +var plainTextRegexp = regexp.MustCompile(`^(\s*)(.*)`) + +func lexText(line string) (token, bool) { + if m := plainTextRegexp.FindStringSubmatch(line); m != nil { + return token{"text", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func lexHorizontalRule(line string) (token, bool) { + if m := horizontalRuleRegexp.FindStringSubmatch(line); m != nil { + return token{"horizontalRule", len(m[1]), "", m}, true + } + return nilToken, false +} + +func isSecondBlankLine(d *Document, i int) bool { + if i-1 <= 0 { + return false + } + t1, t2 := d.tokens[i-1], d.tokens[i] + if t1.kind == "text" && t2.kind == "text" && t1.content == "" && t2.content == "" { + return true + } + return false +} + +func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) { + lines, start := []Node{Line{d.parseInline(d.tokens[i].content)}}, i + i++ + stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" } + for ; !stop(d, i) && !isSecondBlankLine(d, i); i++ { + if isSecondBlankLine(d, i) { + lines = lines[:len(lines)-1] + i++ + break + } + lines = append(lines, Line{d.parseInline(d.tokens[i].content)}) + } + consumed := i - start + return consumed, Paragraph{lines} +} + +func (d *Document) parseHorizontalRule(i int, parentStop stopFn) (int, Node) { + return 1, HorizontalRule{} +} diff --git a/table.go b/table.go new file mode 100644 index 0000000..3a84939 --- /dev/null +++ b/table.go @@ -0,0 +1,63 @@ +package org + +import ( + "regexp" + "strings" +) + +type Table struct { + Header Node + Rows []Node +} + +type TableSeparator struct{ Content string } + +type TableHeader struct { + Columns [][]Node + Separator TableSeparator +} + +type TableRow struct{ Columns [][]Node } + +var tableSeparatorRegexp = regexp.MustCompile(`^(\s*)(\|[+-|]*)\s*$`) +var tableRowRegexp = regexp.MustCompile(`^(\s*)(\|.*)`) + +func lexTable(line string) (token, bool) { + if m := tableSeparatorRegexp.FindStringSubmatch(line); m != nil { + return token{"tableSeparator", len(m[1]), m[2], m}, true + } else if m := tableRowRegexp.FindStringSubmatch(line); m != nil { + return token{"tableRow", len(m[1]), m[2], m}, true + } + return nilToken, false +} + +func (d *Document) parseTable(i int, parentStop stopFn) (int, Node) { + rows, start := []Node{}, i + for !parentStop(d, i) && (d.tokens[i].kind == "tableRow" || d.tokens[i].kind == "tableSeparator") { + consumed, row := d.parseTableRowOrSeparator(i, parentStop) + i += consumed + rows = append(rows, row) + } + + consumed := i - start + if len(rows) >= 2 { + if row, ok := rows[0].(TableRow); ok { + if separator, ok := rows[1].(TableSeparator); ok { + return consumed, Table{TableHeader{row.Columns, separator}, rows[2:]} + } + } + } + return consumed, Table{nil, rows} +} + +func (d *Document) parseTableRowOrSeparator(i int, _ stopFn) (int, Node) { + if d.tokens[i].kind == "tableSeparator" { + return 1, TableSeparator{d.tokens[i].content} + } + fields := strings.FieldsFunc(d.tokens[i].content, func(r rune) bool { return r == '|' }) + row := TableRow{} + for _, field := range fields { + row.Columns = append(row.Columns, d.parseInline(strings.TrimSpace(field))) + } + return 1, row +} diff --git a/testdata/example.org b/testdata/example.org new file mode 100644 index 0000000..90cad73 --- /dev/null +++ b/testdata/example.org @@ -0,0 +1,59 @@ +#+TITLE: Example org mode file +#+AUTHOR: Niklas Fasching +#+DESCRIPTION: just some random elements with little explanation + +* Motivation + +To validate the parser we'll try printing the AST back to org-mode source - if that +works we can be kind of sure that the parsing worked. +At least I hope so - I would like to get around writing tests for the individual parsing +functions... + +** Headlines with TODO status, priority & tags +*** TODO [#B] Headline with todo status & priority +*** DONE Headline with TODO status +*** [#A] Headline with tags & priority :foo:bar: +this one is cheating a little as tags are ALWAYS printed right aligned to a given column number... +** Lists +- unordered list item 1 +- unordered list item 2 - with ~inline~ /markup/ + 1. ordered sublist item 1 + a) ordered sublist item 1 + b) ordered sublist item 2 + c) ordered sublist item 3 + 2. ordered sublist item 2 +- unordered list item 3 - and a [[https://example.com][link]] + and some lines of text + 1. and another subitem + #+BEGIN_SRC sh + echo with a block + #+END_SRC + 2. and another one with a table + | a | b | c | + |---+---+---| + | 1 | 2 | 3 | + + and text with an empty line in between as well! +- unordered list item 4 + +** Inline +- /emphasis/ and a hard line break \\ +- /.emphasis with dot border chars./ +- /emphasis with a slash/inside/ +- /emphasis/ followed by raw text with slash / +- ->/not an emphasis/<- +- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/ +- _underlined_ *bold* =verbatim= ~code~ +strikethrough+ +- *bold string with an *asterisk inside* +- links + 1. regular link [[https://example.com]] link without description + 2. regular link [[https://example.com][example.com]] link with description + 3. regular link to a file (image) [[file:my-img.png]] +** Footnotes +- normal footnote reference [fn:1] +- further references to the same footnote should not [fn:1] render duplicates in the footnote list +- also inline footnotes are supported via =fn:2:inline definition=. But we won't test that because it would + cause the output to look different from the input + +* Footnotes +[fn:1] Foobar