From 592be07cfd6733b34b4f57156647a80b1898ce95 Mon Sep 17 00:00:00 2001
From: Niklas Fasching
Date: Sun, 2 Dec 2018 20:58:06 +0100
Subject: [PATCH] Refactor space handling of writers
I went through the issues of goorgeous and picked a few that seemed easy enough
to add (and added some fore as todos for later). That helped a lot and showed
some bugs / edge cases that required changes.
- the org writer wrote a lot of eol spaces and just removed it whenever
String() was actually called. That worked until now but did not bode with
rendering an empty headline - by removing ALL eol space we would render "* "
back as just "*" -> not a headline anymore.
- the html writer had some special handling for line spacing inside paragraphs
and list items - with the introduction of more blocks we need that handling
everywhere.
As browsers / html renderers are nice enough to collapse whitespace (and
especially collapse "\s*\n" into " ") we can just write out the newlines and
let the renderer take care of the rest.
---
README.org | 19 ++-
org/block.go | 7 +-
org/html.go | 54 ++++---
org/org.go | 28 ++--
org/testdata/example.html | 289 ++++++++++++++++++++++++++++++++------
org/testdata/example.org | 40 ++++++
6 files changed, 339 insertions(+), 98 deletions(-)
diff --git a/README.org b/README.org
index 49faa3f..7d8bec7 100644
--- a/README.org
+++ b/README.org
@@ -3,18 +3,23 @@ A basic org-mode parser in go
- have a org-mode AST to play around with building an org-mode language server
- hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned
* next
+- handle #+RESULTS: raw and stuff
- hugo frontmatter - see https://gohugo.io/content-management/front-matter/
- captions: images, tables & blocks
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/72][#72:]] Support for #+ATTR_HTML
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/46][#46]]: Support for symbols like ndash and mdash
+- see org-entities replacement: see org-entities-help
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/10][#10]]: Support noexport
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/19][#19]]: Support #+HTML
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/31][#31]]: Support #+INCLUDE
+- see https://orgmode.org/manual/Include-files.html
+*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/33][#33]]: Wrong output when mixing html with org-mode
* later
-- affiliated keywords
- see org-element.el - org-element-affiliated-keywords
+- affiliated keywords: see org-element.el - org-element-affiliated-keywords
- keywords: support both multi (e.g. LINK, TODO) & normal (e.g. AUTHOR, TITLE) keywords
- links based on #+LINK
-- includes https://orgmode.org/manual/Include-files.html
- could be used to have a single org file (ignored via hugo ignoreFiles) and then for each post a file including the relevant headline
-- tables
- colgroups https://orgmode.org/worg/org-tutorials/tables.html
-- org-entities replacement: see org-entities-help
+- table colgroups https://orgmode.org/worg/org-tutorials/tables.html
+- table pretty printing
* resources
- syntax
- https://orgmode.org/worg/dev/org-syntax.html
diff --git a/org/block.go b/org/block.go
index 07cc060..63d3a8c 100644
--- a/org/block.go
+++ b/org/block.go
@@ -32,7 +32,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
if parentStop(d, i) {
return 0, nil
}
- nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}})
+ text := trim(d.tokens[i].matches[0])
+ if name == "SRC" || name == "EXAMPLE" {
+ nodes = append(nodes, Line{[]Node{Text{text}}})
+ } else {
+ nodes = append(nodes, Line{d.parseInline(text)})
+ }
}
return i + 1 - start, Block{name, parameters, nodes}
}
diff --git a/org/html.go b/org/html.go
index 765b597..9c47dd0 100644
--- a/org/html.go
+++ b/org/html.go
@@ -102,15 +102,6 @@ func (w *HTMLWriter) writeNodes(ns ...Node) {
}
}
-func (w *HTMLWriter) writeLines(lines []Node) {
- for i, line := range lines {
- w.writeNodes(line)
- if i != len(lines)-1 && line.(Line).Children != nil {
- w.WriteString(" ")
- }
- }
-}
-
func (w *HTMLWriter) writeBlock(b Block) {
switch b.Name {
case "SRC":
@@ -125,17 +116,22 @@ func (w *HTMLWriter) writeBlock(b Block) {
w.WriteString(w.HighlightCodeBlock(strings.Join(lines, "\n"), lang))
w.WriteString("\n\n")
case "EXAMPLE":
- w.WriteString(`` + "\n")
+ w.WriteString(`\n`)
w.writeNodes(b.Children...)
- w.WriteString("\n
\n")
+ w.WriteString("
\n")
case "QUOTE":
w.WriteString("\n")
w.writeNodes(b.Children...)
- w.WriteString("\n
\n")
+ w.WriteString("\n")
case "CENTER":
- w.WriteString(`` + "\n")
+ w.WriteString(`
` + "\n")
w.writeNodes(b.Children...)
- w.WriteString("\n
\n")
+ w.WriteString("
\n")
+ default:
+ w.WriteString(fmt.Sprintf(``, strings.ToLower(b.Name)) + "\n")
+ w.writeNodes(b.Children...)
+ w.WriteString("
\n")
+
}
}
@@ -159,6 +155,7 @@ func (w *HTMLWriter) writeFootnotes(d *Document) {
}
w.WriteString("\n\n")
}
+
func (w *HTMLWriter) writeHeadline(h Headline) {
w.WriteString(fmt.Sprintf("", h.Lvl))
w.writeNodes(h.Title...)
@@ -219,27 +216,24 @@ func (w *HTMLWriter) writeList(l List) {
}
func (w *HTMLWriter) writeListItem(li ListItem) {
- w.WriteString("")
- if len(li.Children) == 1 {
- if p, ok := li.Children[0].(Paragraph); ok {
- w.writeLines(p.Children)
- }
- } else {
- w.writeNodes(li.Children...)
- }
+ w.WriteString("\n")
+ w.writeNodes(li.Children...)
w.WriteString("\n")
}
func (w *HTMLWriter) writeLine(l Line) {
- w.writeNodes(l.Children...)
+ if len(l.Children) != 0 {
+ w.writeNodes(l.Children...)
+ w.WriteString("\n")
+ }
}
func (w *HTMLWriter) writeParagraph(p Paragraph) {
if len(p.Children) == 1 && p.Children[0].(Line).Children == nil {
return
}
- w.WriteString("")
- w.writeLines(p.Children)
+ w.WriteString("
\n")
+ w.writeNodes(p.Children...)
w.WriteString("
\n")
}
@@ -248,15 +242,15 @@ func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) {
}
func (w *HTMLWriter) writeTable(t Table) {
- w.WriteString("")
+ w.WriteString("\n")
w.writeNodes(t.Header)
- w.WriteString("")
+ w.WriteString("\n")
w.writeNodes(t.Rows...)
w.WriteString("\n
\n")
}
func (w *HTMLWriter) writeTableRow(t TableRow) {
- w.WriteString("\n\n")
+ w.WriteString("
\n")
for _, column := range t.Columns {
w.WriteString("")
w.writeNodes(column...)
@@ -266,7 +260,7 @@ func (w *HTMLWriter) writeTableRow(t TableRow) {
}
func (w *HTMLWriter) writeTableHeader(t TableHeader) {
- w.WriteString("\n\n")
+ w.WriteString("\n")
for _, column := range t.Columns {
w.WriteString("")
w.writeNodes(column...)
@@ -276,5 +270,5 @@ func (w *HTMLWriter) writeTableHeader(t TableHeader) {
}
func (w *HTMLWriter) writeTableSeparator(t TableSeparator) {
- w.WriteString("\n | \n")
+ w.WriteString(" \n")
}
diff --git a/org/org.go b/org/org.go
index e728b04..b28b176 100644
--- a/org/org.go
+++ b/org/org.go
@@ -2,7 +2,6 @@ package org
import (
"fmt"
- "regexp"
"strings"
)
@@ -96,13 +95,6 @@ func (w *OrgWriter) writeNodes(ns ...Node) {
}
}
-var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n")
-
-func (w *OrgWriter) String() string {
- s := w.stringBuilder.String()
- return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n")
-}
-
func (w *OrgWriter) writeHeadline(h Headline) {
tmp := w.emptyClone()
tmp.WriteString(strings.Repeat("*", h.Lvl))
@@ -134,7 +126,11 @@ func (w *OrgWriter) writeHeadline(h Headline) {
}
func (w *OrgWriter) writeBlock(b Block) {
- w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " ")))
+ w.WriteString(w.indent + "#+BEGIN_" + b.Name)
+ if len(b.Parameters) != 0 {
+ w.WriteString(" " + strings.Join(b.Parameters, " "))
+ }
+ w.WriteString("\n")
w.writeNodes(b.Children...)
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
}
@@ -180,7 +176,6 @@ func (w *OrgWriter) writeListItem(li ListItem) {
}
func (w *OrgWriter) writeTable(t Table) {
- // TODO: pretty print tables
w.writeNodes(t.Header)
w.writeNodes(t.Rows...)
}
@@ -200,9 +195,12 @@ func (w *OrgWriter) writeTableSeparator(ts TableSeparator) {
func (w *OrgWriter) writeTableColumns(columns [][]Node) {
w.WriteString(w.indent + "| ")
- for _, columnNodes := range columns {
+ for i, columnNodes := range columns {
w.writeNodes(columnNodes...)
- w.WriteString(" | ")
+ w.WriteString(" |")
+ if i < len(columns)-1 {
+ w.WriteString(" ")
+ }
}
w.WriteString("\n")
}
@@ -212,8 +210,10 @@ func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) {
}
func (w *OrgWriter) writeLine(l Line) {
- w.WriteString(w.indent)
- w.writeNodes(l.Children...)
+ if len(l.Children) != 0 {
+ w.WriteString(w.indent)
+ w.writeNodes(l.Children...)
+ }
w.WriteString("\n")
}
diff --git a/org/testdata/example.html b/org/testdata/example.html
index d5a95b8..a332be9 100644
--- a/org/testdata/example.html
+++ b/org/testdata/example.html
@@ -1,33 +1,76 @@
Motivation
-To validate the parser we'll try printing the AST back to org-mode source - if that works we can be kind of sure that the parsing worked. At least I hope so - I would like to get around writing tests for the individual parsing functions...
+
+To validate the parser we'll try printing the AST back to org-mode source - if that
+works we can be kind of sure that the parsing worked.
+At least I hope so - I would like to get around writing tests for the individual parsing
+functions...
+
Headlines with TODO status, priority & tags
Headline with todo status & priority
Headline with TODO status
Headline with tags & priority
-this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
+
+this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
+
Lists
-- unordered list item 1
-unordered list item 2 - with inline markup
+-
+
+unordered list item 1
+
+
+-
+
+unordered list item 2 - with inline markup
+
-ordered sublist item 1
+-
+
+ordered sublist item 1
+
-- ordered sublist item 1
-- ordered sublist item 2
-- ordered sublist item 3
+-
+
+ordered sublist item 1
+
+
+-
+
+ordered sublist item 2
+
+
+-
+
+ordered sublist item 3
+
+
-- ordered sublist item 2
+-
+
+ordered sublist item 2
+
+
-unordered list item 3 - and a link and some lines of text
+-
+
+unordered list item 3 - and a link
+and some lines of text
+
-and another subitem
+-
+
+and another subitem
+
echo with a block
-and another one with a table
+-
+
+and another one with a table
+
-and text with an empty line in between as well!
+
+and text with an empty line in between as well!
+
-- unordered list item 4
+-
+
+unordered list item 4
+
+
Inline
-- emphasis and a hard line break
- see?
-- .emphasis with dot border chars.
-- emphasis with a slash/inside
-- emphasis followed by raw text with slash /
-- ->/not an emphasis/<-
-- links with slashes do not become emphasis: https://somelinkshouldntrenderaccidentalemphasis.com/ emphasis
-- underlined bold
verbatim code strikethrough
-- bold string with an *asterisk inside
-links
+-
+
+emphasis and a hard line break
+
+see?
+
+
+-
+
+.emphasis with dot border chars.
+
+
+-
+
+emphasis with a slash/inside
+
+
+-
+
+emphasis followed by raw text with slash /
+
+
+-
+
+->/not an emphasis/<-
+
+
+-
+
+links with slashes do not become emphasis: https://somelinkshouldntrenderaccidentalemphasis.com/ emphasis
+
+
+-
+
+underlined bold verbatim code strikethrough
+
+
+-
+
+bold string with an *asterisk inside
+
+
+-
+
+links
+
-- regular link https://example.com link without description
-- regular link example.com link with description
-- regular link to a file (image)

-- auto link, i.e. not inside
\[[square brackets]\] https://www.example.com
+-
+
+regular link https://example.com link without description
+
+
+-
+
+regular link example.com link with description
+
+
+-
+
+regular link to a file (image)
+
+
+-
+
+auto link, i.e. not inside \[[square brackets]\] https://www.example.com
+
+
@@ -73,43 +174,128 @@
and a second line
and a third one
-
-an example blockwith multiple lines
+\nan example block
+with multiple lines
Mongodb is very webscale
+issues from goorgeous (free test cases, yay!)
+#29: Support verse block
+
+This
+is
+verse
+
+
+or even a totally custom kind of block
+crazy ain't it?
+
+#47: Consecutive code wrapped text gets joined
+
+either this or that foo.
+either this
+or that foo.
+
+#68: Quote block with inline markup
+
+this is markup!
+
+#77: Recognize code --- as code plus dash
+#75: Not parsing nested lists correctly
+
+#78: Emphasis at beginning of line
+
+italics
+
+
+Text
+italics
+
+#82: Crash on empty headline
+
+
+just a space as title...
+
+#84: Paragraphs that are not followed by an empty line are not parsed correctly
+Foo
+
+Foo paragraph.
+
+Bar
+
+Bar paragraph
+
Footnotes
-- normal footnote reference
-- further references to the same footnote should not render duplicates in the footnote list
-- inline footnotes are also supported via .
+-
+
+normal footnote reference
+
+
+-
+
+further references to the same footnote should not render duplicates in the footnote list
+
+
+-
+
+inline footnotes are also supported via .
+
+
|