Refactor space handling of writers

I went through the issues of goorgeous and picked a few that seemed easy enough
to add (and added some fore as todos for later). That helped a lot and showed
some bugs / edge cases that required changes.

- the org writer wrote a lot of eol spaces and just removed it whenever
  String() was actually called. That worked until now but did not bode with
  rendering an empty headline - by removing ALL eol space we would render "* "
  back as just "*" -> not a headline anymore.
- the html writer had some special handling for line spacing inside paragraphs
  and list items - with the introduction of more blocks we need that handling
  everywhere.
  As browsers / html renderers are nice enough to collapse whitespace (and
  especially collapse "\s*\n" into " ") we can just write out the newlines and
  let the renderer take care of the rest.
This commit is contained in:
Niklas Fasching 2018-12-02 20:58:06 +01:00
parent 0df8bc541b
commit 592be07cfd
6 changed files with 339 additions and 98 deletions

View file

@ -3,18 +3,23 @@ A basic org-mode parser in go
- have a org-mode AST to play around with building an org-mode language server
- hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned
* next
- handle #+RESULTS: raw and stuff
- hugo frontmatter - see https://gohugo.io/content-management/front-matter/
- captions: images, tables & blocks
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/72][#72:]] Support for #+ATTR_HTML
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/46][#46]]: Support for symbols like ndash and mdash
- see org-entities replacement: see org-entities-help
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/10][#10]]: Support noexport
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/19][#19]]: Support #+HTML
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/31][#31]]: Support #+INCLUDE
- see https://orgmode.org/manual/Include-files.html
*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/33][#33]]: Wrong output when mixing html with org-mode
* later
- affiliated keywords
see org-element.el - org-element-affiliated-keywords
- affiliated keywords: see org-element.el - org-element-affiliated-keywords
- keywords: support both multi (e.g. LINK, TODO) & normal (e.g. AUTHOR, TITLE) keywords
- links based on #+LINK
- includes https://orgmode.org/manual/Include-files.html
could be used to have a single org file (ignored via hugo ignoreFiles) and then for each post a file including the relevant headline
- tables
colgroups https://orgmode.org/worg/org-tutorials/tables.html
- org-entities replacement: see org-entities-help
- table colgroups https://orgmode.org/worg/org-tutorials/tables.html
- table pretty printing
* resources
- syntax
- https://orgmode.org/worg/dev/org-syntax.html

View file

@ -32,7 +32,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
if parentStop(d, i) {
return 0, nil
}
nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}})
text := trim(d.tokens[i].matches[0])
if name == "SRC" || name == "EXAMPLE" {
nodes = append(nodes, Line{[]Node{Text{text}}})
} else {
nodes = append(nodes, Line{d.parseInline(text)})
}
}
return i + 1 - start, Block{name, parameters, nodes}
}

View file

@ -102,15 +102,6 @@ func (w *HTMLWriter) writeNodes(ns ...Node) {
}
}
func (w *HTMLWriter) writeLines(lines []Node) {
for i, line := range lines {
w.writeNodes(line)
if i != len(lines)-1 && line.(Line).Children != nil {
w.WriteString(" ")
}
}
}
func (w *HTMLWriter) writeBlock(b Block) {
switch b.Name {
case "SRC":
@ -125,17 +116,22 @@ func (w *HTMLWriter) writeBlock(b Block) {
w.WriteString(w.HighlightCodeBlock(strings.Join(lines, "\n"), lang))
w.WriteString("\n</code>\n")
case "EXAMPLE":
w.WriteString(`<pre class="example">` + "\n")
w.WriteString(`<pre class="example">\n`)
w.writeNodes(b.Children...)
w.WriteString("\n</pre>\n")
w.WriteString("</pre>\n")
case "QUOTE":
w.WriteString("<blockquote>\n")
w.writeNodes(b.Children...)
w.WriteString("\n</blockquote>\n")
w.WriteString("</blockquote>\n")
case "CENTER":
w.WriteString(`<div style="text-align: center; margin-left: auto; margin-right: auto;">` + "\n")
w.WriteString(`<p class="center-block" style="text-align: center; margin-left: auto; margin-right: auto;">` + "\n")
w.writeNodes(b.Children...)
w.WriteString("\n</div>\n")
w.WriteString("</p>\n")
default:
w.WriteString(fmt.Sprintf(`<p class="%s-block">`, strings.ToLower(b.Name)) + "\n")
w.writeNodes(b.Children...)
w.WriteString("</p>\n")
}
}
@ -159,6 +155,7 @@ func (w *HTMLWriter) writeFootnotes(d *Document) {
}
w.WriteString("</div>\n</div>\n")
}
func (w *HTMLWriter) writeHeadline(h Headline) {
w.WriteString(fmt.Sprintf("<h%d>", h.Lvl))
w.writeNodes(h.Title...)
@ -219,27 +216,24 @@ func (w *HTMLWriter) writeList(l List) {
}
func (w *HTMLWriter) writeListItem(li ListItem) {
w.WriteString("<li>")
if len(li.Children) == 1 {
if p, ok := li.Children[0].(Paragraph); ok {
w.writeLines(p.Children)
}
} else {
w.WriteString("<li>\n")
w.writeNodes(li.Children...)
}
w.WriteString("</li>\n")
}
func (w *HTMLWriter) writeLine(l Line) {
if len(l.Children) != 0 {
w.writeNodes(l.Children...)
w.WriteString("\n")
}
}
func (w *HTMLWriter) writeParagraph(p Paragraph) {
if len(p.Children) == 1 && p.Children[0].(Line).Children == nil {
return
}
w.WriteString("<p>")
w.writeLines(p.Children)
w.WriteString("<p>\n")
w.writeNodes(p.Children...)
w.WriteString("</p>\n")
}
@ -248,15 +242,15 @@ func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) {
}
func (w *HTMLWriter) writeTable(t Table) {
w.WriteString("<table>")
w.WriteString("<table>\n")
w.writeNodes(t.Header)
w.WriteString("<tbody>")
w.WriteString("<tbody>\n")
w.writeNodes(t.Rows...)
w.WriteString("</tbody>\n</table>\n")
}
func (w *HTMLWriter) writeTableRow(t TableRow) {
w.WriteString("\n<tr>\n")
w.WriteString("<tr>\n")
for _, column := range t.Columns {
w.WriteString("<td>")
w.writeNodes(column...)
@ -266,7 +260,7 @@ func (w *HTMLWriter) writeTableRow(t TableRow) {
}
func (w *HTMLWriter) writeTableHeader(t TableHeader) {
w.WriteString("\n<thead>\n")
w.WriteString("<thead>\n")
for _, column := range t.Columns {
w.WriteString("<th>")
w.writeNodes(column...)
@ -276,5 +270,5 @@ func (w *HTMLWriter) writeTableHeader(t TableHeader) {
}
func (w *HTMLWriter) writeTableSeparator(t TableSeparator) {
w.WriteString("\n<tr></tr>\n")
w.WriteString("<tr></tr>\n")
}

View file

@ -2,7 +2,6 @@ package org
import (
"fmt"
"regexp"
"strings"
)
@ -96,13 +95,6 @@ func (w *OrgWriter) writeNodes(ns ...Node) {
}
}
var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n")
func (w *OrgWriter) String() string {
s := w.stringBuilder.String()
return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n")
}
func (w *OrgWriter) writeHeadline(h Headline) {
tmp := w.emptyClone()
tmp.WriteString(strings.Repeat("*", h.Lvl))
@ -134,7 +126,11 @@ func (w *OrgWriter) writeHeadline(h Headline) {
}
func (w *OrgWriter) writeBlock(b Block) {
w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " ")))
w.WriteString(w.indent + "#+BEGIN_" + b.Name)
if len(b.Parameters) != 0 {
w.WriteString(" " + strings.Join(b.Parameters, " "))
}
w.WriteString("\n")
w.writeNodes(b.Children...)
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
}
@ -180,7 +176,6 @@ func (w *OrgWriter) writeListItem(li ListItem) {
}
func (w *OrgWriter) writeTable(t Table) {
// TODO: pretty print tables
w.writeNodes(t.Header)
w.writeNodes(t.Rows...)
}
@ -200,9 +195,12 @@ func (w *OrgWriter) writeTableSeparator(ts TableSeparator) {
func (w *OrgWriter) writeTableColumns(columns [][]Node) {
w.WriteString(w.indent + "| ")
for _, columnNodes := range columns {
for i, columnNodes := range columns {
w.writeNodes(columnNodes...)
w.WriteString(" |")
if i < len(columns)-1 {
w.WriteString(" ")
}
}
w.WriteString("\n")
}
@ -212,8 +210,10 @@ func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) {
}
func (w *OrgWriter) writeLine(l Line) {
if len(l.Children) != 0 {
w.WriteString(w.indent)
w.writeNodes(l.Children...)
}
w.WriteString("\n")
}

View file

@ -1,33 +1,76 @@
<h1>Motivation</h1>
<p>To validate the parser we&#39;ll try printing the AST back to org-mode source - if that works we can be kind of sure that the parsing worked. At least I hope so - I would like to get around writing tests for the individual parsing functions... </p>
<p>
To validate the parser we&#39;ll try printing the AST back to org-mode source - if that
works we can be kind of sure that the parsing worked.
At least I hope so - I would like to get around writing tests for the individual parsing
functions...
</p>
<h2>Headlines with TODO status, priority &amp; tags</h2>
<h3>Headline with todo status &amp; priority</h3>
<h3>Headline with TODO status</h3>
<h3>Headline with tags &amp; priority</h3>
<p>this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...</p>
<p>
this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...
</p>
<h2>Lists</h2>
<ul>
<li>unordered list item 1</li>
<li><p>unordered list item 2 - with <code>inline</code> <em>markup</em></p>
<li>
<p>
unordered list item 1
</p>
</li>
<li>
<p>
unordered list item 2 - with <code>inline</code> <em>markup</em>
</p>
<ol>
<li><p>ordered sublist item 1</p>
<li>
<p>
ordered sublist item 1
</p>
<ol>
<li>ordered sublist item 1</li>
<li>ordered sublist item 2</li>
<li>ordered sublist item 3</li>
<li>
<p>
ordered sublist item 1
</p>
</li>
<li>
<p>
ordered sublist item 2
</p>
</li>
<li>
<p>
ordered sublist item 3
</p>
</li>
</ol>
</li>
<li>ordered sublist item 2</li>
<li>
<p>
ordered sublist item 2
</p>
</li>
</ol>
</li>
<li><p>unordered list item 3 - and a <a href="https://example.com">link</a> and some lines of text</p>
<li>
<p>
unordered list item 3 - and a <a href="https://example.com">link</a>
and some lines of text
</p>
<ol>
<li><p>and another subitem</p>
<li>
<p>
and another subitem
</p>
<code class="src src-sh">
<pre>echo with a block</pre>
</code>
</li>
<li><p>and another one with a table</p>
<li>
<p>
and another one with a table
</p>
<table>
<thead>
<th>a</th><th>b</th><th>c</th>
@ -38,29 +81,87 @@
</tr>
</tbody>
</table>
<p>and text with an empty line in between as well!</p>
<p>
and text with an empty line in between as well!
</p>
</li>
</ol>
</li>
<li>unordered list item 4 </li>
<li>
<p>
unordered list item 4
</p>
</li>
</ul>
<h2>Inline</h2>
<ul>
<li><em>emphasis</em> and a hard line break <br>
see?</li>
<li><em>.emphasis with dot border chars.</em></li>
<li><em>emphasis with a slash/inside</em></li>
<li><em>emphasis</em> followed by raw text with slash /</li>
<li>-&gt;/not an emphasis/&lt;-</li>
<li>links with slashes do not become <em>emphasis</em>: <a href="https://somelinkshouldntrenderaccidentalemphasis.com">https://somelinkshouldntrenderaccidentalemphasis.com</a>/ <em>emphasis</em></li>
<li><span style="text-decoration: underline;">underlined</span> <strong>bold</strong> <code class="verbatim">verbatim</code> <code>code</code> <del>strikethrough</del></li>
<li><strong>bold string with an *asterisk inside</strong></li>
<li><p>links</p>
<li>
<p>
<em>emphasis</em> and a hard line break <br>
see?
</p>
</li>
<li>
<p>
<em>.emphasis with dot border chars.</em>
</p>
</li>
<li>
<p>
<em>emphasis with a slash/inside</em>
</p>
</li>
<li>
<p>
<em>emphasis</em> followed by raw text with slash /
</p>
</li>
<li>
<p>
-&gt;/not an emphasis/&lt;-
</p>
</li>
<li>
<p>
links with slashes do not become <em>emphasis</em>: <a href="https://somelinkshouldntrenderaccidentalemphasis.com">https://somelinkshouldntrenderaccidentalemphasis.com</a>/ <em>emphasis</em>
</p>
</li>
<li>
<p>
<span style="text-decoration: underline;">underlined</span> <strong>bold</strong> <code class="verbatim">verbatim</code> <code>code</code> <del>strikethrough</del>
</p>
</li>
<li>
<p>
<strong>bold string with an *asterisk inside</strong>
</p>
</li>
<li>
<p>
links
</p>
<ol>
<li>regular link <a href="https://example.com">https://example.com</a> link without description</li>
<li>regular link <a href="https://example.com">example.com</a> link with description</li>
<li>regular link to a file (image) <img src="my-img.png" alt="file:my-img.png" title="file:my-img.png" /></li>
<li>auto link, i.e. not inside <code class="verbatim">\[[square brackets]\]</code> <a href="https://www.example.com">https://www.example.com</a></li>
<li>
<p>
regular link <a href="https://example.com">https://example.com</a> link without description
</p>
</li>
<li>
<p>
regular link <a href="https://example.com">example.com</a> link with description
</p>
</li>
<li>
<p>
regular link to a file (image) <img src="my-img.png" alt="file:my-img.png" title="file:my-img.png" />
</p>
</li>
<li>
<p>
auto link, i.e. not inside <code class="verbatim">\[[square brackets]\]</code> <a href="https://www.example.com">https://www.example.com</a>
</p>
</li>
</ol>
</li>
</ul>
@ -73,43 +174,128 @@
and a second line
and a third one</pre>
</code>
<pre class="example">
an example blockwith multiple lines
<pre class="example">\nan example block
with multiple lines
</pre>
<blockquote>
Mongodb is very webscale
</blockquote>
<h2>issues from goorgeous (free test cases, yay!)</h2>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/29">#29:</a> Support verse block</h3>
<p class="verse-block">
This
<strong>is</strong>
verse
</p>
<p class="custom-block">
or even a <strong>totally</strong> <em>custom</em> kind of block
crazy ain&#39;t it?
</p>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/47">#47:</a> Consecutive <code>code</code> wrapped text gets joined</h3>
<p>
either <code>this</code> or <code>that</code> foo.
either <code>this</code>
or <code>that</code> foo.
</p>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/68">#68</a>: Quote block with inline markup</h3>
<blockquote>
<a href="www.example.com"><em>this</em> <strong>is</strong> <span style="text-decoration: underline;">markup</span>!</a>
</blockquote>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/77">#77</a>: Recognize <code class="verbatim">code</code>--- as code plus dash</h3>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/75">#75</a>: Not parsing nested lists correctly</h3>
<ul>
<li>
<p>
bullet 1
</p>
<ul>
<li>
<p>
sub bullet
</p>
</li>
</ul>
</li>
</ul>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/78">#78</a>: Emphasis at beginning of line</h3>
<p>
<em>italics</em>
</p>
<p>
Text
<em>italics</em>
</p>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/82">#82</a>: Crash on empty headline</h3>
<h4></h4>
<p>
just a space as title...
</p>
<h3><a href="https://github.com/chaseadamsio/goorgeous/issues/84">#84</a>: Paragraphs that are not followed by an empty line are not parsed correctly</h3>
<h4>Foo</h4>
<p>
Foo paragraph.
</p>
<h4>Bar</h4>
<p>
Bar paragraph
</p>
<h2>Footnotes</h2>
<ul>
<li>normal footnote reference <sup class="footnote-reference"><a href="#footnote-1">1</a></sup></li>
<li>further references to the same footnote should not <sup class="footnote-reference"><a href="#footnote-1">1</a></sup> render duplicates in the footnote list</li>
<li>inline footnotes are also supported via <sup class="footnote-reference"><a href="#footnote-2">2</a></sup>. </li>
<li>
<p>
normal footnote reference <sup class="footnote-reference"><a href="#footnote-1">1</a></sup>
</p>
</li>
<li>
<p>
further references to the same footnote should not <sup class="footnote-reference"><a href="#footnote-1">1</a></sup> render duplicates in the footnote list
</p>
</li>
<li>
<p>
inline footnotes are also supported via <sup class="footnote-reference"><a href="#footnote-2">2</a></sup>.
</p>
</li>
</ul>
<div id="footnotes">
<h1 class="footnotes-title">Footnotes</h1>
<div class="footnote-definitions">
<div class="footnote-definition">
<sup id="footnote-1">1</sup>
<p><a href="https://www.example.com">https://www.example.com</a></p>
<p>
<a href="https://www.example.com">https://www.example.com</a>
</p>
<ul>
<li>footnotes can contain <strong>markup</strong></li>
<li><p>and other elements</p>
<li>
<p>
footnotes can contain <strong>markup</strong>
</p>
</li>
<li>
<p>
and other elements
</p>
<ul>
<li><p>like blocks</p>
<li>
<p>
like blocks
</p>
<code class="src src-text">
<pre>other non-plain</pre>
</code>
</li>
<li><p>and tables</p>
<table><tbody>
<li>
<p>
and tables
</p>
<table>
<tbody>
<tr>
<td>1</td><td>a</td>
</tr>
<tr>
<td>2</td><td>b</td>
</tr>
<tr>
<td>3</td><td>c</td>
</tr>
@ -122,22 +308,33 @@ Mongodb is very webscale
</div>
<div class="footnote-definition">
<sup id="footnote-3">3</sup>
<p><a href="http://example.com/unused-footnote">example.com/unused-footnote</a> </p>
<p>
<a href="http://example.com/unused-footnote">example.com/unused-footnote</a>
</p>
</div>
<div class="footnote-definition">
<sup id="footnote-4">4</sup>
<p>another unused footnote </p>
<p>
another unused footnote
</p>
</div>
<div class="footnote-definition">
<sup id="footnote-5">5</sup>
<p>another unused footnote </p>
<p>
another unused footnote
</p>
</div>
<div class="footnote-definition">
<sup id="footnote-6">6</sup>
<p>another unused footnote</p>
<p>
another unused footnote
</p>
</div>
<div class="footnote-definition">
<sup id="footnote-2">2</sup>
the inline footnote definition</div>
<p>
the inline footnote definition
</p>
</div>
</div>
</div>

View file

@ -70,6 +70,46 @@ with multiple lines
#+BEGIN_QUOTE
Mongodb is very webscale
#+END_QUOTE
** issues from goorgeous (free test cases, yay!)
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/29][#29:]] Support verse block
#+BEGIN_VERSE
This
*is*
verse
#+END_VERSE
#+BEGIN_CUSTOM
or even a *totally* /custom/ kind of block
crazy ain't it?
#+END_CUSTOM
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/47][#47:]] Consecutive ~code~ wrapped text gets joined
either ~this~ or ~that~ foo.
either ~this~
or ~that~ foo.
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/68][#68]]: Quote block with inline markup
#+BEGIN_QUOTE
[[www.example.com][/this/ *is* _markup_!]]
#+END_QUOTE
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/77][#77]]: Recognize =code=--- as code plus dash
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly
- bullet 1
- sub bullet
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/78][#78]]: Emphasis at beginning of line
/italics/
Text
/italics/
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/82][#82]]: Crash on empty headline
****
just a space as title...
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/84][#84]]: Paragraphs that are not followed by an empty line are not parsed correctly
**** Foo
Foo paragraph.
**** Bar
Bar paragraph
** Footnotes
- normal footnote reference [fn:1]
- further references to the same footnote should not [fn:1] render duplicates in the footnote list