Improve handling of elements containing raw text

While adding another test case from the goorgeous issues it became clear that
inline markup and html entity replacement were erronously applied to raw text
elements like inline code =foo=, src/example/export blocks, example lines,
etc.

To correctly handle those cases in both org and html exports a new
parseRawInline method had to be added.

Also some misc html export whitespace fixes and stuff
This commit is contained in:
Niklas Fasching 2018-12-17 13:36:57 +01:00
parent ac2597af4c
commit 0eb3baf1bb
10 changed files with 191 additions and 45 deletions

View file

@ -27,7 +27,7 @@ func lexBlock(line string) (token, bool) {
func isRawTextBlock(name string) bool { return name == "SRC" || name == "EXAMPLE" || name == "EXPORT" } func isRawTextBlock(name string) bool { return name == "SRC" || name == "EXAMPLE" || name == "EXPORT" }
func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
t, start, lines := d.tokens[i], i, []string{} t, start := d.tokens[i], i
name, parameters := t.content, strings.Fields(t.matches[3]) name, parameters := t.content, strings.Fields(t.matches[3])
trim := trimIndentUpTo(d.tokens[i].lvl) trim := trimIndentUpTo(d.tokens[i].lvl)
stop := func(d *Document, i int) bool { stop := func(d *Document, i int) bool {
@ -35,11 +35,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
} }
block, consumed, i := Block{name, parameters, nil}, 0, i+1 block, consumed, i := Block{name, parameters, nil}, 0, i+1
if isRawTextBlock(name) { if isRawTextBlock(name) {
rawText := ""
for ; !stop(d, i); i++ { for ; !stop(d, i); i++ {
lines = append(lines, trim(d.tokens[i].matches[0])) rawText += trim(d.tokens[i].matches[0]) + "\n"
} }
consumed = i - start consumed = i - start
block.Children = []Node{Text{strings.Join(lines, "\n")}} block.Children = d.parseRawInline(rawText)
} else { } else {
consumed, block.Children = d.parseMany(i, stop) consumed, block.Children = d.parseMany(i, stop)
consumed++ // line with BEGIN consumed++ // line with BEGIN

View file

@ -20,7 +20,7 @@ func lexExample(line string) (token, bool) {
func (d *Document) parseExample(i int, parentStop stopFn) (int, Node) { func (d *Document) parseExample(i int, parentStop stopFn) (int, Node) {
example, start := Example{}, i example, start := Example{}, i
for ; !parentStop(d, i) && d.tokens[i].kind == "example"; i++ { for ; !parentStop(d, i) && d.tokens[i].kind == "example"; i++ {
example.Children = append(example.Children, Text{d.tokens[i].content}) example.Children = append(example.Children, Text{d.tokens[i].content, true})
} }
return i - start, example return i - start, example
} }

View file

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"html" "html"
"strings" "strings"
"unicode"
h "golang.org/x/net/html" h "golang.org/x/net/html"
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
@ -13,6 +14,7 @@ type HTMLWriter struct {
stringBuilder stringBuilder
HighlightCodeBlock func(source, lang string) string HighlightCodeBlock func(source, lang string) string
FootnotesHeadingTitle string FootnotesHeadingTitle string
htmlEscape bool
} }
var emphasisTags = map[string][]string{ var emphasisTags = map[string][]string{
@ -34,6 +36,7 @@ var listTags = map[string][]string{
func NewHTMLWriter() *HTMLWriter { func NewHTMLWriter() *HTMLWriter {
return &HTMLWriter{ return &HTMLWriter{
htmlEscape: true,
FootnotesHeadingTitle: "Footnotes", FootnotesHeadingTitle: "Footnotes",
HighlightCodeBlock: func(source, lang string) string { HighlightCodeBlock: func(source, lang string) string {
return fmt.Sprintf("%s\n<pre>\n%s\n</pre>\n</div>", `<div class="highlight">`, html.EscapeString(source)) return fmt.Sprintf("%s\n<pre>\n%s\n</pre>\n</div>", `<div class="highlight">`, html.EscapeString(source))
@ -117,31 +120,34 @@ func (w *HTMLWriter) writeNodes(ns ...Node) {
} }
func (w *HTMLWriter) writeBlock(b Block) { func (w *HTMLWriter) writeBlock(b Block) {
content := ""
if isRawTextBlock(b.Name) {
exportWriter := w.emptyClone()
exportWriter.htmlEscape = false
exportWriter.writeNodes(b.Children...)
content = strings.TrimRightFunc(exportWriter.String(), unicode.IsSpace)
} else {
content = w.nodesAsString(b.Children...)
}
switch name := b.Name; { switch name := b.Name; {
case name == "SRC": case name == "SRC":
source, lang := b.Children[0].(Text).Content, "text" lang := "text"
if len(b.Parameters) >= 1 { if len(b.Parameters) >= 1 {
lang = strings.ToLower(b.Parameters[0]) lang = strings.ToLower(b.Parameters[0])
} }
w.WriteString(w.HighlightCodeBlock(source, lang) + "\n") w.WriteString(w.HighlightCodeBlock(content, lang) + "\n")
case name == "EXAMPLE": case name == "EXAMPLE":
w.WriteString(`<pre class="example">` + "\n") w.WriteString(`<pre class="example">` + "\n" + content + "\n</pre>\n")
w.writeNodes(b.Children...)
w.WriteString("\n</pre>\n")
case name == "EXPORT" && len(b.Parameters) >= 1 && strings.ToLower(b.Parameters[0]) == "html": case name == "EXPORT" && len(b.Parameters) >= 1 && strings.ToLower(b.Parameters[0]) == "html":
w.WriteString(b.Children[0].(Text).Content + "\n") w.WriteString(content + "\n")
case name == "QUOTE": case name == "QUOTE":
w.WriteString("<blockquote>\n") w.WriteString("<blockquote>\n" + content + "</blockquote>\n")
w.writeNodes(b.Children...)
w.WriteString("</blockquote>\n")
case name == "CENTER": case name == "CENTER":
w.WriteString(`<div class="center-block" style="text-align: center; margin-left: auto; margin-right: auto;">` + "\n") w.WriteString(`<div class="center-block" style="text-align: center; margin-left: auto; margin-right: auto;">` + "\n")
w.writeNodes(b.Children...) w.WriteString(content + "</div>\n")
w.WriteString("</div>\n")
default: default:
w.WriteString(fmt.Sprintf(`<div class="%s-block">`, strings.ToLower(b.Name)) + "\n") w.WriteString(fmt.Sprintf(`<div class="%s-block">`, strings.ToLower(b.Name)) + "\n")
w.writeNodes(b.Children...) w.WriteString(content + "</div>\n")
w.WriteString("</div>\n")
} }
} }
@ -205,7 +211,13 @@ func (w *HTMLWriter) writeHeadline(h Headline) {
} }
func (w *HTMLWriter) writeText(t Text) { func (w *HTMLWriter) writeText(t Text) {
w.WriteString(html.EscapeString(htmlEntityReplacer.Replace(t.Content))) if !w.htmlEscape {
w.WriteString(t.Content)
} else if t.IsRaw {
w.WriteString(html.EscapeString(t.Content))
} else {
w.WriteString(html.EscapeString(htmlEntityReplacer.Replace(t.Content)))
}
} }
func (w *HTMLWriter) writeEmphasis(e Emphasis) { func (w *HTMLWriter) writeEmphasis(e Emphasis) {
@ -219,7 +231,7 @@ func (w *HTMLWriter) writeEmphasis(e Emphasis) {
} }
func (w *HTMLWriter) writeLineBreak(l LineBreak) { func (w *HTMLWriter) writeLineBreak(l LineBreak) {
w.WriteString("\n") w.WriteString(strings.Repeat("\n", l.Count))
} }
func (w *HTMLWriter) writeExplicitLineBreak(l ExplicitLineBreak) { func (w *HTMLWriter) writeExplicitLineBreak(l ExplicitLineBreak) {
@ -298,7 +310,7 @@ func (w *HTMLWriter) writeExample(e Example) {
w.WriteString("\n") w.WriteString("\n")
} }
} }
w.WriteString("\n</pre>\n") w.WriteString("</pre>\n")
} }
func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) { func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) {

View file

@ -7,7 +7,10 @@ import (
"unicode" "unicode"
) )
type Text struct{ Content string } type Text struct {
Content string
IsRaw bool
}
type LineBreak struct{ Count int } type LineBreak struct{ Count int }
type ExplicitLineBreak struct{} type ExplicitLineBreak struct{}
@ -46,8 +49,10 @@ func (d *Document) parseInline(input string) (nodes []Node) {
consumed, node = d.parseSubOrSuperScript(input, current) consumed, node = d.parseSubOrSuperScript(input, current)
case '_': case '_':
consumed, node = d.parseSubScriptOrEmphasis(input, current) consumed, node = d.parseSubScriptOrEmphasis(input, current)
case '*', '/', '=', '~', '+': case '*', '/', '+':
consumed, node = d.parseEmphasis(input, current) consumed, node = d.parseEmphasis(input, current, false)
case '=', '~':
consumed, node = d.parseEmphasis(input, current, true)
case '[': case '[':
consumed, node = d.parseRegularLinkOrFootnoteReference(input, current) consumed, node = d.parseRegularLinkOrFootnoteReference(input, current)
case '\\': case '\\':
@ -60,7 +65,7 @@ func (d *Document) parseInline(input string) (nodes []Node) {
} }
if consumed != 0 { if consumed != 0 {
if current > previous { if current > previous {
nodes = append(nodes, Text{input[previous:current]}) nodes = append(nodes, Text{input[previous:current], false})
} }
if node != nil { if node != nil {
nodes = append(nodes, node) nodes = append(nodes, node)
@ -73,7 +78,28 @@ func (d *Document) parseInline(input string) (nodes []Node) {
} }
if previous < len(input) { if previous < len(input) {
nodes = append(nodes, Text{input[previous:]}) nodes = append(nodes, Text{input[previous:], false})
}
return nodes
}
func (d *Document) parseRawInline(input string) (nodes []Node) {
previous, current := 0, 0
for current < len(input) {
if input[current] == '\n' {
consumed, node := d.parseLineBreak(input, current)
if current > previous {
nodes = append(nodes, Text{input[previous:current], true})
}
nodes = append(nodes, node)
current += consumed
previous = current
} else {
current++
}
}
if previous < len(input) {
nodes = append(nodes, Text{input[previous:], true})
} }
return nodes return nodes
} }
@ -102,7 +128,7 @@ func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) { func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil { if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}} return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
} }
return 0, nil return 0, nil
} }
@ -111,7 +137,7 @@ func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node)
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 { if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
return consumed, node return consumed, node
} }
return d.parseEmphasis(input, start) return d.parseEmphasis(input, start, false)
} }
func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) { func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) {
@ -180,7 +206,7 @@ func (d *Document) parseRegularLink(input string, start int) (int, Node) {
return consumed, RegularLink{protocol, description, link, false} return consumed, RegularLink{protocol, description, link, false}
} }
func (d *Document) parseEmphasis(input string, start int) (int, Node) { func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
marker, i := input[start], start marker, i := input[start], start
if !hasValidPreAndBorderChars(input, i) { if !hasValidPreAndBorderChars(input, i) {
return 0, nil return 0, nil
@ -191,6 +217,9 @@ func (d *Document) parseEmphasis(input string, start int) (int, Node) {
} }
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) { if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
if isRaw {
return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
}
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])} return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
} }
} }

View file

@ -117,7 +117,7 @@ func (d *Document) newInclude(k Keyword) (int, Node) {
if err != nil { if err != nil {
panic(fmt.Sprintf("bad include '#+INCLUDE: %s': %s", k.Value, err)) panic(fmt.Sprintf("bad include '#+INCLUDE: %s': %s", k.Value, err))
} }
return Block{strings.ToUpper(kind), []string{lang}, []Node{Text{string(bs)}}} return Block{strings.ToUpper(kind), []string{lang}, d.parseRawInline(string(bs))}
} }
} }
return 1, Include{k, resolve} return 1, Include{k, resolve}

View file

@ -140,16 +140,9 @@ func (w *OrgWriter) writeBlock(b Block) {
if len(b.Parameters) != 0 { if len(b.Parameters) != 0 {
w.WriteString(" " + strings.Join(b.Parameters, " ")) w.WriteString(" " + strings.Join(b.Parameters, " "))
} }
w.WriteString("\n") w.WriteString("\n" + w.indent)
w.writeNodes(b.Children...)
if isRawTextBlock(b.Name) { w.WriteString("#+END_" + b.Name + "\n")
for _, line := range strings.Split(b.Children[0].(Text).Content, "\n") {
w.WriteString(w.indent + line + "\n")
}
} else {
w.writeNodes(b.Children...)
}
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
} }
func (w *OrgWriter) writeDrawer(d Drawer) { func (w *OrgWriter) writeDrawer(d Drawer) {

View file

@ -34,7 +34,6 @@ note that /inline/ *markup* ignored
examples like this examples like this
are also supported are also supported
note that /inline/ *markup* ignored note that /inline/ *markup* ignored
</pre> </pre>
<blockquote> <blockquote>
<p> <p>

View file

@ -88,7 +88,6 @@ unordered list item 4
with an example with an example
that spans multiple lines that spans multiple lines
</pre> </pre>
</li> </li>
</ul> </ul>

View file

@ -57,7 +57,6 @@ Still outside the drawer
This is inside the drawer This is inside the drawer
:END: :END:
Still outside the drawer Still outside the drawer
</pre> </pre>
</div> </div>
</li> </li>
@ -79,7 +78,6 @@ lines.
<p> <p>
Paragraphs can contain inline markup like <em>emphasis</em> <strong>strong</strong> and links <a href="https://www.example.com">example.com</a> and stuff. Paragraphs can contain inline markup like <em>emphasis</em> <strong>strong</strong> and links <a href="https://www.example.com">example.com</a> and stuff.
</p> </p>
</li> </li>
<li> <li>
<p> <p>
@ -87,7 +85,7 @@ example block
</p> </p>
<pre class="example"> <pre class="example">
language: go language: go
go: &#34;1.x&#34; go: "1.x"
script: script:
- make test - make test
- make generate-gh-pages - make generate-gh-pages
@ -100,7 +98,6 @@ deploy:
verbose: true verbose: true
on: on:
branch: master branch: master
</pre> </pre>
</li> </li>
</ul> </ul>
@ -270,3 +267,89 @@ Bar
<p> <p>
Bar paragraph Bar paragraph
</p> </p>
<h3>
<span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/86">#86</a>: Multiple hyphens not converted to dashes
</h3>
<p>
just like #46
</p>
<ul>
<li>
<p>
<code class="verbatim">--</code> -&gt; (en dash)
</p>
</li>
<li>
<p>
<code class="verbatim">---</code> -&gt; — (em dash)
</p>
</li>
</ul>
<p>
also, consecutive dashes inside
</p>
<ul>
<li>
<p>
inline code <code class="verbatim">--</code> <code class="verbatim">---</code> and verbatim <code>--</code> <code>---</code>
</p>
</li>
<li>
<p>
src/example/export blocks should not be converted!
</p>
<div class="highlight">
<pre>
--, ---
</pre>
</div>
<pre class="example">
--, ---
</pre>
--, ---
<pre class="example">
--, ---
</pre>
</li>
</ul>
<h3>
<span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/87">#87</a>: Markup in footnotes is rendered literally
</h3>
<p>
footnotes can contain <strong>markup</strong> - and other elements and stuff <sup class="footnote-reference"><a id="footnote-reference-2" href="#footnote-2">2</a></sup>
</p>
<div class="footnotes">
<h1 class="footnotes-title">Footnotes</h1>
<div class="footnote-definitions">
<div class="footnote-definition">
<sup id="footnote-1"><a href="#footnote-reference-1">1</a></sup>
<div class="footnote-body">
<p>
a footnote <em>with</em> <strong>markup</strong>
</p>
<ul>
<li>
<p>
and a <strong>list</strong>
</p>
</li>
<li>
<p>
because that&#39;s possible
</p>
</li>
</ul>
</div>
</div>
<div class="footnote-definition">
<sup id="footnote-2"><a href="#footnote-reference-2">2</a></sup>
<div class="footnote-body">
<p>
that also goes for <strong>inline</strong> footnote <em>definitions</em>
</p>
</div>
</div>
</div>
</div>

30
org/testdata/misc.org vendored
View file

@ -82,3 +82,33 @@ just a space as title...
Foo paragraph. Foo paragraph.
**** Bar **** Bar
Bar paragraph Bar paragraph
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/86][#86]]: Multiple hyphens not converted to dashes
just like #46
- =--= -> -- (en dash)
- =---= -> --- (em dash)
also, consecutive dashes inside
- inline code =--= =---= and verbatim ~--~ ~---~
- src/example/export blocks should not be converted!
#+BEGIN_SRC sh
--, ---
#+END_SRC
#+BEGIN_EXAMPLE
--, ---
#+END_EXAMPLE
#+BEGIN_EXPORT html
--, ---
#+END_EXPORT
: --, ---
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/87][#87]]: Markup in footnotes is rendered literally
footnotes can contain *markup* - and other elements and stuff [fn:2:that also goes for *inline* footnote /definitions/]
* Footnotes
[fn:1] a footnote /with/ *markup*
- and a *list*
- because that's possible