Improve handling of elements containing raw text
While adding another test case from the goorgeous issues it became clear that inline markup and html entity replacement were erronously applied to raw text elements like inline code =foo=, src/example/export blocks, example lines, etc. To correctly handle those cases in both org and html exports a new parseRawInline method had to be added. Also some misc html export whitespace fixes and stuff
This commit is contained in:
parent
ac2597af4c
commit
0eb3baf1bb
10 changed files with 191 additions and 45 deletions
|
@ -27,7 +27,7 @@ func lexBlock(line string) (token, bool) {
|
|||
func isRawTextBlock(name string) bool { return name == "SRC" || name == "EXAMPLE" || name == "EXPORT" }
|
||||
|
||||
func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
|
||||
t, start, lines := d.tokens[i], i, []string{}
|
||||
t, start := d.tokens[i], i
|
||||
name, parameters := t.content, strings.Fields(t.matches[3])
|
||||
trim := trimIndentUpTo(d.tokens[i].lvl)
|
||||
stop := func(d *Document, i int) bool {
|
||||
|
@ -35,11 +35,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) {
|
|||
}
|
||||
block, consumed, i := Block{name, parameters, nil}, 0, i+1
|
||||
if isRawTextBlock(name) {
|
||||
rawText := ""
|
||||
for ; !stop(d, i); i++ {
|
||||
lines = append(lines, trim(d.tokens[i].matches[0]))
|
||||
rawText += trim(d.tokens[i].matches[0]) + "\n"
|
||||
}
|
||||
consumed = i - start
|
||||
block.Children = []Node{Text{strings.Join(lines, "\n")}}
|
||||
block.Children = d.parseRawInline(rawText)
|
||||
} else {
|
||||
consumed, block.Children = d.parseMany(i, stop)
|
||||
consumed++ // line with BEGIN
|
||||
|
|
|
@ -20,7 +20,7 @@ func lexExample(line string) (token, bool) {
|
|||
func (d *Document) parseExample(i int, parentStop stopFn) (int, Node) {
|
||||
example, start := Example{}, i
|
||||
for ; !parentStop(d, i) && d.tokens[i].kind == "example"; i++ {
|
||||
example.Children = append(example.Children, Text{d.tokens[i].content})
|
||||
example.Children = append(example.Children, Text{d.tokens[i].content, true})
|
||||
}
|
||||
return i - start, example
|
||||
}
|
||||
|
|
42
org/html.go
42
org/html.go
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"html"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
h "golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
|
@ -13,6 +14,7 @@ type HTMLWriter struct {
|
|||
stringBuilder
|
||||
HighlightCodeBlock func(source, lang string) string
|
||||
FootnotesHeadingTitle string
|
||||
htmlEscape bool
|
||||
}
|
||||
|
||||
var emphasisTags = map[string][]string{
|
||||
|
@ -34,6 +36,7 @@ var listTags = map[string][]string{
|
|||
|
||||
func NewHTMLWriter() *HTMLWriter {
|
||||
return &HTMLWriter{
|
||||
htmlEscape: true,
|
||||
FootnotesHeadingTitle: "Footnotes",
|
||||
HighlightCodeBlock: func(source, lang string) string {
|
||||
return fmt.Sprintf("%s\n<pre>\n%s\n</pre>\n</div>", `<div class="highlight">`, html.EscapeString(source))
|
||||
|
@ -117,31 +120,34 @@ func (w *HTMLWriter) writeNodes(ns ...Node) {
|
|||
}
|
||||
|
||||
func (w *HTMLWriter) writeBlock(b Block) {
|
||||
content := ""
|
||||
if isRawTextBlock(b.Name) {
|
||||
exportWriter := w.emptyClone()
|
||||
exportWriter.htmlEscape = false
|
||||
exportWriter.writeNodes(b.Children...)
|
||||
content = strings.TrimRightFunc(exportWriter.String(), unicode.IsSpace)
|
||||
} else {
|
||||
content = w.nodesAsString(b.Children...)
|
||||
}
|
||||
switch name := b.Name; {
|
||||
case name == "SRC":
|
||||
source, lang := b.Children[0].(Text).Content, "text"
|
||||
lang := "text"
|
||||
if len(b.Parameters) >= 1 {
|
||||
lang = strings.ToLower(b.Parameters[0])
|
||||
}
|
||||
w.WriteString(w.HighlightCodeBlock(source, lang) + "\n")
|
||||
w.WriteString(w.HighlightCodeBlock(content, lang) + "\n")
|
||||
case name == "EXAMPLE":
|
||||
w.WriteString(`<pre class="example">` + "\n")
|
||||
w.writeNodes(b.Children...)
|
||||
w.WriteString("\n</pre>\n")
|
||||
w.WriteString(`<pre class="example">` + "\n" + content + "\n</pre>\n")
|
||||
case name == "EXPORT" && len(b.Parameters) >= 1 && strings.ToLower(b.Parameters[0]) == "html":
|
||||
w.WriteString(b.Children[0].(Text).Content + "\n")
|
||||
w.WriteString(content + "\n")
|
||||
case name == "QUOTE":
|
||||
w.WriteString("<blockquote>\n")
|
||||
w.writeNodes(b.Children...)
|
||||
w.WriteString("</blockquote>\n")
|
||||
w.WriteString("<blockquote>\n" + content + "</blockquote>\n")
|
||||
case name == "CENTER":
|
||||
w.WriteString(`<div class="center-block" style="text-align: center; margin-left: auto; margin-right: auto;">` + "\n")
|
||||
w.writeNodes(b.Children...)
|
||||
w.WriteString("</div>\n")
|
||||
w.WriteString(content + "</div>\n")
|
||||
default:
|
||||
w.WriteString(fmt.Sprintf(`<div class="%s-block">`, strings.ToLower(b.Name)) + "\n")
|
||||
w.writeNodes(b.Children...)
|
||||
w.WriteString("</div>\n")
|
||||
w.WriteString(content + "</div>\n")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -205,7 +211,13 @@ func (w *HTMLWriter) writeHeadline(h Headline) {
|
|||
}
|
||||
|
||||
func (w *HTMLWriter) writeText(t Text) {
|
||||
if !w.htmlEscape {
|
||||
w.WriteString(t.Content)
|
||||
} else if t.IsRaw {
|
||||
w.WriteString(html.EscapeString(t.Content))
|
||||
} else {
|
||||
w.WriteString(html.EscapeString(htmlEntityReplacer.Replace(t.Content)))
|
||||
}
|
||||
}
|
||||
|
||||
func (w *HTMLWriter) writeEmphasis(e Emphasis) {
|
||||
|
@ -219,7 +231,7 @@ func (w *HTMLWriter) writeEmphasis(e Emphasis) {
|
|||
}
|
||||
|
||||
func (w *HTMLWriter) writeLineBreak(l LineBreak) {
|
||||
w.WriteString("\n")
|
||||
w.WriteString(strings.Repeat("\n", l.Count))
|
||||
}
|
||||
|
||||
func (w *HTMLWriter) writeExplicitLineBreak(l ExplicitLineBreak) {
|
||||
|
@ -298,7 +310,7 @@ func (w *HTMLWriter) writeExample(e Example) {
|
|||
w.WriteString("\n")
|
||||
}
|
||||
}
|
||||
w.WriteString("\n</pre>\n")
|
||||
w.WriteString("</pre>\n")
|
||||
}
|
||||
|
||||
func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) {
|
||||
|
|
|
@ -7,7 +7,10 @@ import (
|
|||
"unicode"
|
||||
)
|
||||
|
||||
type Text struct{ Content string }
|
||||
type Text struct {
|
||||
Content string
|
||||
IsRaw bool
|
||||
}
|
||||
|
||||
type LineBreak struct{ Count int }
|
||||
type ExplicitLineBreak struct{}
|
||||
|
@ -46,8 +49,10 @@ func (d *Document) parseInline(input string) (nodes []Node) {
|
|||
consumed, node = d.parseSubOrSuperScript(input, current)
|
||||
case '_':
|
||||
consumed, node = d.parseSubScriptOrEmphasis(input, current)
|
||||
case '*', '/', '=', '~', '+':
|
||||
consumed, node = d.parseEmphasis(input, current)
|
||||
case '*', '/', '+':
|
||||
consumed, node = d.parseEmphasis(input, current, false)
|
||||
case '=', '~':
|
||||
consumed, node = d.parseEmphasis(input, current, true)
|
||||
case '[':
|
||||
consumed, node = d.parseRegularLinkOrFootnoteReference(input, current)
|
||||
case '\\':
|
||||
|
@ -60,7 +65,7 @@ func (d *Document) parseInline(input string) (nodes []Node) {
|
|||
}
|
||||
if consumed != 0 {
|
||||
if current > previous {
|
||||
nodes = append(nodes, Text{input[previous:current]})
|
||||
nodes = append(nodes, Text{input[previous:current], false})
|
||||
}
|
||||
if node != nil {
|
||||
nodes = append(nodes, node)
|
||||
|
@ -73,7 +78,28 @@ func (d *Document) parseInline(input string) (nodes []Node) {
|
|||
}
|
||||
|
||||
if previous < len(input) {
|
||||
nodes = append(nodes, Text{input[previous:]})
|
||||
nodes = append(nodes, Text{input[previous:], false})
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
func (d *Document) parseRawInline(input string) (nodes []Node) {
|
||||
previous, current := 0, 0
|
||||
for current < len(input) {
|
||||
if input[current] == '\n' {
|
||||
consumed, node := d.parseLineBreak(input, current)
|
||||
if current > previous {
|
||||
nodes = append(nodes, Text{input[previous:current], true})
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
current += consumed
|
||||
previous = current
|
||||
} else {
|
||||
current++
|
||||
}
|
||||
}
|
||||
if previous < len(input) {
|
||||
nodes = append(nodes, Text{input[previous:], true})
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
@ -102,7 +128,7 @@ func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
|
|||
|
||||
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
|
||||
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
|
||||
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}}
|
||||
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
@ -111,7 +137,7 @@ func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node)
|
|||
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
|
||||
return consumed, node
|
||||
}
|
||||
return d.parseEmphasis(input, start)
|
||||
return d.parseEmphasis(input, start, false)
|
||||
}
|
||||
|
||||
func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) {
|
||||
|
@ -180,7 +206,7 @@ func (d *Document) parseRegularLink(input string, start int) (int, Node) {
|
|||
return consumed, RegularLink{protocol, description, link, false}
|
||||
}
|
||||
|
||||
func (d *Document) parseEmphasis(input string, start int) (int, Node) {
|
||||
func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
|
||||
marker, i := input[start], start
|
||||
if !hasValidPreAndBorderChars(input, i) {
|
||||
return 0, nil
|
||||
|
@ -191,6 +217,9 @@ func (d *Document) parseEmphasis(input string, start int) (int, Node) {
|
|||
}
|
||||
|
||||
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
|
||||
if isRaw {
|
||||
return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
|
||||
}
|
||||
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,7 +117,7 @@ func (d *Document) newInclude(k Keyword) (int, Node) {
|
|||
if err != nil {
|
||||
panic(fmt.Sprintf("bad include '#+INCLUDE: %s': %s", k.Value, err))
|
||||
}
|
||||
return Block{strings.ToUpper(kind), []string{lang}, []Node{Text{string(bs)}}}
|
||||
return Block{strings.ToUpper(kind), []string{lang}, d.parseRawInline(string(bs))}
|
||||
}
|
||||
}
|
||||
return 1, Include{k, resolve}
|
||||
|
|
11
org/org.go
11
org/org.go
|
@ -140,16 +140,9 @@ func (w *OrgWriter) writeBlock(b Block) {
|
|||
if len(b.Parameters) != 0 {
|
||||
w.WriteString(" " + strings.Join(b.Parameters, " "))
|
||||
}
|
||||
w.WriteString("\n")
|
||||
|
||||
if isRawTextBlock(b.Name) {
|
||||
for _, line := range strings.Split(b.Children[0].(Text).Content, "\n") {
|
||||
w.WriteString(w.indent + line + "\n")
|
||||
}
|
||||
} else {
|
||||
w.WriteString("\n" + w.indent)
|
||||
w.writeNodes(b.Children...)
|
||||
}
|
||||
w.WriteString(w.indent + "#+END_" + b.Name + "\n")
|
||||
w.WriteString("#+END_" + b.Name + "\n")
|
||||
}
|
||||
|
||||
func (w *OrgWriter) writeDrawer(d Drawer) {
|
||||
|
|
1
org/testdata/blocks.html
vendored
1
org/testdata/blocks.html
vendored
|
@ -34,7 +34,6 @@ note that /inline/ *markup* ignored
|
|||
examples like this
|
||||
are also supported
|
||||
note that /inline/ *markup* ignored
|
||||
|
||||
</pre>
|
||||
<blockquote>
|
||||
<p>
|
||||
|
|
1
org/testdata/lists.html
vendored
1
org/testdata/lists.html
vendored
|
@ -88,7 +88,6 @@ unordered list item 4
|
|||
with an example
|
||||
|
||||
that spans multiple lines
|
||||
|
||||
</pre>
|
||||
</li>
|
||||
</ul>
|
||||
|
|
91
org/testdata/misc.html
vendored
91
org/testdata/misc.html
vendored
|
@ -57,7 +57,6 @@ Still outside the drawer
|
|||
This is inside the drawer
|
||||
:END:
|
||||
Still outside the drawer
|
||||
|
||||
</pre>
|
||||
</div>
|
||||
</li>
|
||||
|
@ -79,7 +78,6 @@ lines.
|
|||
<p>
|
||||
Paragraphs can contain inline markup like <em>emphasis</em> <strong>strong</strong> and links <a href="https://www.example.com">example.com</a> and stuff.
|
||||
</p>
|
||||
|
||||
</li>
|
||||
<li>
|
||||
<p>
|
||||
|
@ -87,7 +85,7 @@ example block
|
|||
</p>
|
||||
<pre class="example">
|
||||
language: go
|
||||
go: "1.x"
|
||||
go: "1.x"
|
||||
script:
|
||||
- make test
|
||||
- make generate-gh-pages
|
||||
|
@ -100,7 +98,6 @@ deploy:
|
|||
verbose: true
|
||||
on:
|
||||
branch: master
|
||||
|
||||
</pre>
|
||||
</li>
|
||||
</ul>
|
||||
|
@ -270,3 +267,89 @@ Bar
|
|||
<p>
|
||||
Bar paragraph
|
||||
</p>
|
||||
<h3>
|
||||
<span class="todo">DONE</span>
|
||||
<a href="https://github.com/chaseadamsio/goorgeous/issues/86">#86</a>: Multiple hyphens not converted to dashes
|
||||
</h3>
|
||||
<p>
|
||||
just like #46
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p>
|
||||
<code class="verbatim">--</code> -> – (en dash)
|
||||
</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>
|
||||
<code class="verbatim">---</code> -> — (em dash)
|
||||
</p>
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
also, consecutive dashes inside
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p>
|
||||
inline code <code class="verbatim">--</code> <code class="verbatim">---</code> and verbatim <code>--</code> <code>---</code>
|
||||
</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>
|
||||
src/example/export blocks should not be converted!
|
||||
</p>
|
||||
<div class="highlight">
|
||||
<pre>
|
||||
--, ---
|
||||
</pre>
|
||||
</div>
|
||||
<pre class="example">
|
||||
--, ---
|
||||
</pre>
|
||||
--, ---
|
||||
<pre class="example">
|
||||
--, ---
|
||||
</pre>
|
||||
</li>
|
||||
</ul>
|
||||
<h3>
|
||||
<span class="todo">DONE</span>
|
||||
<a href="https://github.com/chaseadamsio/goorgeous/issues/87">#87</a>: Markup in footnotes is rendered literally
|
||||
</h3>
|
||||
<p>
|
||||
footnotes can contain <strong>markup</strong> - and other elements and stuff <sup class="footnote-reference"><a id="footnote-reference-2" href="#footnote-2">2</a></sup>
|
||||
</p>
|
||||
<div class="footnotes">
|
||||
<h1 class="footnotes-title">Footnotes</h1>
|
||||
<div class="footnote-definitions">
|
||||
<div class="footnote-definition">
|
||||
<sup id="footnote-1"><a href="#footnote-reference-1">1</a></sup>
|
||||
<div class="footnote-body">
|
||||
<p>
|
||||
a footnote <em>with</em> <strong>markup</strong>
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
<p>
|
||||
and a <strong>list</strong>
|
||||
</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>
|
||||
because that's possible
|
||||
</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footnote-definition">
|
||||
<sup id="footnote-2"><a href="#footnote-reference-2">2</a></sup>
|
||||
<div class="footnote-body">
|
||||
<p>
|
||||
that also goes for <strong>inline</strong> footnote <em>definitions</em>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
30
org/testdata/misc.org
vendored
30
org/testdata/misc.org
vendored
|
@ -82,3 +82,33 @@ just a space as title...
|
|||
Foo paragraph.
|
||||
**** Bar
|
||||
Bar paragraph
|
||||
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/86][#86]]: Multiple hyphens not converted to dashes
|
||||
just like #46
|
||||
- =--= -> -- (en dash)
|
||||
- =---= -> --- (em dash)
|
||||
|
||||
also, consecutive dashes inside
|
||||
- inline code =--= =---= and verbatim ~--~ ~---~
|
||||
- src/example/export blocks should not be converted!
|
||||
#+BEGIN_SRC sh
|
||||
--, ---
|
||||
#+END_SRC
|
||||
|
||||
#+BEGIN_EXAMPLE
|
||||
--, ---
|
||||
#+END_EXAMPLE
|
||||
|
||||
#+BEGIN_EXPORT html
|
||||
--, ---
|
||||
#+END_EXPORT
|
||||
|
||||
: --, ---
|
||||
|
||||
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/87][#87]]: Markup in footnotes is rendered literally
|
||||
footnotes can contain *markup* - and other elements and stuff [fn:2:that also goes for *inline* footnote /definitions/]
|
||||
|
||||
* Footnotes
|
||||
|
||||
[fn:1] a footnote /with/ *markup*
|
||||
- and a *list*
|
||||
- because that's possible
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue