From 0eb3baf1bbbe2ac36a7e17509047dc78b44c305f Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Mon, 17 Dec 2018 13:36:57 +0100 Subject: [PATCH] Improve handling of elements containing raw text While adding another test case from the goorgeous issues it became clear that inline markup and html entity replacement were erronously applied to raw text elements like inline code =foo=, src/example/export blocks, example lines, etc. To correctly handle those cases in both org and html exports a new parseRawInline method had to be added. Also some misc html export whitespace fixes and stuff --- org/block.go | 7 ++-- org/example.go | 2 +- org/html.go | 44 ++++++++++++------- org/inline.go | 45 ++++++++++++++++---- org/keyword.go | 2 +- org/org.go | 13 ++---- org/testdata/blocks.html | 1 - org/testdata/lists.html | 1 - org/testdata/misc.html | 91 ++++++++++++++++++++++++++++++++++++++-- org/testdata/misc.org | 30 +++++++++++++ 10 files changed, 191 insertions(+), 45 deletions(-) diff --git a/org/block.go b/org/block.go index 9668f3b..6ba264f 100644 --- a/org/block.go +++ b/org/block.go @@ -27,7 +27,7 @@ func lexBlock(line string) (token, bool) { func isRawTextBlock(name string) bool { return name == "SRC" || name == "EXAMPLE" || name == "EXPORT" } func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { - t, start, lines := d.tokens[i], i, []string{} + t, start := d.tokens[i], i name, parameters := t.content, strings.Fields(t.matches[3]) trim := trimIndentUpTo(d.tokens[i].lvl) stop := func(d *Document, i int) bool { @@ -35,11 +35,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { } block, consumed, i := Block{name, parameters, nil}, 0, i+1 if isRawTextBlock(name) { + rawText := "" for ; !stop(d, i); i++ { - lines = append(lines, trim(d.tokens[i].matches[0])) + rawText += trim(d.tokens[i].matches[0]) + "\n" } consumed = i - start - block.Children = []Node{Text{strings.Join(lines, "\n")}} + block.Children = d.parseRawInline(rawText) } else { consumed, block.Children = d.parseMany(i, stop) consumed++ // line with BEGIN diff --git a/org/example.go b/org/example.go index 9d716f3..9e737cb 100644 --- a/org/example.go +++ b/org/example.go @@ -20,7 +20,7 @@ func lexExample(line string) (token, bool) { func (d *Document) parseExample(i int, parentStop stopFn) (int, Node) { example, start := Example{}, i for ; !parentStop(d, i) && d.tokens[i].kind == "example"; i++ { - example.Children = append(example.Children, Text{d.tokens[i].content}) + example.Children = append(example.Children, Text{d.tokens[i].content, true}) } return i - start, example } diff --git a/org/html.go b/org/html.go index d562345..e34f5d0 100644 --- a/org/html.go +++ b/org/html.go @@ -4,6 +4,7 @@ import ( "fmt" "html" "strings" + "unicode" h "golang.org/x/net/html" "golang.org/x/net/html/atom" @@ -13,6 +14,7 @@ type HTMLWriter struct { stringBuilder HighlightCodeBlock func(source, lang string) string FootnotesHeadingTitle string + htmlEscape bool } var emphasisTags = map[string][]string{ @@ -34,6 +36,7 @@ var listTags = map[string][]string{ func NewHTMLWriter() *HTMLWriter { return &HTMLWriter{ + htmlEscape: true, FootnotesHeadingTitle: "Footnotes", HighlightCodeBlock: func(source, lang string) string { return fmt.Sprintf("%s\n
\n%s\n
\n", `
`, html.EscapeString(source)) @@ -117,31 +120,34 @@ func (w *HTMLWriter) writeNodes(ns ...Node) { } func (w *HTMLWriter) writeBlock(b Block) { + content := "" + if isRawTextBlock(b.Name) { + exportWriter := w.emptyClone() + exportWriter.htmlEscape = false + exportWriter.writeNodes(b.Children...) + content = strings.TrimRightFunc(exportWriter.String(), unicode.IsSpace) + } else { + content = w.nodesAsString(b.Children...) + } switch name := b.Name; { case name == "SRC": - source, lang := b.Children[0].(Text).Content, "text" + lang := "text" if len(b.Parameters) >= 1 { lang = strings.ToLower(b.Parameters[0]) } - w.WriteString(w.HighlightCodeBlock(source, lang) + "\n") + w.WriteString(w.HighlightCodeBlock(content, lang) + "\n") case name == "EXAMPLE": - w.WriteString(`
` + "\n")
-		w.writeNodes(b.Children...)
-		w.WriteString("\n
\n") + w.WriteString(`
` + "\n" + content + "\n
\n") case name == "EXPORT" && len(b.Parameters) >= 1 && strings.ToLower(b.Parameters[0]) == "html": - w.WriteString(b.Children[0].(Text).Content + "\n") + w.WriteString(content + "\n") case name == "QUOTE": - w.WriteString("
\n") - w.writeNodes(b.Children...) - w.WriteString("
\n") + w.WriteString("
\n" + content + "
\n") case name == "CENTER": w.WriteString(`
` + "\n") - w.writeNodes(b.Children...) - w.WriteString("
\n") + w.WriteString(content + "
\n") default: w.WriteString(fmt.Sprintf(`
`, strings.ToLower(b.Name)) + "\n") - w.writeNodes(b.Children...) - w.WriteString("
\n") + w.WriteString(content + "\n") } } @@ -205,7 +211,13 @@ func (w *HTMLWriter) writeHeadline(h Headline) { } func (w *HTMLWriter) writeText(t Text) { - w.WriteString(html.EscapeString(htmlEntityReplacer.Replace(t.Content))) + if !w.htmlEscape { + w.WriteString(t.Content) + } else if t.IsRaw { + w.WriteString(html.EscapeString(t.Content)) + } else { + w.WriteString(html.EscapeString(htmlEntityReplacer.Replace(t.Content))) + } } func (w *HTMLWriter) writeEmphasis(e Emphasis) { @@ -219,7 +231,7 @@ func (w *HTMLWriter) writeEmphasis(e Emphasis) { } func (w *HTMLWriter) writeLineBreak(l LineBreak) { - w.WriteString("\n") + w.WriteString(strings.Repeat("\n", l.Count)) } func (w *HTMLWriter) writeExplicitLineBreak(l ExplicitLineBreak) { @@ -298,7 +310,7 @@ func (w *HTMLWriter) writeExample(e Example) { w.WriteString("\n") } } - w.WriteString("\n\n") + w.WriteString("\n") } func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) { diff --git a/org/inline.go b/org/inline.go index e11fd45..52f2ff7 100644 --- a/org/inline.go +++ b/org/inline.go @@ -7,7 +7,10 @@ import ( "unicode" ) -type Text struct{ Content string } +type Text struct { + Content string + IsRaw bool +} type LineBreak struct{ Count int } type ExplicitLineBreak struct{} @@ -46,8 +49,10 @@ func (d *Document) parseInline(input string) (nodes []Node) { consumed, node = d.parseSubOrSuperScript(input, current) case '_': consumed, node = d.parseSubScriptOrEmphasis(input, current) - case '*', '/', '=', '~', '+': - consumed, node = d.parseEmphasis(input, current) + case '*', '/', '+': + consumed, node = d.parseEmphasis(input, current, false) + case '=', '~': + consumed, node = d.parseEmphasis(input, current, true) case '[': consumed, node = d.parseRegularLinkOrFootnoteReference(input, current) case '\\': @@ -60,7 +65,7 @@ func (d *Document) parseInline(input string) (nodes []Node) { } if consumed != 0 { if current > previous { - nodes = append(nodes, Text{input[previous:current]}) + nodes = append(nodes, Text{input[previous:current], false}) } if node != nil { nodes = append(nodes, node) @@ -73,7 +78,28 @@ func (d *Document) parseInline(input string) (nodes []Node) { } if previous < len(input) { - nodes = append(nodes, Text{input[previous:]}) + nodes = append(nodes, Text{input[previous:], false}) + } + return nodes +} + +func (d *Document) parseRawInline(input string) (nodes []Node) { + previous, current := 0, 0 + for current < len(input) { + if input[current] == '\n' { + consumed, node := d.parseLineBreak(input, current) + if current > previous { + nodes = append(nodes, Text{input[previous:current], true}) + } + nodes = append(nodes, node) + current += consumed + previous = current + } else { + current++ + } + } + if previous < len(input) { + nodes = append(nodes, Text{input[previous:], true}) } return nodes } @@ -102,7 +128,7 @@ func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) { func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) { if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil { - return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2]}}} + return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}} } return 0, nil } @@ -111,7 +137,7 @@ func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 { return consumed, node } - return d.parseEmphasis(input, start) + return d.parseEmphasis(input, start, false) } func (d *Document) parseRegularLinkOrFootnoteReference(input string, start int) (int, Node) { @@ -180,7 +206,7 @@ func (d *Document) parseRegularLink(input string, start int) (int, Node) { return consumed, RegularLink{protocol, description, link, false} } -func (d *Document) parseEmphasis(input string, start int) (int, Node) { +func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) { marker, i := input[start], start if !hasValidPreAndBorderChars(input, i) { return 0, nil @@ -191,6 +217,9 @@ func (d *Document) parseEmphasis(input string, start int) (int, Node) { } if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) { + if isRaw { + return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])} + } return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])} } } diff --git a/org/keyword.go b/org/keyword.go index c972430..cf1121c 100644 --- a/org/keyword.go +++ b/org/keyword.go @@ -117,7 +117,7 @@ func (d *Document) newInclude(k Keyword) (int, Node) { if err != nil { panic(fmt.Sprintf("bad include '#+INCLUDE: %s': %s", k.Value, err)) } - return Block{strings.ToUpper(kind), []string{lang}, []Node{Text{string(bs)}}} + return Block{strings.ToUpper(kind), []string{lang}, d.parseRawInline(string(bs))} } } return 1, Include{k, resolve} diff --git a/org/org.go b/org/org.go index e70162a..116038d 100644 --- a/org/org.go +++ b/org/org.go @@ -140,16 +140,9 @@ func (w *OrgWriter) writeBlock(b Block) { if len(b.Parameters) != 0 { w.WriteString(" " + strings.Join(b.Parameters, " ")) } - w.WriteString("\n") - - if isRawTextBlock(b.Name) { - for _, line := range strings.Split(b.Children[0].(Text).Content, "\n") { - w.WriteString(w.indent + line + "\n") - } - } else { - w.writeNodes(b.Children...) - } - w.WriteString(w.indent + "#+END_" + b.Name + "\n") + w.WriteString("\n" + w.indent) + w.writeNodes(b.Children...) + w.WriteString("#+END_" + b.Name + "\n") } func (w *OrgWriter) writeDrawer(d Drawer) { diff --git a/org/testdata/blocks.html b/org/testdata/blocks.html index 5160801..d6d5ebd 100644 --- a/org/testdata/blocks.html +++ b/org/testdata/blocks.html @@ -34,7 +34,6 @@ note that /inline/ *markup* ignored examples like this are also supported note that /inline/ *markup* ignored -

diff --git a/org/testdata/lists.html b/org/testdata/lists.html index e93a7bf..4d93680 100644 --- a/org/testdata/lists.html +++ b/org/testdata/lists.html @@ -88,7 +88,6 @@ unordered list item 4 with an example that spans multiple lines - diff --git a/org/testdata/misc.html b/org/testdata/misc.html index 550e903..9da1979 100644 --- a/org/testdata/misc.html +++ b/org/testdata/misc.html @@ -57,7 +57,6 @@ Still outside the drawer This is inside the drawer :END: Still outside the drawer - @@ -79,7 +78,6 @@ lines.

Paragraphs can contain inline markup like emphasis strong and links example.com and stuff.

-
  • @@ -87,7 +85,7 @@ example block

     language: go
    -go: "1.x"
    +go: "1.x"
     script:
       - make test
       - make generate-gh-pages
    @@ -100,7 +98,6 @@ deploy:
       verbose: true
       on:
         branch: master
    -
     
  • @@ -270,3 +267,89 @@ Bar

    Bar paragraph

    +

    +DONE +#86: Multiple hyphens not converted to dashes +

    +

    +just like #46 +

    + +

    +also, consecutive dashes inside +

    + +

    +DONE +#87: Markup in footnotes is rendered literally +

    +

    +footnotes can contain markup - and other elements and stuff 2 +

    +
    +

    Footnotes

    +
    +
    +1 +
    +

    +a footnote with markup +

    +
      +
    • +

      +and a list +

      +
    • +
    • +

      +because that's possible +

      +
    • +
    +
    +
    +
    +2 +
    +

    +that also goes for inline footnote definitions +

    +
    +
    +
    +
    diff --git a/org/testdata/misc.org b/org/testdata/misc.org index 643a862..a871ff7 100644 --- a/org/testdata/misc.org +++ b/org/testdata/misc.org @@ -82,3 +82,33 @@ just a space as title... Foo paragraph. **** Bar Bar paragraph +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/86][#86]]: Multiple hyphens not converted to dashes +just like #46 +- =--= -> -- (en dash) +- =---= -> --- (em dash) + +also, consecutive dashes inside +- inline code =--= =---= and verbatim ~--~ ~---~ +- src/example/export blocks should not be converted! + #+BEGIN_SRC sh + --, --- + #+END_SRC + + #+BEGIN_EXAMPLE + --, --- + #+END_EXAMPLE + + #+BEGIN_EXPORT html + --, --- + #+END_EXPORT + + : --, --- + +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/87][#87]]: Markup in footnotes is rendered literally +footnotes can contain *markup* - and other elements and stuff [fn:2:that also goes for *inline* footnote /definitions/] + +* Footnotes + +[fn:1] a footnote /with/ *markup* +- and a *list* +- because that's possible