From 592be07cfd6733b34b4f57156647a80b1898ce95 Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Sun, 2 Dec 2018 20:58:06 +0100 Subject: [PATCH] Refactor space handling of writers I went through the issues of goorgeous and picked a few that seemed easy enough to add (and added some fore as todos for later). That helped a lot and showed some bugs / edge cases that required changes. - the org writer wrote a lot of eol spaces and just removed it whenever String() was actually called. That worked until now but did not bode with rendering an empty headline - by removing ALL eol space we would render "* " back as just "*" -> not a headline anymore. - the html writer had some special handling for line spacing inside paragraphs and list items - with the introduction of more blocks we need that handling everywhere. As browsers / html renderers are nice enough to collapse whitespace (and especially collapse "\s*\n" into " ") we can just write out the newlines and let the renderer take care of the rest. --- README.org | 19 ++- org/block.go | 7 +- org/html.go | 54 ++++--- org/org.go | 28 ++-- org/testdata/example.html | 289 ++++++++++++++++++++++++++++++++------ org/testdata/example.org | 40 ++++++ 6 files changed, 339 insertions(+), 98 deletions(-) diff --git a/README.org b/README.org index 49faa3f..7d8bec7 100644 --- a/README.org +++ b/README.org @@ -3,18 +3,23 @@ A basic org-mode parser in go - have a org-mode AST to play around with building an org-mode language server - hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned * next +- handle #+RESULTS: raw and stuff - hugo frontmatter - see https://gohugo.io/content-management/front-matter/ - captions: images, tables & blocks +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/72][#72:]] Support for #+ATTR_HTML +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/46][#46]]: Support for symbols like ndash and mdash +- see org-entities replacement: see org-entities-help +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/10][#10]]: Support noexport +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/19][#19]]: Support #+HTML +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/31][#31]]: Support #+INCLUDE +- see https://orgmode.org/manual/Include-files.html +*** TODO [[https://github.com/chaseadamsio/goorgeous/issues/33][#33]]: Wrong output when mixing html with org-mode * later -- affiliated keywords - see org-element.el - org-element-affiliated-keywords +- affiliated keywords: see org-element.el - org-element-affiliated-keywords - keywords: support both multi (e.g. LINK, TODO) & normal (e.g. AUTHOR, TITLE) keywords - links based on #+LINK -- includes https://orgmode.org/manual/Include-files.html - could be used to have a single org file (ignored via hugo ignoreFiles) and then for each post a file including the relevant headline -- tables - colgroups https://orgmode.org/worg/org-tutorials/tables.html -- org-entities replacement: see org-entities-help +- table colgroups https://orgmode.org/worg/org-tutorials/tables.html +- table pretty printing * resources - syntax - https://orgmode.org/worg/dev/org-syntax.html diff --git a/org/block.go b/org/block.go index 07cc060..63d3a8c 100644 --- a/org/block.go +++ b/org/block.go @@ -32,7 +32,12 @@ func (d *Document) parseBlock(i int, parentStop stopFn) (int, Node) { if parentStop(d, i) { return 0, nil } - nodes = append(nodes, Line{[]Node{Text{trim(d.tokens[i].matches[0])}}}) + text := trim(d.tokens[i].matches[0]) + if name == "SRC" || name == "EXAMPLE" { + nodes = append(nodes, Line{[]Node{Text{text}}}) + } else { + nodes = append(nodes, Line{d.parseInline(text)}) + } } return i + 1 - start, Block{name, parameters, nodes} } diff --git a/org/html.go b/org/html.go index 765b597..9c47dd0 100644 --- a/org/html.go +++ b/org/html.go @@ -102,15 +102,6 @@ func (w *HTMLWriter) writeNodes(ns ...Node) { } } -func (w *HTMLWriter) writeLines(lines []Node) { - for i, line := range lines { - w.writeNodes(line) - if i != len(lines)-1 && line.(Line).Children != nil { - w.WriteString(" ") - } - } -} - func (w *HTMLWriter) writeBlock(b Block) { switch b.Name { case "SRC": @@ -125,17 +116,22 @@ func (w *HTMLWriter) writeBlock(b Block) { w.WriteString(w.HighlightCodeBlock(strings.Join(lines, "\n"), lang)) w.WriteString("\n\n") case "EXAMPLE": - w.WriteString(`
` + "\n")
+		w.WriteString(`
\n`)
 		w.writeNodes(b.Children...)
-		w.WriteString("\n
\n") + w.WriteString("
\n") case "QUOTE": w.WriteString("
\n") w.writeNodes(b.Children...) - w.WriteString("\n
\n") + w.WriteString("\n") case "CENTER": - w.WriteString(`
` + "\n") + w.WriteString(`

` + "\n") w.writeNodes(b.Children...) - w.WriteString("\n

\n") + w.WriteString("

\n") + default: + w.WriteString(fmt.Sprintf(`

`, strings.ToLower(b.Name)) + "\n") + w.writeNodes(b.Children...) + w.WriteString("

\n") + } } @@ -159,6 +155,7 @@ func (w *HTMLWriter) writeFootnotes(d *Document) { } w.WriteString("\n\n") } + func (w *HTMLWriter) writeHeadline(h Headline) { w.WriteString(fmt.Sprintf("", h.Lvl)) w.writeNodes(h.Title...) @@ -219,27 +216,24 @@ func (w *HTMLWriter) writeList(l List) { } func (w *HTMLWriter) writeListItem(li ListItem) { - w.WriteString("
  • ") - if len(li.Children) == 1 { - if p, ok := li.Children[0].(Paragraph); ok { - w.writeLines(p.Children) - } - } else { - w.writeNodes(li.Children...) - } + w.WriteString("
  • \n") + w.writeNodes(li.Children...) w.WriteString("
  • \n") } func (w *HTMLWriter) writeLine(l Line) { - w.writeNodes(l.Children...) + if len(l.Children) != 0 { + w.writeNodes(l.Children...) + w.WriteString("\n") + } } func (w *HTMLWriter) writeParagraph(p Paragraph) { if len(p.Children) == 1 && p.Children[0].(Line).Children == nil { return } - w.WriteString("

    ") - w.writeLines(p.Children) + w.WriteString("

    \n") + w.writeNodes(p.Children...) w.WriteString("

    \n") } @@ -248,15 +242,15 @@ func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) { } func (w *HTMLWriter) writeTable(t Table) { - w.WriteString("") + w.WriteString("
    \n") w.writeNodes(t.Header) - w.WriteString("") + w.WriteString("\n") w.writeNodes(t.Rows...) w.WriteString("\n
    \n") } func (w *HTMLWriter) writeTableRow(t TableRow) { - w.WriteString("\n\n") + w.WriteString("\n") for _, column := range t.Columns { w.WriteString("") w.writeNodes(column...) @@ -266,7 +260,7 @@ func (w *HTMLWriter) writeTableRow(t TableRow) { } func (w *HTMLWriter) writeTableHeader(t TableHeader) { - w.WriteString("\n\n") + w.WriteString("\n") for _, column := range t.Columns { w.WriteString("") w.writeNodes(column...) @@ -276,5 +270,5 @@ func (w *HTMLWriter) writeTableHeader(t TableHeader) { } func (w *HTMLWriter) writeTableSeparator(t TableSeparator) { - w.WriteString("\n\n") + w.WriteString("\n") } diff --git a/org/org.go b/org/org.go index e728b04..b28b176 100644 --- a/org/org.go +++ b/org/org.go @@ -2,7 +2,6 @@ package org import ( "fmt" - "regexp" "strings" ) @@ -96,13 +95,6 @@ func (w *OrgWriter) writeNodes(ns ...Node) { } } -var eolWhiteSpaceRegexp = regexp.MustCompile("[\t ]*\n") - -func (w *OrgWriter) String() string { - s := w.stringBuilder.String() - return eolWhiteSpaceRegexp.ReplaceAllString(s, "\n") -} - func (w *OrgWriter) writeHeadline(h Headline) { tmp := w.emptyClone() tmp.WriteString(strings.Repeat("*", h.Lvl)) @@ -134,7 +126,11 @@ func (w *OrgWriter) writeHeadline(h Headline) { } func (w *OrgWriter) writeBlock(b Block) { - w.WriteString(fmt.Sprintf("%s#+BEGIN_%s %s\n", w.indent, b.Name, strings.Join(b.Parameters, " "))) + w.WriteString(w.indent + "#+BEGIN_" + b.Name) + if len(b.Parameters) != 0 { + w.WriteString(" " + strings.Join(b.Parameters, " ")) + } + w.WriteString("\n") w.writeNodes(b.Children...) w.WriteString(w.indent + "#+END_" + b.Name + "\n") } @@ -180,7 +176,6 @@ func (w *OrgWriter) writeListItem(li ListItem) { } func (w *OrgWriter) writeTable(t Table) { - // TODO: pretty print tables w.writeNodes(t.Header) w.writeNodes(t.Rows...) } @@ -200,9 +195,12 @@ func (w *OrgWriter) writeTableSeparator(ts TableSeparator) { func (w *OrgWriter) writeTableColumns(columns [][]Node) { w.WriteString(w.indent + "| ") - for _, columnNodes := range columns { + for i, columnNodes := range columns { w.writeNodes(columnNodes...) - w.WriteString(" | ") + w.WriteString(" |") + if i < len(columns)-1 { + w.WriteString(" ") + } } w.WriteString("\n") } @@ -212,8 +210,10 @@ func (w *OrgWriter) writeHorizontalRule(hr HorizontalRule) { } func (w *OrgWriter) writeLine(l Line) { - w.WriteString(w.indent) - w.writeNodes(l.Children...) + if len(l.Children) != 0 { + w.WriteString(w.indent) + w.writeNodes(l.Children...) + } w.WriteString("\n") } diff --git a/org/testdata/example.html b/org/testdata/example.html index d5a95b8..a332be9 100644 --- a/org/testdata/example.html +++ b/org/testdata/example.html @@ -1,33 +1,76 @@

    Motivation

    -

    To validate the parser we'll try printing the AST back to org-mode source - if that works we can be kind of sure that the parsing worked. At least I hope so - I would like to get around writing tests for the individual parsing functions...

    +

    +To validate the parser we'll try printing the AST back to org-mode source - if that +works we can be kind of sure that the parsing worked. +At least I hope so - I would like to get around writing tests for the individual parsing +functions... +

    Headlines with TODO status, priority & tags

    Headline with todo status & priority

    Headline with TODO status

    Headline with tags & priority

    -

    this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...

    +

    +this one is cheating a little as tags are ALWAYS printed right aligned to a given column number... +

    Lists

    Inline

    @@ -73,43 +174,128 @@ and a second line and a third one -
    -an example blockwith multiple lines
    +
    \nan example block
    +with multiple lines
     
    Mongodb is very webscale
    +

    issues from goorgeous (free test cases, yay!)

    +

    #29: Support verse block

    +

    +This +is +verse +

    +

    +or even a totally custom kind of block +crazy ain't it? +

    +

    #47: Consecutive code wrapped text gets joined

    +

    +either this or that foo. +either this +or that foo. +

    +

    #68: Quote block with inline markup

    +
    +this is markup! +
    +

    #77: Recognize code--- as code plus dash

    +

    #75: Not parsing nested lists correctly

    +
      +
    • +

      +bullet 1 +

      +
        +
      • +

        +sub bullet +

        +
      • +
      +
    • +
    +

    #78: Emphasis at beginning of line

    +

    +italics +

    +

    +Text +italics +

    +

    #82: Crash on empty headline

    +

    +

    +just a space as title... +

    +

    #84: Paragraphs that are not followed by an empty line are not parsed correctly

    +

    Foo

    +

    +Foo paragraph. +

    +

    Bar

    +

    +Bar paragraph +

    Footnotes

      -
    • normal footnote reference 1
    • -
    • further references to the same footnote should not 1 render duplicates in the footnote list
    • -
    • inline footnotes are also supported via 2.
    • +
    • +

      +normal footnote reference 1 +

      +
    • +
    • +

      +further references to the same footnote should not 1 render duplicates in the footnote list +

      +
    • +
    • +

      +inline footnotes are also supported via 2. +

      +

    Footnotes

    1 -

    https://www.example.com

    +

    +https://www.example.com +

      -
    • footnotes can contain markup
    • -
    • and other elements

      +
    • +

      +footnotes can contain markup +

      +
    • +
    • +

      +and other elements +

        -
      • like blocks

        +
      • +

        +like blocks +

        other non-plain
      • -
      • and tables

        - +
      • +

        +and tables +

        +
      • + - - @@ -122,22 +308,33 @@ Mongodb is very webscale
        4 -

        another unused footnote

        +

        +another unused footnote +

        5 -

        another unused footnote

        +

        +another unused footnote +

        6 -

        another unused footnote

        +

        +another unused footnote +

        2 -the inline footnote definition
        +

        +the inline footnote definition +

        + diff --git a/org/testdata/example.org b/org/testdata/example.org index eb93aea..7678caa 100644 --- a/org/testdata/example.org +++ b/org/testdata/example.org @@ -70,6 +70,46 @@ with multiple lines #+BEGIN_QUOTE Mongodb is very webscale #+END_QUOTE + +** issues from goorgeous (free test cases, yay!) +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/29][#29:]] Support verse block +#+BEGIN_VERSE +This +*is* +verse +#+END_VERSE + +#+BEGIN_CUSTOM +or even a *totally* /custom/ kind of block +crazy ain't it? +#+END_CUSTOM +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/47][#47:]] Consecutive ~code~ wrapped text gets joined +either ~this~ or ~that~ foo. +either ~this~ +or ~that~ foo. +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/68][#68]]: Quote block with inline markup +#+BEGIN_QUOTE +[[www.example.com][/this/ *is* _markup_!]] +#+END_QUOTE + +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/77][#77]]: Recognize =code=--- as code plus dash +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly +- bullet 1 + - sub bullet +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/78][#78]]: Emphasis at beginning of line +/italics/ + + +Text +/italics/ +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/82][#82]]: Crash on empty headline +**** +just a space as title... +*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/84][#84]]: Paragraphs that are not followed by an empty line are not parsed correctly +**** Foo +Foo paragraph. +**** Bar +Bar paragraph ** Footnotes - normal footnote reference [fn:1] - further references to the same footnote should not [fn:1] render duplicates in the footnote list
        1a
        2b
        3c