Add basic html rendering

This commit is contained in:
Niklas Fasching 2018-12-02 16:18:59 +01:00
parent 00aa120bf4
commit 6c683dfbdb
7 changed files with 402 additions and 1 deletions

View file

@ -12,3 +12,8 @@ build:
.PHONY: setup .PHONY: setup
setup: setup:
git config core.hooksPath etc/githooks git config core.hooksPath etc/githooks
case=example
.PHONY: render
render:
bash -c "go run cmd/org/org.go testdata/$(case).org html |& html2text"

View file

@ -2,6 +2,21 @@ A basic org-mode parser in go
* motivation * motivation
- have a org-mode AST to play around with building an org-mode language server - have a org-mode AST to play around with building an org-mode language server
- hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned - hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned
* next
- hugo frontmatter - see https://gohugo.io/content-management/front-matter/
- captions: images, tables & blocks
- blocks: highlighted src code, blockquote
- basic tables
* later
- affiliated keywords
see org-element.el - org-element-affiliated-keywords
- keywords: support both multi (e.g. LINK, TODO) & normal (e.g. AUTHOR, TITLE) keywords
- links based on #+LINK
- includes https://orgmode.org/manual/Include-files.html
could be used to have a single org file (ignored via hugo ignoreFiles) and then for each post a file including the relevant headline
- tables
colgroups https://orgmode.org/worg/org-tutorials/tables.html
- org-entities replacement: see org-entities-help
* resources * resources
- syntax - syntax
- https://orgmode.org/worg/dev/org-syntax.html - https://orgmode.org/worg/dev/org-syntax.html

View file

@ -7,6 +7,10 @@ import (
"os" "os"
"strings" "strings"
"github.com/alecthomas/chroma"
"github.com/alecthomas/chroma/formatters/html"
"github.com/alecthomas/chroma/lexers"
"github.com/alecthomas/chroma/styles"
"github.com/niklasfasching/org" "github.com/niklasfasching/org"
) )
@ -14,7 +18,7 @@ func main() {
log.SetFlags(0) log.SetFlags(0)
if len(os.Args) < 3 { if len(os.Args) < 3 {
log.Println("USAGE: org FILE OUTPUT_FORMAT") log.Println("USAGE: org FILE OUTPUT_FORMAT")
log.Fatal("supported output formats: org") log.Fatal("supported output formats: org, html")
} }
bs, err := ioutil.ReadFile(os.Args[1]) bs, err := ioutil.ReadFile(os.Args[1])
if err != nil { if err != nil {
@ -24,8 +28,24 @@ func main() {
switch strings.ToLower(os.Args[2]) { switch strings.ToLower(os.Args[2]) {
case "org": case "org":
out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String() out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String()
case "html":
writer := org.NewHTMLWriter()
writer.HighlightCodeBlock = highlightCodeBlock
out = org.NewDocument().Parse(r).Write(writer).String()
default: default:
log.Fatal("Unsupported output format") log.Fatal("Unsupported output format")
} }
log.Println(out) log.Println(out)
} }
func highlightCodeBlock(source, lang string) string {
var w strings.Builder
l := lexers.Get(lang)
if l == nil {
l = lexers.Fallback
}
l = chroma.Coalesce(l)
it, _ := l.Tokenise(nil, source)
_ = html.New().Format(&w, styles.Get("friendly"), it)
return w.String()
}

262
html.go Normal file
View file

@ -0,0 +1,262 @@
package org
import (
"fmt"
"html"
"path"
"strings"
)
type HTMLWriter struct {
stringBuilder
HighlightCodeBlock func(source, lang string) string
}
var emphasisTags = map[string][]string{
"/": []string{"<em>", "</em>"},
"*": []string{"<strong>", "</strong>"},
"+": []string{"<del>", "</del>"},
"~": []string{"<code>", "</code>"},
"=": []string{`<code class="verbatim">`, "</code>"},
"_": []string{`<span style="text-decoration: underline;">`, "</span>"},
"_{}": []string{"<sub>", "</sub>"},
"^{}": []string{"<super>", "</super>"},
}
var listTags = map[string][]string{
"+": []string{"<ul>", "</ul>"},
"-": []string{"<ul>", "</ul>"},
"*": []string{"<ul>", "</ul>"},
"number": []string{"<ol>", "</ol>"},
"letter": []string{"<ol>", "</ol>"},
}
func NewHTMLWriter() *HTMLWriter {
return &HTMLWriter{
HighlightCodeBlock: func(source, lang string) string { return html.EscapeString(source) },
}
}
func (w *HTMLWriter) emptyClone() *HTMLWriter {
wcopy := *w
wcopy.stringBuilder = stringBuilder{}
return &wcopy
}
func (w *HTMLWriter) before(d *Document) {}
func (w *HTMLWriter) after(d *Document) {
fs := d.Footnotes
if len(fs.Definitions) == 0 {
return
}
w.WriteString(`<div id="footnotes">` + "\n")
w.WriteString(`<h1 class="footnotes-title">` + fs.Title + `</h1>` + "\n")
w.WriteString(`<div class="footnote-definitions">` + "\n")
for _, name := range fs.Order {
w.writeNodes(fs.Definitions[name])
}
w.WriteString("</div>\n</div>\n")
}
func (w *HTMLWriter) writeNodes(ns ...Node) {
for _, n := range ns {
switch n := n.(type) {
case Keyword, Comment:
continue
case Headline:
w.writeHeadline(n)
case Block:
w.writeBlock(n)
case FootnoteDefinition:
w.writeFootnoteDefinition(n)
case List:
w.writeList(n)
case ListItem:
w.writeListItem(n)
case Table:
w.writeTable(n)
case TableHeader:
w.writeTableHeader(n)
case TableRow:
w.writeTableRow(n)
case TableSeparator:
w.writeTableSeparator(n)
case Paragraph:
w.writeParagraph(n)
case HorizontalRule:
w.writeHorizontalRule(n)
case Line:
w.writeLine(n)
case Text:
w.writeText(n)
case Emphasis:
w.writeEmphasis(n)
case Linebreak:
w.writeLinebreak(n)
case RegularLink:
w.writeRegularLink(n)
case FootnoteLink:
w.writeFootnoteLink(n)
default:
if n != nil {
panic(fmt.Sprintf("bad node %#v", n))
}
}
}
}
func (w *HTMLWriter) writeLines(lines []Node) {
for i, line := range lines {
w.writeNodes(line)
if i != len(lines)-1 && line.(Line).Children != nil {
w.WriteString(" ")
}
}
}
func (w *HTMLWriter) writeBlock(b Block) {
w.WriteString("<code>")
lang := ""
if len(b.Parameters) >= 1 {
lang = b.Parameters[0]
}
lines := []string{}
for _, n := range b.Children {
lines = append(lines, n.(Line).Children[0].(Text).Content)
}
w.WriteString(w.HighlightCodeBlock(strings.Join(lines, "\n"), lang))
w.WriteString("</code>\n")
}
func (w *HTMLWriter) writeFootnoteDefinition(f FootnoteDefinition) {
w.WriteString(`<div class="footnote-definition">` + "\n")
w.WriteString(fmt.Sprintf(`<sup id="footnote-%s">%s</sup>`, f.Name, f.Name) + "\n")
w.writeNodes(f.Children...)
w.WriteString("</div>\n")
}
func (w *HTMLWriter) writeHeadline(h Headline) {
w.WriteString(fmt.Sprintf("<h%d>", h.Lvl))
w.writeNodes(h.Title...)
w.WriteString(fmt.Sprintf("</h%d>\n", h.Lvl))
w.writeNodes(h.Children...)
}
func (w *HTMLWriter) writeText(t Text) {
w.WriteString(html.EscapeString(t.Content))
}
func (w *HTMLWriter) writeEmphasis(e Emphasis) {
tags, ok := emphasisTags[e.Kind]
if !ok {
panic(fmt.Sprintf("bad emphasis %#v", e))
}
w.WriteString(tags[0])
w.writeNodes(e.Content...)
w.WriteString(tags[1])
}
func (w *HTMLWriter) writeLinebreak(l Linebreak) {
w.WriteString("<br>\n")
}
func (w *HTMLWriter) writeFootnoteLink(l FootnoteLink) {
name := html.EscapeString(l.Name)
w.WriteString(fmt.Sprintf(`<sup class="footnote-reference"><a href="#footnote-%s">%s</a></sup>`, name, name))
}
func (w *HTMLWriter) writeRegularLink(l RegularLink) {
url := html.EscapeString(l.URL)
descriptionWriter := w.emptyClone()
descriptionWriter.writeNodes(l.Description...)
description := descriptionWriter.String()
switch l.Protocol {
case "file": // TODO
url = url[len("file:"):]
if strings.Contains(".png.jpg.jpeg.gif", path.Ext(l.URL)) {
w.WriteString(fmt.Sprintf(`<img src="%s" alt="%s" title="%s" />`, url, description, description))
} else {
w.WriteString(fmt.Sprintf(`<a href="%s">%s</a>`, url, description))
}
default:
w.WriteString(fmt.Sprintf(`<a href="%s">%s</a>`, url, description))
}
}
func (w *HTMLWriter) writeList(l List) {
tags, ok := listTags[l.Kind]
if !ok {
panic(fmt.Sprintf("bad list kind %#v", l))
}
w.WriteString(tags[0] + "\n")
w.writeNodes(l.Items...)
w.WriteString(tags[1] + "\n")
}
func (w *HTMLWriter) writeListItem(li ListItem) {
w.WriteString("<li>")
if len(li.Children) == 1 {
if p, ok := li.Children[0].(Paragraph); ok {
w.writeLines(p.Children)
}
} else {
w.writeNodes(li.Children...)
}
w.WriteString("</li>\n")
}
func (w *HTMLWriter) writeLine(l Line) {
w.writeNodes(l.Children...)
}
func (w *HTMLWriter) writeParagraph(p Paragraph) {
if len(p.Children) == 1 && p.Children[0].(Line).Children == nil {
return
}
w.WriteString("<p>")
w.writeLines(p.Children)
w.WriteString("</p>\n")
}
func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) {
w.WriteString("<hr>\n")
}
func (w *HTMLWriter) writeTable(t Table) {
w.WriteString("<table>")
w.writeNodes(t.Header)
w.WriteString("<tbody>")
w.writeNodes(t.Rows...)
w.WriteString("</tbody>\n</table>\n")
}
func (w *HTMLWriter) writeTableRow(t TableRow) {
w.WriteString("\n<tr>\n")
for _, column := range t.Columns {
w.WriteString("<td>")
w.writeNodes(column...)
w.WriteString("</td>")
}
w.WriteString("\n</tr>\n")
}
func (w *HTMLWriter) writeTableHeader(t TableHeader) {
w.WriteString("\n<thead>\n")
for _, column := range t.Columns {
w.WriteString("<th>")
w.writeNodes(column...)
w.WriteString("</th>")
}
w.WriteString("\n</thead>\n")
}
func (w *HTMLWriter) writeTableSeparator(t TableSeparator) {
w.WriteString("\n<tr></tr>\n")
}

20
html_test.go Normal file
View file

@ -0,0 +1,20 @@
package org
import (
"strings"
"testing"
)
func TestHTMLWriter(t *testing.T) {
for _, path := range orgTestFiles() {
reader, writer := strings.NewReader(fileString(path)), NewHTMLWriter()
actual := NewDocument().Parse(reader).Write(writer).String()
expected := fileString(path[:len(path)-len(".org")] + ".html")
if expected != actual {
t.Errorf("%s:\n%s'", path, diff(actual, expected))
} else {
t.Logf("%s: passed!", path)
}
}
}

78
testdata/example.html vendored Normal file
View file

@ -0,0 +1,78 @@
<h1>Motivation</h1>
<p>To validate the parser we&#39;ll try printing the AST back to org-mode source - if that works we can be kind of sure that the parsing worked. At least I hope so - I would like to get around writing tests for the individual parsing functions... </p>
<h2>Headlines with TODO status, priority &amp; tags</h2>
<h3>Headline with todo status &amp; priority</h3>
<h3>Headline with TODO status</h3>
<h3>Headline with tags &amp; priority</h3>
<p>this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...</p>
<h2>Lists</h2>
<ul>
<li>unordered list item 1</li>
<li><p>unordered list item 2 - with <code>inline</code> <em>markup</em></p>
<ol>
<li><p>ordered sublist item 1</p>
<ol>
<li>ordered sublist item 1</li>
<li>ordered sublist item 2</li>
<li>ordered sublist item 3</li>
</ol>
</li>
<li>ordered sublist item 2</li>
</ol>
</li>
<li><p>unordered list item 3 - and a <a href="https://example.com">link</a> and some lines of text</p>
<ol>
<li><p>and another subitem</p>
<code>echo with a block</code>
</li>
<li><p>and another one with a table</p>
<table>
<thead>
<th>a</th><th>b</th><th>c</th>
</thead>
<tbody>
<tr>
<td>1</td><td>2</td><td>3</td>
</tr>
</tbody>
</table>
<p>and text with an empty line in between as well!</p>
</li>
</ol>
</li>
<li>unordered list item 4 </li>
</ul>
<h2>Inline</h2>
<ul>
<li><em>emphasis</em> and a hard line break <br>
see?</li>
<li><em>.emphasis with dot border chars.</em></li>
<li><em>emphasis with a slash/inside</em></li>
<li><em>emphasis</em> followed by raw text with slash /</li>
<li>-&gt;/not an emphasis/&lt;-</li>
<li>links with slashes do not become <em>emphasis</em>: <a href="https://somelinkshouldntrenderaccidentalemphasis.com">https://somelinkshouldntrenderaccidentalemphasis.com</a>/ <em>emphasis</em></li>
<li><span style="text-decoration: underline;">underlined</span> <strong>bold</strong> <code class="verbatim">verbatim</code> <code>code</code> <del>strikethrough</del></li>
<li><strong>bold string with an *asterisk inside</strong></li>
<li><p>links</p>
<ol>
<li>regular link <a href="https://example.com">https://example.com</a> link without description</li>
<li>regular link <a href="https://example.com">example.com</a> link with description</li>
<li>regular link to a file (image) <img src="my-img.png" alt="file:my-img.png" title="file:my-img.png" /></li>
</ol>
</li>
</ul>
<h2>Footnotes</h2>
<ul>
<li>normal footnote reference <sup class="footnote-reference"><a href="#footnote-1">1</a></sup></li>
<li>further references to the same footnote should not <sup class="footnote-reference"><a href="#footnote-1">1</a></sup> render duplicates in the footnote list</li>
<li>also inline footnotes are supported via <code class="verbatim">fn:2:inline definition</code>. But we won&#39;t test that because it would cause the output to look different from the input </li>
</ul>
<div id="footnotes">
<h1 class="footnotes-title">Footnotes</h1>
<div class="footnote-definitions">
<div class="footnote-definition">
<sup id="footnote-1">1</sup>
<p>Foobar</p>
</div>
</div>
</div>

View file

@ -38,6 +38,7 @@ this one is cheating a little as tags are ALWAYS printed right aligned to a give
** Inline ** Inline
- /emphasis/ and a hard line break \\ - /emphasis/ and a hard line break \\
see?
- /.emphasis with dot border chars./ - /.emphasis with dot border chars./
- /emphasis with a slash/inside/ - /emphasis with a slash/inside/
- /emphasis/ followed by raw text with slash / - /emphasis/ followed by raw text with slash /