diff --git a/Makefile b/Makefile index 8aa7439..ba12877 100644 --- a/Makefile +++ b/Makefile @@ -12,3 +12,8 @@ build: .PHONY: setup setup: git config core.hooksPath etc/githooks + +case=example +.PHONY: render +render: + bash -c "go run cmd/org/org.go testdata/$(case).org html |& html2text" diff --git a/README.org b/README.org index 24d4d64..af67a39 100644 --- a/README.org +++ b/README.org @@ -2,6 +2,21 @@ A basic org-mode parser in go * motivation - have a org-mode AST to play around with building an org-mode language server - hopefully add reasonable org-mode support to hugo - sadly [[https://github.com/chaseadamsio/goorgeous][goorgeous]] is broken & abandoned +* next +- hugo frontmatter - see https://gohugo.io/content-management/front-matter/ +- captions: images, tables & blocks +- blocks: highlighted src code, blockquote +- basic tables +* later +- affiliated keywords + see org-element.el - org-element-affiliated-keywords +- keywords: support both multi (e.g. LINK, TODO) & normal (e.g. AUTHOR, TITLE) keywords +- links based on #+LINK +- includes https://orgmode.org/manual/Include-files.html + could be used to have a single org file (ignored via hugo ignoreFiles) and then for each post a file including the relevant headline +- tables + colgroups https://orgmode.org/worg/org-tutorials/tables.html +- org-entities replacement: see org-entities-help * resources - syntax - https://orgmode.org/worg/dev/org-syntax.html diff --git a/cmd/org/org.go b/cmd/org/org.go index c5c11ba..63315de 100644 --- a/cmd/org/org.go +++ b/cmd/org/org.go @@ -7,6 +7,10 @@ import ( "os" "strings" + "github.com/alecthomas/chroma" + "github.com/alecthomas/chroma/formatters/html" + "github.com/alecthomas/chroma/lexers" + "github.com/alecthomas/chroma/styles" "github.com/niklasfasching/org" ) @@ -14,7 +18,7 @@ func main() { log.SetFlags(0) if len(os.Args) < 3 { log.Println("USAGE: org FILE OUTPUT_FORMAT") - log.Fatal("supported output formats: org") + log.Fatal("supported output formats: org, html") } bs, err := ioutil.ReadFile(os.Args[1]) if err != nil { @@ -24,8 +28,24 @@ func main() { switch strings.ToLower(os.Args[2]) { case "org": out = org.NewDocument().Parse(r).Write(org.NewOrgWriter()).String() + case "html": + writer := org.NewHTMLWriter() + writer.HighlightCodeBlock = highlightCodeBlock + out = org.NewDocument().Parse(r).Write(writer).String() default: log.Fatal("Unsupported output format") } log.Println(out) } + +func highlightCodeBlock(source, lang string) string { + var w strings.Builder + l := lexers.Get(lang) + if l == nil { + l = lexers.Fallback + } + l = chroma.Coalesce(l) + it, _ := l.Tokenise(nil, source) + _ = html.New().Format(&w, styles.Get("friendly"), it) + return w.String() +} diff --git a/html.go b/html.go new file mode 100644 index 0000000..97b8d35 --- /dev/null +++ b/html.go @@ -0,0 +1,262 @@ +package org + +import ( + "fmt" + "html" + "path" + "strings" +) + +type HTMLWriter struct { + stringBuilder + HighlightCodeBlock func(source, lang string) string +} + +var emphasisTags = map[string][]string{ + "/": []string{"", ""}, + "*": []string{"", ""}, + "+": []string{"", ""}, + "~": []string{"", ""}, + "=": []string{``, ""}, + "_": []string{``, ""}, + "_{}": []string{"", ""}, + "^{}": []string{"", ""}, +} + +var listTags = map[string][]string{ + "+": []string{""}, + "-": []string{""}, + "*": []string{""}, + "number": []string{"
    ", "
"}, + "letter": []string{"
    ", "
"}, +} + +func NewHTMLWriter() *HTMLWriter { + return &HTMLWriter{ + HighlightCodeBlock: func(source, lang string) string { return html.EscapeString(source) }, + } +} + +func (w *HTMLWriter) emptyClone() *HTMLWriter { + wcopy := *w + wcopy.stringBuilder = stringBuilder{} + return &wcopy +} + +func (w *HTMLWriter) before(d *Document) {} + +func (w *HTMLWriter) after(d *Document) { + fs := d.Footnotes + if len(fs.Definitions) == 0 { + return + } + w.WriteString(`
` + "\n") + w.WriteString(`

` + fs.Title + `

` + "\n") + w.WriteString(`
` + "\n") + for _, name := range fs.Order { + w.writeNodes(fs.Definitions[name]) + } + w.WriteString("
\n
\n") +} + +func (w *HTMLWriter) writeNodes(ns ...Node) { + for _, n := range ns { + switch n := n.(type) { + case Keyword, Comment: + continue + case Headline: + w.writeHeadline(n) + case Block: + w.writeBlock(n) + + case FootnoteDefinition: + w.writeFootnoteDefinition(n) + + case List: + w.writeList(n) + case ListItem: + w.writeListItem(n) + + case Table: + w.writeTable(n) + case TableHeader: + w.writeTableHeader(n) + case TableRow: + w.writeTableRow(n) + case TableSeparator: + w.writeTableSeparator(n) + + case Paragraph: + w.writeParagraph(n) + case HorizontalRule: + w.writeHorizontalRule(n) + case Line: + w.writeLine(n) + + case Text: + w.writeText(n) + case Emphasis: + w.writeEmphasis(n) + case Linebreak: + w.writeLinebreak(n) + case RegularLink: + w.writeRegularLink(n) + case FootnoteLink: + w.writeFootnoteLink(n) + default: + if n != nil { + panic(fmt.Sprintf("bad node %#v", n)) + } + } + } +} + +func (w *HTMLWriter) writeLines(lines []Node) { + for i, line := range lines { + w.writeNodes(line) + if i != len(lines)-1 && line.(Line).Children != nil { + w.WriteString(" ") + } + } +} + +func (w *HTMLWriter) writeBlock(b Block) { + w.WriteString("") + lang := "" + if len(b.Parameters) >= 1 { + lang = b.Parameters[0] + } + lines := []string{} + for _, n := range b.Children { + lines = append(lines, n.(Line).Children[0].(Text).Content) + } + w.WriteString(w.HighlightCodeBlock(strings.Join(lines, "\n"), lang)) + w.WriteString("\n") +} + +func (w *HTMLWriter) writeFootnoteDefinition(f FootnoteDefinition) { + w.WriteString(`
` + "\n") + w.WriteString(fmt.Sprintf(`%s`, f.Name, f.Name) + "\n") + w.writeNodes(f.Children...) + w.WriteString("
\n") +} + +func (w *HTMLWriter) writeHeadline(h Headline) { + w.WriteString(fmt.Sprintf("", h.Lvl)) + w.writeNodes(h.Title...) + w.WriteString(fmt.Sprintf("\n", h.Lvl)) + w.writeNodes(h.Children...) +} + +func (w *HTMLWriter) writeText(t Text) { + w.WriteString(html.EscapeString(t.Content)) +} + +func (w *HTMLWriter) writeEmphasis(e Emphasis) { + tags, ok := emphasisTags[e.Kind] + if !ok { + panic(fmt.Sprintf("bad emphasis %#v", e)) + } + w.WriteString(tags[0]) + w.writeNodes(e.Content...) + w.WriteString(tags[1]) +} + +func (w *HTMLWriter) writeLinebreak(l Linebreak) { + w.WriteString("
\n") +} + +func (w *HTMLWriter) writeFootnoteLink(l FootnoteLink) { + name := html.EscapeString(l.Name) + w.WriteString(fmt.Sprintf(`%s`, name, name)) + +} + +func (w *HTMLWriter) writeRegularLink(l RegularLink) { + url := html.EscapeString(l.URL) + descriptionWriter := w.emptyClone() + descriptionWriter.writeNodes(l.Description...) + description := descriptionWriter.String() + switch l.Protocol { + case "file": // TODO + url = url[len("file:"):] + if strings.Contains(".png.jpg.jpeg.gif", path.Ext(l.URL)) { + w.WriteString(fmt.Sprintf(`%s`, url, description, description)) + } else { + w.WriteString(fmt.Sprintf(`%s`, url, description)) + } + default: + w.WriteString(fmt.Sprintf(`%s`, url, description)) + } +} + +func (w *HTMLWriter) writeList(l List) { + tags, ok := listTags[l.Kind] + if !ok { + panic(fmt.Sprintf("bad list kind %#v", l)) + } + w.WriteString(tags[0] + "\n") + w.writeNodes(l.Items...) + w.WriteString(tags[1] + "\n") +} + +func (w *HTMLWriter) writeListItem(li ListItem) { + w.WriteString("
  • ") + if len(li.Children) == 1 { + if p, ok := li.Children[0].(Paragraph); ok { + w.writeLines(p.Children) + } + } else { + w.writeNodes(li.Children...) + } + w.WriteString("
  • \n") +} + +func (w *HTMLWriter) writeLine(l Line) { + w.writeNodes(l.Children...) +} + +func (w *HTMLWriter) writeParagraph(p Paragraph) { + if len(p.Children) == 1 && p.Children[0].(Line).Children == nil { + return + } + w.WriteString("

    ") + w.writeLines(p.Children) + w.WriteString("

    \n") +} + +func (w *HTMLWriter) writeHorizontalRule(h HorizontalRule) { + w.WriteString("
    \n") +} + +func (w *HTMLWriter) writeTable(t Table) { + w.WriteString("") + w.writeNodes(t.Header) + w.WriteString("") + w.writeNodes(t.Rows...) + w.WriteString("\n
    \n") +} + +func (w *HTMLWriter) writeTableRow(t TableRow) { + w.WriteString("\n\n") + for _, column := range t.Columns { + w.WriteString("") + w.writeNodes(column...) + w.WriteString("") + } + w.WriteString("\n\n") +} + +func (w *HTMLWriter) writeTableHeader(t TableHeader) { + w.WriteString("\n\n") + for _, column := range t.Columns { + w.WriteString("") + w.writeNodes(column...) + w.WriteString("") + } + w.WriteString("\n\n") +} + +func (w *HTMLWriter) writeTableSeparator(t TableSeparator) { + w.WriteString("\n\n") +} diff --git a/html_test.go b/html_test.go new file mode 100644 index 0000000..fe2daf2 --- /dev/null +++ b/html_test.go @@ -0,0 +1,20 @@ +package org + +import ( + "strings" + "testing" +) + +func TestHTMLWriter(t *testing.T) { + for _, path := range orgTestFiles() { + reader, writer := strings.NewReader(fileString(path)), NewHTMLWriter() + actual := NewDocument().Parse(reader).Write(writer).String() + expected := fileString(path[:len(path)-len(".org")] + ".html") + + if expected != actual { + t.Errorf("%s:\n%s'", path, diff(actual, expected)) + } else { + t.Logf("%s: passed!", path) + } + } +} diff --git a/testdata/example.html b/testdata/example.html new file mode 100644 index 0000000..1f10157 --- /dev/null +++ b/testdata/example.html @@ -0,0 +1,78 @@ +

    Motivation

    +

    To validate the parser we'll try printing the AST back to org-mode source - if that works we can be kind of sure that the parsing worked. At least I hope so - I would like to get around writing tests for the individual parsing functions...

    +

    Headlines with TODO status, priority & tags

    +

    Headline with todo status & priority

    +

    Headline with TODO status

    +

    Headline with tags & priority

    +

    this one is cheating a little as tags are ALWAYS printed right aligned to a given column number...

    +

    Lists

    + +

    Inline

    + +

    Footnotes

    + +
    +

    Footnotes

    +
    +
    +1 +

    Foobar

    +
    +
    +
    diff --git a/testdata/example.org b/testdata/example.org index 90cad73..3460855 100644 --- a/testdata/example.org +++ b/testdata/example.org @@ -38,6 +38,7 @@ this one is cheating a little as tags are ALWAYS printed right aligned to a give ** Inline - /emphasis/ and a hard line break \\ + see? - /.emphasis with dot border chars./ - /emphasis with a slash/inside/ - /emphasis/ followed by raw text with slash /