Improve ATTR_HTML keyword parsing

Org mode separates kvs not as initially assumed by whitespace (~ csv) but
rather at keywords (~ :\w+).

This is still not replicating Org mode behaviour though as I decided against
attributes ignoring multi-definitions. Instead we stack their
values (and those existing on the element) for certain attributes (class, style
for now).

e.g.
[[foo]]

would become <foo class="a"> in Org mode but becomes <foo class="a b"> with
go-org.
This commit is contained in:
Niklas Fasching 2018-12-18 00:16:41 +01:00
parent 0e64f9df7f
commit dce67eaddf
7 changed files with 25 additions and 22 deletions

View file

@ -3,9 +3,6 @@ A basic Org mode parser in go.
Take a look at [[https://niklasfasching.github.io/go-org/][github pages]] for some examples and an online org -> html demo (requires wasm support).
* next
- test against [[https://raw.githubusercontent.com/kaushalmodi/ox-hugo/master/test/site/content-org/all-posts.org][ox-hugo all-posts.org]]
- fix attr_html splitting - not whitespace, :x keywords, e.g.
#+attr_html: :caption A unicorn! :style width: 100%; height: 100%; :caption foo
[[/images/org-mode-unicorn-logo.png]]
- more keywords: https://orgmode.org/manual/In_002dbuffer-settings.html
- headlines
- unique ids

View file

@ -1,7 +1,6 @@
package org
import (
"encoding/csv"
"fmt"
"io/ioutil"
"path/filepath"
@ -35,6 +34,7 @@ var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):(\s+(.*)|(\s*)$)`)
var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`)
var includeFileRegexp = regexp.MustCompile(`(?i)^"([^"]+)" (src|example|export) (\w+)$`)
var attributeRegexp = regexp.MustCompile(`(?:^|\s+)(:[-\w]+)\s+(.*)$`)
func lexKeywordOrComment(line string) (token, bool) {
if m := keywordRegexp.FindStringSubmatch(line); m != nil {
@ -77,11 +77,21 @@ func (d *Document) parseAffiliated(i int, stop stopFn) (int, Node) {
case "CAPTION":
meta.Caption = append(meta.Caption, d.parseInline(k.Value))
case "ATTR_HTML":
r := csv.NewReader(strings.NewReader(k.Value))
r.Comma = ' '
attributes, err := r.Read()
if err != nil {
return 0, nil
attributes, rest := []string{}, k.Value
for {
if k, m := "", attributeRegexp.FindStringSubmatch(rest); m != nil {
k, rest = m[1], m[2]
attributes = append(attributes, k)
if v, m := "", attributeRegexp.FindStringSubmatchIndex(rest); m != nil {
v, rest = rest[:m[0]], rest[m[0]:]
attributes = append(attributes, v)
} else {
attributes = append(attributes, strings.TrimSpace(rest))
break
}
} else {
break
}
}
meta.HTMLAttributes = append(meta.HTMLAttributes, attributes)
default:

View file

@ -184,11 +184,6 @@ func (w *OrgWriter) writeNodeWithMeta(n NodeWithMeta) {
}
for _, attributes := range n.Meta.HTMLAttributes {
w.WriteString("#+ATTR_HTML: ")
for i := 0; i < len(attributes)-1; i += 2 {
if strings.ContainsAny(attributes[i+1], "\t ") {
attributes[i+1] = fmt.Sprintf(`"%s"`, attributes[i+1])
}
}
w.WriteString(strings.Join(attributes, " ") + "\n")
}
w.writeNodes(n.Node)

View file

@ -1,5 +1,5 @@
<figure>
<div class="highlight a b c" id="it">
<div class="highlight a b c d" id="it">
<pre>echo &#34;a bash source block with custom html attributes&#34;
</pre>
</div>
@ -11,7 +11,7 @@ and <span style="text-decoration: underline;">multiple</span> lines of <strong>c
and an image with custom html attributes and a caption
</p>
<figure>
<img src="https://placekitten.com/200/200#.png" alt="https://placekitten.com/200/200#.png" title="https://placekitten.com/200/200#.png" style="border: 10px solid black"/>
<img src="https://placekitten.com/200/200#.png" alt="https://placekitten.com/200/200#.png" title="https://placekitten.com/200/200#.png" style="height: 100%; border: 10px solid black;" id="kittens"/>
<figcaption>
kittens!
</figcaption>

View file

@ -1,13 +1,14 @@
#+CAPTION: and _multiple_
#+CAPTION: lines of *captions*!
#+ATTR_HTML: :class "a b c"
#+ATTR_HTML: :id it
#+ATTR_HTML: :class a b
#+ATTR_HTML: :id it :class c d
#+BEGIN_SRC sh
echo "a bash source block with custom html attributes"
#+END_SRC
and an image with custom html attributes and a caption
#+CAPTION: kittens!
#+ATTR_HTML: :style "border: 10px solid black"
#+ATTR_HTML: :style height: 100%; :id overwritten
#+ATTR_HTML: :style border: 10px solid black; :id kittens
[[https://placekitten.com/200/200#.png]]

View file

@ -207,7 +207,7 @@ foo
<span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/72">#72</a>: Support for #+ATTR_HTML
</h3>
<img src="https://golang.org/doc/gopher/pkg.png" alt="Go is fine though." title="https://golang.org/doc/gopher/pkg.png" width="300" style="border:2px solid black;"/>
<img src="https://golang.org/doc/gopher/pkg.png" alt="Go is fine though." title="https://golang.org/doc/gopher/pkg.png" id="gopher-image" width="300" style="border:2px solid black;"/>
<h3>
<span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/75">#75</a>: Not parsing nested lists correctly

View file

@ -60,8 +60,8 @@ or ~that~ foo.
[[https://www.example.com][/this/ *is* _markup_!]]
#+END_QUOTE
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/72][#72]]: Support for #+ATTR_HTML
#+ATTR_HTML: :alt "Go is fine though."
#+ATTR_HTML: :width 300 :style "border:2px solid black;"
#+ATTR_HTML: :alt Go is fine though. :id gopher-image
#+ATTR_HTML: :width 300 :style border:2px solid black;
[[https://golang.org/doc/gopher/pkg.png]]
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly
- bullet 1