Improve ATTR_HTML keyword parsing

Org mode separates kvs not as initially assumed by whitespace (~ csv) but
rather at keywords (~ :\w+).

This is still not replicating Org mode behaviour though as I decided against
attributes ignoring multi-definitions. Instead we stack their
values (and those existing on the element) for certain attributes (class, style
for now).

e.g.
[[foo]]

would become <foo class="a"> in Org mode but becomes <foo class="a b"> with
go-org.
This commit is contained in:
Niklas Fasching 2018-12-18 00:16:41 +01:00
parent 0e64f9df7f
commit dce67eaddf
7 changed files with 25 additions and 22 deletions

View file

@ -3,9 +3,6 @@ A basic Org mode parser in go.
Take a look at [[https://niklasfasching.github.io/go-org/][github pages]] for some examples and an online org -> html demo (requires wasm support). Take a look at [[https://niklasfasching.github.io/go-org/][github pages]] for some examples and an online org -> html demo (requires wasm support).
* next * next
- test against [[https://raw.githubusercontent.com/kaushalmodi/ox-hugo/master/test/site/content-org/all-posts.org][ox-hugo all-posts.org]] - test against [[https://raw.githubusercontent.com/kaushalmodi/ox-hugo/master/test/site/content-org/all-posts.org][ox-hugo all-posts.org]]
- fix attr_html splitting - not whitespace, :x keywords, e.g.
#+attr_html: :caption A unicorn! :style width: 100%; height: 100%; :caption foo
[[/images/org-mode-unicorn-logo.png]]
- more keywords: https://orgmode.org/manual/In_002dbuffer-settings.html - more keywords: https://orgmode.org/manual/In_002dbuffer-settings.html
- headlines - headlines
- unique ids - unique ids

View file

@ -1,7 +1,6 @@
package org package org
import ( import (
"encoding/csv"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"path/filepath" "path/filepath"
@ -35,6 +34,7 @@ var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):(\s+(.*)|(\s*)$)`)
var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`) var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`)
var includeFileRegexp = regexp.MustCompile(`(?i)^"([^"]+)" (src|example|export) (\w+)$`) var includeFileRegexp = regexp.MustCompile(`(?i)^"([^"]+)" (src|example|export) (\w+)$`)
var attributeRegexp = regexp.MustCompile(`(?:^|\s+)(:[-\w]+)\s+(.*)$`)
func lexKeywordOrComment(line string) (token, bool) { func lexKeywordOrComment(line string) (token, bool) {
if m := keywordRegexp.FindStringSubmatch(line); m != nil { if m := keywordRegexp.FindStringSubmatch(line); m != nil {
@ -77,11 +77,21 @@ func (d *Document) parseAffiliated(i int, stop stopFn) (int, Node) {
case "CAPTION": case "CAPTION":
meta.Caption = append(meta.Caption, d.parseInline(k.Value)) meta.Caption = append(meta.Caption, d.parseInline(k.Value))
case "ATTR_HTML": case "ATTR_HTML":
r := csv.NewReader(strings.NewReader(k.Value)) attributes, rest := []string{}, k.Value
r.Comma = ' ' for {
attributes, err := r.Read() if k, m := "", attributeRegexp.FindStringSubmatch(rest); m != nil {
if err != nil { k, rest = m[1], m[2]
return 0, nil attributes = append(attributes, k)
if v, m := "", attributeRegexp.FindStringSubmatchIndex(rest); m != nil {
v, rest = rest[:m[0]], rest[m[0]:]
attributes = append(attributes, v)
} else {
attributes = append(attributes, strings.TrimSpace(rest))
break
}
} else {
break
}
} }
meta.HTMLAttributes = append(meta.HTMLAttributes, attributes) meta.HTMLAttributes = append(meta.HTMLAttributes, attributes)
default: default:

View file

@ -184,11 +184,6 @@ func (w *OrgWriter) writeNodeWithMeta(n NodeWithMeta) {
} }
for _, attributes := range n.Meta.HTMLAttributes { for _, attributes := range n.Meta.HTMLAttributes {
w.WriteString("#+ATTR_HTML: ") w.WriteString("#+ATTR_HTML: ")
for i := 0; i < len(attributes)-1; i += 2 {
if strings.ContainsAny(attributes[i+1], "\t ") {
attributes[i+1] = fmt.Sprintf(`"%s"`, attributes[i+1])
}
}
w.WriteString(strings.Join(attributes, " ") + "\n") w.WriteString(strings.Join(attributes, " ") + "\n")
} }
w.writeNodes(n.Node) w.writeNodes(n.Node)

View file

@ -1,5 +1,5 @@
<figure> <figure>
<div class="highlight a b c" id="it"> <div class="highlight a b c d" id="it">
<pre>echo &#34;a bash source block with custom html attributes&#34; <pre>echo &#34;a bash source block with custom html attributes&#34;
</pre> </pre>
</div> </div>
@ -11,7 +11,7 @@ and <span style="text-decoration: underline;">multiple</span> lines of <strong>c
and an image with custom html attributes and a caption and an image with custom html attributes and a caption
</p> </p>
<figure> <figure>
<img src="https://placekitten.com/200/200#.png" alt="https://placekitten.com/200/200#.png" title="https://placekitten.com/200/200#.png" style="border: 10px solid black"/> <img src="https://placekitten.com/200/200#.png" alt="https://placekitten.com/200/200#.png" title="https://placekitten.com/200/200#.png" style="height: 100%; border: 10px solid black;" id="kittens"/>
<figcaption> <figcaption>
kittens! kittens!
</figcaption> </figcaption>

View file

@ -1,13 +1,14 @@
#+CAPTION: and _multiple_ #+CAPTION: and _multiple_
#+CAPTION: lines of *captions*! #+CAPTION: lines of *captions*!
#+ATTR_HTML: :class "a b c" #+ATTR_HTML: :class a b
#+ATTR_HTML: :id it #+ATTR_HTML: :id it :class c d
#+BEGIN_SRC sh #+BEGIN_SRC sh
echo "a bash source block with custom html attributes" echo "a bash source block with custom html attributes"
#+END_SRC #+END_SRC
and an image with custom html attributes and a caption and an image with custom html attributes and a caption
#+CAPTION: kittens! #+CAPTION: kittens!
#+ATTR_HTML: :style "border: 10px solid black" #+ATTR_HTML: :style height: 100%; :id overwritten
#+ATTR_HTML: :style border: 10px solid black; :id kittens
[[https://placekitten.com/200/200#.png]] [[https://placekitten.com/200/200#.png]]

View file

@ -207,7 +207,7 @@ foo
<span class="todo">DONE</span> <span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/72">#72</a>: Support for #+ATTR_HTML <a href="https://github.com/chaseadamsio/goorgeous/issues/72">#72</a>: Support for #+ATTR_HTML
</h3> </h3>
<img src="https://golang.org/doc/gopher/pkg.png" alt="Go is fine though." title="https://golang.org/doc/gopher/pkg.png" width="300" style="border:2px solid black;"/> <img src="https://golang.org/doc/gopher/pkg.png" alt="Go is fine though." title="https://golang.org/doc/gopher/pkg.png" id="gopher-image" width="300" style="border:2px solid black;"/>
<h3> <h3>
<span class="todo">DONE</span> <span class="todo">DONE</span>
<a href="https://github.com/chaseadamsio/goorgeous/issues/75">#75</a>: Not parsing nested lists correctly <a href="https://github.com/chaseadamsio/goorgeous/issues/75">#75</a>: Not parsing nested lists correctly

View file

@ -60,8 +60,8 @@ or ~that~ foo.
[[https://www.example.com][/this/ *is* _markup_!]] [[https://www.example.com][/this/ *is* _markup_!]]
#+END_QUOTE #+END_QUOTE
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/72][#72]]: Support for #+ATTR_HTML *** DONE [[https://github.com/chaseadamsio/goorgeous/issues/72][#72]]: Support for #+ATTR_HTML
#+ATTR_HTML: :alt "Go is fine though." #+ATTR_HTML: :alt Go is fine though. :id gopher-image
#+ATTR_HTML: :width 300 :style "border:2px solid black;" #+ATTR_HTML: :width 300 :style border:2px solid black;
[[https://golang.org/doc/gopher/pkg.png]] [[https://golang.org/doc/gopher/pkg.png]]
*** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly *** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly
- bullet 1 - bullet 1