From dce67eaddfb92c6a31f5c87e402930077b54e90b Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Tue, 18 Dec 2018 00:16:41 +0100 Subject: [PATCH] Improve ATTR_HTML keyword parsing Org mode separates kvs not as initially assumed by whitespace (~ csv) but rather at keywords (~ :\w+). This is still not replicating Org mode behaviour though as I decided against attributes ignoring multi-definitions. Instead we stack their values (and those existing on the element) for certain attributes (class, style for now). e.g. [[foo]] would become in Org mode but becomes with go-org. --- README.org | 3 --- org/keyword.go | 22 ++++++++++++++++------ org/org.go | 5 ----- org/testdata/keywords.html | 4 ++-- org/testdata/keywords.org | 7 ++++--- org/testdata/misc.html | 2 +- org/testdata/misc.org | 4 ++-- 7 files changed, 25 insertions(+), 22 deletions(-) diff --git a/README.org b/README.org index 72f732d..ddfed79 100644 --- a/README.org +++ b/README.org @@ -3,9 +3,6 @@ A basic Org mode parser in go. Take a look at [[https://niklasfasching.github.io/go-org/][github pages]] for some examples and an online org -> html demo (requires wasm support). * next - test against [[https://raw.githubusercontent.com/kaushalmodi/ox-hugo/master/test/site/content-org/all-posts.org][ox-hugo all-posts.org]] - - fix attr_html splitting - not whitespace, :x keywords, e.g. - #+attr_html: :caption A unicorn! :style width: 100%; height: 100%; :caption foo - [[/images/org-mode-unicorn-logo.png]] - more keywords: https://orgmode.org/manual/In_002dbuffer-settings.html - headlines - unique ids diff --git a/org/keyword.go b/org/keyword.go index cf1121c..ec10967 100644 --- a/org/keyword.go +++ b/org/keyword.go @@ -1,7 +1,6 @@ package org import ( - "encoding/csv" "fmt" "io/ioutil" "path/filepath" @@ -35,6 +34,7 @@ var keywordRegexp = regexp.MustCompile(`^(\s*)#\+([^:]+):(\s+(.*)|(\s*)$)`) var commentRegexp = regexp.MustCompile(`^(\s*)#(.*)`) var includeFileRegexp = regexp.MustCompile(`(?i)^"([^"]+)" (src|example|export) (\w+)$`) +var attributeRegexp = regexp.MustCompile(`(?:^|\s+)(:[-\w]+)\s+(.*)$`) func lexKeywordOrComment(line string) (token, bool) { if m := keywordRegexp.FindStringSubmatch(line); m != nil { @@ -77,11 +77,21 @@ func (d *Document) parseAffiliated(i int, stop stopFn) (int, Node) { case "CAPTION": meta.Caption = append(meta.Caption, d.parseInline(k.Value)) case "ATTR_HTML": - r := csv.NewReader(strings.NewReader(k.Value)) - r.Comma = ' ' - attributes, err := r.Read() - if err != nil { - return 0, nil + attributes, rest := []string{}, k.Value + for { + if k, m := "", attributeRegexp.FindStringSubmatch(rest); m != nil { + k, rest = m[1], m[2] + attributes = append(attributes, k) + if v, m := "", attributeRegexp.FindStringSubmatchIndex(rest); m != nil { + v, rest = rest[:m[0]], rest[m[0]:] + attributes = append(attributes, v) + } else { + attributes = append(attributes, strings.TrimSpace(rest)) + break + } + } else { + break + } } meta.HTMLAttributes = append(meta.HTMLAttributes, attributes) default: diff --git a/org/org.go b/org/org.go index fe90179..ad82a65 100644 --- a/org/org.go +++ b/org/org.go @@ -184,11 +184,6 @@ func (w *OrgWriter) writeNodeWithMeta(n NodeWithMeta) { } for _, attributes := range n.Meta.HTMLAttributes { w.WriteString("#+ATTR_HTML: ") - for i := 0; i < len(attributes)-1; i += 2 { - if strings.ContainsAny(attributes[i+1], "\t ") { - attributes[i+1] = fmt.Sprintf(`"%s"`, attributes[i+1]) - } - } w.WriteString(strings.Join(attributes, " ") + "\n") } w.writeNodes(n.Node) diff --git a/org/testdata/keywords.html b/org/testdata/keywords.html index 32c4cad..135c47e 100644 --- a/org/testdata/keywords.html +++ b/org/testdata/keywords.html @@ -1,5 +1,5 @@
-
+
echo "a bash source block with custom html attributes"
 
@@ -11,7 +11,7 @@ and multiple lines of c and an image with custom html attributes and a caption

-https://placekitten.com/200/200#.png +https://placekitten.com/200/200#.png
kittens!
diff --git a/org/testdata/keywords.org b/org/testdata/keywords.org index f6f0f6b..e6b8ce9 100644 --- a/org/testdata/keywords.org +++ b/org/testdata/keywords.org @@ -1,13 +1,14 @@ #+CAPTION: and _multiple_ #+CAPTION: lines of *captions*! -#+ATTR_HTML: :class "a b c" -#+ATTR_HTML: :id it +#+ATTR_HTML: :class a b +#+ATTR_HTML: :id it :class c d #+BEGIN_SRC sh echo "a bash source block with custom html attributes" #+END_SRC and an image with custom html attributes and a caption #+CAPTION: kittens! -#+ATTR_HTML: :style "border: 10px solid black" +#+ATTR_HTML: :style height: 100%; :id overwritten +#+ATTR_HTML: :style border: 10px solid black; :id kittens [[https://placekitten.com/200/200#.png]] diff --git a/org/testdata/misc.html b/org/testdata/misc.html index afd2c95..9213416 100644 --- a/org/testdata/misc.html +++ b/org/testdata/misc.html @@ -207,7 +207,7 @@ foo DONE #72: Support for #+ATTR_HTML -Go is fine though. +Go is fine though.

DONE #75: Not parsing nested lists correctly diff --git a/org/testdata/misc.org b/org/testdata/misc.org index 0ef55c5..b3df75b 100644 --- a/org/testdata/misc.org +++ b/org/testdata/misc.org @@ -60,8 +60,8 @@ or ~that~ foo. [[https://www.example.com][/this/ *is* _markup_!]] #+END_QUOTE *** DONE [[https://github.com/chaseadamsio/goorgeous/issues/72][#72]]: Support for #+ATTR_HTML -#+ATTR_HTML: :alt "Go is fine though." -#+ATTR_HTML: :width 300 :style "border:2px solid black;" +#+ATTR_HTML: :alt Go is fine though. :id gopher-image +#+ATTR_HTML: :width 300 :style border:2px solid black; [[https://golang.org/doc/gopher/pkg.png]] *** DONE [[https://github.com/chaseadamsio/goorgeous/issues/75][#75]]: Not parsing nested lists correctly - bullet 1