From b61e49eb855db4f488fa464e06f3f064478866dc Mon Sep 17 00:00:00 2001
From: Niklas Fasching <niklas.fasching@gmail.com>
Date: Mon, 9 Dec 2019 19:37:39 +0100
Subject: [PATCH] Preserve whitespace (indentation) inside paragraphs

We want original whitespace to be rendered in some cases (e.g. verse
blocks). This requires information about the original whitespace to be
preserved during paragraph parsing. As html ignores (collapses) whitespace by
default we don't have to adapt the html writer and can just selectively enable
rendering of the preseverved whitespace wherever we want it using
css (white-space: pre).

To differentiate meaningful whitespace from document structure based
indentation (i.e. list item base indentation) we need to introduce
document.baseLvl. A paragraph by itself does not have enough information to
differentiate both kinds of whitespace and needs this information as context
[0].

As we're already touching list indentation i went along and improved (fixed?)
descriptive list item indentation rendering in the org writer (it should match
emacs tab behavior - i.e. indent subsequent lines up to the `:: `).

[0] e.g. list items can contain blank lines - a paragraph starting with a blank
line would not know that it is part of a list item / has a base indentation -
the blank line would suggest a baseLvl of 0.
---
 org/document.go                |  1 +
 org/list.go                    |  4 +++
 org/org_writer.go              |  3 +-
 org/paragraph.go               |  7 ++--
 org/testdata/blocks.html       | 61 ++++++++++++++++++++++++++++++----
 org/testdata/blocks.org        | 40 ++++++++++++++++++++--
 org/testdata/blocks.pretty_org | 40 ++++++++++++++++++++--
 org/testdata/lists.pretty_org  |  4 +--
 8 files changed, 141 insertions(+), 19 deletions(-)

diff --git a/org/document.go b/org/document.go
index a9697c4..3c60e5b 100644
--- a/org/document.go
+++ b/org/document.go
@@ -35,6 +35,7 @@ type Document struct {
 	*Configuration
 	Path           string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE).
 	tokens         []token
+	baseLvl        int
 	Nodes          []Node
 	NamedNodes     map[string]Node
 	Outline        Outline           // Outline is a Table Of Contents for the document and contains all sections (headline + content).
diff --git a/org/list.go b/org/list.go
index 462e9fa..54f5b2d 100644
--- a/org/list.go
+++ b/org/list.go
@@ -81,12 +81,15 @@ func (d *Document) parseList(i int, parentStop stopFn) (int, Node) {
 func (d *Document) parseListItem(l List, i int, parentStop stopFn) (int, Node) {
 	start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2]
 	minIndent, dterm, content, status := d.tokens[i].lvl+len(bullet), "", d.tokens[i].content, ""
+	originalBaseLvl := d.baseLvl
+	d.baseLvl = minIndent + 1
 	if m := listItemStatusRegexp.FindStringSubmatch(content); m != nil {
 		status, content = m[1], content[len("[ ] "):]
 	}
 	if l.Kind == "descriptive" {
 		if m := descriptiveListItemRegexp.FindStringIndex(content); m != nil {
 			dterm, content = content[:m[0]], content[m[1]:]
+			d.baseLvl = strings.Index(d.tokens[i].matches[0], " ::") + 4
 		}
 	}
 
@@ -103,6 +106,7 @@ func (d *Document) parseListItem(l List, i int, parentStop stopFn) (int, Node) {
 		i += consumed
 		nodes = append(nodes, node)
 	}
+	d.baseLvl = originalBaseLvl
 	if l.Kind == "descriptive" {
 		return i - start, DescriptiveListItem{bullet, status, d.parseInline(dterm), nodes}
 	}
diff --git a/org/org_writer.go b/org/org_writer.go
index 8855df9..d5a33ff 100644
--- a/org/org_writer.go
+++ b/org/org_writer.go
@@ -196,11 +196,12 @@ func (w *OrgWriter) WriteListItem(li ListItem) {
 }
 
 func (w *OrgWriter) WriteDescriptiveListItem(di DescriptiveListItem) {
+	indent := w.indent + strings.Repeat(" ", len(di.Bullet)+1)
 	w.WriteString(w.indent + di.Bullet)
 	if di.Status != "" {
 		w.WriteString(fmt.Sprintf(" [%s]", di.Status))
+		indent = indent + strings.Repeat(" ", len(di.Status)+3)
 	}
-	indent := w.indent + strings.Repeat(" ", len(di.Bullet)+1)
 	if len(di.Term) != 0 {
 		term := w.WriteNodesAsString(di.Term...)
 		w.WriteString(" " + term + " ::")
diff --git a/org/paragraph.go b/org/paragraph.go
index 24f0554..2c58eac 100644
--- a/org/paragraph.go
+++ b/org/paragraph.go
@@ -1,6 +1,7 @@
 package org
 
 import (
+	"math"
 	"regexp"
 	"strings"
 )
@@ -27,12 +28,12 @@ func lexHorizontalRule(line string) (token, bool) {
 
 func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) {
 	lines, start := []string{d.tokens[i].content}, i
-	i++
 	stop := func(d *Document, i int) bool {
 		return parentStop(d, i) || d.tokens[i].kind != "text" || d.tokens[i].content == ""
 	}
-	for ; !stop(d, i); i++ {
-		lines = append(lines, d.tokens[i].content)
+	for i += 1; !stop(d, i); i++ {
+		lvl := math.Max(float64(d.tokens[i].lvl-d.baseLvl), 0)
+		lines = append(lines, strings.Repeat(" ", int(lvl))+d.tokens[i].content)
 	}
 	consumed := i - start
 	return consumed, Paragraph{d.parseInline(strings.Join(lines, "\n"))}
diff --git a/org/testdata/blocks.html b/org/testdata/blocks.html
index 38a304c..a470343 100644
--- a/org/testdata/blocks.html
+++ b/org/testdata/blocks.html
@@ -87,12 +87,12 @@ paragraphs
 <p>
 …
 </p>
+<p>
+    whitespace is honored and not removed (but is not displayed because that&#39;s how html works by default)
+      it can be made visible using css (e.g. <code class="verbatim">white-space: pre</code>).
+</p>
 </li>
 </ul>
-<p>
-also whitespace is not significant
-and superfluous whitespace (at the beginning of the line) is removed
-</p>
 </blockquote>
 <script>
 console.log("Hello World!")
@@ -134,11 +134,58 @@ list item 2
 <p>
 #+END_EXAMPLE
 </p>
-<p>
-#+BEGIN_QUOTE
-</p>
+<blockquote>
 <pre class="example">
 #+END_QUOTE
 </pre>
+</blockquote>
+</li>
+<li>
+<p>
+verse blocks
+</p>
+<ul>
+<li>
+<p>
+emacs / ox-hugo rendering
+</p>
+<p class="verse">
+Great clouds overhead<br />
+Tiny black birds rise and fall<br />
+Snow covers Emacs<br />
+<br />
+&nbsp;&nbsp;&nbsp;---AlexSchroeder<br />
+</p>
+</li>
+<li>
+<p>
+go-org rendering
+</p>
+<div class="src src-html">
+<div class="highlight">
+<pre>
+&lt;style&gt;
+.verse-block p { white-space: pre; }
+.verse-block p + p { margin: 0; }
+&lt;/style&gt;
+</pre>
+</div>
+</div>
+<style>
+.verse-block p { white-space: pre; }
+.verse-block p + p { margin: 0; }
+</style>
+<div class="verse-block">
+<p>
+Great clouds overhead
+Tiny black birds rise and fall
+Snow covers Emacs
+</p>
+<p>
+    —AlexSchroeder
+</p>
+</div>
+</li>
+</ul>
 </li>
 </ul>
diff --git a/org/testdata/blocks.org b/org/testdata/blocks.org
index e9e9624..44d93e5 100644
--- a/org/testdata/blocks.org
+++ b/org/testdata/blocks.org
@@ -45,9 +45,8 @@ blocks like the quote block parse their content and can contain
 - paragraphs
 - ...
 
-
-      also whitespace is not significant
-            and superfluous whitespace (at the beginning of the line) is removed
+      whitespace is honored and not removed (but is not displayed because that's how html works by default)
+        it can be made visible using css (e.g. =white-space: pre=).
 #+END_QUOTE
 
 #+BEGIN_EXPORT html
@@ -76,3 +75,38 @@ this unindented line is outside of the list item
   #+BEGIN_EXAMPLE
   #+END_QUOTE
   #+END_EXAMPLE
+  #+END_QUOTE
+
+- verse blocks
+  - emacs / ox-hugo rendering
+    #+BEGIN_EXPORT html
+    <p class="verse">
+    Great clouds overhead<br />
+    Tiny black birds rise and fall<br />
+    Snow covers Emacs<br />
+    <br />
+    &nbsp;&nbsp;&nbsp;---AlexSchroeder<br />
+    </p>
+    #+END_EXPORT
+  - go-org rendering
+    #+BEGIN_SRC html
+    <style>
+    .verse-block p { white-space: pre; }
+    .verse-block p + p { margin: 0; }
+    </style>
+    #+END_SRC
+
+    #+BEGIN_EXPORT html
+    <style>
+    .verse-block p { white-space: pre; }
+    .verse-block p + p { margin: 0; }
+    </style>
+    #+END_EXPORT
+
+    #+BEGIN_VERSE
+    Great clouds overhead
+    Tiny black birds rise and fall
+    Snow covers Emacs
+
+        ---AlexSchroeder
+    #+END_VERSE
diff --git a/org/testdata/blocks.pretty_org b/org/testdata/blocks.pretty_org
index 40fc8cc..239890c 100644
--- a/org/testdata/blocks.pretty_org
+++ b/org/testdata/blocks.pretty_org
@@ -45,9 +45,8 @@ blocks like the quote block parse their content and can contain
 - paragraphs
 - ...
 
-
-also whitespace is not significant
-and superfluous whitespace (at the beginning of the line) is removed
+      whitespace is honored and not removed (but is not displayed because that's how html works by default)
+        it can be made visible using css (e.g. =white-space: pre=).
 #+END_QUOTE
 
 #+BEGIN_EXPORT html
@@ -76,3 +75,38 @@ this unindented line is outside of the list item
   #+BEGIN_EXAMPLE
   #+END_QUOTE
   #+END_EXAMPLE
+  #+END_QUOTE
+
+- verse blocks
+  - emacs / ox-hugo rendering
+    #+BEGIN_EXPORT html
+    <p class="verse">
+    Great clouds overhead<br />
+    Tiny black birds rise and fall<br />
+    Snow covers Emacs<br />
+    <br />
+    &nbsp;&nbsp;&nbsp;---AlexSchroeder<br />
+    </p>
+    #+END_EXPORT
+  - go-org rendering
+    #+BEGIN_SRC html
+    <style>
+    .verse-block p { white-space: pre; }
+    .verse-block p + p { margin: 0; }
+    </style>
+    #+END_SRC
+
+    #+BEGIN_EXPORT html
+    <style>
+    .verse-block p { white-space: pre; }
+    .verse-block p + p { margin: 0; }
+    </style>
+    #+END_EXPORT
+
+    #+BEGIN_VERSE
+    Great clouds overhead
+    Tiny black birds rise and fall
+    Snow covers Emacs
+
+        ---AlexSchroeder
+    #+END_VERSE
diff --git a/org/testdata/lists.pretty_org b/org/testdata/lists.pretty_org
index 72c757a..c388789 100644
--- a/org/testdata/lists.pretty_org
+++ b/org/testdata/lists.pretty_org
@@ -32,10 +32,10 @@
 
 descriptive lists
 - [ ] term :: details
-          continued details
+              continued details
 - [ ] details without a term
 - [X] term ::
-          details on a new line
+              details on a new line
 - term ::
 
           details on a new line (with an empty line in between)