From b61e49eb855db4f488fa464e06f3f064478866dc Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Mon, 9 Dec 2019 19:37:39 +0100 Subject: [PATCH] Preserve whitespace (indentation) inside paragraphs We want original whitespace to be rendered in some cases (e.g. verse blocks). This requires information about the original whitespace to be preserved during paragraph parsing. As html ignores (collapses) whitespace by default we don't have to adapt the html writer and can just selectively enable rendering of the preseverved whitespace wherever we want it using css (white-space: pre). To differentiate meaningful whitespace from document structure based indentation (i.e. list item base indentation) we need to introduce document.baseLvl. A paragraph by itself does not have enough information to differentiate both kinds of whitespace and needs this information as context [0]. As we're already touching list indentation i went along and improved (fixed?) descriptive list item indentation rendering in the org writer (it should match emacs tab behavior - i.e. indent subsequent lines up to the `:: `). [0] e.g. list items can contain blank lines - a paragraph starting with a blank line would not know that it is part of a list item / has a base indentation - the blank line would suggest a baseLvl of 0. --- org/document.go | 1 + org/list.go | 4 +++ org/org_writer.go | 3 +- org/paragraph.go | 7 ++-- org/testdata/blocks.html | 61 ++++++++++++++++++++++++++++++---- org/testdata/blocks.org | 40 ++++++++++++++++++++-- org/testdata/blocks.pretty_org | 40 ++++++++++++++++++++-- org/testdata/lists.pretty_org | 4 +-- 8 files changed, 141 insertions(+), 19 deletions(-) diff --git a/org/document.go b/org/document.go index a9697c4..3c60e5b 100644 --- a/org/document.go +++ b/org/document.go @@ -35,6 +35,7 @@ type Document struct { *Configuration Path string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE). tokens []token + baseLvl int Nodes []Node NamedNodes map[string]Node Outline Outline // Outline is a Table Of Contents for the document and contains all sections (headline + content). diff --git a/org/list.go b/org/list.go index 462e9fa..54f5b2d 100644 --- a/org/list.go +++ b/org/list.go @@ -81,12 +81,15 @@ func (d *Document) parseList(i int, parentStop stopFn) (int, Node) { func (d *Document) parseListItem(l List, i int, parentStop stopFn) (int, Node) { start, nodes, bullet := i, []Node{}, d.tokens[i].matches[2] minIndent, dterm, content, status := d.tokens[i].lvl+len(bullet), "", d.tokens[i].content, "" + originalBaseLvl := d.baseLvl + d.baseLvl = minIndent + 1 if m := listItemStatusRegexp.FindStringSubmatch(content); m != nil { status, content = m[1], content[len("[ ] "):] } if l.Kind == "descriptive" { if m := descriptiveListItemRegexp.FindStringIndex(content); m != nil { dterm, content = content[:m[0]], content[m[1]:] + d.baseLvl = strings.Index(d.tokens[i].matches[0], " ::") + 4 } } @@ -103,6 +106,7 @@ func (d *Document) parseListItem(l List, i int, parentStop stopFn) (int, Node) { i += consumed nodes = append(nodes, node) } + d.baseLvl = originalBaseLvl if l.Kind == "descriptive" { return i - start, DescriptiveListItem{bullet, status, d.parseInline(dterm), nodes} } diff --git a/org/org_writer.go b/org/org_writer.go index 8855df9..d5a33ff 100644 --- a/org/org_writer.go +++ b/org/org_writer.go @@ -196,11 +196,12 @@ func (w *OrgWriter) WriteListItem(li ListItem) { } func (w *OrgWriter) WriteDescriptiveListItem(di DescriptiveListItem) { + indent := w.indent + strings.Repeat(" ", len(di.Bullet)+1) w.WriteString(w.indent + di.Bullet) if di.Status != "" { w.WriteString(fmt.Sprintf(" [%s]", di.Status)) + indent = indent + strings.Repeat(" ", len(di.Status)+3) } - indent := w.indent + strings.Repeat(" ", len(di.Bullet)+1) if len(di.Term) != 0 { term := w.WriteNodesAsString(di.Term...) w.WriteString(" " + term + " ::") diff --git a/org/paragraph.go b/org/paragraph.go index 24f0554..2c58eac 100644 --- a/org/paragraph.go +++ b/org/paragraph.go @@ -1,6 +1,7 @@ package org import ( + "math" "regexp" "strings" ) @@ -27,12 +28,12 @@ func lexHorizontalRule(line string) (token, bool) { func (d *Document) parseParagraph(i int, parentStop stopFn) (int, Node) { lines, start := []string{d.tokens[i].content}, i - i++ stop := func(d *Document, i int) bool { return parentStop(d, i) || d.tokens[i].kind != "text" || d.tokens[i].content == "" } - for ; !stop(d, i); i++ { - lines = append(lines, d.tokens[i].content) + for i += 1; !stop(d, i); i++ { + lvl := math.Max(float64(d.tokens[i].lvl-d.baseLvl), 0) + lines = append(lines, strings.Repeat(" ", int(lvl))+d.tokens[i].content) } consumed := i - start return consumed, Paragraph{d.parseInline(strings.Join(lines, "\n"))} diff --git a/org/testdata/blocks.html b/org/testdata/blocks.html index 38a304c..a470343 100644 --- a/org/testdata/blocks.html +++ b/org/testdata/blocks.html @@ -87,12 +87,12 @@ paragraphs

+

+ whitespace is honored and not removed (but is not displayed because that's how html works by default) + it can be made visible using css (e.g. white-space: pre). +

-

-also whitespace is not significant -and superfluous whitespace (at the beginning of the line) is removed -