From c901c00166809395bb2bab65c3420f1bb593e43c Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Tue, 20 Jul 2021 22:33:28 +0200 Subject: [PATCH] Add non-standard ealb (east asian line break) option See pandoc: https://pandoc.org/MANUAL.html#extension-east_asian_line_breaks --- org/document.go | 3 ++- org/html_writer.go | 4 +++- org/inline.go | 10 ++++++++-- org/testdata/east_asian_line_breaks.html | 8 ++++++++ org/testdata/east_asian_line_breaks.org | 11 +++++++++++ org/testdata/east_asian_line_breaks.pretty_org | 11 +++++++++++ 6 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 org/testdata/east_asian_line_breaks.html create mode 100644 org/testdata/east_asian_line_breaks.org create mode 100644 org/testdata/east_asian_line_breaks.pretty_org diff --git a/org/document.go b/org/document.go index 0f016c1..9bc7fc8 100644 --- a/org/document.go +++ b/org/document.go @@ -86,7 +86,7 @@ func New() *Configuration { DefaultSettings: map[string]string{ "TODO": "TODO | DONE", "EXCLUDE_TAGS": "noexport", - "OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t title:t", + "OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t title:t ealb:nil", }, Log: log.New(os.Stderr, "go-org: ", 0), ReadFile: ioutil.ReadFile, @@ -179,6 +179,7 @@ func (d *Document) Get(key string) string { // - todo (export headline todo status) // - pri (export headline priority) // - tags (export headline tags) +// - ealb (non-standard) (export with east asian line breaks / ignore line breaks between multi-byte characters) // see https://orgmode.org/manual/Export-settings.html for more information func (d *Document) GetOption(key string) string { get := func(settings map[string]string) string { diff --git a/org/html_writer.go b/org/html_writer.go index 878560b..5ab9d8f 100644 --- a/org/html_writer.go +++ b/org/html_writer.go @@ -311,7 +311,9 @@ func (w *HTMLWriter) WriteStatisticToken(s StatisticToken) { } func (w *HTMLWriter) WriteLineBreak(l LineBreak) { - w.WriteString(strings.Repeat("\n", l.Count)) + if w.document.GetOption("ealb") == "nil" || !l.BetweenMultibyteCharacters { + w.WriteString(strings.Repeat("\n", l.Count)) + } } func (w *HTMLWriter) WriteExplicitLineBreak(l ExplicitLineBreak) { diff --git a/org/inline.go b/org/inline.go index e7a8f47..b1bb28c 100644 --- a/org/inline.go +++ b/org/inline.go @@ -7,6 +7,7 @@ import ( "strings" "time" "unicode" + "unicode/utf8" ) type Text struct { @@ -14,7 +15,10 @@ type Text struct { IsRaw bool } -type LineBreak struct{ Count int } +type LineBreak struct { + Count int + BetweenMultibyteCharacters bool +} type ExplicitLineBreak struct{} type StatisticToken struct{ Content string } @@ -159,7 +163,9 @@ func (d *Document) parseLineBreak(input string, start int) (int, Node) { i := start for ; i < len(input) && input[i] == '\n'; i++ { } - return i - start, LineBreak{i - start} + _, beforeLen := utf8.DecodeLastRuneInString(input[:start]) + _, afterLen := utf8.DecodeRuneInString(input[i:]) + return i - start, LineBreak{i - start, beforeLen > 1 && afterLen > 1} } func (d *Document) parseInlineBlock(input string, start int) (int, int, Node) { diff --git a/org/testdata/east_asian_line_breaks.html b/org/testdata/east_asian_line_breaks.html new file mode 100644 index 0000000..3502d67 --- /dev/null +++ b/org/testdata/east_asian_line_breaks.html @@ -0,0 +1,8 @@ +

+Line breaks between multi-byte characters are omitted when the ealb option is set:

+ diff --git a/org/testdata/east_asian_line_breaks.org b/org/testdata/east_asian_line_breaks.org new file mode 100644 index 0000000..9e4e124 --- /dev/null +++ b/org/testdata/east_asian_line_breaks.org @@ -0,0 +1,11 @@ +#+OPTIONS: ealb:t + +Line breaks between multi-byte characters are omitted when the =ealb= option is set: + +- 中午 + 吃啥 + +- something else + 中午 + 吃啥 + something else diff --git a/org/testdata/east_asian_line_breaks.pretty_org b/org/testdata/east_asian_line_breaks.pretty_org new file mode 100644 index 0000000..9e4e124 --- /dev/null +++ b/org/testdata/east_asian_line_breaks.pretty_org @@ -0,0 +1,11 @@ +#+OPTIONS: ealb:t + +Line breaks between multi-byte characters are omitted when the =ealb= option is set: + +- 中午 + 吃啥 + +- something else + 中午 + 吃啥 + something else