From ec895cbe83c33952e6e4c8282cfd507c66226253 Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Wed, 19 Dec 2018 12:58:07 +0100 Subject: [PATCH] Fix headline tags, table pretty printing and multiline links - we can't just look at the len of the string (~ #bytes) - that breaks down for tables containing characters consisting of multiple bytes. This handles more (still not all) cases and is good enough for now - add _ to allowed tag chars - also require space between headline and tags - links (link itself, not the description) spanning multiple lines are not supported - otherwise we would have to take care of splitting link and adding indentation for org pretty printing - and that sounds like such an edge case that it seems cleaner to forbid them --- org/headline.go | 2 +- org/inline.go | 3 +++ org/org.go | 3 ++- org/table.go | 5 +++-- org/testdata/tables.html | 36 ++++++++++++++++++++++++++++++++++ org/testdata/tables.org | 8 ++++++++ org/testdata/tables.pretty_org | 8 ++++++++ 7 files changed, 61 insertions(+), 4 deletions(-) diff --git a/org/headline.go b/org/headline.go index b9d56c4..998fa84 100644 --- a/org/headline.go +++ b/org/headline.go @@ -17,7 +17,7 @@ type Headline struct { } var headlineRegexp = regexp.MustCompile(`^([*]+)\s+(.*)`) -var tagRegexp = regexp.MustCompile(`(.*?)\s*(:[A-Za-z0-9@#%:]+:\s*$)`) +var tagRegexp = regexp.MustCompile(`(.*?)\s+(:[A-Za-z0-9_@#%:]+:\s*$)`) func lexHeadline(line string) (token, bool) { if m := headlineRegexp.FindStringSubmatch(line); m != nil { diff --git a/org/inline.go b/org/inline.go index d7c26a1..86740f3 100644 --- a/org/inline.go +++ b/org/inline.go @@ -210,6 +210,9 @@ func (d *Document) parseRegularLink(input string, start int) (int, Node) { if len(rawLinkParts) == 2 { link, description = rawLinkParts[0], d.parseInline(rawLinkParts[1]) } + if strings.ContainsRune(link, '\n') { + return 0, nil + } consumed := end + 2 protocol, linkParts := "", strings.SplitN(link, ":", 2) if len(linkParts) == 2 { diff --git a/org/org.go b/org/org.go index 0e32898..f9564e0 100644 --- a/org/org.go +++ b/org/org.go @@ -3,6 +3,7 @@ package org import ( "fmt" "strings" + "unicode/utf8" ) type stringBuilder = strings.Builder @@ -268,7 +269,7 @@ func (w *OrgWriter) writeTable(t Table) { if content == "" { content = " " } - n := column.Len - len(content) + n := column.Len - utf8.RuneCountInString(content) if n < 0 { n = 0 } diff --git a/org/table.go b/org/table.go index 30e94f8..6e391cc 100644 --- a/org/table.go +++ b/org/table.go @@ -4,6 +4,7 @@ import ( "regexp" "strconv" "strings" + "unicode/utf8" ) type Table struct { @@ -89,8 +90,8 @@ func getColumnInfos(rows [][]string) []ColumnInfo { continue } - if len(columns[i]) > columnInfos[i].Len { - columnInfos[i].Len = len(columns[i]) + if n := utf8.RuneCountInString(columns[i]); n > columnInfos[i].Len { + columnInfos[i].Len = n } if m := columnAlignRegexp.FindStringSubmatch(columns[i]); m != nil && isSpecialRow(columns) { diff --git a/org/testdata/tables.html b/org/testdata/tables.html index eee44cf..4c42147 100644 --- a/org/testdata/tables.html +++ b/org/testdata/tables.html @@ -42,6 +42,42 @@ table with separator after header
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CharacterOrgRendered HTML
Hyphena - ba - b
Ndasha -- ba – b
Mdasha --- ba — b
Ellipsisa ... ba … b
+
+table with unicode characters +
+
+
+ diff --git a/org/testdata/tables.org b/org/testdata/tables.org index c2ee86f..a3c51b2 100644 --- a/org/testdata/tables.org +++ b/org/testdata/tables.org @@ -9,6 +9,14 @@ |---+---+---| | 1 | 2 | 3 | +#+CAPTION: table with unicode characters +| Character | Org | Rendered HTML | +|-----------+-----------+---------------| +| Hyphen | =a - b= | a - b | +| Ndash | =a -- b= | a – b | +| Mdash | =a --- b= | a — b | +| Ellipsis | =a ... b= | a … b | + #+CAPTION: table without header (but separator before) |---+---+---| | 1 | 2 | 3 | diff --git a/org/testdata/tables.pretty_org b/org/testdata/tables.pretty_org index c2ee86f..a3c51b2 100644 --- a/org/testdata/tables.pretty_org +++ b/org/testdata/tables.pretty_org @@ -9,6 +9,14 @@ |---+---+---| | 1 | 2 | 3 | +#+CAPTION: table with unicode characters +| Character | Org | Rendered HTML | +|-----------+-----------+---------------| +| Hyphen | =a - b= | a - b | +| Ndash | =a -- b= | a – b | +| Mdash | =a --- b= | a — b | +| Ellipsis | =a ... b= | a … b | + #+CAPTION: table without header (but separator before) |---+---+---| | 1 | 2 | 3 |
1