emphasis: Fix reading of prev/next rune for multibyte characters

This commit is contained in:
Niklas Fasching 2022-06-18 15:56:20 +02:00
parent 0f145082ad
commit a520664dc7
4 changed files with 18 additions and 4 deletions

View file

@ -371,19 +371,30 @@ func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node
// see org-emphasis-regexp-components (emacs elisp variable) // see org-emphasis-regexp-components (emacs elisp variable)
func hasValidPreAndBorderChars(input string, i int) bool { func hasValidPreAndBorderChars(input string, i int) bool {
return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1]))) return isValidBorderChar(nextRune(input, i)) && isValidPreChar(prevRune(input, i))
} }
func hasValidPostAndBorderChars(input string, i int) bool { func hasValidPostAndBorderChars(input string, i int) bool {
return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1]))) return (isValidPostChar(nextRune(input, i))) && isValidBorderChar(prevRune(input, i))
}
func prevRune(input string, i int) rune {
r, _ := utf8.DecodeLastRuneInString(input[:i])
return r
}
func nextRune(input string, i int) rune {
_, c := utf8.DecodeRuneInString(input[i:])
r, _ := utf8.DecodeRuneInString(input[i+c:])
return r
} }
func isValidPreChar(r rune) bool { func isValidPreChar(r rune) bool {
return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r) return r == utf8.RuneError || unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
} }
func isValidPostChar(r rune) bool { func isValidPostChar(r rune) bool {
return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r) return r == utf8.RuneError || unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
} }
func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) } func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }

View file

@ -5,6 +5,7 @@ also hard line breaks not followed by a newline get ignored, see \\</li>
<li><em>.emphasis with dot border chars.</em></li> <li><em>.emphasis with dot border chars.</em></li>
<li><em>emphasis with a slash/inside</em></li> <li><em>emphasis with a slash/inside</em></li>
<li><em>emphasis</em> followed by raw text with slash /</li> <li><em>emphasis</em> followed by raw text with slash /</li>
<li><strong>emphasis ending with a &#34;difficult&#34; multibyte character 习</strong></li>
<li>-&gt;/not an emphasis/&lt;-</li> <li>-&gt;/not an emphasis/&lt;-</li>
<li>links with slashes do not become <em>emphasis</em>: <a href="https://somelinkshouldntrenderaccidentalemphasis.com">https://somelinkshouldntrenderaccidentalemphasis.com</a>/ <em>emphasis</em></li> <li>links with slashes do not become <em>emphasis</em>: <a href="https://somelinkshouldntrenderaccidentalemphasis.com">https://somelinkshouldntrenderaccidentalemphasis.com</a>/ <em>emphasis</em></li>
<li><span style="text-decoration: underline;">underlined</span> <strong>bold</strong> <code class="verbatim">verbatim</code> <code>code</code> <del>strikethrough</del></li> <li><span style="text-decoration: underline;">underlined</span> <strong>bold</strong> <code class="verbatim">verbatim</code> <code>code</code> <del>strikethrough</del></li>

View file

@ -4,6 +4,7 @@
- /.emphasis with dot border chars./ - /.emphasis with dot border chars./
- /emphasis with a slash/inside/ - /emphasis with a slash/inside/
- /emphasis/ followed by raw text with slash / - /emphasis/ followed by raw text with slash /
- *emphasis ending with a "difficult" multibyte character 习*
- ->/not an emphasis/<- - ->/not an emphasis/<-
- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/ - links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/
- _underlined_ *bold* =verbatim= ~code~ +strikethrough+ - _underlined_ *bold* =verbatim= ~code~ +strikethrough+

View file

@ -4,6 +4,7 @@
- /.emphasis with dot border chars./ - /.emphasis with dot border chars./
- /emphasis with a slash/inside/ - /emphasis with a slash/inside/
- /emphasis/ followed by raw text with slash / - /emphasis/ followed by raw text with slash /
- *emphasis ending with a "difficult" multibyte character 习*
- ->/not an emphasis/<- - ->/not an emphasis/<-
- links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/ - links with slashes do not become /emphasis/: [[https://somelinkshouldntrenderaccidentalemphasis.com]]/ /emphasis/
- _underlined_ *bold* =verbatim= ~code~ +strikethrough+ - _underlined_ *bold* =verbatim= ~code~ +strikethrough+