It's possible for the input to end right after the explicit line break, i.e. after the second \. This currently leads to an out of range index into input (as the for loop starts with start+2 and [start:start+1] is the \\).
315 lines
9.2 KiB
Go
315 lines
9.2 KiB
Go
package org
|
|
|
|
import (
|
|
"fmt"
|
|
"path"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
"unicode"
|
|
)
|
|
|
|
type Text struct {
|
|
Content string
|
|
IsRaw bool
|
|
}
|
|
|
|
type LineBreak struct{ Count int }
|
|
type ExplicitLineBreak struct{}
|
|
|
|
type StatisticToken struct{ Content string }
|
|
|
|
type Timestamp struct {
|
|
Time time.Time
|
|
IsDate bool
|
|
Interval string
|
|
}
|
|
|
|
type Emphasis struct {
|
|
Kind string
|
|
Content []Node
|
|
}
|
|
|
|
type FootnoteLink struct {
|
|
Name string
|
|
Definition *FootnoteDefinition
|
|
}
|
|
|
|
type RegularLink struct {
|
|
Protocol string
|
|
Description []Node
|
|
URL string
|
|
AutoLink bool
|
|
}
|
|
|
|
var validURLCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
|
|
var autolinkProtocols = regexp.MustCompile(`^(https?|ftp|file)$`)
|
|
var imageExtensionRegexp = regexp.MustCompile(`^[.](png|gif|jpe?g|svg|tiff?)$`)
|
|
var videoExtensionRegexp = regexp.MustCompile(`^[.](webm|mp4)$`)
|
|
|
|
var subScriptSuperScriptRegexp = regexp.MustCompile(`^([_^]){([^{}]+?)}`)
|
|
var timestampRegexp = regexp.MustCompile(`^<(\d{4}-\d{2}-\d{2})( [A-Za-z]+)?( \d{2}:\d{2})?( \+\d+[dwmy])?>`)
|
|
var footnoteRegexp = regexp.MustCompile(`^\[fn:([\w-]+?)(:(.*?))?\]`)
|
|
var statisticsTokenRegexp = regexp.MustCompile(`^\[(\d+/\d+|\d+%)\]`)
|
|
|
|
var timestampFormat = "2006-01-02 Mon 15:04"
|
|
var datestampFormat = "2006-01-02 Mon"
|
|
|
|
func (d *Document) parseInline(input string) (nodes []Node) {
|
|
previous, current := 0, 0
|
|
for current < len(input) {
|
|
rewind, consumed, node := 0, 0, (Node)(nil)
|
|
switch input[current] {
|
|
case '^':
|
|
consumed, node = d.parseSubOrSuperScript(input, current)
|
|
case '_':
|
|
consumed, node = d.parseSubScriptOrEmphasis(input, current)
|
|
case '*', '/', '+':
|
|
consumed, node = d.parseEmphasis(input, current, false)
|
|
case '=', '~':
|
|
consumed, node = d.parseEmphasis(input, current, true)
|
|
case '[':
|
|
consumed, node = d.parseOpeningBracket(input, current)
|
|
case '<':
|
|
consumed, node = d.parseTimestamp(input, current)
|
|
case '\\':
|
|
consumed, node = d.parseExplicitLineBreak(input, current)
|
|
case '\n':
|
|
consumed, node = d.parseLineBreak(input, current)
|
|
case ':':
|
|
rewind, consumed, node = d.parseAutoLink(input, current)
|
|
current -= rewind
|
|
}
|
|
if consumed != 0 {
|
|
if current > previous {
|
|
nodes = append(nodes, Text{input[previous:current], false})
|
|
}
|
|
if node != nil {
|
|
nodes = append(nodes, node)
|
|
}
|
|
current += consumed
|
|
previous = current
|
|
} else {
|
|
current++
|
|
}
|
|
}
|
|
|
|
if previous < len(input) {
|
|
nodes = append(nodes, Text{input[previous:], false})
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
func (d *Document) parseRawInline(input string) (nodes []Node) {
|
|
previous, current := 0, 0
|
|
for current < len(input) {
|
|
if input[current] == '\n' {
|
|
consumed, node := d.parseLineBreak(input, current)
|
|
if current > previous {
|
|
nodes = append(nodes, Text{input[previous:current], true})
|
|
}
|
|
nodes = append(nodes, node)
|
|
current += consumed
|
|
previous = current
|
|
} else {
|
|
current++
|
|
}
|
|
}
|
|
if previous < len(input) {
|
|
nodes = append(nodes, Text{input[previous:], true})
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
func (d *Document) parseLineBreak(input string, start int) (int, Node) {
|
|
i := start
|
|
for ; i < len(input) && input[i] == '\n'; i++ {
|
|
}
|
|
return i - start, LineBreak{i - start}
|
|
}
|
|
|
|
func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
|
|
if start == 0 || input[start-1] == '\n' || start+2 >= len(input) || input[start+1] != '\\' {
|
|
return 0, nil
|
|
}
|
|
for i := start + 2; unicode.IsSpace(rune(input[i])); i++ {
|
|
if i >= len(input) || input[i] == '\n' {
|
|
return i + 1 - start, ExplicitLineBreak{}
|
|
}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
|
|
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
|
|
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
|
|
return consumed, node
|
|
}
|
|
return d.parseEmphasis(input, start, false)
|
|
}
|
|
|
|
func (d *Document) parseOpeningBracket(input string, start int) (int, Node) {
|
|
if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
|
|
return d.parseRegularLink(input, start)
|
|
} else if footnoteRegexp.MatchString(input[start:]) {
|
|
return d.parseFootnoteReference(input, start)
|
|
} else if statisticsTokenRegexp.MatchString(input[start:]) {
|
|
return d.parseStatisticToken(input, start)
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
|
|
if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
name, definition := m[1], m[3]
|
|
link := FootnoteLink{name, nil}
|
|
if definition != "" {
|
|
link.Definition = &FootnoteDefinition{name, []Node{Paragraph{d.parseInline(definition)}}, true}
|
|
d.addFootnote(name, link.Definition)
|
|
}
|
|
return len(m[0]), link
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseStatisticToken(input string, start int) (int, Node) {
|
|
if m := statisticsTokenRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
return len(m[1]) + 2, StatisticToken{m[1]}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseAutoLink(input string, start int) (int, int, Node) {
|
|
if !d.AutoLink || start == 0 || len(input[start:]) < 3 || input[start:start+3] != "://" {
|
|
return 0, 0, nil
|
|
}
|
|
protocolStart, protocol := start-1, ""
|
|
for ; protocolStart > 0; protocolStart-- {
|
|
if !unicode.IsLetter(rune(input[protocolStart])) {
|
|
protocolStart++
|
|
break
|
|
}
|
|
}
|
|
if m := autolinkProtocols.FindStringSubmatch(input[protocolStart:start]); m != nil {
|
|
protocol = m[1]
|
|
} else {
|
|
return 0, 0, nil
|
|
}
|
|
end := start
|
|
for ; end < len(input) && strings.ContainsRune(validURLCharacters, rune(input[end])); end++ {
|
|
}
|
|
path := input[start:end]
|
|
if path == "://" {
|
|
return 0, 0, nil
|
|
}
|
|
return len(protocol), len(path + protocol), RegularLink{protocol, nil, protocol + path, true}
|
|
}
|
|
|
|
func (d *Document) parseRegularLink(input string, start int) (int, Node) {
|
|
input = input[start:]
|
|
if len(input) < 3 || input[:2] != "[[" || input[2] == '[' {
|
|
return 0, nil
|
|
}
|
|
end := strings.Index(input, "]]")
|
|
if end == -1 {
|
|
return 0, nil
|
|
}
|
|
rawLinkParts := strings.Split(input[2:end], "][")
|
|
description, link := ([]Node)(nil), rawLinkParts[0]
|
|
if len(rawLinkParts) == 2 {
|
|
link, description = rawLinkParts[0], d.parseInline(rawLinkParts[1])
|
|
}
|
|
if strings.ContainsRune(link, '\n') {
|
|
return 0, nil
|
|
}
|
|
consumed := end + 2
|
|
protocol, linkParts := "", strings.SplitN(link, ":", 2)
|
|
if len(linkParts) == 2 {
|
|
protocol = linkParts[0]
|
|
}
|
|
return consumed, RegularLink{protocol, description, link, false}
|
|
}
|
|
|
|
func (d *Document) parseTimestamp(input string, start int) (int, Node) {
|
|
if m := timestampRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
ddmmyy, hhmm, interval, isDate := m[1], m[3], strings.TrimSpace(m[4]), false
|
|
if hhmm == "" {
|
|
hhmm, isDate = "00:00", true
|
|
}
|
|
t, err := time.Parse(timestampFormat, fmt.Sprintf("%s Mon %s", ddmmyy, hhmm))
|
|
if err != nil {
|
|
return 0, nil
|
|
}
|
|
timestamp := Timestamp{t, isDate, interval}
|
|
return len(m[0]), timestamp
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
|
|
marker, i := input[start], start
|
|
if !hasValidPreAndBorderChars(input, i) {
|
|
return 0, nil
|
|
}
|
|
for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
|
|
if input[i] == '\n' {
|
|
consumedNewLines++
|
|
}
|
|
|
|
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
|
|
if isRaw {
|
|
return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
|
|
}
|
|
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
|
|
}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
// see org-emphasis-regexp-components (emacs elisp variable)
|
|
|
|
func hasValidPreAndBorderChars(input string, i int) bool {
|
|
return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
|
|
}
|
|
|
|
func hasValidPostAndBorderChars(input string, i int) bool {
|
|
return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
|
|
}
|
|
|
|
func isValidPreChar(r rune) bool {
|
|
return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
|
|
}
|
|
|
|
func isValidPostChar(r rune) bool {
|
|
return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
|
|
}
|
|
|
|
func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
|
|
|
|
func (l RegularLink) Kind() string {
|
|
if p := l.Protocol; l.Description != nil || (p != "" && p != "file" && p != "http" && p != "https") {
|
|
return "regular"
|
|
}
|
|
if imageExtensionRegexp.MatchString(path.Ext(l.URL)) {
|
|
return "image"
|
|
}
|
|
if videoExtensionRegexp.MatchString(path.Ext(l.URL)) {
|
|
return "video"
|
|
}
|
|
return "regular"
|
|
}
|
|
|
|
func (n Text) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n LineBreak) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n ExplicitLineBreak) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n StatisticToken) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n Emphasis) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n FootnoteLink) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n RegularLink) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n Timestamp) String() string { return orgWriter.nodesAsString(n) }
|