- Remove unused footnote section title option - Move away from maintaining a list of footnotes in the document (only needed for html export, potential maintainance overhead when modifying the document) and rather only build it on export when required. - HTML export: Rename all footnotes to numbers (so we can support anonymous footnote references by assigning them a number) and export footnotes in order of reference, not definition. The implementation of this makes it natural to also stop exporting unused footnote definitions so we do that as well.
314 lines
9.2 KiB
Go
314 lines
9.2 KiB
Go
package org
|
|
|
|
import (
|
|
"fmt"
|
|
"path"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
"unicode"
|
|
)
|
|
|
|
type Text struct {
|
|
Content string
|
|
IsRaw bool
|
|
}
|
|
|
|
type LineBreak struct{ Count int }
|
|
type ExplicitLineBreak struct{}
|
|
|
|
type StatisticToken struct{ Content string }
|
|
|
|
type Timestamp struct {
|
|
Time time.Time
|
|
IsDate bool
|
|
Interval string
|
|
}
|
|
|
|
type Emphasis struct {
|
|
Kind string
|
|
Content []Node
|
|
}
|
|
|
|
type FootnoteLink struct {
|
|
Name string
|
|
Definition *FootnoteDefinition
|
|
}
|
|
|
|
type RegularLink struct {
|
|
Protocol string
|
|
Description []Node
|
|
URL string
|
|
AutoLink bool
|
|
}
|
|
|
|
var validURLCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
|
|
var autolinkProtocols = regexp.MustCompile(`^(https?|ftp|file)$`)
|
|
var imageExtensionRegexp = regexp.MustCompile(`^[.](png|gif|jpe?g|svg|tiff?)$`)
|
|
var videoExtensionRegexp = regexp.MustCompile(`^[.](webm|mp4)$`)
|
|
|
|
var subScriptSuperScriptRegexp = regexp.MustCompile(`^([_^]){([^{}]+?)}`)
|
|
var timestampRegexp = regexp.MustCompile(`^<(\d{4}-\d{2}-\d{2})( [A-Za-z]+)?( \d{2}:\d{2})?( \+\d+[dwmy])?>`)
|
|
var footnoteRegexp = regexp.MustCompile(`^\[fn:([\w-]+?)(:(.*?))?\]`)
|
|
var statisticsTokenRegexp = regexp.MustCompile(`^\[(\d+/\d+|\d+%)\]`)
|
|
|
|
var timestampFormat = "2006-01-02 Mon 15:04"
|
|
var datestampFormat = "2006-01-02 Mon"
|
|
|
|
func (d *Document) parseInline(input string) (nodes []Node) {
|
|
previous, current := 0, 0
|
|
for current < len(input) {
|
|
rewind, consumed, node := 0, 0, (Node)(nil)
|
|
switch input[current] {
|
|
case '^':
|
|
consumed, node = d.parseSubOrSuperScript(input, current)
|
|
case '_':
|
|
consumed, node = d.parseSubScriptOrEmphasis(input, current)
|
|
case '*', '/', '+':
|
|
consumed, node = d.parseEmphasis(input, current, false)
|
|
case '=', '~':
|
|
consumed, node = d.parseEmphasis(input, current, true)
|
|
case '[':
|
|
consumed, node = d.parseOpeningBracket(input, current)
|
|
case '<':
|
|
consumed, node = d.parseTimestamp(input, current)
|
|
case '\\':
|
|
consumed, node = d.parseExplicitLineBreak(input, current)
|
|
case '\n':
|
|
consumed, node = d.parseLineBreak(input, current)
|
|
case ':':
|
|
rewind, consumed, node = d.parseAutoLink(input, current)
|
|
current -= rewind
|
|
}
|
|
if consumed != 0 {
|
|
if current > previous {
|
|
nodes = append(nodes, Text{input[previous:current], false})
|
|
}
|
|
if node != nil {
|
|
nodes = append(nodes, node)
|
|
}
|
|
current += consumed
|
|
previous = current
|
|
} else {
|
|
current++
|
|
}
|
|
}
|
|
|
|
if previous < len(input) {
|
|
nodes = append(nodes, Text{input[previous:], false})
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
func (d *Document) parseRawInline(input string) (nodes []Node) {
|
|
previous, current := 0, 0
|
|
for current < len(input) {
|
|
if input[current] == '\n' {
|
|
consumed, node := d.parseLineBreak(input, current)
|
|
if current > previous {
|
|
nodes = append(nodes, Text{input[previous:current], true})
|
|
}
|
|
nodes = append(nodes, node)
|
|
current += consumed
|
|
previous = current
|
|
} else {
|
|
current++
|
|
}
|
|
}
|
|
if previous < len(input) {
|
|
nodes = append(nodes, Text{input[previous:], true})
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
func (d *Document) parseLineBreak(input string, start int) (int, Node) {
|
|
i := start
|
|
for ; i < len(input) && input[i] == '\n'; i++ {
|
|
}
|
|
return i - start, LineBreak{i - start}
|
|
}
|
|
|
|
func (d *Document) parseExplicitLineBreak(input string, start int) (int, Node) {
|
|
if start == 0 || input[start-1] == '\n' || start+2 >= len(input) || input[start+1] != '\\' {
|
|
return 0, nil
|
|
}
|
|
for i := start + 2; unicode.IsSpace(rune(input[i])); i++ {
|
|
if i >= len(input) || input[i] == '\n' {
|
|
return i + 1 - start, ExplicitLineBreak{}
|
|
}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
|
|
if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
|
|
if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
|
|
return consumed, node
|
|
}
|
|
return d.parseEmphasis(input, start, false)
|
|
}
|
|
|
|
func (d *Document) parseOpeningBracket(input string, start int) (int, Node) {
|
|
if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
|
|
return d.parseRegularLink(input, start)
|
|
} else if footnoteRegexp.MatchString(input[start:]) {
|
|
return d.parseFootnoteReference(input, start)
|
|
} else if statisticsTokenRegexp.MatchString(input[start:]) {
|
|
return d.parseStatisticToken(input, start)
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
|
|
if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
name, definition := m[1], m[3]
|
|
link := FootnoteLink{name, nil}
|
|
if definition != "" {
|
|
link.Definition = &FootnoteDefinition{name, []Node{Paragraph{d.parseInline(definition)}}, true}
|
|
}
|
|
return len(m[0]), link
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseStatisticToken(input string, start int) (int, Node) {
|
|
if m := statisticsTokenRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
return len(m[1]) + 2, StatisticToken{m[1]}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseAutoLink(input string, start int) (int, int, Node) {
|
|
if !d.AutoLink || start == 0 || len(input[start:]) < 3 || input[start:start+3] != "://" {
|
|
return 0, 0, nil
|
|
}
|
|
protocolStart, protocol := start-1, ""
|
|
for ; protocolStart > 0; protocolStart-- {
|
|
if !unicode.IsLetter(rune(input[protocolStart])) {
|
|
protocolStart++
|
|
break
|
|
}
|
|
}
|
|
if m := autolinkProtocols.FindStringSubmatch(input[protocolStart:start]); m != nil {
|
|
protocol = m[1]
|
|
} else {
|
|
return 0, 0, nil
|
|
}
|
|
end := start
|
|
for ; end < len(input) && strings.ContainsRune(validURLCharacters, rune(input[end])); end++ {
|
|
}
|
|
path := input[start:end]
|
|
if path == "://" {
|
|
return 0, 0, nil
|
|
}
|
|
return len(protocol), len(path + protocol), RegularLink{protocol, nil, protocol + path, true}
|
|
}
|
|
|
|
func (d *Document) parseRegularLink(input string, start int) (int, Node) {
|
|
input = input[start:]
|
|
if len(input) < 3 || input[:2] != "[[" || input[2] == '[' {
|
|
return 0, nil
|
|
}
|
|
end := strings.Index(input, "]]")
|
|
if end == -1 {
|
|
return 0, nil
|
|
}
|
|
rawLinkParts := strings.Split(input[2:end], "][")
|
|
description, link := ([]Node)(nil), rawLinkParts[0]
|
|
if len(rawLinkParts) == 2 {
|
|
link, description = rawLinkParts[0], d.parseInline(rawLinkParts[1])
|
|
}
|
|
if strings.ContainsRune(link, '\n') {
|
|
return 0, nil
|
|
}
|
|
consumed := end + 2
|
|
protocol, linkParts := "", strings.SplitN(link, ":", 2)
|
|
if len(linkParts) == 2 {
|
|
protocol = linkParts[0]
|
|
}
|
|
return consumed, RegularLink{protocol, description, link, false}
|
|
}
|
|
|
|
func (d *Document) parseTimestamp(input string, start int) (int, Node) {
|
|
if m := timestampRegexp.FindStringSubmatch(input[start:]); m != nil {
|
|
ddmmyy, hhmm, interval, isDate := m[1], m[3], strings.TrimSpace(m[4]), false
|
|
if hhmm == "" {
|
|
hhmm, isDate = "00:00", true
|
|
}
|
|
t, err := time.Parse(timestampFormat, fmt.Sprintf("%s Mon %s", ddmmyy, hhmm))
|
|
if err != nil {
|
|
return 0, nil
|
|
}
|
|
timestamp := Timestamp{t, isDate, interval}
|
|
return len(m[0]), timestamp
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
|
|
marker, i := input[start], start
|
|
if !hasValidPreAndBorderChars(input, i) {
|
|
return 0, nil
|
|
}
|
|
for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
|
|
if input[i] == '\n' {
|
|
consumedNewLines++
|
|
}
|
|
|
|
if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
|
|
if isRaw {
|
|
return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
|
|
}
|
|
return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
|
|
}
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
// see org-emphasis-regexp-components (emacs elisp variable)
|
|
|
|
func hasValidPreAndBorderChars(input string, i int) bool {
|
|
return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
|
|
}
|
|
|
|
func hasValidPostAndBorderChars(input string, i int) bool {
|
|
return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
|
|
}
|
|
|
|
func isValidPreChar(r rune) bool {
|
|
return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
|
|
}
|
|
|
|
func isValidPostChar(r rune) bool {
|
|
return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
|
|
}
|
|
|
|
func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
|
|
|
|
func (l RegularLink) Kind() string {
|
|
if p := l.Protocol; l.Description != nil || (p != "" && p != "file" && p != "http" && p != "https") {
|
|
return "regular"
|
|
}
|
|
if imageExtensionRegexp.MatchString(path.Ext(l.URL)) {
|
|
return "image"
|
|
}
|
|
if videoExtensionRegexp.MatchString(path.Ext(l.URL)) {
|
|
return "video"
|
|
}
|
|
return "regular"
|
|
}
|
|
|
|
func (n Text) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n LineBreak) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n ExplicitLineBreak) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n StatisticToken) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n Emphasis) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n FootnoteLink) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n RegularLink) String() string { return orgWriter.nodesAsString(n) }
|
|
func (n Timestamp) String() string { return orgWriter.nodesAsString(n) }
|