From d2d9dc0fc8210104ad2fbcbbda2df964671b3fb8 Mon Sep 17 00:00:00 2001 From: Niklas Fasching Date: Sun, 2 Dec 2018 18:24:51 +0100 Subject: [PATCH] Add auto links: Try to render unmarked urls as links --- org/inline.go | 34 ++++++++++++++++++++++++++++++++-- org/org.go | 4 +++- org/testdata/example.html | 1 + org/testdata/example.org | 1 + 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/org/inline.go b/org/inline.go index 2b78cd4..62bf0d3 100644 --- a/org/inline.go +++ b/org/inline.go @@ -24,8 +24,12 @@ type RegularLink struct { Protocol string Description []Node URL string + AutoLink bool } +var validURLCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;=" +var autolinkProtocols = regexp.MustCompile(`(https?|ftp|file)`) + var redundantSpaces = regexp.MustCompile("[ \t]+") var subScriptSuperScriptRegexp = regexp.MustCompile(`([_^])\{(.*?)\}`) var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`) @@ -33,7 +37,7 @@ var footnoteRegexp = regexp.MustCompile(`\[fn:([\w-]+?)(:(.*?))?\]`) func (d *Document) parseInline(input string) (nodes []Node) { previous, current := 0, 0 for current < len(input) { - consumed, node := 0, (Node)(nil) + rewind, consumed, node := 0, 0, (Node)(nil) switch input[current] { case '^': consumed, node = d.parseSubOrSuperScript(input, current) @@ -45,6 +49,9 @@ func (d *Document) parseInline(input string) (nodes []Node) { consumed, node = d.parseRegularLinkOrFootnoteReference(input, current) case '\\': consumed, node = d.parseExplicitLineBreak(input, current) + case ':': + rewind, consumed, node = d.parseAutoLink(input, current) + current -= rewind } if consumed != 0 { if current > previous { @@ -125,6 +132,29 @@ func (d *Document) parseFootnoteReference(input string, start int) (int, Node) { return 0, nil } +func (d *Document) parseAutoLink(input string, start int) (int, int, Node) { + if len(input[start:]) < 3 || input[start+1] != '/' || input[start+2] != '/' { + return 0, 0, nil + } + protocolStart, protocol := start-1, "" + for ; protocolStart > 0 && unicode.IsLetter(rune(input[protocolStart])); protocolStart-- { + } + if m := autolinkProtocols.FindStringSubmatch(input[protocolStart:start]); m != nil { + protocol = m[1] + } else { + return 0, 0, nil + } + end := start + for ; end < len(input) && strings.ContainsRune(validURLCharacters, rune(input[end])); end++ { + } + path := input[start:end] + if path == "://" { + return 0, 0, nil + } + link := RegularLink{protocol, []Node{Text{protocol + path}}, protocol + path, true} + return len(protocol), len(path + protocol), link +} + func (d *Document) parseRegularLink(input string, start int) (int, Node) { if len(input[start:]) == 0 || input[start+1] != '[' { return 0, nil @@ -147,7 +177,7 @@ func (d *Document) parseRegularLink(input string, start int) (int, Node) { if len(parts) == 2 { protocol = parts[0] } - return consumed, RegularLink{protocol, description, link} + return consumed, RegularLink{protocol, description, link, false} } func (d *Document) parseEmphasis(input string, start int) (int, Node) { diff --git a/org/org.go b/org/org.go index 866881c..203981c 100644 --- a/org/org.go +++ b/org/org.go @@ -246,7 +246,9 @@ func (w *OrgWriter) writeRegularLink(l RegularLink) { descriptionWriter := w.emptyClone() descriptionWriter.writeNodes(l.Description...) description := descriptionWriter.String() - if l.URL != description { + if l.AutoLink { + w.WriteString(l.URL) + } else if l.URL != description { w.WriteString(fmt.Sprintf("[[%s][%s]]", l.URL, description)) } else { w.WriteString(fmt.Sprintf("[[%s]]", l.URL)) diff --git a/org/testdata/example.html b/org/testdata/example.html index 9786b14..4d6af74 100644 --- a/org/testdata/example.html +++ b/org/testdata/example.html @@ -58,6 +58,7 @@
  • regular link https://example.com link without description
  • regular link example.com link with description
  • regular link to a file (image) file:my-img.png
  • +
  • auto link, i.e. not inside \[[square brackets]\] https://www.example.com
  • diff --git a/org/testdata/example.org b/org/testdata/example.org index 789cdac..258964e 100644 --- a/org/testdata/example.org +++ b/org/testdata/example.org @@ -50,6 +50,7 @@ this one is cheating a little as tags are ALWAYS printed right aligned to a give 1. regular link [[https://example.com]] link without description 2. regular link [[https://example.com][example.com]] link with description 3. regular link to a file (image) [[file:my-img.png]] + 4. auto link, i.e. not inside =\[[square brackets]\]= https://www.example.com ** Footnotes - normal footnote reference [fn:1] - further references to the same footnote should not [fn:1] render duplicates in the footnote list