icy does git — grayfriday: 9fc8c9d8660c52c8390ba80e50556577e179b984

Fix bug with overzealous autolink processing

When the source Markdown contains an anchor tag with URL as link text
(i.e. <a href=...>http://foo.bar</a>), autolink converts that link text
into another anchor tag, which is nonsense. Detect this situation with
regexp and early exit autolink processing.

Vytautas Šaltenis vytas@rtfb.lt

Sat, 25 Jan 2014 21:42:34 +0200

commit

9fc8c9d8660c52c8390ba80e50556577e179b984

parent

84ee8e62f639b3084fda017e785aa74fe272b895

2 files changed, 34 insertions(+), 0 deletions(-)

jump to

inline.go

inline_test.go

M inline.go → inline.go

@@ -15,7 +15,12 @@ package blackfriday
 
 import (
 	"bytes"
+	"regexp"
 	"strconv"
+)
+
+var (
+	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
 )
 
 // Functions to parse text within a block
@@ -616,6 +621,20 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 	// quick check to rule out most false hits on ':'
 	if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
 		return 0
+	}
+
+	// Now a more expensive check to see if we're not inside an anchor element
+	anchorStart := offset
+	offsetFromAnchor := 0
+	for anchorStart > 0 && data[anchorStart] != '<' {
+		anchorStart--
+		offsetFromAnchor++
+	}
+
+	anchorStr := anchorRe.Find(data[anchorStart:])
+	if anchorStr != nil {
+		out.Write(anchorStr[offsetFromAnchor:])
+		return len(anchorStr) - offsetFromAnchor
 	}
 
 	// scan backward for a word boundary

M inline_test.go → inline_test.go

@@ -674,6 +674,21 @@
 		"even a > can be escaped <http://new.com?q=\\>&etc>\n",
 		"<p>even a &gt; can be escaped <a href=\"http://new.com?q=&gt;&amp;etc\">" +
 			"http://new.com?q=&gt;&amp;etc</a></p>\n",
+
+		"<a href=\"http://fancy.com\">http://fancy.com</a>\n",
+		"<p><a href=\"http://fancy.com\">http://fancy.com</a></p>\n",
+
+		"<a href=\"http://fancy.com\">This is a link</a>\n",
+		"<p><a href=\"http://fancy.com\">This is a link</a></p>\n",
+
+		"<a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a>\n",
+		"<p><a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a></p>\n",
+
+		"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (\n",
+		"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (</p>\n",
+
+		"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).\n",
+		"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",
 	}
 	doTestsInline(t, tests)
 }

all repos — grayfriday @ 9fc8c9d8660c52c8390ba80e50556577e179b984

blackfriday fork with a few changes