all repos — grayfriday @ 9fc8c9d8660c52c8390ba80e50556577e179b984

blackfriday fork with a few changes

Fix bug with overzealous autolink processing

When the source Markdown contains an anchor tag with URL as link text
(i.e. <a href=...>http://foo.bar</a>), autolink converts that link text
into another anchor tag, which is nonsense. Detect this situation with
regexp and early exit autolink processing.
Vytautas Ĺ altenis vytas@rtfb.lt
Sat, 25 Jan 2014 21:42:34 +0200
commit

9fc8c9d8660c52c8390ba80e50556577e179b984

parent

84ee8e62f639b3084fda017e785aa74fe272b895

2 files changed, 34 insertions(+), 0 deletions(-)

jump to
M inline.goinline.go

@@ -15,7 +15,12 @@ package blackfriday

import ( "bytes" + "regexp" "strconv" +) + +var ( + anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) ) // Functions to parse text within a block

@@ -616,6 +621,20 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {

// quick check to rule out most false hits on ':' if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' { return 0 + } + + // Now a more expensive check to see if we're not inside an anchor element + anchorStart := offset + offsetFromAnchor := 0 + for anchorStart > 0 && data[anchorStart] != '<' { + anchorStart-- + offsetFromAnchor++ + } + + anchorStr := anchorRe.Find(data[anchorStart:]) + if anchorStr != nil { + out.Write(anchorStr[offsetFromAnchor:]) + return len(anchorStr) - offsetFromAnchor } // scan backward for a word boundary
M inline_test.goinline_test.go

@@ -674,6 +674,21 @@

"even a > can be escaped <http://new.com?q=\\>&etc>\n", "<p>even a &gt; can be escaped <a href=\"http://new.com?q=&gt;&amp;etc\">" + "http://new.com?q=&gt;&amp;etc</a></p>\n", + + "<a href=\"http://fancy.com\">http://fancy.com</a>\n", + "<p><a href=\"http://fancy.com\">http://fancy.com</a></p>\n", + + "<a href=\"http://fancy.com\">This is a link</a>\n", + "<p><a href=\"http://fancy.com\">This is a link</a></p>\n", + + "<a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a>\n", + "<p><a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a></p>\n", + + "(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (\n", + "<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (</p>\n", + + "(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).\n", + "<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n", } doTestsInline(t, tests) }