Fix bug in autolink overescaping html entities If autolink encounters a link which already has an escaped html entity, it would escape the ampersand again, producing things like these: & --> & " --> " This commit solves that by first looking for all entity-looking things in the link and copying those ranges verbatim, only considering the rest of the string for escaping. Doesn't seem to have considerable performance impact. The mailto: links are processed the old way.
Vytautas Ĺ altenis vytas@rtfb.lt
Sun, 26 Jan 2014 21:39:38 +0200
2 files changed,
22 insertions(+),
3 deletions(-)
M
html.go
→
html.go
@@ -74,6 +74,8 @@ urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) + // TODO: improve this regexp to catch all possible entities: + htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`) ) // Html is a type that implements the Renderer interface for HTML output.@@ -162,6 +164,16 @@ }
if org < len(src) { out.Write(src[org:]) } +} + +func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) { + end := 0 + for _, rang := range skipRanges { + attrEscape(out, src[end:rang[0]]) + out.Write(src[rang[0]:rang[1]]) + end = rang[1] + } + attrEscape(out, src[end:]) } func (options *Html) GetFlags() int {@@ -408,10 +420,11 @@ out.WriteString("</p>\n")
} func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) { + skipRanges := htmlEntity.FindAllIndex(link, -1) if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL { // mark it but don't link it if it is not a safe link: no smartypants out.WriteString("<tt>") - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) out.WriteString("</tt>") return }@@ -420,7 +433,7 @@ out.WriteString("<a href=\"")
if kind == LINK_TYPE_EMAIL { out.WriteString("mailto:") } - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) out.WriteString("\">") // Pretty print: if we get an email address as@@ -432,7 +445,7 @@ attrEscape(out, link[len("mailto://"):])
case bytes.HasPrefix(link, []byte("mailto:")): attrEscape(out, link[len("mailto:"):]) default: - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) } out.WriteString("</a>")
M
inline_test.go
→
inline_test.go
@@ -692,6 +692,12 @@ "<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",
"http://www.foo.com<br />\n", "<p><a href=\"http://www.foo.com\">http://www.foo.com</a><br /></p>\n", + + "http://foo.com/viewtopic.php?f=18&t=297", + "<p><a href=\"http://foo.com/viewtopic.php?f=18&t=297\">http://foo.com/viewtopic.php?f=18&t=297</a></p>\n", + + "http://foo.com/viewtopic.php?param="18"zz", + "<p><a href=\"http://foo.com/viewtopic.php?param="18"zz\">http://foo.com/viewtopic.php?param="18"zz</a></p>\n", } doTestsInline(t, tests) }