all repos — grayfriday @ b0bdfbec4ceab22844aa766b3856aa95753ffde8

blackfriday fork with a few changes

Fix bug in autolink overescaping html entities

If autolink encounters a link which already has an escaped html entity,
it would escape the ampersand again, producing things like these:
    &  --> &
    " --> "
This commit solves that by first looking for all entity-looking things
in the link and copying those ranges verbatim, only considering the rest
of the string for escaping.
Doesn't seem to have considerable performance impact.
The mailto: links are processed the old way.
Vytautas Ĺ altenis vytas@rtfb.lt
Sun, 26 Jan 2014 21:39:38 +0200
commit

b0bdfbec4ceab22844aa766b3856aa95753ffde8

parent

cc0d56d0920643c43d4fb60b26d62ac10893c6dd

2 files changed, 22 insertions(+), 3 deletions(-)

jump to
M html.gohtml.go

@@ -74,6 +74,8 @@ urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`

tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) + // TODO: improve this regexp to catch all possible entities: + htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`) ) // Html is a type that implements the Renderer interface for HTML output.

@@ -162,6 +164,16 @@ }

if org < len(src) { out.Write(src[org:]) } +} + +func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) { + end := 0 + for _, rang := range skipRanges { + attrEscape(out, src[end:rang[0]]) + out.Write(src[rang[0]:rang[1]]) + end = rang[1] + } + attrEscape(out, src[end:]) } func (options *Html) GetFlags() int {

@@ -408,10 +420,11 @@ out.WriteString("</p>\n")

} func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) { + skipRanges := htmlEntity.FindAllIndex(link, -1) if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL { // mark it but don't link it if it is not a safe link: no smartypants out.WriteString("<tt>") - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) out.WriteString("</tt>") return }

@@ -420,7 +433,7 @@ out.WriteString("<a href=\"")

if kind == LINK_TYPE_EMAIL { out.WriteString("mailto:") } - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) out.WriteString("\">") // Pretty print: if we get an email address as

@@ -432,7 +445,7 @@ attrEscape(out, link[len("mailto://"):])

case bytes.HasPrefix(link, []byte("mailto:")): attrEscape(out, link[len("mailto:"):]) default: - attrEscape(out, link) + entityEscapeWithSkip(out, link, skipRanges) } out.WriteString("</a>")
M inline_test.goinline_test.go

@@ -692,6 +692,12 @@ "<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",

"http://www.foo.com<br />\n", "<p><a href=\"http://www.foo.com\">http://www.foo.com</a><br /></p>\n", + + "http://foo.com/viewtopic.php?f=18&amp;t=297", + "<p><a href=\"http://foo.com/viewtopic.php?f=18&amp;t=297\">http://foo.com/viewtopic.php?f=18&amp;t=297</a></p>\n", + + "http://foo.com/viewtopic.php?param=&quot;18&quot;zz", + "<p><a href=\"http://foo.com/viewtopic.php?param=&quot;18&quot;zz\">http://foo.com/viewtopic.php?param=&quot;18&quot;zz</a></p>\n", } doTestsInline(t, tests) }