Roll our own implementation of HTML escaper
Vytautas Ĺ altenis vytas@rtfb.lt
Sat, 10 Sep 2016 14:33:37 +0300
4 files changed,
130 insertions(+),
33 deletions(-)
A
esc.go
@@ -0,0 +1,45 @@
+package blackfriday + +import ( + "html" + "io" +) + +type escMap struct { + char byte + seq []byte +} + +var htmlEscaper = []escMap{ + {'&', []byte("&")}, + {'<', []byte("<")}, + {'>', []byte(">")}, + {'"', []byte(""")}, +} + +func escapeHTML(w io.Writer, s []byte) { + var start, end int + var sEnd byte + for end < len(s) { + sEnd = s[end] + if sEnd == '&' || sEnd == '<' || sEnd == '>' || sEnd == '"' { + for i := 0; i < len(htmlEscaper); i++ { + if sEnd == htmlEscaper[i].char { + w.Write(s[start:end]) + w.Write(htmlEscaper[i].seq) + start = end + 1 + break + } + } + } + end++ + } + if start < len(s) && end <= len(s) { + w.Write(s[start:end]) + } +} + +func escLink(w io.Writer, text []byte) { + unesc := html.UnescapeString(string(text)) + escapeHTML(w, []byte(unesc)) +}
A
esc_test.go
@@ -0,0 +1,50 @@
+package blackfriday + +import ( + "bytes" + "testing" +) + +func TestEsc(t *testing.T) { + tests := []string{ + "abc", "abc", + "a&c", "a&c", + "<", "<", + "[]:<", "[]:<", + "Hello <!--", "Hello <!--", + } + for i := 0; i < len(tests); i += 2 { + var b bytes.Buffer + escapeHTML(&b, []byte(tests[i])) + if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) { + t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]", + tests[i], tests[i+1], b.String()) + } + } +} + +/* +func BenchmarkEscapeHTML(b *testing.B) { + tests := [][]byte{ + []byte(""), + []byte("AT&T has an ampersand in their name."), + []byte("AT&T is another way to write it."), + []byte("This & that."), + []byte("4 < 5."), + []byte("6 > 5."), + []byte("Here's a [link] [1] with an ampersand in the URL."), + []byte("Here's a link with an amersand in the link text: [AT&T] [2]."), + []byte("Here's an inline [link](/script?foo=1&bar=2)."), + []byte("Here's an inline [link](</script?foo=1&bar=2>)."), + []byte("[1]: http://example.com/?foo=1&bar=2"), + []byte("[2]: http://att.com/ \"AT&T\""), + } + var buff bytes.Buffer + for n := 0; n < b.N; n++ { + for _, t := range tests { + escapeHTML(&buff, t) + buff.Reset() + } + } +} +*/
M
html.go
→
html.go
@@ -18,7 +18,6 @@
import ( "bytes" "fmt" - "html" "io" "regexp" "strings"@@ -375,17 +374,6 @@ return ""
} } -func esc(text []byte) []byte { - unesc := []byte(html.UnescapeString(string(text))) - return escCode(unesc) -} - -func escCode(text []byte) []byte { - e1 := []byte(html.EscapeString(string(text))) - e2 := bytes.Replace(e1, []byte("""), []byte("""), -1) - return bytes.Replace(e2, []byte("'"), []byte{'\''}, -1) -} - func (r *HTMLRenderer) out(w io.Writer, text []byte) { if r.disableTags > 0 { w.Write(htmlTagRe.ReplaceAll(text, []byte{}))@@ -504,11 +492,17 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
attrs := []string{} switch node.Type { case Text: - node.Literal = esc(node.Literal) if r.Flags&Smartypants != 0 { - node.Literal = r.sr.Process(node.Literal) + var tmp bytes.Buffer + escapeHTML(&tmp, node.Literal) + r.sr.Process(w, tmp.Bytes()) + } else { + if node.Parent.Type == Link { + escLink(w, node.Literal) + } else { + escapeHTML(w, node.Literal) + } } - r.out(w, node.Literal) case Softbreak: r.out(w, []byte{'\n'}) // TODO: make it configurable via out(renderer.softbreak)@@ -561,16 +555,22 @@ }
} else { if entering { dest = r.addAbsPrefix(dest) - //if (!(options.safe && potentiallyUnsafe(node.destination))) { - attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest))) - //} + var hrefBuff bytes.Buffer + hrefBuff.WriteString("href=\"") + escLink(&hrefBuff, dest) + hrefBuff.WriteByte('"') + attrs = append(attrs, hrefBuff.String()) if node.NoteID != 0 { r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node)) break } attrs = appendLinkAttrs(attrs, r.Flags, dest) if len(node.LinkData.Title) > 0 { - attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title))) + var titleBuff bytes.Buffer + titleBuff.WriteString("title=\"") + escapeHTML(&titleBuff, node.LinkData.Title) + titleBuff.WriteByte('"') + attrs = append(attrs, titleBuff.String()) } r.tag(w, aTag, attrs) } else {@@ -591,7 +591,9 @@ if r.disableTags == 0 {
//if options.safe && potentiallyUnsafe(dest) { //out(w, `<img src="" alt="`) //} else { - r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest)))) + r.out(w, []byte(`<img src="`)) + escLink(w, dest) + r.out(w, []byte(`" alt="`)) //} } r.disableTags++@@ -600,14 +602,14 @@ r.disableTags--
if r.disableTags == 0 { if node.LinkData.Title != nil { r.out(w, []byte(`" title="`)) - r.out(w, esc(node.LinkData.Title)) + escapeHTML(w, node.LinkData.Title) } r.out(w, []byte(`" />`)) } } case Code: r.out(w, codeTag) - r.out(w, escCode(node.Literal)) + escapeHTML(w, node.Literal) r.out(w, codeCloseTag) case Document: break@@ -752,7 +754,7 @@ attrs = appendLanguageAttr(attrs, node.Info)
r.cr(w) r.out(w, preTag) r.tag(w, codeTag[:len(codeTag)-1], attrs) - r.out(w, escCode(node.Literal)) + escapeHTML(w, node.Literal) r.out(w, codeCloseTag) r.out(w, preCloseTag) if node.Parent.Type != Item {@@ -837,9 +839,9 @@ }
w.WriteString("<head>\n") w.WriteString(" <title>") if r.Flags&Smartypants != 0 { - w.Write(r.sr.Process([]byte(r.Title))) + r.sr.Process(w, []byte(r.Title)) } else { - w.Write(esc([]byte(r.Title))) + escapeHTML(w, []byte(r.Title)) } w.WriteString("</title>\n") w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")@@ -852,14 +854,14 @@ w.WriteString(ending)
w.WriteString(">\n") if r.CSS != "" { w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"") - w.Write(esc([]byte(r.CSS))) + escapeHTML(w, []byte(r.CSS)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n") } if r.Icon != "" { w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"") - w.Write(esc([]byte(r.Icon))) + escapeHTML(w, []byte(r.Icon)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n")@@ -919,6 +921,7 @@ w.WriteString("<nav>\n")
w.Write(buf.Bytes()) w.WriteString("\n\n</nav>\n") } + r.lastOutputLen = buf.Len() } func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {
M
smartypants.go
→
smartypants.go
@@ -17,6 +17,7 @@ package blackfriday
import ( "bytes" + "io" ) // SPRenderer is a struct containing state of a Smartypants renderer.@@ -401,13 +402,12 @@ return &r
} // Process is the entry point of the Smartypants renderer. -func (r *SPRenderer) Process(text []byte) []byte { - var buff bytes.Buffer +func (r *SPRenderer) Process(w io.Writer, text []byte) { mark := 0 for i := 0; i < len(text); i++ { if action := r.callbacks[text[i]]; action != nil { if i > mark { - buff.Write(text[mark:i]) + w.Write(text[mark:i]) } previousChar := byte(0) if i > 0 {@@ -415,12 +415,11 @@ previousChar = text[i-1]
} var tmp bytes.Buffer i += action(&tmp, previousChar, text[i:]) - buff.Write(tmp.Bytes()) + w.Write(tmp.Bytes()) mark = i + 1 } } if mark < len(text) { - buff.Write(text[mark:]) + w.Write(text[mark:]) } - return buff.Bytes() }