Merge pull request #75 from mprobst/sanitize_test Avoid raw mode parsing so that tags like <script> don't cause escaping
Vytautas Ĺ altenis vytas@rtfb.lt
Sat, 03 May 2014 15:11:41 +0300
2 files changed,
20 insertions(+),
8 deletions(-)
M
inline_test.go
→
inline_test.go
@@ -135,7 +135,7 @@ "<a onmouseover=alert(document.cookie)>xss link</a>",
"<p><a>xss link</a></p>\n", `<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, - "<p><img><script>alert(&quot;XSS&quot;)</script>"></p>\n", + "<p><img><script>alert("XSS")</script>"></p>\n", "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", "<p><img></p>\n",@@ -182,18 +182,14 @@
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, "<p><script/SRC="http://ha.ckers.org/xss.js"></script></p>\n", - // HTML5 interprets the <script> tag contents as raw test, thus the end - // result has double-escaped &quot; `<<SCRIPT>alert("XSS");//<</SCRIPT>`, - "<p><<script>alert(&quot;XSS&quot;);//&lt;</script></p>\n", + "<p><<script>alert("XSS");//<</script></p>\n", - // HTML5 parses the </p> within an unclosed <script> tag as text. - // Same for the following tests. "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", - "<p><script SRC=http://ha.ckers.org/xss.js?< B ></p>\n", + "<p><script SRC=http://ha.ckers.org/xss.js?< B ></p>\n", "<SCRIPT SRC=//ha.ckers.org/.j>", - "<p><script SRC=//ha.ckers.org/.j></p>\n", + "<p><script SRC=//ha.ckers.org/.j></p>\n", `<IMG SRC="javascript:alert('XSS')"`, "<p><IMG SRC="javascript:alert('XSS')"</p>\n",@@ -220,11 +216,23 @@ }
func TestQuoteEscaping(t *testing.T) { tests := []string{ + // Make sure quotes are transported correctly (different entities or + // unicode, but correct semantics) "<p>Here are some "quotes".</p>\n", "<p>Here are some "quotes".</p>\n", "<p>Here are some “quotes”.</p>\n", "<p>Here are some \u201Cquotes\u201D.</p>\n", + + // Within a <script> tag, content gets parsed by the raw text parsing rules. + // This test makes sure we correctly disable those parsing rules and do not + // escape e.g. the closing </p>. + `Here are <script> some "quotes".`, + "<p>Here are <script> some "quotes".</p>\n", + + // Same test for an unknown element that does not switch into raw mode. + `Here are <eviltag> some "quotes".`, + "<p>Here are <eviltag> some "quotes".</p>\n", } doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT) }
M
sanitize.go
→
sanitize.go
@@ -107,6 +107,10 @@ wr.WriteString(">")
} else { wr.WriteString(html.EscapeString(string(tokenizer.Raw()))) } + // Make sure that tags like <script> that switch the parser into raw mode + // do not destroy the parse mode for following HTML text (the point is to + // escape them anyway). For that, switch off raw mode in the tokenizer. + tokenizer.NextIsNotRawText() case html.EndTagToken: // Whitelisted tokens can be written in raw. tag, _ := tokenizer.TagName()