Rewrite protection against JavaScript injection This drops the naive approach at <script> tag stripping and resorts to full sanitization of html. The general idea (and the regexps) is grabbed from Stack Exchange's PageDown JavaScript Markdown processor[1]. Like in PageDown, it's implemented as a separate pass over resulting html. Includes a metric ton (but not all) of test cases from here[2]. Several are commented out since they don't pass yet. Stronger (but still incomplete) fix for #11. [1] http://code.google.com/p/pagedown/wiki/PageDown [2] https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
Vytautas Ĺ altenis vytas@rtfb.lt
Wed, 22 Jan 2014 01:14:35 +0200
4 files changed,
194 insertions(+),
25 deletions(-)
M
html.go
→
html.go
@@ -18,6 +18,7 @@
import ( "bytes" "fmt" + "regexp" "strconv" "strings" )@@ -38,6 +39,41 @@ HTML_USE_XHTML // generate XHTML output instead of HTML
HTML_USE_SMARTYPANTS // enable smart punctuation substitutions HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS) HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS) +) + +var ( + tags = []string{ + "b", + "blockquote", + "code", + "del", + "dd", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "i", + "kbd", + "li", + "ol", + "p", + "pre", + "s", + "sup", + "sub", + "strong", + "strike", + "ul", + } + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` + tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) + anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) + imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) ) // Html is a type that implements the Renderer interface for HTML output.@@ -137,6 +173,10 @@ out.Write(src[org:])
} } +func (options *Html) GetFlags() int { + return options.flags +} + func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) { marker := out.Len() doubleSpace(out)@@ -168,30 +208,8 @@ return
} doubleSpace(out) - if options.flags&HTML_SKIP_SCRIPT != 0 { - out.Write(stripTag(string(text), "script", "p")) - } else { - out.Write(text) - } + out.Write(text) out.WriteByte('\n') -} - -func stripTag(text, tag, newTag string) []byte { - closeNewTag := fmt.Sprintf("</%s>", newTag) - i := 0 - for i < len(text) && text[i] != '<' { - i++ - } - if i == len(text) { - return []byte(text) - } - found, end := findHtmlTagPos([]byte(text[i:]), tag) - closeTag := fmt.Sprintf("</%s>", tag) - noOpen := text - if found { - noOpen = text[0:i+1] + newTag + text[end:] - } - return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1)) } func (options *Html) HRule(out *bytes.Buffer) {@@ -779,6 +797,46 @@ return true, rightAngle
} return false, -1 +} + +func sanitizeHtml(html []byte) []byte { + var result []byte + for string(html) != "" { + skip, tag, rest := findHtmlTag(html) + html = rest + result = append(result, skip...) + result = append(result, sanitizeTag(tag)...) + } + return append(result, []byte("\n")...) +} + +func sanitizeTag(tag []byte) []byte { + if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) { + return tag + } else { + return []byte("") + } +} + +func skipUntilChar(text []byte, start int, char byte) int { + i := start + for i < len(text) && text[i] != char { + i++ + } + return i +} + +func findHtmlTag(html []byte) (skip, tag, rest []byte) { + start := skipUntilChar(html, 0, '<') + rightAngle := skipUntilCharIgnoreQuotes(html, start, '>') + if rightAngle > start { + skip = html[0:start] + tag = html[start : rightAngle+1] + rest = html[rightAngle+1:] + return + } + + return []byte(""), []byte(""), []byte("") } func skipSpace(tag []byte, i int) int {
M
inline_test.go
→
inline_test.go
@@ -90,16 +90,117 @@ " <script>alert()</script>\n",
"<p>alert()</p>\n", "<script>alert()</script>\n", - "<p>alert()</p>\n", + "alert()\n", "<script src='foo'></script>\n", - "<p></p>\n", + "\n", + + "<script src='a>b'></script>\n", + "\n", "zz <script src='foo'></script>\n", "<p>zz </p>\n", "zz <script src=foo></script>\n", "<p>zz </p>\n", + + `<script><script src="http://example.com/exploit.js"></SCRIPT></script>`, + "\n", + + `'';!--"<XSS>=&{()}`, + "<p>'';!--"=&{()}</p>\n", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + "<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + `<IMG SRC="javascript:alert('XSS');">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=JaVaScRiPt:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", + "<p></p>\n", + + `<a onmouseover="alert(document.cookie)">xss link</a>`, + "<p>xss link</a></p>\n", + + "<a onmouseover=alert(document.cookie)>xss link</a>", + "<p>xss link</a></p>\n", + + // XXX: this doesn't pass yet + //`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, + //"<p></p>\n", + + "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", + "<p></p>\n", + + `<IMG SRC=# onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG SRC= onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG onmouseover="alert('xxs')">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + `<IMG SRC="javascriptascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav	ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav
ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav
ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="  javascript:alert('XSS');">`, + "<p></p>\n", + + `<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", + //"\n", + + `<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<<SCRIPT>alert("XSS");//<</SCRIPT>`, + //"", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", + "<p></p>\n", + + "<SCRIPT SRC=//ha.ckers.org/.j>", + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<IMG SRC="javascript:alert('XSS')"`, + //"", + + // XXX: this doesn't pass yet + //"<iframe src=http://ha.ckers.org/scriptlet.html <", + //"", } doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT) }
M
latex.go
→
latex.go
@@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
return &Latex{} } +func (options *Latex) GetFlags() int { + return 0 +} + // render code chunks using verbatim, or listings if we have a language func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { if lang == "" {
M
markdown.go
→
markdown.go
@@ -165,6 +165,8 @@
// Header and footer DocumentHeader(out *bytes.Buffer) DocumentFooter(out *bytes.Buffer) + + GetFlags() int } // Callback functions for inline parsing. One such function is defined@@ -290,6 +292,10 @@ }
first := firstPass(p, input) second := secondPass(p, first) + + if renderer.GetFlags()&HTML_SKIP_SCRIPT != 0 { + second = sanitizeHtml(second) + } return second }