Merge pull request #50 from rtfb/master Better protection against JavaScript injection
Vytautas Ĺ altenis vytas@rtfb.lt
Sun, 30 Mar 2014 19:52:13 +0300
5 files changed,
228 insertions(+),
48 deletions(-)
M
README.md
→
README.md
@@ -89,6 +89,11 @@ happening. The test suite stress tests this and there are no
known inputs that make it crash. If you find one, please let me know and send me the input that does it. + NOTE: "safety" in this context means *runtime safety only*. It is + not bullet proof against JavaScript injections, though we're working + on it (https://github.com/russross/blackfriday/issues/11 tracks the + progress). + * **Fast processing**. It is fast enough to render on-demand in most web applications without having to cache the output.
M
html.go
→
html.go
@@ -18,6 +18,7 @@
import ( "bytes" "fmt" + "regexp" "strconv" "strings" )@@ -28,7 +29,7 @@ HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
HTML_SKIP_STYLE // skip embedded <style> elements HTML_SKIP_IMAGES // skip embedded images HTML_SKIP_LINKS // skip all links - HTML_SKIP_SCRIPT // skip embedded <script> elements + HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe HTML_SAFELINK // only link to trusted protocols HTML_NOFOLLOW_LINKS // only link with rel="nofollow" HTML_TOC // generate a table of contents@@ -39,6 +40,41 @@ HTML_USE_XHTML // generate XHTML output instead of HTML
HTML_USE_SMARTYPANTS // enable smart punctuation substitutions HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS) HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS) +) + +var ( + tags = []string{ + "b", + "blockquote", + "code", + "del", + "dd", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "i", + "kbd", + "li", + "ol", + "p", + "pre", + "s", + "sup", + "sub", + "strong", + "strike", + "ul", + } + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` + tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) + anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) + imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) ) // Html is a type that implements the Renderer interface for HTML output.@@ -138,6 +174,10 @@ out.Write(src[org:])
} } +func (options *Html) GetFlags() int { + return options.flags +} + func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) { marker := out.Len() doubleSpace(out)@@ -169,30 +209,8 @@ return
} doubleSpace(out) - if options.flags&HTML_SKIP_SCRIPT != 0 { - out.Write(stripTag(string(text), "script", "p")) - } else { - out.Write(text) - } + out.Write(text) out.WriteByte('\n') -} - -func stripTag(text, tag, newTag string) []byte { - closeNewTag := fmt.Sprintf("</%s>", newTag) - i := 0 - for i < len(text) && text[i] != '<' { - i++ - } - if i == len(text) { - return []byte(text) - } - found, end := findHtmlTagPos([]byte(text[i:]), tag) - closeTag := fmt.Sprintf("</%s>", tag) - noOpen := text - if found { - noOpen = text[0:i+1] + newTag + text[end:] - } - return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1)) } func (options *Html) HRule(out *bytes.Buffer) {@@ -522,9 +540,6 @@ }
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") { return } - if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") { - return - } out.Write(text) }@@ -726,6 +741,29 @@ found, _ := findHtmlTagPos(tag, tagname)
return found } +// Look for a character, but ignore it when it's in any kind of quotes, it +// might be JavaScript +func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { + inSingleQuote := false + inDoubleQuote := false + inGraveQuote := false + i := start + for i < len(html) { + switch { + case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: + return i + case html[i] == '\'': + inSingleQuote = !inSingleQuote + case html[i] == '"': + inDoubleQuote = !inDoubleQuote + case html[i] == '`': + inGraveQuote = !inGraveQuote + } + i++ + } + return start +} + func findHtmlTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' {@@ -754,26 +792,52 @@ if i == len(tag) {
return false, -1 } - // Now look for closing '>', but ignore it when it's in any kind of quotes, - // it might be JavaScript - inSingleQuote := false - inDoubleQuote := false - inGraveQuote := false - for i < len(tag) { - switch { - case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote: - return true, i - case tag[i] == '\'': - inSingleQuote = !inSingleQuote - case tag[i] == '"': - inDoubleQuote = !inDoubleQuote - case tag[i] == '`': - inGraveQuote = !inGraveQuote - } - i++ + rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') + if rightAngle > i { + return true, rightAngle } return false, -1 +} + +func sanitizeHtml(html []byte) []byte { + var result []byte + for string(html) != "" { + skip, tag, rest := findHtmlTag(html) + html = rest + result = append(result, skip...) + result = append(result, sanitizeTag(tag)...) + } + return append(result, []byte("\n")...) +} + +func sanitizeTag(tag []byte) []byte { + if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) { + return tag + } else { + return []byte("") + } +} + +func skipUntilChar(text []byte, start int, char byte) int { + i := start + for i < len(text) && text[i] != char { + i++ + } + return i +} + +func findHtmlTag(html []byte) (skip, tag, rest []byte) { + start := skipUntilChar(html, 0, '<') + rightAngle := skipUntilCharIgnoreQuotes(html, start, '>') + if rightAngle > start { + skip = html[0:start] + tag = html[start : rightAngle+1] + rest = html[rightAngle+1:] + return + } + + return []byte(""), []byte(""), []byte("") } func skipSpace(tag []byte, i int) int {
M
inline_test.go
→
inline_test.go
@@ -90,18 +90,119 @@ " <script>alert()</script>\n",
"<p>alert()</p>\n", "<script>alert()</script>\n", - "<p>alert()</p>\n", + "alert()\n", "<script src='foo'></script>\n", - "<p></p>\n", + "\n", + + "<script src='a>b'></script>\n", + "\n", "zz <script src='foo'></script>\n", "<p>zz </p>\n", "zz <script src=foo></script>\n", "<p>zz </p>\n", + + `<script><script src="http://example.com/exploit.js"></SCRIPT></script>`, + "\n", + + `'';!--"<XSS>=&{()}`, + "<p>'';!--"=&{()}</p>\n", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + "<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + `<IMG SRC="javascript:alert('XSS');">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=JaVaScRiPt:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", + "<p></p>\n", + + `<a onmouseover="alert(document.cookie)">xss link</a>`, + "<p>xss link</a></p>\n", + + "<a onmouseover=alert(document.cookie)>xss link</a>", + "<p>xss link</a></p>\n", + + // XXX: this doesn't pass yet + //`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, + //"<p></p>\n", + + "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", + "<p></p>\n", + + `<IMG SRC=# onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG SRC= onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG onmouseover="alert('xxs')">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + `<IMG SRC="javascriptascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav	ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav
ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav
ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="  javascript:alert('XSS');">`, + "<p></p>\n", + + `<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", + //"\n", + + `<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<<SCRIPT>alert("XSS");//<</SCRIPT>`, + //"", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", + "<p></p>\n", + + "<SCRIPT SRC=//ha.ckers.org/.j>", + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<IMG SRC="javascript:alert('XSS')"`, + //"", + + // XXX: this doesn't pass yet + //"<iframe src=http://ha.ckers.org/scriptlet.html <", + //"", } - doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT) + doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT) } func TestEmphasis(t *testing.T) {
M
latex.go
→
latex.go
@@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
return &Latex{} } +func (options *Latex) GetFlags() int { + return 0 +} + // render code chunks using verbatim, or listings if we have a language func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { if lang == "" {
M
markdown.go
→
markdown.go
@@ -165,6 +165,8 @@
// Header and footer DocumentHeader(out *bytes.Buffer) DocumentFooter(out *bytes.Buffer) + + GetFlags() int } // Callback functions for inline parsing. One such function is defined@@ -231,7 +233,7 @@ htmlFlags |= HTML_USE_XHTML
htmlFlags |= HTML_USE_SMARTYPANTS htmlFlags |= HTML_SMARTYPANTS_FRACTIONS htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES - htmlFlags |= HTML_SKIP_SCRIPT + htmlFlags |= HTML_SANITIZE_OUTPUT renderer := HtmlRenderer(htmlFlags, "", "") // set up the parser@@ -290,6 +292,10 @@ }
first := firstPass(p, input) second := secondPass(p, first) + + if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 { + second = sanitizeHtml(second) + } return second }