all repos — grayfriday @ d643453f1ef4fe5d73e8697dfac24435ae6ad99d

blackfriday fork with a few changes

Merge pull request #50 from rtfb/master

Better protection against JavaScript injection
Vytautas Ĺ altenis vytas@rtfb.lt
Sun, 30 Mar 2014 19:52:13 +0300
commit

d643453f1ef4fe5d73e8697dfac24435ae6ad99d

parent

e078bb8ec34db7cc285f347d24f367b95c909ece

5 files changed, 228 insertions(+), 48 deletions(-)

jump to
M README.mdREADME.md

@@ -89,6 +89,11 @@ happening. The test suite stress tests this and there are no

known inputs that make it crash. If you find one, please let me know and send me the input that does it. + NOTE: "safety" in this context means *runtime safety only*. It is + not bullet proof against JavaScript injections, though we're working + on it (https://github.com/russross/blackfriday/issues/11 tracks the + progress). + * **Fast processing**. It is fast enough to render on-demand in most web applications without having to cache the output.
M html.gohtml.go

@@ -18,6 +18,7 @@

import ( "bytes" "fmt" + "regexp" "strconv" "strings" )

@@ -28,7 +29,7 @@ HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks

HTML_SKIP_STYLE // skip embedded <style> elements HTML_SKIP_IMAGES // skip embedded images HTML_SKIP_LINKS // skip all links - HTML_SKIP_SCRIPT // skip embedded <script> elements + HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe HTML_SAFELINK // only link to trusted protocols HTML_NOFOLLOW_LINKS // only link with rel="nofollow" HTML_TOC // generate a table of contents

@@ -39,6 +40,41 @@ HTML_USE_XHTML // generate XHTML output instead of HTML

HTML_USE_SMARTYPANTS // enable smart punctuation substitutions HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS) HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS) +) + +var ( + tags = []string{ + "b", + "blockquote", + "code", + "del", + "dd", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "i", + "kbd", + "li", + "ol", + "p", + "pre", + "s", + "sup", + "sub", + "strong", + "strike", + "ul", + } + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` + tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) + anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) + imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) ) // Html is a type that implements the Renderer interface for HTML output.

@@ -138,6 +174,10 @@ out.Write(src[org:])

} } +func (options *Html) GetFlags() int { + return options.flags +} + func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) { marker := out.Len() doubleSpace(out)

@@ -169,30 +209,8 @@ return

} doubleSpace(out) - if options.flags&HTML_SKIP_SCRIPT != 0 { - out.Write(stripTag(string(text), "script", "p")) - } else { - out.Write(text) - } + out.Write(text) out.WriteByte('\n') -} - -func stripTag(text, tag, newTag string) []byte { - closeNewTag := fmt.Sprintf("</%s>", newTag) - i := 0 - for i < len(text) && text[i] != '<' { - i++ - } - if i == len(text) { - return []byte(text) - } - found, end := findHtmlTagPos([]byte(text[i:]), tag) - closeTag := fmt.Sprintf("</%s>", tag) - noOpen := text - if found { - noOpen = text[0:i+1] + newTag + text[end:] - } - return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1)) } func (options *Html) HRule(out *bytes.Buffer) {

@@ -522,9 +540,6 @@ }

if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") { return } - if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") { - return - } out.Write(text) }

@@ -726,6 +741,29 @@ found, _ := findHtmlTagPos(tag, tagname)

return found } +// Look for a character, but ignore it when it's in any kind of quotes, it +// might be JavaScript +func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { + inSingleQuote := false + inDoubleQuote := false + inGraveQuote := false + i := start + for i < len(html) { + switch { + case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: + return i + case html[i] == '\'': + inSingleQuote = !inSingleQuote + case html[i] == '"': + inDoubleQuote = !inDoubleQuote + case html[i] == '`': + inGraveQuote = !inGraveQuote + } + i++ + } + return start +} + func findHtmlTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' {

@@ -754,26 +792,52 @@ if i == len(tag) {

return false, -1 } - // Now look for closing '>', but ignore it when it's in any kind of quotes, - // it might be JavaScript - inSingleQuote := false - inDoubleQuote := false - inGraveQuote := false - for i < len(tag) { - switch { - case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote: - return true, i - case tag[i] == '\'': - inSingleQuote = !inSingleQuote - case tag[i] == '"': - inDoubleQuote = !inDoubleQuote - case tag[i] == '`': - inGraveQuote = !inGraveQuote - } - i++ + rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') + if rightAngle > i { + return true, rightAngle } return false, -1 +} + +func sanitizeHtml(html []byte) []byte { + var result []byte + for string(html) != "" { + skip, tag, rest := findHtmlTag(html) + html = rest + result = append(result, skip...) + result = append(result, sanitizeTag(tag)...) + } + return append(result, []byte("\n")...) +} + +func sanitizeTag(tag []byte) []byte { + if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) { + return tag + } else { + return []byte("") + } +} + +func skipUntilChar(text []byte, start int, char byte) int { + i := start + for i < len(text) && text[i] != char { + i++ + } + return i +} + +func findHtmlTag(html []byte) (skip, tag, rest []byte) { + start := skipUntilChar(html, 0, '<') + rightAngle := skipUntilCharIgnoreQuotes(html, start, '>') + if rightAngle > start { + skip = html[0:start] + tag = html[start : rightAngle+1] + rest = html[rightAngle+1:] + return + } + + return []byte(""), []byte(""), []byte("") } func skipSpace(tag []byte, i int) int {
M inline_test.goinline_test.go

@@ -90,18 +90,119 @@ " <script>alert()</script>\n",

"<p>alert()</p>\n", "<script>alert()</script>\n", - "<p>alert()</p>\n", + "alert()\n", "<script src='foo'></script>\n", - "<p></p>\n", + "\n", + + "<script src='a>b'></script>\n", + "\n", "zz <script src='foo'></script>\n", "<p>zz </p>\n", "zz <script src=foo></script>\n", "<p>zz </p>\n", + + `<script><script src="http://example.com/exploit.js"></SCRIPT></script>`, + "\n", + + `'';!--"<XSS>=&{()}`, + "<p>'';!--&quot;=&amp;{()}</p>\n", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + "<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + `<IMG SRC="javascript:alert('XSS');">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=JaVaScRiPt:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", + "<p></p>\n", + + `<a onmouseover="alert(document.cookie)">xss link</a>`, + "<p>xss link</a></p>\n", + + "<a onmouseover=alert(document.cookie)>xss link</a>", + "<p>xss link</a></p>\n", + + // XXX: this doesn't pass yet + //`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, + //"<p></p>\n", + + "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", + "<p></p>\n", + + `<IMG SRC=# onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG SRC= onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG onmouseover="alert('xxs')">`, + "<p></p>\n", + + "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>", + "<p></p>\n", + + "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>", + "<p></p>\n", + + "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>", + "<p></p>\n", + + `<IMG SRC="javascriptascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x09;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x0A;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC=" &#14; javascript:alert('XSS');">`, + "<p></p>\n", + + `<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", + //"\n", + + `<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<<SCRIPT>alert("XSS");//<</SCRIPT>`, + //"", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", + "<p></p>\n", + + "<SCRIPT SRC=//ha.ckers.org/.j>", + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<IMG SRC="javascript:alert('XSS')"`, + //"", + + // XXX: this doesn't pass yet + //"<iframe src=http://ha.ckers.org/scriptlet.html <", + //"", } - doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT) + doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT) } func TestEmphasis(t *testing.T) {
M latex.golatex.go

@@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {

return &Latex{} } +func (options *Latex) GetFlags() int { + return 0 +} + // render code chunks using verbatim, or listings if we have a language func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { if lang == "" {
M markdown.gomarkdown.go

@@ -165,6 +165,8 @@

// Header and footer DocumentHeader(out *bytes.Buffer) DocumentFooter(out *bytes.Buffer) + + GetFlags() int } // Callback functions for inline parsing. One such function is defined

@@ -231,7 +233,7 @@ htmlFlags |= HTML_USE_XHTML

htmlFlags |= HTML_USE_SMARTYPANTS htmlFlags |= HTML_SMARTYPANTS_FRACTIONS htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES - htmlFlags |= HTML_SKIP_SCRIPT + htmlFlags |= HTML_SANITIZE_OUTPUT renderer := HtmlRenderer(htmlFlags, "", "") // set up the parser

@@ -290,6 +292,10 @@ }

first := firstPass(p, input) second := secondPass(p, first) + + if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 { + second = sanitizeHtml(second) + } return second }