all repos — grayfriday @ 55cd82008e9b35b9a03a80e06d5a4c4601320211

blackfriday fork with a few changes

Rewrite protection against JavaScript injection

This drops the naive approach at <script> tag stripping and resorts to
full sanitization of html. The general idea (and the regexps) is grabbed
from Stack Exchange's PageDown JavaScript Markdown processor[1]. Like in
PageDown, it's implemented as a separate pass over resulting html.

Includes a metric ton (but not all) of test cases from here[2]. Several
are commented out since they don't pass yet.

Stronger (but still incomplete) fix for #11.

[1] http://code.google.com/p/pagedown/wiki/PageDown
[2] https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
Vytautas Ĺ altenis vytas@rtfb.lt
Wed, 22 Jan 2014 01:14:35 +0200
commit

55cd82008e9b35b9a03a80e06d5a4c4601320211

parent

e02c392dc656371e48ec50c6b0acdfbfbee31439

4 files changed, 194 insertions(+), 25 deletions(-)

jump to
M html.gohtml.go

@@ -18,6 +18,7 @@

import ( "bytes" "fmt" + "regexp" "strconv" "strings" )

@@ -38,6 +39,41 @@ HTML_USE_XHTML // generate XHTML output instead of HTML

HTML_USE_SMARTYPANTS // enable smart punctuation substitutions HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS) HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS) +) + +var ( + tags = []string{ + "b", + "blockquote", + "code", + "del", + "dd", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "i", + "kbd", + "li", + "ol", + "p", + "pre", + "s", + "sup", + "sub", + "strong", + "strike", + "ul", + } + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` + tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) + anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`) + imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) ) // Html is a type that implements the Renderer interface for HTML output.

@@ -137,6 +173,10 @@ out.Write(src[org:])

} } +func (options *Html) GetFlags() int { + return options.flags +} + func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) { marker := out.Len() doubleSpace(out)

@@ -168,30 +208,8 @@ return

} doubleSpace(out) - if options.flags&HTML_SKIP_SCRIPT != 0 { - out.Write(stripTag(string(text), "script", "p")) - } else { - out.Write(text) - } + out.Write(text) out.WriteByte('\n') -} - -func stripTag(text, tag, newTag string) []byte { - closeNewTag := fmt.Sprintf("</%s>", newTag) - i := 0 - for i < len(text) && text[i] != '<' { - i++ - } - if i == len(text) { - return []byte(text) - } - found, end := findHtmlTagPos([]byte(text[i:]), tag) - closeTag := fmt.Sprintf("</%s>", tag) - noOpen := text - if found { - noOpen = text[0:i+1] + newTag + text[end:] - } - return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1)) } func (options *Html) HRule(out *bytes.Buffer) {

@@ -779,6 +797,46 @@ return true, rightAngle

} return false, -1 +} + +func sanitizeHtml(html []byte) []byte { + var result []byte + for string(html) != "" { + skip, tag, rest := findHtmlTag(html) + html = rest + result = append(result, skip...) + result = append(result, sanitizeTag(tag)...) + } + return append(result, []byte("\n")...) +} + +func sanitizeTag(tag []byte) []byte { + if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) { + return tag + } else { + return []byte("") + } +} + +func skipUntilChar(text []byte, start int, char byte) int { + i := start + for i < len(text) && text[i] != char { + i++ + } + return i +} + +func findHtmlTag(html []byte) (skip, tag, rest []byte) { + start := skipUntilChar(html, 0, '<') + rightAngle := skipUntilCharIgnoreQuotes(html, start, '>') + if rightAngle > start { + skip = html[0:start] + tag = html[start : rightAngle+1] + rest = html[rightAngle+1:] + return + } + + return []byte(""), []byte(""), []byte("") } func skipSpace(tag []byte, i int) int {
M inline_test.goinline_test.go

@@ -90,16 +90,117 @@ " <script>alert()</script>\n",

"<p>alert()</p>\n", "<script>alert()</script>\n", - "<p>alert()</p>\n", + "alert()\n", "<script src='foo'></script>\n", - "<p></p>\n", + "\n", + + "<script src='a>b'></script>\n", + "\n", "zz <script src='foo'></script>\n", "<p>zz </p>\n", "zz <script src=foo></script>\n", "<p>zz </p>\n", + + `<script><script src="http://example.com/exploit.js"></SCRIPT></script>`, + "\n", + + `'';!--"<XSS>=&{()}`, + "<p>'';!--&quot;=&amp;{()}</p>\n", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + "<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>", + "<p></p>\n", + + `<IMG SRC="javascript:alert('XSS');">`, + "<p></p>\n", + + "<IMG SRC=javascript:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=JaVaScRiPt:alert('XSS')>", + "<p></p>\n", + + "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", + "<p></p>\n", + + `<a onmouseover="alert(document.cookie)">xss link</a>`, + "<p>xss link</a></p>\n", + + "<a onmouseover=alert(document.cookie)>xss link</a>", + "<p>xss link</a></p>\n", + + // XXX: this doesn't pass yet + //`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, + //"<p></p>\n", + + "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", + "<p></p>\n", + + `<IMG SRC=# onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG SRC= onmouseover="alert('xxs')">`, + "<p></p>\n", + + `<IMG onmouseover="alert('xxs')">`, + "<p></p>\n", + + "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>", + "<p></p>\n", + + "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>", + "<p></p>\n", + + "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>", + "<p></p>\n", + + `<IMG SRC="javascriptascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x09;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x0A;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, + "<p></p>\n", + + `<IMG SRC=" &#14; javascript:alert('XSS');">`, + "<p></p>\n", + + `<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", + //"\n", + + `<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<<SCRIPT>alert("XSS");//<</SCRIPT>`, + //"", + + "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", + "<p></p>\n", + + "<SCRIPT SRC=//ha.ckers.org/.j>", + "<p></p>\n", + + // XXX: this doesn't pass yet + //`<IMG SRC="javascript:alert('XSS')"`, + //"", + + // XXX: this doesn't pass yet + //"<iframe src=http://ha.ckers.org/scriptlet.html <", + //"", } doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT) }
M latex.golatex.go

@@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {

return &Latex{} } +func (options *Latex) GetFlags() int { + return 0 +} + // render code chunks using verbatim, or listings if we have a language func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { if lang == "" {
M markdown.gomarkdown.go

@@ -165,6 +165,8 @@

// Header and footer DocumentHeader(out *bytes.Buffer) DocumentFooter(out *bytes.Buffer) + + GetFlags() int } // Callback functions for inline parsing. One such function is defined

@@ -290,6 +292,10 @@ }

first := firstPass(p, input) second := secondPass(p, first) + + if renderer.GetFlags()&HTML_SKIP_SCRIPT != 0 { + second = sanitizeHtml(second) + } return second }