icy does git — grayfriday: d643453f1ef4fe5d73e8697dfac24435ae6ad99d

Merge pull request #50 from rtfb/master

Better protection against JavaScript injection

Vytautas Šaltenis vytas@rtfb.lt

Sun, 30 Mar 2014 19:52:13 +0300

commit

d643453f1ef4fe5d73e8697dfac24435ae6ad99d

parent

e078bb8ec34db7cc285f347d24f367b95c909ece

5 files changed, 228 insertions(+), 48 deletions(-)

jump to

README.md

html.go

inline_test.go

latex.go

markdown.go

M README.md → README.md

@@ -89,6 +89,11 @@ happening. The test suite stress tests this and there are no
     known inputs that make it crash.  If you find one, please let me
     know and send me the input that does it.
 
+    NOTE: "safety" in this context means *runtime safety only*. It is
+    not bullet proof against JavaScript injections, though we're working
+    on it (https://github.com/russross/blackfriday/issues/11 tracks the
+    progress).
+
 *   **Fast processing**. It is fast enough to render on-demand in
     most web applications without having to cache the output.

M html.go → html.go

@@ -18,6 +18,7 @@
 import (
 	"bytes"
 	"fmt"
+	"regexp"
 	"strconv"
 	"strings"
 )
@@ -28,7 +29,7 @@ HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 	HTML_SKIP_STYLE                           // skip embedded <style> elements
 	HTML_SKIP_IMAGES                          // skip embedded images
 	HTML_SKIP_LINKS                           // skip all links
-	HTML_SKIP_SCRIPT                          // skip embedded <script> elements
+	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 	HTML_SAFELINK                             // only link to trusted protocols
 	HTML_NOFOLLOW_LINKS                       // only link with rel="nofollow"
 	HTML_TOC                                  // generate a table of contents
@@ -39,6 +40,41 @@ HTML_USE_XHTML                            // generate XHTML output instead of HTML
 	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
+)
+
+var (
+	tags  = []string{
+		"b",
+		"blockquote",
+		"code",
+		"del",
+		"dd",
+		"dl",
+		"dt",
+		"em",
+		"h1",
+		"h2",
+		"h3",
+		"h4",
+		"h5",
+		"h6",
+		"i",
+		"kbd",
+		"li",
+		"ol",
+		"p",
+		"pre",
+		"s",
+		"sup",
+		"sub",
+		"strong",
+		"strike",
+		"ul",
+	}
+	urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
+	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
+	anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
+	imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 )
 
 // Html is a type that implements the Renderer interface for HTML output.
@@ -138,6 +174,10 @@ out.Write(src[org:])
 	}
 }
 
+func (options *Html) GetFlags() int {
+	return options.flags
+}
+
 func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
 	marker := out.Len()
 	doubleSpace(out)
@@ -169,30 +209,8 @@ return
 	}
 
 	doubleSpace(out)
-	if options.flags&HTML_SKIP_SCRIPT != 0 {
-		out.Write(stripTag(string(text), "script", "p"))
-	} else {
-		out.Write(text)
-	}
+	out.Write(text)
 	out.WriteByte('\n')
-}
-
-func stripTag(text, tag, newTag string) []byte {
-	closeNewTag := fmt.Sprintf("</%s>", newTag)
-	i := 0
-	for i < len(text) && text[i] != '<' {
-		i++
-	}
-	if i == len(text) {
-		return []byte(text)
-	}
-	found, end := findHtmlTagPos([]byte(text[i:]), tag)
-	closeTag := fmt.Sprintf("</%s>", tag)
-	noOpen := text
-	if found {
-		noOpen = text[0:i+1] + newTag + text[end:]
-	}
-	return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
 }
 
 func (options *Html) HRule(out *bytes.Buffer) {
@@ -522,9 +540,6 @@ }
 	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
 		return
 	}
-	if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
-		return
-	}
 	out.Write(text)
 }
 
@@ -726,6 +741,29 @@ found, _ := findHtmlTagPos(tag, tagname)
 	return found
 }
 
+// Look for a character, but ignore it when it's in any kind of quotes, it
+// might be JavaScript
+func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
+	inSingleQuote := false
+	inDoubleQuote := false
+	inGraveQuote := false
+	i := start
+	for i < len(html) {
+		switch {
+		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
+			return i
+		case html[i] == '\'':
+			inSingleQuote = !inSingleQuote
+		case html[i] == '"':
+			inDoubleQuote = !inDoubleQuote
+		case html[i] == '`':
+			inGraveQuote = !inGraveQuote
+		}
+		i++
+	}
+	return start
+}
+
 func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
 	i := 0
 	if i < len(tag) && tag[0] != '<' {
@@ -754,26 +792,52 @@ if i == len(tag) {
 		return false, -1
 	}
 
-	// Now look for closing '>', but ignore it when it's in any kind of quotes,
-	// it might be JavaScript
-	inSingleQuote := false
-	inDoubleQuote := false
-	inGraveQuote := false
-	for i < len(tag) {
-		switch {
-		case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
-			return true, i
-		case tag[i] == '\'':
-			inSingleQuote = !inSingleQuote
-		case tag[i] == '"':
-			inDoubleQuote = !inDoubleQuote
-		case tag[i] == '`':
-			inGraveQuote = !inGraveQuote
-		}
-		i++
+	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
+	if rightAngle > i {
+		return true, rightAngle
 	}
 
 	return false, -1
+}
+
+func sanitizeHtml(html []byte) []byte {
+	var result []byte
+	for string(html) != "" {
+		skip, tag, rest := findHtmlTag(html)
+		html = rest
+		result = append(result, skip...)
+		result = append(result, sanitizeTag(tag)...)
+	}
+	return append(result, []byte("\n")...)
+}
+
+func sanitizeTag(tag []byte) []byte {
+	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
+		return tag
+	} else {
+		return []byte("")
+	}
+}
+
+func skipUntilChar(text []byte, start int, char byte) int {
+	i := start
+	for i < len(text) && text[i] != char {
+		i++
+	}
+	return i
+}
+
+func findHtmlTag(html []byte) (skip, tag, rest []byte) {
+	start := skipUntilChar(html, 0, '<')
+	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
+	if rightAngle > start {
+		skip = html[0:start]
+		tag = html[start : rightAngle+1]
+		rest = html[rightAngle+1:]
+		return
+	}
+
+	return []byte(""), []byte(""), []byte("")
 }
 
 func skipSpace(tag []byte, i int) int {

M inline_test.go → inline_test.go

@@ -90,18 +90,119 @@ " <script>alert()</script>\n",
 		"<p>alert()</p>\n",
 
 		"<script>alert()</script>\n",
-		"<p>alert()</p>\n",
+		"alert()\n",
 
 		"<script src='foo'></script>\n",
-		"<p></p>\n",
+		"\n",
+
+		"<script src='a>b'></script>\n",
+		"\n",
 
 		"zz <script src='foo'></script>\n",
 		"<p>zz </p>\n",
 
 		"zz <script src=foo></script>\n",
 		"<p>zz </p>\n",
+
+		`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
+		"\n",
+
+		`'';!--"<XSS>=&{()}`,
+		"<p>'';!--&quot;=&amp;{()}</p>\n",
+
+		"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
+		"<p></p>\n",
+
+		"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
+		"<p></p>\n",
+
+		`<IMG SRC="javascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		"<IMG SRC=javascript:alert('XSS')>",
+		"<p></p>\n",
+
+		"<IMG SRC=JaVaScRiPt:alert('XSS')>",
+		"<p></p>\n",
+
+		"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
+		"<p></p>\n",
+
+		`<a onmouseover="alert(document.cookie)">xss link</a>`,
+		"<p>xss link</a></p>\n",
+
+		"<a onmouseover=alert(document.cookie)>xss link</a>",
+		"<p>xss link</a></p>\n",
+
+		// XXX: this doesn't pass yet
+		//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
+		//"<p></p>\n",
+
+		"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
+		"<p></p>\n",
+
+		`<IMG SRC=# onmouseover="alert('xxs')">`,
+		"<p></p>\n",
+
+		`<IMG SRC= onmouseover="alert('xxs')">`,
+		"<p></p>\n",
+
+		`<IMG onmouseover="alert('xxs')">`,
+		"<p></p>\n",
+
+		"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
+		"<p></p>\n",
+
+		"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
+		"<p></p>\n",
+
+		"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
+		"<p></p>\n",
+
+		`<IMG SRC="javascriptascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		`<IMG SRC="jav&#x09;ascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		`<IMG SRC="jav&#x0A;ascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		`<IMG SRC=" &#14;  javascript:alert('XSS');">`,
+		"<p></p>\n",
+
+		`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
+		"<p></p>\n",
+
+		// XXX: this doesn't pass yet
+		//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
+		//"\n",
+
+		`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
+		"<p></p>\n",
+
+		// XXX: this doesn't pass yet
+		//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
+		//"",
+
+		"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
+		"<p></p>\n",
+
+		"<SCRIPT SRC=//ha.ckers.org/.j>",
+		"<p></p>\n",
+
+		// XXX: this doesn't pass yet
+		//`<IMG SRC="javascript:alert('XSS')"`,
+		//"",
+
+		// XXX: this doesn't pass yet
+		//"<iframe src=http://ha.ckers.org/scriptlet.html <",
+		//"",
 	}
-	doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
+	doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
 }
 
 func TestEmphasis(t *testing.T) {

M latex.go → latex.go

@@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
 	return &Latex{}
 }
 
+func (options *Latex) GetFlags() int {
+	return 0
+}
+
 // render code chunks using verbatim, or listings if we have a language
 func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
 	if lang == "" {

M markdown.go → markdown.go

@@ -165,6 +165,8 @@
 	// Header and footer
 	DocumentHeader(out *bytes.Buffer)
 	DocumentFooter(out *bytes.Buffer)
+
+	GetFlags() int
 }
 
 // Callback functions for inline parsing. One such function is defined
@@ -231,7 +233,7 @@ htmlFlags |= HTML_USE_XHTML
 	htmlFlags |= HTML_USE_SMARTYPANTS
 	htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
 	htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
-	htmlFlags |= HTML_SKIP_SCRIPT
+	htmlFlags |= HTML_SANITIZE_OUTPUT
 	renderer := HtmlRenderer(htmlFlags, "", "")
 
 	// set up the parser
@@ -290,6 +292,10 @@ }
 
 	first := firstPass(p, input)
 	second := secondPass(p, first)
+
+	if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
+		second = sanitizeHtml(second)
+	}
 
 	return second
 }

all repos — grayfriday @ d643453f1ef4fe5d73e8697dfac24435ae6ad99d

blackfriday fork with a few changes