all repos — grayfriday @ 64fbfbbadf651c943c4dec8e28c9ac20d973fad7

blackfriday fork with a few changes

Merge pull request #117 from rtfb/remove-sanitization

Remove sanitization
Vytautas Ĺ altenis vytas@rtfb.lt
Sat, 20 Sep 2014 14:54:26 +0300
commit

64fbfbbadf651c943c4dec8e28c9ac20d973fad7

parent

44a39c16c6b85809f8c2663bdb8ddbced1ffdee8

6 files changed, 28 insertions(+), 370 deletions(-)

jump to
M README.mdREADME.md

@@ -48,6 +48,28 @@ feature set, use this instead:

output := blackfriday.MarkdownCommon(input) +### Sanitize untrusted content + +Blackfriday itself does nothing to protect against malicious content. If you are +dealing with user-supplied markdown, we recommend running blackfriday's output +through HTML sanitizer such as +[Bluemonday](https://github.com/microcosm-cc/bluemonday). + +Here's an example of simple usage of blackfriday together with bluemonday: + +``` go +import ( + "github.com/microcosm-cc/bluemonday" + "github.com/russross/blackfriday" +) + +// ... +unsafe := blackfriday.MarkdownCommon(input) +html := bluemonday.UGCPolicy().SanitizeBytes(unsafe) +``` + +### Custom options + If you want to customize the set of options, first get a renderer (currently either the HTML or LaTeX output engines), then use it to call the more general `Markdown` function. For examples, see the

@@ -93,10 +115,9 @@ happening. The test suite stress tests this and there are no

known inputs that make it crash. If you find one, please let me know and send me the input that does it. - NOTE: "safety" in this context means *runtime safety only*. It is - not bullet proof against JavaScript injections, though we're working - on it (https://github.com/russross/blackfriday/issues/11 tracks the - progress). + NOTE: "safety" in this context means *runtime safety only*. In order to + protect yourself agains JavaScript injection in untrusted content, see + [this example](https://github.com/russross/blackfriday#sanitize-untrusted-content). * **Fast processing**. It is fast enough to render on-demand in most web applications without having to cache the output.
M html.gohtml.go

@@ -29,7 +29,6 @@ HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks

HTML_SKIP_STYLE // skip embedded <style> elements HTML_SKIP_IMAGES // skip embedded images HTML_SKIP_LINKS // skip all links - HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe HTML_SAFELINK // only link to trusted protocols HTML_NOFOLLOW_LINKS // only link with rel="nofollow" HTML_HREF_TARGET_BLANK // add a blank target
M inline_test.goinline_test.go

@@ -425,15 +425,12 @@ func TestNofollowLink(t *testing.T) {

var tests = []string{ "[foo](http://bar.com/foo/)\n", "<p><a href=\"http://bar.com/foo/\" rel=\"nofollow\">foo</a></p>\n", - } - doTestsInlineParam(t, tests, 0, HTML_SAFELINK|HTML_NOFOLLOW_LINKS|HTML_SANITIZE_OUTPUT, - HtmlRendererParameters{}) - // HTML_SANITIZE_OUTPUT won't allow relative links, so test that separately: - tests = []string{ + "[foo](/bar/)\n", "<p><a href=\"/bar/\">foo</a></p>\n", } - doTestsInlineParam(t, tests, 0, HTML_SAFELINK|HTML_NOFOLLOW_LINKS, HtmlRendererParameters{}) + doTestsInlineParam(t, tests, 0, HTML_SAFELINK|HTML_NOFOLLOW_LINKS, + HtmlRendererParameters{}) } func TestHrefTargetBlank(t *testing.T) {
M markdown.gomarkdown.go

@@ -238,7 +238,6 @@ htmlFlags |= HTML_USE_XHTML

htmlFlags |= HTML_USE_SMARTYPANTS htmlFlags |= HTML_SMARTYPANTS_FRACTIONS htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES - htmlFlags |= HTML_SANITIZE_OUTPUT renderer := HtmlRenderer(htmlFlags, "", "") // set up the parser

@@ -298,11 +297,6 @@ }

first := firstPass(p, input) second := secondPass(p, first) - - if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 { - second = sanitizeHtmlSafe(second) - } - return second }
D sanitize.go

@@ -1,154 +0,0 @@

-package blackfriday - -import ( - "bufio" - "bytes" - "code.google.com/p/go.net/html" - "fmt" - "io" -) - -// Whitelisted element tags, attributes on particular tags, attributes that are -// interpreted as protocols (again on particular tags), and allowed protocols. -var ( - whitelistTags map[string]bool - whitelistAttrs map[string]map[string]bool - protocolAttrs map[string]map[string]bool - whitelistProtocols [][]byte -) - -func init() { - whitelistTags = toSet([]string{ - // Headings - "h1", "h2", "h3", "h4", "h5", "h6", - // Block elements - "p", "pre", "blockquote", "hr", "div", "header", "article", "aside", "footer", - "section", "main", "mark", "figure", "figcaption", - // Inline elements - "a", "br", "cite", "code", "img", - // Lists - "ol", "ul", "li", - // Tables - "table", "tbody", "td", "tfoot", "th", "thead", "tr", "colgroup", "col", "caption", - // Formatting - "u", "i", "em", "small", "strike", "b", "strong", "sub", "sup", "q", - // Definition lists - "dd", "dl", "dt", - }) - whitelistAttrs = map[string]map[string]bool{ - "a": toSet([]string{"href", "title", "rel"}), - "img": toSet([]string{"src", "alt", "title"}), - "td": toSet([]string{"align"}), - "th": toSet([]string{"align"}), - } - protocolAttrs = map[string]map[string]bool{ - "a": toSet([]string{"href"}), - "img": toSet([]string{"src"}), - } - whitelistProtocols = [][]byte{ - []byte("http://"), - []byte("https://"), - []byte("ftp://"), - []byte("mailto:"), - } -} - -func toSet(keys []string) map[string]bool { - m := make(map[string]bool, len(keys)) - for _, k := range keys { - m[k] = true - } - return m -} - -// Sanitizes the given input by parsing it as HTML5, then whitelisting known to -// be safe elements and attributes. All other HTML is escaped, unsafe attributes -// are stripped. -func sanitizeHtmlSafe(input []byte) []byte { - r := bytes.NewReader(input) - var w bytes.Buffer - tokenizer := html.NewTokenizer(r) - wr := bufio.NewWriter(&w) - - // Iterate through all tokens in the input stream and sanitize them. - for t := tokenizer.Next(); t != html.ErrorToken; t = tokenizer.Next() { - switch t { - case html.TextToken: - // Text is written escaped. - wr.WriteString(tokenizer.Token().String()) - case html.SelfClosingTagToken, html.StartTagToken: - // HTML tags are escaped unless whitelisted. - tag, hasAttributes := tokenizer.TagName() - tagName := string(tag) - if whitelistTags[tagName] { - wr.WriteString("<") - wr.Write(tag) - for hasAttributes { - var key, val []byte - key, val, hasAttributes = tokenizer.TagAttr() - attrName := string(key) - // Only include whitelisted attributes for the given tagName. - tagWhitelistedAttrs, ok := whitelistAttrs[tagName] - if ok && tagWhitelistedAttrs[attrName] { - // For whitelisted attributes, if it's an attribute that requires - // protocol checking, do so and strip it if it's not known to be safe. - tagProtocolAttrs, ok := protocolAttrs[tagName] - if ok && tagProtocolAttrs[attrName] { - if !isRelativeLink(val) && !protocolAllowed(val) { - continue - } - } - wr.WriteByte(' ') - wr.Write(key) - wr.WriteString(`="`) - wr.WriteString(html.EscapeString(string(val))) - wr.WriteByte('"') - } - } - if t == html.SelfClosingTagToken { - wr.WriteString("/>") - } else { - wr.WriteString(">") - } - } else { - wr.WriteString(html.EscapeString(string(tokenizer.Raw()))) - } - // Make sure that tags like <script> that switch the parser into raw mode - // do not destroy the parse mode for following HTML text (the point is to - // escape them anyway). For that, switch off raw mode in the tokenizer. - tokenizer.NextIsNotRawText() - case html.EndTagToken: - // Whitelisted tokens can be written in raw. - tag, _ := tokenizer.TagName() - if whitelistTags[string(tag)] { - wr.Write(tokenizer.Raw()) - } else { - wr.WriteString(html.EscapeString(string(tokenizer.Raw()))) - } - case html.CommentToken: - // Comments are not really expected, but harmless. - wr.Write(tokenizer.Raw()) - case html.DoctypeToken: - // Escape DOCTYPES, entities etc can be dangerous - wr.WriteString(html.EscapeString(string(tokenizer.Raw()))) - default: - tokenizer.Token() - panic(fmt.Errorf("Unexpected token type %v", t)) - } - } - err := tokenizer.Err() - if err != nil && err != io.EOF { - panic(tokenizer.Err()) - } - wr.Flush() - return w.Bytes() -} - -func protocolAllowed(attr []byte) bool { - for _, prefix := range whitelistProtocols { - if bytes.HasPrefix(attr, prefix) { - return true - } - } - return false -}
D sanitize_test.go

@@ -1,199 +0,0 @@

-package blackfriday - -import ( - "testing" -) - -func doTestsSanitize(t *testing.T, tests []string) { - doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT, HtmlRendererParameters{}) -} - -func TestSanitizeRawHtmlTag(t *testing.T) { - tests := []string{ - "zz <style>p {}</style>\n", - "<p>zz &lt;style&gt;p {}&lt;/style&gt;</p>\n", - - "zz <STYLE>p {}</STYLE>\n", - "<p>zz &lt;style&gt;p {}&lt;/style&gt;</p>\n", - - "<SCRIPT>alert()</SCRIPT>\n", - "<p>&lt;script&gt;alert()&lt;/script&gt;</p>\n", - - "zz <SCRIPT>alert()</SCRIPT>\n", - "<p>zz &lt;script&gt;alert()&lt;/script&gt;</p>\n", - - "zz <script>alert()</script>\n", - "<p>zz &lt;script&gt;alert()&lt;/script&gt;</p>\n", - - " <script>alert()</script>\n", - "<p>&lt;script&gt;alert()&lt;/script&gt;</p>\n", - - "<script>alert()</script>\n", - "&lt;script&gt;alert()&lt;/script&gt;\n", - - "<script src='foo'></script>\n", - "&lt;script src=&#39;foo&#39;&gt;&lt;/script&gt;\n", - - "<script src='a>b'></script>\n", - "&lt;script src=&#39;a&gt;b&#39;&gt;&lt;/script&gt;\n", - - "zz <script src='foo'></script>\n", - "<p>zz &lt;script src=&#39;foo&#39;&gt;&lt;/script&gt;</p>\n", - - "zz <script src=foo></script>\n", - "<p>zz &lt;script src=foo&gt;&lt;/script&gt;</p>\n", - - `<script><script src="http://example.com/exploit.js"></SCRIPT></script>`, - "&lt;script&gt;&lt;script src=&#34;http://example.com/exploit.js&#34;&gt;&lt;/script&gt;&lt;/script&gt;\n", - - `'';!--"<XSS>=&{()}`, - "<p>&#39;&#39;;!--&#34;&lt;xss&gt;=&amp;{()}</p>\n", - - "<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>", - "<p>&lt;script SRC=http://ha.ckers.org/xss.js&gt;&lt;/script&gt;</p>\n", - - "<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>", - "<p>&lt;script \nSRC=http://ha.ckers.org/xss.js&gt;&lt;/script&gt;</p>\n", - - `<IMG SRC="javascript:alert('XSS');">`, - "<p><img></p>\n", - - "<IMG SRC=javascript:alert('XSS')>", - "<p><img></p>\n", - - "<IMG SRC=JaVaScRiPt:alert('XSS')>", - "<p><img></p>\n", - - "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>", - "<p><img></p>\n", - - `<a onmouseover="alert(document.cookie)">xss link</a>`, - "<p><a>xss link</a></p>\n", - - "<a onmouseover=alert(document.cookie)>xss link</a>", - "<p><a>xss link</a></p>\n", - - `<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, - "<p><img>&lt;script&gt;alert(&#34;XSS&#34;)&lt;/script&gt;&#34;&gt;</p>\n", - - "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", - "<p><img></p>\n", - - `<IMG SRC=# onmouseover="alert('xxs')">`, - "<p><img src=\"#\"></p>\n", - - `<IMG SRC= onmouseover="alert('xxs')">`, - "<p><img></p>\n", - - `<IMG onmouseover="alert('xxs')">`, - "<p><img></p>\n", - - "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>", - "<p><img></p>\n", - - "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>", - "<p><img></p>\n", - - "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>", - "<p><img></p>\n", - - `<IMG SRC="javascriptascript:alert('XSS');">`, - "<p><img></p>\n", - - `<IMG SRC="jav&#x09;ascript:alert('XSS');">`, - "<p><img></p>\n", - - `<IMG SRC="jav&#x0A;ascript:alert('XSS');">`, - "<p><img></p>\n", - - `<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, - "<p><img></p>\n", - - `<IMG SRC=" &#14; javascript:alert('XSS');">`, - "<p><img></p>\n", - - `<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, - "<p>&lt;script/XSS SRC=&#34;http://ha.ckers.org/xss.js&#34;&gt;&lt;/script&gt;</p>\n", - - "<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", - "<p>&lt;body onload!#$%&amp;()*~+-_.,:;?@[/|\\]^`=alert(&#34;XSS&#34;)&gt;</p>\n", - - `<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, - "<p>&lt;script/SRC=&#34;http://ha.ckers.org/xss.js&#34;&gt;&lt;/script&gt;</p>\n", - - `<<SCRIPT>alert("XSS");//<</SCRIPT>`, - "<p>&lt;&lt;script&gt;alert(&#34;XSS&#34;);//&lt;&lt;/script&gt;</p>\n", - - "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", - "<p>&lt;script SRC=http://ha.ckers.org/xss.js?&lt; B &gt;</p>\n", - - "<SCRIPT SRC=//ha.ckers.org/.j>", - "<p>&lt;script SRC=//ha.ckers.org/.j&gt;</p>\n", - - `<IMG SRC="javascript:alert('XSS')"`, - "<p>&lt;IMG SRC=&#34;javascript:alert(&#39;XSS&#39;)&#34;</p>\n", - - "<iframe src=http://ha.ckers.org/scriptlet.html <", - // The hyperlink gets linkified, the <iframe> gets escaped - "<p>&lt;iframe src=<a href=\"http://ha.ckers.org/scriptlet.html\">http://ha.ckers.org/scriptlet.html</a> &lt;</p>\n", - - // Additonal token types: SelfClosing, Comment, DocType. - "<br/>", - "<p><br/></p>\n", - - "<!-- Comment -->", - "<!-- Comment -->\n", - - "<!DOCTYPE test>", - "<p>&lt;!DOCTYPE test&gt;</p>\n", - } - doTestsSanitize(t, tests) -} - -func TestSanitizeQuoteEscaping(t *testing.T) { - tests := []string{ - // Make sure quotes are transported correctly (different entities or - // unicode, but correct semantics) - "<p>Here are some &quot;quotes&quot;.</p>\n", - "<p>Here are some &#34;quotes&#34;.</p>\n", - - "<p>Here are some &ldquo;quotes&rdquo;.</p>\n", - "<p>Here are some \u201Cquotes\u201D.</p>\n", - - // Within a <script> tag, content gets parsed by the raw text parsing rules. - // This test makes sure we correctly disable those parsing rules and do not - // escape e.g. the closing </p>. - `Here are <script> some "quotes".`, - "<p>Here are &lt;script&gt; some &#34;quotes&#34;.</p>\n", - - // Same test for an unknown element that does not switch into raw mode. - `Here are <eviltag> some "quotes".`, - "<p>Here are &lt;eviltag&gt; some &#34;quotes&#34;.</p>\n", - } - doTestsSanitize(t, tests) -} - -func TestSanitizeSelfClosingTag(t *testing.T) { - tests := []string{ - "<hr>\n", - "<hr>\n", - - "<hr/>\n", - "<hr/>\n", - - // Make sure that evil attributes are stripped for self closing tags. - "<hr onclick=\"evil()\"/>\n", - "<hr/>\n", - } - doTestsSanitize(t, tests) -} - -func TestSanitizeInlineLink(t *testing.T) { - tests := []string{ - "[link](javascript:evil)", - "<p><a>link</a></p>\n", - "[link](/abc)", - "<p><a href=\"/abc\">link</a></p>\n", - } - doTestsSanitize(t, tests) -}