Merge pull request #322 from russross/v2-perf-tweaks V2 perf tweaks
@@ -5,10 +5,9 @@
language: go go: - - 1.2 - - 1.3 - - 1.4 - 1.5 + - 1.6 + - 1.7 install: - go get -d -t -v ./...
@@ -29,17 +29,12 @@
var ( reBackslashOrAmp = regexp.MustCompile("[\\&]") reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity) - reTrailingWhitespace = regexp.MustCompile("(\n *)+$") ) // Parse block-level data. // Note: this function and many that it calls assume that // the input buffer ends with a newline. func (p *parser) block(data []byte) { - if len(data) == 0 || data[len(data)-1] != '\n' { - panic("block input is missing terminating newline") - } - // this is called recursively: enforce a maximum depth if p.nesting >= p.maxNesting { return@@ -131,7 +126,7 @@ // ______
if p.isHRule(data) { p.addBlock(HorizontalRule, nil) var i int - for i = 0; data[i] != '\n'; i++ { + for i = 0; i < len(data) && data[i] != '\n'; i++ { } data = data[i:] continue@@ -216,10 +211,10 @@ }
if p.flags&SpaceHeaders != 0 { level := 0 - for level < 6 && data[level] == '#' { + for level < 6 && level < len(data) && data[level] == '#' { level++ } - if data[level] != ' ' { + if level == len(data) || data[level] != ' ' { return false } }@@ -228,7 +223,7 @@ }
func (p *parser) prefixHeader(data []byte) int { level := 0 - for level < 6 && data[level] == '#' { + for level < 6 && level < len(data) && data[level] == '#' { level++ } i := skipChar(data, level, ' ')@@ -277,7 +272,7 @@ // test of level 1 header
if data[0] == '=' { i := skipChar(data, 1, '=') i = skipChar(data, i, ' ') - if data[i] == '\n' { + if i < len(data) && data[i] == '\n' { return 1 } return 0@@ -287,7 +282,7 @@ // test of level 2 header
if data[0] == '-' { i := skipChar(data, 1, '-') i = skipChar(data, i, ' ') - if data[i] == '\n' { + if i < len(data) && data[i] == '\n' { return 2 } return 0@@ -419,8 +414,8 @@ return i
} func finalizeHTMLBlock(block *Node) { - block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{}) - block.content = []byte{} + block.Literal = block.content + block.content = nil } // HTML comment, lax form@@ -445,6 +440,9 @@ }
// HR, which is the only self-closing block tag considered func (p *parser) htmlHr(data []byte, doRender bool) int { + if len(data) < 4 { + return 0 + } if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { return 0 }@@ -452,13 +450,11 @@ if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
// not an <hr> tag after all; at least not a valid one return 0 } - i := 3 - for data[i] != '>' && data[i] != '\n' { + for i < len(data) && data[i] != '>' && data[i] != '\n' { i++ } - - if data[i] == '>' { + if i < len(data) && data[i] == '>' { i++ if j := p.isEmpty(data[i:]); j > 0 { size := i + j@@ -473,13 +469,12 @@ }
return size } } - return 0 } func (p *parser) htmlFindTag(data []byte) (string, bool) { i := 0 - for isalnum(data[i]) { + for i < len(data) && isalnum(data[i]) { i++ } key := string(data[:i])@@ -536,7 +531,10 @@ if data[i] != ' ' && data[i] != '\t' {
return 0 } } - return i + 1 + if i < len(data) && data[i] == '\n' { + i++ + } + return i } func (*parser) isHRule(data []byte) bool {@@ -555,7 +553,7 @@ c := data[i]
// the whole line must be the char or whitespace n := 0 - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { switch { case data[i] == c: n++@@ -571,8 +569,7 @@
// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, // and returns the end index if so, or 0 otherwise. It also returns the marker found. // If syntax is not nil, it gets set to the syntax specified in the fence line. -// A final newline is mandatory to recognize the fence line, unless newlineOptional is true. -func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) { +func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) { i, size := 0, 0 // skip up to three spaces@@ -614,7 +611,7 @@ syn := 0
i = skipChar(data, i, ' ') if i >= len(data) { - if newlineOptional && i == len(data) { + if i == len(data) { return i, marker } return 0, ""@@ -659,12 +656,11 @@ }
i = skipChar(data, i, ' ') if i >= len(data) || data[i] != '\n' { - if newlineOptional && i == len(data) { + if i == len(data) { return i, marker } return 0, "" } - return i + 1, marker // Take newline into account. }@@ -673,7 +669,7 @@ // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
// If doRender is true, a final newline is mandatory to recognize the fenced code block. func (p *parser) fencedCodeBlock(data []byte, doRender bool) int { var syntax string - beg, marker := isFenceLine(data, &syntax, "", false) + beg, marker := isFenceLine(data, &syntax, "") if beg == 0 || beg >= len(data) { return 0 }@@ -686,8 +682,7 @@ for {
// safe to assume beg < len(data) // check for the end of the code block - newlineOptional := !doRender - fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) + fenceEnd, _ := isFenceLine(data[beg:], nil, marker) if fenceEnd != 0 { beg += fenceEnd break@@ -739,7 +734,7 @@ rest := block.content[newlinePos+1:]
block.Info = unescapeString(bytes.Trim(firstLine, "\n")) block.Literal = rest } else { - block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'}) + block.Literal = block.content } block.content = nil }@@ -757,7 +752,7 @@ p.addBlock(TableBody, nil)
for i < len(data) { pipes, rowStart := 0, i - for ; data[i] != '\n'; i++ { + for ; i < len(data) && data[i] != '\n'; i++ { if data[i] == '|' { pipes++ }@@ -769,7 +764,9 @@ break
} // include the newline in data sent to tableRow - i++ + if i < len(data) && data[i] == '\n' { + i++ + } p.tableRow(data[rowStart:i], columns, false) }@@ -788,7 +785,7 @@
func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) { i := 0 colCount := 1 - for i = 0; data[i] != '\n'; i++ { + for i = 0; i < len(data) && data[i] != '\n'; i++ { if data[i] == '|' && !isBackslashEscaped(data, i) { colCount++ }@@ -800,7 +797,11 @@ return
} // include the newline in the data sent to tableRow - header := data[:i+1] + j := i + if j < len(data) && data[j] == '\n' { + j++ + } + header := data[:j] // column count ignores pipes at beginning or end of line if data[0] == '|' {@@ -826,7 +827,7 @@
// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 // and trailing | optional on last column col := 0 - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { dashes := 0 if data[i] == ':' {@@ -834,19 +835,21 @@ i++
columns[col] |= TableAlignmentLeft dashes++ } - for data[i] == '-' { + for i < len(data) && data[i] == '-' { i++ dashes++ } - if data[i] == ':' { + if i < len(data) && data[i] == ':' { i++ columns[col] |= TableAlignmentRight dashes++ } - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } - + if i == len(data) { + return + } // end of column test is messy switch { case dashes < 3:@@ -857,12 +860,12 @@ case data[i] == '|' && !isBackslashEscaped(data, i):
// marker found, now skip past trailing whitespace col++ i++ - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } // trailing junk found after last column - if col >= colCount && data[i] != '\n' { + if col >= colCount && i < len(data) && data[i] != '\n' { return }@@ -885,7 +888,10 @@ }
p.addBlock(TableHead, nil) p.tableRow(header, columns, true) - size = i + 1 + size = i + if size < len(data) && data[size] == '\n' { + size++ + } return }@@ -898,13 +904,13 @@ i++
} for col = 0; col < len(columns) && i < len(data); col++ { - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } cellStart := i - for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { + for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { i++ }@@ -913,7 +919,7 @@
// skip the end-of-cell marker, possibly taking us past end of buffer i++ - for cellEnd > cellStart && data[cellEnd-1] == ' ' { + for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' { cellEnd-- }@@ -935,11 +941,11 @@
// returns blockquote prefix length func (p *parser) quotePrefix(data []byte) int { i := 0 - for i < 3 && data[i] == ' ' { + for i < 3 && i < len(data) && data[i] == ' ' { i++ } - if data[i] == '>' { - if data[i+1] == ' ' { + if i < len(data) && data[i] == '>' { + if i+1 < len(data) && data[i+1] == ' ' { return i + 2 } return i + 1@@ -969,7 +975,7 @@ end = beg
// Step over whole lines, collecting them. While doing that, check for // fenced code and if one's found, incorporate it altogether, // irregardless of any contents inside it - for data[end] != '\n' { + for end < len(data) && data[end] != '\n' { if p.flags&FencedCode != 0 { if i := p.fencedCodeBlock(data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop:@@ -979,7 +985,9 @@ }
} end++ } - end++ + if end < len(data) && data[end] == '\n' { + end++ + } if pre := p.quotePrefix(data[beg:]); pre > 0 { // skip the prefix beg += pre@@ -997,7 +1005,10 @@ }
// returns prefix length for block code func (p *parser) codePrefix(data []byte) int { - if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { + if len(data) >= 1 && data[0] == '\t' { + return 1 + } + if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { return 4 } return 0@@ -1009,10 +1020,12 @@
i := 0 for i < len(data) { beg := i - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { i++ } - i++ + if i < len(data) && data[i] == '\n' { + i++ + } blankline := p.isEmpty(data[beg:i]) > 0 if pre := p.codePrefix(data[beg:i]); pre > 0 {@@ -1023,7 +1036,7 @@ i = beg
break } - // verbatim copy to the working buffeu + // verbatim copy to the working buffer if blankline { work.WriteByte('\n') } else {@@ -1053,15 +1066,16 @@
// returns unordered list item prefix func (p *parser) uliPrefix(data []byte) int { i := 0 - // start with up to 3 spaces - for i < 3 && data[i] == ' ' { + for i < len(data) && i < 3 && data[i] == ' ' { i++ } - - // need a *, +, or - followed by a space + if i >= len(data)-1 { + return 0 + } + // need one of {'*', '+', '-'} followed by a space or a tab if (data[i] != '*' && data[i] != '+' && data[i] != '-') || - data[i+1] != ' ' { + (data[i+1] != ' ' && data[i+1] != '\t') { return 0 } return i + 2@@ -1072,18 +1086,21 @@ func (p *parser) oliPrefix(data []byte) int {
i := 0 // start with up to 3 spaces - for i < 3 && data[i] == ' ' { + for i < 3 && i < len(data) && data[i] == ' ' { i++ } // count the digits start := i - for data[i] >= '0' && data[i] <= '9' { + for i < len(data) && data[i] >= '0' && data[i] <= '9' { i++ } + if start == i || i >= len(data)-1 { + return 0 + } - // we need >= 1 digits followed by a dot and a space - if start == i || data[i] != '.' || data[i+1] != ' ' { + // we need >= 1 digits followed by a dot and a space or a tab + if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') { return 0 } return i + 2@@ -1091,13 +1108,15 @@ }
// returns definition list item prefix func (p *parser) dliPrefix(data []byte) int { + if len(data) < 2 { + return 0 + } i := 0 - - // need a : followed by a spaces - if data[i] != ':' || data[i+1] != ' ' { + // need a ':' followed by a space or a tab + if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') { return 0 } - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } return i + 2@@ -1175,8 +1194,12 @@ // Assumes initial prefix is already removed if this is a sublist.
func (p *parser) listItem(data []byte, flags *ListType) int { // keep track of the indentation of the first line itemIndent := 0 - for itemIndent < 3 && data[itemIndent] == ' ' { - itemIndent++ + if data[0] == '\t' { + itemIndent += 4 + } else { + for itemIndent < 3 && data[itemIndent] == ' ' { + itemIndent++ + } } var bulletChar byte = '*'@@ -1203,13 +1226,13 @@ }
} // skip leading whitespace on first line - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } // find the end of the line line := i - for i > 0 && data[i-1] != '\n' { + for i > 0 && i < len(data) && data[i-1] != '\n' { i++ }@@ -1229,7 +1252,7 @@ for line < len(data) {
i++ // find the end of this line - for data[i-1] != '\n' { + for i < len(data) && data[i-1] != '\n' { i++ }@@ -1243,11 +1266,18 @@ }
// calculate the indentation indent := 0 - for indent < 4 && line+indent < i && data[line+indent] == ' ' { - indent++ + indentIndex := 0 + if data[line] == '\t' { + indentIndex++ + indent += 4 + } else { + for indent < 4 && line+indent < i && data[line+indent] == ' ' { + indent++ + indentIndex++ + } } - chunk := data[line+indent : i] + chunk := data[line+indentIndex : i] // evaluate how this line fits in switch {@@ -1288,7 +1318,7 @@ case containsBlankLine && indent < 4:
if *flags&ListTypeDefinition != 0 && i < len(data)-1 { // is the next item still a part of this list? next := i - for data[next] != '\n' { + for next < len(data) && data[next] != '\n' { next++ } for next < len(data)-1 && data[next] == '\n' {@@ -1316,7 +1346,7 @@ raw.WriteByte('\n')
} // add the line into the working buffer without prefix - raw.Write(data[line+indent : i]) + raw.Write(data[line+indentIndex : i]) line = i }@@ -1364,8 +1394,11 @@ for data[beg] == ' ' {
beg++ } + end := len(data) // trim trailing newline - end := len(data) - 1 + if data[len(data)-1] == '\n' { + end-- + } // trim trailing spaces for end > beg && data[end-1] == ' ' {@@ -1437,7 +1470,7 @@ block.Level = level
block.HeaderID = id // find the end of the underline - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { i++ } return i@@ -1470,7 +1503,8 @@
// if there's a definition list item, prev line is a definition term if p.flags&DefinitionLists != 0 { if p.dliPrefix(current) != 0 { - return p.list(data[prev:], ListTypeDefinition) + ret := p.list(data[prev:], ListTypeDefinition) + return ret } }@@ -1486,10 +1520,12 @@ }
} // otherwise, scan to the beginning of the next line - for data[i] != '\n' { - i++ + nl := bytes.IndexByte(data[i:], '\n') + if nl >= 0 { + i += nl + 1 + } else { + i += len(data[i:]) } - i++ } p.renderParagraph(data[:i])
@@ -1661,14 +1661,14 @@ func TestIsFenceLine(t *testing.T) {
tests := []struct { data []byte syntaxRequested bool - newlineOptional bool wantEnd int wantMarker string wantSyntax string }{ { - data: []byte("```"), - wantEnd: 0, + data: []byte("```"), + wantEnd: 3, + wantMarker: "```", }, { data: []byte("```\nstuff here\n"),@@ -1687,21 +1687,13 @@ wantEnd: 0,
}, { data: []byte("```"), - newlineOptional: true, - wantEnd: 3, - wantMarker: "```", - }, - { - data: []byte("```"), syntaxRequested: true, - newlineOptional: true, wantEnd: 3, wantMarker: "```", }, { data: []byte("``` go"), syntaxRequested: true, - newlineOptional: true, wantEnd: 6, wantMarker: "```", wantSyntax: "go",@@ -1713,7 +1705,7 @@ var syntax *string
if test.syntaxRequested { syntax = new(string) } - end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional) + end, marker := isFenceLine(test.data, syntax, "```") if got, want := end, test.wantEnd; got != want { t.Errorf("got end %v, want %v", got, want) }
@@ -0,0 +1,34 @@
+package blackfriday + +import ( + "html" + "io" +) + +var htmlEscaper = [256][]byte{ + '&': []byte("&"), + '<': []byte("<"), + '>': []byte(">"), + '"': []byte("""), +} + +func escapeHTML(w io.Writer, s []byte) { + var start, end int + for end < len(s) { + escSeq := htmlEscaper[s[end]] + if escSeq != nil { + w.Write(s[start:end]) + w.Write(escSeq) + start = end + 1 + } + end++ + } + if start < len(s) && end <= len(s) { + w.Write(s[start:end]) + } +} + +func escLink(w io.Writer, text []byte) { + unesc := html.UnescapeString(string(text)) + escapeHTML(w, []byte(unesc)) +}
@@ -0,0 +1,48 @@
+package blackfriday + +import ( + "bytes" + "testing" +) + +func TestEsc(t *testing.T) { + tests := []string{ + "abc", "abc", + "a&c", "a&c", + "<", "<", + "[]:<", "[]:<", + "Hello <!--", "Hello <!--", + } + for i := 0; i < len(tests); i += 2 { + var b bytes.Buffer + escapeHTML(&b, []byte(tests[i])) + if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) { + t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]", + tests[i], tests[i+1], b.String()) + } + } +} + +func BenchmarkEscapeHTML(b *testing.B) { + tests := [][]byte{ + []byte(""), + []byte("AT&T has an ampersand in their name."), + []byte("AT&T is another way to write it."), + []byte("This & that."), + []byte("4 < 5."), + []byte("6 > 5."), + []byte("Here's a [link] [1] with an ampersand in the URL."), + []byte("Here's a link with an ampersand in the link text: [AT&T] [2]."), + []byte("Here's an inline [link](/script?foo=1&bar=2)."), + []byte("Here's an inline [link](</script?foo=1&bar=2>)."), + []byte("[1]: http://example.com/?foo=1&bar=2"), + []byte("[2]: http://att.com/ \"AT&T\""), + } + var buf bytes.Buffer + for n := 0; n < b.N; n++ { + for _, t := range tests { + escapeHTML(&buf, t) + buf.Reset() + } + } +}
@@ -18,7 +18,6 @@
import ( "bytes" "fmt" - "html" "io" "regexp" "strings"@@ -308,22 +307,24 @@ return pt != Link && pt != CodeBlock && pt != Code
} func appendLanguageAttr(attrs []string, info []byte) []string { - infoWords := bytes.Split(info, []byte("\t ")) - if len(infoWords) > 0 && len(infoWords[0]) > 0 { - attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0])) + if len(info) == 0 { + return attrs + } + endOfLang := bytes.IndexAny(info, "\t ") + if endOfLang < 0 { + endOfLang = len(info) } - return attrs + return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang])) } -func tag(name string, attrs []string, selfClosing bool) []byte { - result := "<" + name - if attrs != nil && len(attrs) > 0 { - result += " " + strings.Join(attrs, " ") +func (r *HTMLRenderer) tag(w io.Writer, name []byte, attrs []string) { + w.Write(name) + if len(attrs) > 0 { + w.Write(spaceBytes) + w.Write([]byte(strings.Join(attrs, " "))) } - if selfClosing { - result += " /" - } - return []byte(result + ">") + w.Write(gtBytes) + r.lastOutputLen = 1 } func footnoteRef(prefix string, node *Node) []byte {@@ -371,17 +372,6 @@ return ""
} } -func esc(text []byte) []byte { - unesc := []byte(html.UnescapeString(string(text))) - return escCode(unesc) -} - -func escCode(text []byte) []byte { - e1 := []byte(html.EscapeString(string(text))) - e2 := bytes.Replace(e1, []byte("""), []byte("""), -1) - return bytes.Replace(e2, []byte("'"), []byte{'\''}, -1) -} - func (r *HTMLRenderer) out(w io.Writer, text []byte) { if r.disableTags > 0 { w.Write(htmlTagRe.ReplaceAll(text, []byte{}))@@ -393,7 +383,102 @@ }
func (r *HTMLRenderer) cr(w io.Writer) { if r.lastOutputLen > 0 { - r.out(w, []byte{'\n'}) + r.out(w, nlBytes) + } +} + +var ( + nlBytes = []byte{'\n'} + gtBytes = []byte{'>'} + spaceBytes = []byte{' '} +) + +var ( + brTag = []byte("<br>") + brXHTMLTag = []byte("<br />") + emTag = []byte("<em>") + emCloseTag = []byte("</em>") + strongTag = []byte("<strong>") + strongCloseTag = []byte("</strong>") + delTag = []byte("<del>") + delCloseTag = []byte("</del>") + ttTag = []byte("<tt>") + ttCloseTag = []byte("</tt>") + aTag = []byte("<a") + aCloseTag = []byte("</a>") + preTag = []byte("<pre>") + preCloseTag = []byte("</pre>") + codeTag = []byte("<code>") + codeCloseTag = []byte("</code>") + pTag = []byte("<p>") + pCloseTag = []byte("</p>") + blockquoteTag = []byte("<blockquote>") + blockquoteCloseTag = []byte("</blockquote>") + hrTag = []byte("<hr>") + hrXHTMLTag = []byte("<hr />") + ulTag = []byte("<ul>") + ulCloseTag = []byte("</ul>") + olTag = []byte("<ol>") + olCloseTag = []byte("</ol>") + dlTag = []byte("<dl>") + dlCloseTag = []byte("</dl>") + liTag = []byte("<li>") + liCloseTag = []byte("</li>") + ddTag = []byte("<dd>") + ddCloseTag = []byte("</dd>") + dtTag = []byte("<dt>") + dtCloseTag = []byte("</dt>") + tableTag = []byte("<table>") + tableCloseTag = []byte("</table>") + tdTag = []byte("<td") + tdCloseTag = []byte("</td>") + thTag = []byte("<th") + thCloseTag = []byte("</th>") + theadTag = []byte("<thead>") + theadCloseTag = []byte("</thead>") + tbodyTag = []byte("<tbody>") + tbodyCloseTag = []byte("</tbody>") + trTag = []byte("<tr>") + trCloseTag = []byte("</tr>") + h1Tag = []byte("<h1") + h1CloseTag = []byte("</h1>") + h2Tag = []byte("<h2") + h2CloseTag = []byte("</h2>") + h3Tag = []byte("<h3") + h3CloseTag = []byte("</h3>") + h4Tag = []byte("<h4") + h4CloseTag = []byte("</h4>") + h5Tag = []byte("<h5") + h5CloseTag = []byte("</h5>") + h6Tag = []byte("<h6") + h6CloseTag = []byte("</h6>") + + footnotesDivBytes = []byte("\n<div class=\"footnotes\">\n\n") + footnotesCloseDivBytes = []byte("\n</div>\n") +) + +func headerTagsFromLevel(level int) ([]byte, []byte) { + switch level { + case 1: + return h1Tag, h1CloseTag + case 2: + return h2Tag, h2CloseTag + case 3: + return h3Tag, h3CloseTag + case 4: + return h4Tag, h4CloseTag + case 5: + return h5Tag, h5CloseTag + default: + return h6Tag, h6CloseTag + } +} + +func (r *HTMLRenderer) outHRTag(w io.Writer) { + if r.Flags&UseXHTML == 0 { + r.out(w, hrTag) + } else { + r.out(w, hrXHTMLTag) } }@@ -411,34 +496,44 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
attrs := []string{} switch node.Type { case Text: - node.Literal = esc(node.Literal) if r.Flags&Smartypants != 0 { - node.Literal = r.sr.Process(node.Literal) + var tmp bytes.Buffer + escapeHTML(&tmp, node.Literal) + r.sr.Process(w, tmp.Bytes()) + } else { + if node.Parent.Type == Link { + escLink(w, node.Literal) + } else { + escapeHTML(w, node.Literal) + } } - r.out(w, node.Literal) case Softbreak: - r.out(w, []byte{'\n'}) + r.cr(w) // TODO: make it configurable via out(renderer.softbreak) case Hardbreak: - r.out(w, tag("br", nil, true)) + if r.Flags&UseXHTML == 0 { + r.out(w, brTag) + } else { + r.out(w, brXHTMLTag) + } r.cr(w) case Emph: if entering { - r.out(w, tag("em", nil, false)) + r.out(w, emTag) } else { - r.out(w, tag("/em", nil, false)) + r.out(w, emCloseTag) } case Strong: if entering { - r.out(w, tag("strong", nil, false)) + r.out(w, strongTag) } else { - r.out(w, tag("/strong", nil, false)) + r.out(w, strongCloseTag) } case Del: if entering { - r.out(w, tag("del", nil, false)) + r.out(w, delTag) } else { - r.out(w, tag("/del", nil, false)) + r.out(w, delCloseTag) } case HTMLSpan: if r.Flags&SkipHTML != 0 {@@ -450,30 +545,36 @@ // mark it but don't link it if it is not a safe link: no smartypants
dest := node.LinkData.Destination if needSkipLink(r.Flags, dest) { if entering { - r.out(w, tag("tt", nil, false)) + r.out(w, ttTag) } else { - r.out(w, tag("/tt", nil, false)) + r.out(w, ttCloseTag) } } else { if entering { dest = r.addAbsPrefix(dest) - //if (!(options.safe && potentiallyUnsafe(node.destination))) { - attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest))) - //} + var hrefBuf bytes.Buffer + hrefBuf.WriteString("href=\"") + escLink(&hrefBuf, dest) + hrefBuf.WriteByte('"') + attrs = append(attrs, hrefBuf.String()) if node.NoteID != 0 { r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node)) break } attrs = appendLinkAttrs(attrs, r.Flags, dest) if len(node.LinkData.Title) > 0 { - attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title))) + var titleBuff bytes.Buffer + titleBuff.WriteString("title=\"") + escapeHTML(&titleBuff, node.LinkData.Title) + titleBuff.WriteByte('"') + attrs = append(attrs, titleBuff.String()) } - r.out(w, tag("a", attrs, false)) + r.tag(w, aTag, attrs) } else { if node.NoteID != 0 { break } - r.out(w, tag("/a", nil, false)) + r.out(w, aCloseTag) } } case Image:@@ -487,7 +588,9 @@ if r.disableTags == 0 {
//if options.safe && potentiallyUnsafe(dest) { //out(w, `<img src="" alt="`) //} else { - r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest)))) + r.out(w, []byte(`<img src="`)) + escLink(w, dest) + r.out(w, []byte(`" alt="`)) //} } r.disableTags++@@ -496,15 +599,15 @@ r.disableTags--
if r.disableTags == 0 { if node.LinkData.Title != nil { r.out(w, []byte(`" title="`)) - r.out(w, esc(node.LinkData.Title)) + escapeHTML(w, node.LinkData.Title) } r.out(w, []byte(`" />`)) } } case Code: - r.out(w, tag("code", nil, false)) - r.out(w, escCode(node.Literal)) - r.out(w, tag("/code", nil, false)) + r.out(w, codeTag) + escapeHTML(w, node.Literal) + r.out(w, codeCloseTag) case Document: break case Paragraph:@@ -523,9 +626,9 @@ }
if node.Parent.Type == BlockQuote && node.Prev == nil { r.cr(w) } - r.out(w, tag("p", attrs, false)) + r.out(w, pTag) } else { - r.out(w, tag("/p", attrs, false)) + r.out(w, pCloseTag) if !(node.Parent.Type == Item && node.Next == nil) { r.cr(w) }@@ -533,9 +636,9 @@ }
case BlockQuote: if entering { r.cr(w) - r.out(w, tag("blockquote", attrs, false)) + r.out(w, blockquoteTag) } else { - r.out(w, tag("/blockquote", nil, false)) + r.out(w, blockquoteCloseTag) r.cr(w) } case HTMLBlock:@@ -546,7 +649,7 @@ r.cr(w)
r.out(w, node.Literal) r.cr(w) case Header: - tagname := fmt.Sprintf("h%d", node.Level) + openTag, closeTag := headerTagsFromLevel(node.Level) if entering { if node.IsTitleblock { attrs = append(attrs, `class="title"`)@@ -562,39 +665,42 @@ }
attrs = append(attrs, fmt.Sprintf(`id="%s"`, id)) } r.cr(w) - r.out(w, tag(tagname, attrs, false)) + r.tag(w, openTag, attrs) } else { - r.out(w, tag("/"+tagname, nil, false)) + r.out(w, closeTag) if !(node.Parent.Type == Item && node.Next == nil) { r.cr(w) } } case HorizontalRule: r.cr(w) - r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0)) + r.outHRTag(w) r.cr(w) case List: - tagName := "ul" + openTag := ulTag + closeTag := ulCloseTag if node.ListFlags&ListTypeOrdered != 0 { - tagName = "ol" + openTag = olTag + closeTag = olCloseTag } if node.ListFlags&ListTypeDefinition != 0 { - tagName = "dl" + openTag = dlTag + closeTag = dlCloseTag } if entering { if node.IsFootnotesList { - r.out(w, []byte("\n<div class=\"footnotes\">\n\n")) - r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0)) + r.out(w, footnotesDivBytes) + r.outHRTag(w) r.cr(w) } r.cr(w) if node.Parent.Type == Item && node.Parent.Parent.Tight { r.cr(w) } - r.out(w, tag(tagName, attrs, false)) + r.tag(w, openTag[:len(openTag)-1], attrs) r.cr(w) } else { - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) //cr(w) //if node.parent.Type != Item { // cr(w)@@ -606,16 +712,19 @@ if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
r.cr(w) } if node.IsFootnotesList { - r.out(w, []byte("\n</div>\n")) + r.out(w, footnotesCloseDivBytes) } } case Item: - tagName := "li" + openTag := liTag + closeTag := liCloseTag if node.ListFlags&ListTypeDefinition != 0 { - tagName = "dd" + openTag = ddTag + closeTag = ddCloseTag } if node.ListFlags&ListTypeTerm != 0 { - tagName = "dt" + openTag = dtTag + closeTag = dtCloseTag } if entering { if itemOpenCR(node) {@@ -626,7 +735,7 @@ slug := slugify(node.ListData.RefLink)
r.out(w, footnoteItem(r.FootnoteAnchorPrefix, slug)) break } - r.out(w, tag(tagName, nil, false)) + r.out(w, openTag) } else { if node.ListData.RefLink != nil { slug := slugify(node.ListData.RefLink)@@ -634,32 +743,34 @@ if r.Flags&FootnoteReturnLinks != 0 {
r.out(w, footnoteReturnLink(r.FootnoteAnchorPrefix, r.FootnoteReturnLinkContents, slug)) } } - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) r.cr(w) } case CodeBlock: attrs = appendLanguageAttr(attrs, node.Info) r.cr(w) - r.out(w, tag("pre", nil, false)) - r.out(w, tag("code", attrs, false)) - r.out(w, escCode(node.Literal)) - r.out(w, tag("/code", nil, false)) - r.out(w, tag("/pre", nil, false)) + r.out(w, preTag) + r.tag(w, codeTag[:len(codeTag)-1], attrs) + escapeHTML(w, node.Literal) + r.out(w, codeCloseTag) + r.out(w, preCloseTag) if node.Parent.Type != Item { r.cr(w) } case Table: if entering { r.cr(w) - r.out(w, tag("table", nil, false)) + r.out(w, tableTag) } else { - r.out(w, tag("/table", nil, false)) + r.out(w, tableCloseTag) r.cr(w) } case TableCell: - tagName := "td" + openTag := tdTag + closeTag := tdCloseTag if node.IsHeader { - tagName = "th" + openTag = thTag + closeTag = thCloseTag } if entering { align := cellAlignment(node.Align)@@ -669,37 +780,37 @@ }
if node.Prev == nil { r.cr(w) } - r.out(w, tag(tagName, attrs, false)) + r.tag(w, openTag, attrs) } else { - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) r.cr(w) } case TableHead: if entering { r.cr(w) - r.out(w, tag("thead", nil, false)) + r.out(w, theadTag) } else { - r.out(w, tag("/thead", nil, false)) + r.out(w, theadCloseTag) r.cr(w) } case TableBody: if entering { r.cr(w) - r.out(w, tag("tbody", nil, false)) + r.out(w, tbodyTag) // XXX: this is to adhere to a rather silly test. Should fix test. if node.FirstChild == nil { r.cr(w) } } else { - r.out(w, tag("/tbody", nil, false)) + r.out(w, tbodyCloseTag) r.cr(w) } case TableRow: if entering { r.cr(w) - r.out(w, tag("tr", nil, false)) + r.out(w, trTag) } else { - r.out(w, tag("/tr", nil, false)) + r.out(w, trCloseTag) r.cr(w) } default:@@ -725,9 +836,9 @@ }
w.WriteString("<head>\n") w.WriteString(" <title>") if r.Flags&Smartypants != 0 { - w.Write(r.sr.Process([]byte(r.Title))) + r.sr.Process(w, []byte(r.Title)) } else { - w.Write(esc([]byte(r.Title))) + escapeHTML(w, []byte(r.Title)) } w.WriteString("</title>\n") w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")@@ -740,14 +851,14 @@ w.WriteString(ending)
w.WriteString(">\n") if r.CSS != "" { w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"") - w.Write(esc([]byte(r.CSS))) + escapeHTML(w, []byte(r.CSS)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n") } if r.Icon != "" { w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"") - w.Write(esc([]byte(r.Icon))) + escapeHTML(w, []byte(r.Icon)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n")@@ -807,6 +918,7 @@ w.WriteString("<nav>\n")
w.Write(buf.Bytes()) w.WriteString("\n\n</nav>\n") } + r.lastOutputLen = buf.Len() } func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {@@ -820,17 +932,17 @@ // Render walks the specified syntax (sub)tree and returns a HTML document.
func (r *HTMLRenderer) Render(ast *Node) []byte { //println("render_Blackfriday") //dump(ast) - var buff bytes.Buffer - r.writeDocumentHeader(&buff) + var buf bytes.Buffer + r.writeDocumentHeader(&buf) if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 { - r.writeTOC(&buff, ast) + r.writeTOC(&buf, ast) if r.Flags&OmitContents != 0 { - return buff.Bytes() + return buf.Bytes() } } ast.Walk(func(node *Node, entering bool) WalkStatus { - return r.RenderNode(&buff, node, entering) + return r.RenderNode(&buf, node, entering) }) - r.writeDocumentFooter(&buff) - return buff.Bytes() + r.writeDocumentFooter(&buf) + return buf.Bytes() }
@@ -33,51 +33,38 @@ // data is the complete block being rendered
// offset is the number of valid chars before the current cursor func (p *parser) inline(currBlock *Node, data []byte) { - // this is called recursively: enforce a maximum depth - if p.nesting >= p.maxNesting { + // handlers might call us recursively: enforce a maximum depth + if p.nesting >= p.maxNesting || len(data) == 0 { return } p.nesting++ - - i, end := 0, 0 - for i < len(data) { - // Stop at EOL - if data[i] == '\n' && i+1 == len(data) { - break - } - - for ; end < len(data); end++ { - if p.inlineCallback[data[end]] != nil { - break - } - } - - if end >= len(data) { - if data[end-1] == '\n' { - currBlock.AppendChild(text(data[i : end-1])) + beg, end := 0, 0 + for end < len(data) { + handler := p.inlineCallback[data[end]] + if handler != nil { + if consumed, node := handler(p, data, end); consumed == 0 { + // No action from the callback. + end++ } else { - currBlock.AppendChild(text(data[i:end])) + // Copy inactive chars into the output. + currBlock.AppendChild(text(data[beg:end])) + if node != nil { + currBlock.AppendChild(node) + } + // Skip past whatever the callback used. + beg = end + consumed + end = beg } - break - } - - // call the trigger - handler := p.inlineCallback[data[end]] - if consumed, node := handler(p, data, end); consumed == 0 { - // No action from the callback. - end++ } else { - // Copy inactive chars into the output. - currBlock.AppendChild(text(data[i:end])) - if node != nil { - currBlock.AppendChild(node) - } - // Skip past whatever the callback used. - i = end + consumed - end = i + end++ + } + } + if beg < len(data) { + if data[end-1] == '\n' { + end-- } + currBlock.AppendChild(text(data[beg:end])) } - p.nesting-- }@@ -733,25 +720,45 @@ entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd } +// hasPrefixCaseInsensitive is a custom implementation of +// strings.HasPrefix(strings.ToLower(s), prefix) +// we rolled our own because ToLower pulls in a huge machinery of lowercasing +// anything from Unicode and that's very slow. Since this func will only be +// used on ASCII protocol prefixes, we can take shortcuts. +func hasPrefixCaseInsensitive(s, prefix []byte) bool { + if len(s) < len(prefix) { + return false + } + delta := byte('a' - 'A') + for i, b := range prefix { + if b != s[i] && b != s[i]+delta { + return false + } + } + return true +} + +var protocolPrefixes = [][]byte{ + []byte("http://"), + []byte("https://"), + []byte("ftp://"), + []byte("file://"), + []byte("mailto:"), +} + +const shortestPrefix = 6 // len("ftp://"), the shortest of the above + func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) { // quick check to rule out most false hits - if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below + if p.insideLink || len(data) < offset+shortestPrefix { return 0, nil } - prefixes := []string{ - "http://", - "https://", - "ftp://", - "file://", - "mailto:", - } - for _, prefix := range prefixes { + for _, prefix := range protocolPrefixes { endOfHead := offset + 8 // 8 is the len() of the longest prefix if endOfHead > len(data) { endOfHead = len(data) } - head := bytes.ToLower(data[offset:endOfHead]) - if bytes.HasPrefix(head, []byte(prefix)) { + if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) { return autoLink(p, data, offset) } }
@@ -1133,7 +1133,7 @@
func TestSkipHTML(t *testing.T) { doTestsParam(t, []string{ "<div class=\"foo\"></div>\n\ntext\n\n<form>the form</form>", - "<p>text</p>\n", + "<p>text</p>\n\n<p>the form</p>\n", "text <em>inline html</em> more text", "<p>text inline html more text</p>\n",
@@ -385,7 +385,7 @@ if extensions&Footnotes != 0 {
p.notes = make([]*reference, 0) } - p.block(preprocess(p, input)) + p.block(input) // Walk the tree and finish up some of unfinished blocks for p.tip != nil { p.finalize(p.tip)@@ -439,63 +439,6 @@ node.content = nil
} return GoToNext }) -} - -// preprocess does a preparatory first pass over the input: -// - normalize newlines -// - expand tabs (outside of fenced code blocks) -// - copy everything else -func preprocess(p *parser, input []byte) []byte { - var out bytes.Buffer - tabSize := TabSizeDefault - if p.flags&TabSizeEight != 0 { - tabSize = TabSizeDouble - } - beg := 0 - lastFencedCodeBlockEnd := 0 - for beg < len(input) { - // Find end of this line, then process the line. - end := beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } - - if p.flags&FencedCode != 0 { - // track fenced code block boundaries to suppress tab expansion - // and reference extraction inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCodeBlock(input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - out.WriteByte('\n') - - beg = end - } - - // empty input? - if out.Len() == 0 { - out.WriteByte('\n') - } - - return out.Bytes() } //
@@ -17,6 +17,7 @@ package blackfriday
import ( "bytes" + "io" ) // SPRenderer is a struct containing state of a Smartypants renderer.@@ -401,13 +402,12 @@ return &r
} // Process is the entry point of the Smartypants renderer. -func (r *SPRenderer) Process(text []byte) []byte { - var buff bytes.Buffer +func (r *SPRenderer) Process(w io.Writer, text []byte) { mark := 0 for i := 0; i < len(text); i++ { if action := r.callbacks[text[i]]; action != nil { if i > mark { - buff.Write(text[mark:i]) + w.Write(text[mark:i]) } previousChar := byte(0) if i > 0 {@@ -415,12 +415,11 @@ previousChar = text[i-1]
} var tmp bytes.Buffer i += action(&tmp, previousChar, text[i:]) - buff.Write(tmp.Bytes()) + w.Write(tmp.Bytes()) mark = i + 1 } } if mark < len(text) { - buff.Write(text[mark:]) + w.Write(text[mark:]) } - return buff.Bytes() }
@@ -1,13 +1,13 @@
<p>Here's a simple block:</p> <div> - foo + foo </div> <p>This should be a code block, though:</p> <pre><code><div> - foo + foo </div> </code></pre>@@ -19,11 +19,11 @@
<p>Now, nested:</p> <div> - <div> - <div> - foo - </div> - </div> + <div> + <div> + foo + </div> + </div> </div> <p>This should just be an HTML comment:</p>
@@ -3,7 +3,7 @@
<!-- This is a simple comment --> <!-- - This is another comment. + This is another comment. --> <p>Paragraph two.</p>
@@ -939,8 +939,8 @@ {} curly braces
[] square brackets () parentheses # hash mark -+ plus sign -- minus sign (hyphen) ++ plus sign +- minus sign (hyphen) . dot ! exclamation mark </code></pre>
@@ -13,13 +13,13 @@ </code></pre>
<p>And:</p> -<pre><code> this code block is indented by two tabs +<pre><code> this code block is indented by two tabs </code></pre> <p>And:</p> -<pre><code>+ this is an example list item - indented with tabs +<pre><code>+ this is an example list item + indented with tabs + this is an example list item indented with spaces