all repos — grayfriday @ ad7f7c56d58a2c7f75a14cdcaa8b8acd5dc4f141

blackfriday fork with a few changes

Merge pull request #322 from russross/v2-perf-tweaks

V2 perf tweaks
Vytautas Ĺ altenis vytas@rtfb.lt
Thu, 02 Feb 2017 17:08:19 +0200
commit

ad7f7c56d58a2c7f75a14cdcaa8b8acd5dc4f141

parent

a4dd8ad4a6fda03e68be3eacb71efbf51565415e

M .travis.yml.travis.yml

@@ -5,10 +5,9 @@

language: go go: - - 1.2 - - 1.3 - - 1.4 - 1.5 + - 1.6 + - 1.7 install: - go get -d -t -v ./...
M block.goblock.go

@@ -29,17 +29,12 @@

var ( reBackslashOrAmp = regexp.MustCompile("[\\&]") reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity) - reTrailingWhitespace = regexp.MustCompile("(\n *)+$") ) // Parse block-level data. // Note: this function and many that it calls assume that // the input buffer ends with a newline. func (p *parser) block(data []byte) { - if len(data) == 0 || data[len(data)-1] != '\n' { - panic("block input is missing terminating newline") - } - // this is called recursively: enforce a maximum depth if p.nesting >= p.maxNesting { return

@@ -131,7 +126,7 @@ // ______

if p.isHRule(data) { p.addBlock(HorizontalRule, nil) var i int - for i = 0; data[i] != '\n'; i++ { + for i = 0; i < len(data) && data[i] != '\n'; i++ { } data = data[i:] continue

@@ -216,10 +211,10 @@ }

if p.flags&SpaceHeaders != 0 { level := 0 - for level < 6 && data[level] == '#' { + for level < 6 && level < len(data) && data[level] == '#' { level++ } - if data[level] != ' ' { + if level == len(data) || data[level] != ' ' { return false } }

@@ -228,7 +223,7 @@ }

func (p *parser) prefixHeader(data []byte) int { level := 0 - for level < 6 && data[level] == '#' { + for level < 6 && level < len(data) && data[level] == '#' { level++ } i := skipChar(data, level, ' ')

@@ -277,7 +272,7 @@ // test of level 1 header

if data[0] == '=' { i := skipChar(data, 1, '=') i = skipChar(data, i, ' ') - if data[i] == '\n' { + if i < len(data) && data[i] == '\n' { return 1 } return 0

@@ -287,7 +282,7 @@ // test of level 2 header

if data[0] == '-' { i := skipChar(data, 1, '-') i = skipChar(data, i, ' ') - if data[i] == '\n' { + if i < len(data) && data[i] == '\n' { return 2 } return 0

@@ -419,8 +414,8 @@ return i

} func finalizeHTMLBlock(block *Node) { - block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{}) - block.content = []byte{} + block.Literal = block.content + block.content = nil } // HTML comment, lax form

@@ -445,6 +440,9 @@ }

// HR, which is the only self-closing block tag considered func (p *parser) htmlHr(data []byte, doRender bool) int { + if len(data) < 4 { + return 0 + } if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { return 0 }

@@ -452,13 +450,11 @@ if data[3] != ' ' && data[3] != '/' && data[3] != '>' {

// not an <hr> tag after all; at least not a valid one return 0 } - i := 3 - for data[i] != '>' && data[i] != '\n' { + for i < len(data) && data[i] != '>' && data[i] != '\n' { i++ } - - if data[i] == '>' { + if i < len(data) && data[i] == '>' { i++ if j := p.isEmpty(data[i:]); j > 0 { size := i + j

@@ -473,13 +469,12 @@ }

return size } } - return 0 } func (p *parser) htmlFindTag(data []byte) (string, bool) { i := 0 - for isalnum(data[i]) { + for i < len(data) && isalnum(data[i]) { i++ } key := string(data[:i])

@@ -536,7 +531,10 @@ if data[i] != ' ' && data[i] != '\t' {

return 0 } } - return i + 1 + if i < len(data) && data[i] == '\n' { + i++ + } + return i } func (*parser) isHRule(data []byte) bool {

@@ -555,7 +553,7 @@ c := data[i]

// the whole line must be the char or whitespace n := 0 - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { switch { case data[i] == c: n++

@@ -571,8 +569,7 @@

// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, // and returns the end index if so, or 0 otherwise. It also returns the marker found. // If syntax is not nil, it gets set to the syntax specified in the fence line. -// A final newline is mandatory to recognize the fence line, unless newlineOptional is true. -func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) { +func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) { i, size := 0, 0 // skip up to three spaces

@@ -614,7 +611,7 @@ syn := 0

i = skipChar(data, i, ' ') if i >= len(data) { - if newlineOptional && i == len(data) { + if i == len(data) { return i, marker } return 0, ""

@@ -659,12 +656,11 @@ }

i = skipChar(data, i, ' ') if i >= len(data) || data[i] != '\n' { - if newlineOptional && i == len(data) { + if i == len(data) { return i, marker } return 0, "" } - return i + 1, marker // Take newline into account. }

@@ -673,7 +669,7 @@ // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.

// If doRender is true, a final newline is mandatory to recognize the fenced code block. func (p *parser) fencedCodeBlock(data []byte, doRender bool) int { var syntax string - beg, marker := isFenceLine(data, &syntax, "", false) + beg, marker := isFenceLine(data, &syntax, "") if beg == 0 || beg >= len(data) { return 0 }

@@ -686,8 +682,7 @@ for {

// safe to assume beg < len(data) // check for the end of the code block - newlineOptional := !doRender - fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) + fenceEnd, _ := isFenceLine(data[beg:], nil, marker) if fenceEnd != 0 { beg += fenceEnd break

@@ -739,7 +734,7 @@ rest := block.content[newlinePos+1:]

block.Info = unescapeString(bytes.Trim(firstLine, "\n")) block.Literal = rest } else { - block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'}) + block.Literal = block.content } block.content = nil }

@@ -757,7 +752,7 @@ p.addBlock(TableBody, nil)

for i < len(data) { pipes, rowStart := 0, i - for ; data[i] != '\n'; i++ { + for ; i < len(data) && data[i] != '\n'; i++ { if data[i] == '|' { pipes++ }

@@ -769,7 +764,9 @@ break

} // include the newline in data sent to tableRow - i++ + if i < len(data) && data[i] == '\n' { + i++ + } p.tableRow(data[rowStart:i], columns, false) }

@@ -788,7 +785,7 @@

func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) { i := 0 colCount := 1 - for i = 0; data[i] != '\n'; i++ { + for i = 0; i < len(data) && data[i] != '\n'; i++ { if data[i] == '|' && !isBackslashEscaped(data, i) { colCount++ }

@@ -800,7 +797,11 @@ return

} // include the newline in the data sent to tableRow - header := data[:i+1] + j := i + if j < len(data) && data[j] == '\n' { + j++ + } + header := data[:j] // column count ignores pipes at beginning or end of line if data[0] == '|' {

@@ -826,7 +827,7 @@

// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 // and trailing | optional on last column col := 0 - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { dashes := 0 if data[i] == ':' {

@@ -834,19 +835,21 @@ i++

columns[col] |= TableAlignmentLeft dashes++ } - for data[i] == '-' { + for i < len(data) && data[i] == '-' { i++ dashes++ } - if data[i] == ':' { + if i < len(data) && data[i] == ':' { i++ columns[col] |= TableAlignmentRight dashes++ } - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } - + if i == len(data) { + return + } // end of column test is messy switch { case dashes < 3:

@@ -857,12 +860,12 @@ case data[i] == '|' && !isBackslashEscaped(data, i):

// marker found, now skip past trailing whitespace col++ i++ - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } // trailing junk found after last column - if col >= colCount && data[i] != '\n' { + if col >= colCount && i < len(data) && data[i] != '\n' { return }

@@ -885,7 +888,10 @@ }

p.addBlock(TableHead, nil) p.tableRow(header, columns, true) - size = i + 1 + size = i + if size < len(data) && data[size] == '\n' { + size++ + } return }

@@ -898,13 +904,13 @@ i++

} for col = 0; col < len(columns) && i < len(data); col++ { - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } cellStart := i - for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { + for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { i++ }

@@ -913,7 +919,7 @@

// skip the end-of-cell marker, possibly taking us past end of buffer i++ - for cellEnd > cellStart && data[cellEnd-1] == ' ' { + for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' { cellEnd-- }

@@ -935,11 +941,11 @@

// returns blockquote prefix length func (p *parser) quotePrefix(data []byte) int { i := 0 - for i < 3 && data[i] == ' ' { + for i < 3 && i < len(data) && data[i] == ' ' { i++ } - if data[i] == '>' { - if data[i+1] == ' ' { + if i < len(data) && data[i] == '>' { + if i+1 < len(data) && data[i+1] == ' ' { return i + 2 } return i + 1

@@ -969,7 +975,7 @@ end = beg

// Step over whole lines, collecting them. While doing that, check for // fenced code and if one's found, incorporate it altogether, // irregardless of any contents inside it - for data[end] != '\n' { + for end < len(data) && data[end] != '\n' { if p.flags&FencedCode != 0 { if i := p.fencedCodeBlock(data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop:

@@ -979,7 +985,9 @@ }

} end++ } - end++ + if end < len(data) && data[end] == '\n' { + end++ + } if pre := p.quotePrefix(data[beg:]); pre > 0 { // skip the prefix beg += pre

@@ -997,7 +1005,10 @@ }

// returns prefix length for block code func (p *parser) codePrefix(data []byte) int { - if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { + if len(data) >= 1 && data[0] == '\t' { + return 1 + } + if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { return 4 } return 0

@@ -1009,10 +1020,12 @@

i := 0 for i < len(data) { beg := i - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { i++ } - i++ + if i < len(data) && data[i] == '\n' { + i++ + } blankline := p.isEmpty(data[beg:i]) > 0 if pre := p.codePrefix(data[beg:i]); pre > 0 {

@@ -1023,7 +1036,7 @@ i = beg

break } - // verbatim copy to the working buffeu + // verbatim copy to the working buffer if blankline { work.WriteByte('\n') } else {

@@ -1053,15 +1066,16 @@

// returns unordered list item prefix func (p *parser) uliPrefix(data []byte) int { i := 0 - // start with up to 3 spaces - for i < 3 && data[i] == ' ' { + for i < len(data) && i < 3 && data[i] == ' ' { i++ } - - // need a *, +, or - followed by a space + if i >= len(data)-1 { + return 0 + } + // need one of {'*', '+', '-'} followed by a space or a tab if (data[i] != '*' && data[i] != '+' && data[i] != '-') || - data[i+1] != ' ' { + (data[i+1] != ' ' && data[i+1] != '\t') { return 0 } return i + 2

@@ -1072,18 +1086,21 @@ func (p *parser) oliPrefix(data []byte) int {

i := 0 // start with up to 3 spaces - for i < 3 && data[i] == ' ' { + for i < 3 && i < len(data) && data[i] == ' ' { i++ } // count the digits start := i - for data[i] >= '0' && data[i] <= '9' { + for i < len(data) && data[i] >= '0' && data[i] <= '9' { i++ } + if start == i || i >= len(data)-1 { + return 0 + } - // we need >= 1 digits followed by a dot and a space - if start == i || data[i] != '.' || data[i+1] != ' ' { + // we need >= 1 digits followed by a dot and a space or a tab + if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') { return 0 } return i + 2

@@ -1091,13 +1108,15 @@ }

// returns definition list item prefix func (p *parser) dliPrefix(data []byte) int { + if len(data) < 2 { + return 0 + } i := 0 - - // need a : followed by a spaces - if data[i] != ':' || data[i+1] != ' ' { + // need a ':' followed by a space or a tab + if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') { return 0 } - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } return i + 2

@@ -1175,8 +1194,12 @@ // Assumes initial prefix is already removed if this is a sublist.

func (p *parser) listItem(data []byte, flags *ListType) int { // keep track of the indentation of the first line itemIndent := 0 - for itemIndent < 3 && data[itemIndent] == ' ' { - itemIndent++ + if data[0] == '\t' { + itemIndent += 4 + } else { + for itemIndent < 3 && data[itemIndent] == ' ' { + itemIndent++ + } } var bulletChar byte = '*'

@@ -1203,13 +1226,13 @@ }

} // skip leading whitespace on first line - for data[i] == ' ' { + for i < len(data) && data[i] == ' ' { i++ } // find the end of the line line := i - for i > 0 && data[i-1] != '\n' { + for i > 0 && i < len(data) && data[i-1] != '\n' { i++ }

@@ -1229,7 +1252,7 @@ for line < len(data) {

i++ // find the end of this line - for data[i-1] != '\n' { + for i < len(data) && data[i-1] != '\n' { i++ }

@@ -1243,11 +1266,18 @@ }

// calculate the indentation indent := 0 - for indent < 4 && line+indent < i && data[line+indent] == ' ' { - indent++ + indentIndex := 0 + if data[line] == '\t' { + indentIndex++ + indent += 4 + } else { + for indent < 4 && line+indent < i && data[line+indent] == ' ' { + indent++ + indentIndex++ + } } - chunk := data[line+indent : i] + chunk := data[line+indentIndex : i] // evaluate how this line fits in switch {

@@ -1288,7 +1318,7 @@ case containsBlankLine && indent < 4:

if *flags&ListTypeDefinition != 0 && i < len(data)-1 { // is the next item still a part of this list? next := i - for data[next] != '\n' { + for next < len(data) && data[next] != '\n' { next++ } for next < len(data)-1 && data[next] == '\n' {

@@ -1316,7 +1346,7 @@ raw.WriteByte('\n')

} // add the line into the working buffer without prefix - raw.Write(data[line+indent : i]) + raw.Write(data[line+indentIndex : i]) line = i }

@@ -1364,8 +1394,11 @@ for data[beg] == ' ' {

beg++ } + end := len(data) // trim trailing newline - end := len(data) - 1 + if data[len(data)-1] == '\n' { + end-- + } // trim trailing spaces for end > beg && data[end-1] == ' ' {

@@ -1437,7 +1470,7 @@ block.Level = level

block.HeaderID = id // find the end of the underline - for data[i] != '\n' { + for i < len(data) && data[i] != '\n' { i++ } return i

@@ -1470,7 +1503,8 @@

// if there's a definition list item, prev line is a definition term if p.flags&DefinitionLists != 0 { if p.dliPrefix(current) != 0 { - return p.list(data[prev:], ListTypeDefinition) + ret := p.list(data[prev:], ListTypeDefinition) + return ret } }

@@ -1486,10 +1520,12 @@ }

} // otherwise, scan to the beginning of the next line - for data[i] != '\n' { - i++ + nl := bytes.IndexByte(data[i:], '\n') + if nl >= 0 { + i += nl + 1 + } else { + i += len(data[i:]) } - i++ } p.renderParagraph(data[:i])
M block_test.goblock_test.go

@@ -1661,14 +1661,14 @@ func TestIsFenceLine(t *testing.T) {

tests := []struct { data []byte syntaxRequested bool - newlineOptional bool wantEnd int wantMarker string wantSyntax string }{ { - data: []byte("```"), - wantEnd: 0, + data: []byte("```"), + wantEnd: 3, + wantMarker: "```", }, { data: []byte("```\nstuff here\n"),

@@ -1687,21 +1687,13 @@ wantEnd: 0,

}, { data: []byte("```"), - newlineOptional: true, - wantEnd: 3, - wantMarker: "```", - }, - { - data: []byte("```"), syntaxRequested: true, - newlineOptional: true, wantEnd: 3, wantMarker: "```", }, { data: []byte("``` go"), syntaxRequested: true, - newlineOptional: true, wantEnd: 6, wantMarker: "```", wantSyntax: "go",

@@ -1713,7 +1705,7 @@ var syntax *string

if test.syntaxRequested { syntax = new(string) } - end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional) + end, marker := isFenceLine(test.data, syntax, "```") if got, want := end, test.wantEnd; got != want { t.Errorf("got end %v, want %v", got, want) }
A esc.go

@@ -0,0 +1,34 @@

+package blackfriday + +import ( + "html" + "io" +) + +var htmlEscaper = [256][]byte{ + '&': []byte("&amp;"), + '<': []byte("&lt;"), + '>': []byte("&gt;"), + '"': []byte("&quot;"), +} + +func escapeHTML(w io.Writer, s []byte) { + var start, end int + for end < len(s) { + escSeq := htmlEscaper[s[end]] + if escSeq != nil { + w.Write(s[start:end]) + w.Write(escSeq) + start = end + 1 + } + end++ + } + if start < len(s) && end <= len(s) { + w.Write(s[start:end]) + } +} + +func escLink(w io.Writer, text []byte) { + unesc := html.UnescapeString(string(text)) + escapeHTML(w, []byte(unesc)) +}
A esc_test.go

@@ -0,0 +1,48 @@

+package blackfriday + +import ( + "bytes" + "testing" +) + +func TestEsc(t *testing.T) { + tests := []string{ + "abc", "abc", + "a&c", "a&amp;c", + "<", "&lt;", + "[]:<", "[]:&lt;", + "Hello <!--", "Hello &lt;!--", + } + for i := 0; i < len(tests); i += 2 { + var b bytes.Buffer + escapeHTML(&b, []byte(tests[i])) + if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) { + t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]", + tests[i], tests[i+1], b.String()) + } + } +} + +func BenchmarkEscapeHTML(b *testing.B) { + tests := [][]byte{ + []byte(""), + []byte("AT&T has an ampersand in their name."), + []byte("AT&amp;T is another way to write it."), + []byte("This & that."), + []byte("4 < 5."), + []byte("6 > 5."), + []byte("Here's a [link] [1] with an ampersand in the URL."), + []byte("Here's a link with an ampersand in the link text: [AT&T] [2]."), + []byte("Here's an inline [link](/script?foo=1&bar=2)."), + []byte("Here's an inline [link](</script?foo=1&bar=2>)."), + []byte("[1]: http://example.com/?foo=1&bar=2"), + []byte("[2]: http://att.com/ \"AT&T\""), + } + var buf bytes.Buffer + for n := 0; n < b.N; n++ { + for _, t := range tests { + escapeHTML(&buf, t) + buf.Reset() + } + } +}
M html.gohtml.go

@@ -18,7 +18,6 @@

import ( "bytes" "fmt" - "html" "io" "regexp" "strings"

@@ -308,22 +307,24 @@ return pt != Link && pt != CodeBlock && pt != Code

} func appendLanguageAttr(attrs []string, info []byte) []string { - infoWords := bytes.Split(info, []byte("\t ")) - if len(infoWords) > 0 && len(infoWords[0]) > 0 { - attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0])) + if len(info) == 0 { + return attrs + } + endOfLang := bytes.IndexAny(info, "\t ") + if endOfLang < 0 { + endOfLang = len(info) } - return attrs + return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang])) } -func tag(name string, attrs []string, selfClosing bool) []byte { - result := "<" + name - if attrs != nil && len(attrs) > 0 { - result += " " + strings.Join(attrs, " ") +func (r *HTMLRenderer) tag(w io.Writer, name []byte, attrs []string) { + w.Write(name) + if len(attrs) > 0 { + w.Write(spaceBytes) + w.Write([]byte(strings.Join(attrs, " "))) } - if selfClosing { - result += " /" - } - return []byte(result + ">") + w.Write(gtBytes) + r.lastOutputLen = 1 } func footnoteRef(prefix string, node *Node) []byte {

@@ -371,17 +372,6 @@ return ""

} } -func esc(text []byte) []byte { - unesc := []byte(html.UnescapeString(string(text))) - return escCode(unesc) -} - -func escCode(text []byte) []byte { - e1 := []byte(html.EscapeString(string(text))) - e2 := bytes.Replace(e1, []byte("&#34;"), []byte("&quot;"), -1) - return bytes.Replace(e2, []byte("&#39;"), []byte{'\''}, -1) -} - func (r *HTMLRenderer) out(w io.Writer, text []byte) { if r.disableTags > 0 { w.Write(htmlTagRe.ReplaceAll(text, []byte{}))

@@ -393,7 +383,102 @@ }

func (r *HTMLRenderer) cr(w io.Writer) { if r.lastOutputLen > 0 { - r.out(w, []byte{'\n'}) + r.out(w, nlBytes) + } +} + +var ( + nlBytes = []byte{'\n'} + gtBytes = []byte{'>'} + spaceBytes = []byte{' '} +) + +var ( + brTag = []byte("<br>") + brXHTMLTag = []byte("<br />") + emTag = []byte("<em>") + emCloseTag = []byte("</em>") + strongTag = []byte("<strong>") + strongCloseTag = []byte("</strong>") + delTag = []byte("<del>") + delCloseTag = []byte("</del>") + ttTag = []byte("<tt>") + ttCloseTag = []byte("</tt>") + aTag = []byte("<a") + aCloseTag = []byte("</a>") + preTag = []byte("<pre>") + preCloseTag = []byte("</pre>") + codeTag = []byte("<code>") + codeCloseTag = []byte("</code>") + pTag = []byte("<p>") + pCloseTag = []byte("</p>") + blockquoteTag = []byte("<blockquote>") + blockquoteCloseTag = []byte("</blockquote>") + hrTag = []byte("<hr>") + hrXHTMLTag = []byte("<hr />") + ulTag = []byte("<ul>") + ulCloseTag = []byte("</ul>") + olTag = []byte("<ol>") + olCloseTag = []byte("</ol>") + dlTag = []byte("<dl>") + dlCloseTag = []byte("</dl>") + liTag = []byte("<li>") + liCloseTag = []byte("</li>") + ddTag = []byte("<dd>") + ddCloseTag = []byte("</dd>") + dtTag = []byte("<dt>") + dtCloseTag = []byte("</dt>") + tableTag = []byte("<table>") + tableCloseTag = []byte("</table>") + tdTag = []byte("<td") + tdCloseTag = []byte("</td>") + thTag = []byte("<th") + thCloseTag = []byte("</th>") + theadTag = []byte("<thead>") + theadCloseTag = []byte("</thead>") + tbodyTag = []byte("<tbody>") + tbodyCloseTag = []byte("</tbody>") + trTag = []byte("<tr>") + trCloseTag = []byte("</tr>") + h1Tag = []byte("<h1") + h1CloseTag = []byte("</h1>") + h2Tag = []byte("<h2") + h2CloseTag = []byte("</h2>") + h3Tag = []byte("<h3") + h3CloseTag = []byte("</h3>") + h4Tag = []byte("<h4") + h4CloseTag = []byte("</h4>") + h5Tag = []byte("<h5") + h5CloseTag = []byte("</h5>") + h6Tag = []byte("<h6") + h6CloseTag = []byte("</h6>") + + footnotesDivBytes = []byte("\n<div class=\"footnotes\">\n\n") + footnotesCloseDivBytes = []byte("\n</div>\n") +) + +func headerTagsFromLevel(level int) ([]byte, []byte) { + switch level { + case 1: + return h1Tag, h1CloseTag + case 2: + return h2Tag, h2CloseTag + case 3: + return h3Tag, h3CloseTag + case 4: + return h4Tag, h4CloseTag + case 5: + return h5Tag, h5CloseTag + default: + return h6Tag, h6CloseTag + } +} + +func (r *HTMLRenderer) outHRTag(w io.Writer) { + if r.Flags&UseXHTML == 0 { + r.out(w, hrTag) + } else { + r.out(w, hrXHTMLTag) } }

@@ -411,34 +496,44 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {

attrs := []string{} switch node.Type { case Text: - node.Literal = esc(node.Literal) if r.Flags&Smartypants != 0 { - node.Literal = r.sr.Process(node.Literal) + var tmp bytes.Buffer + escapeHTML(&tmp, node.Literal) + r.sr.Process(w, tmp.Bytes()) + } else { + if node.Parent.Type == Link { + escLink(w, node.Literal) + } else { + escapeHTML(w, node.Literal) + } } - r.out(w, node.Literal) case Softbreak: - r.out(w, []byte{'\n'}) + r.cr(w) // TODO: make it configurable via out(renderer.softbreak) case Hardbreak: - r.out(w, tag("br", nil, true)) + if r.Flags&UseXHTML == 0 { + r.out(w, brTag) + } else { + r.out(w, brXHTMLTag) + } r.cr(w) case Emph: if entering { - r.out(w, tag("em", nil, false)) + r.out(w, emTag) } else { - r.out(w, tag("/em", nil, false)) + r.out(w, emCloseTag) } case Strong: if entering { - r.out(w, tag("strong", nil, false)) + r.out(w, strongTag) } else { - r.out(w, tag("/strong", nil, false)) + r.out(w, strongCloseTag) } case Del: if entering { - r.out(w, tag("del", nil, false)) + r.out(w, delTag) } else { - r.out(w, tag("/del", nil, false)) + r.out(w, delCloseTag) } case HTMLSpan: if r.Flags&SkipHTML != 0 {

@@ -450,30 +545,36 @@ // mark it but don't link it if it is not a safe link: no smartypants

dest := node.LinkData.Destination if needSkipLink(r.Flags, dest) { if entering { - r.out(w, tag("tt", nil, false)) + r.out(w, ttTag) } else { - r.out(w, tag("/tt", nil, false)) + r.out(w, ttCloseTag) } } else { if entering { dest = r.addAbsPrefix(dest) - //if (!(options.safe && potentiallyUnsafe(node.destination))) { - attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest))) - //} + var hrefBuf bytes.Buffer + hrefBuf.WriteString("href=\"") + escLink(&hrefBuf, dest) + hrefBuf.WriteByte('"') + attrs = append(attrs, hrefBuf.String()) if node.NoteID != 0 { r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node)) break } attrs = appendLinkAttrs(attrs, r.Flags, dest) if len(node.LinkData.Title) > 0 { - attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title))) + var titleBuff bytes.Buffer + titleBuff.WriteString("title=\"") + escapeHTML(&titleBuff, node.LinkData.Title) + titleBuff.WriteByte('"') + attrs = append(attrs, titleBuff.String()) } - r.out(w, tag("a", attrs, false)) + r.tag(w, aTag, attrs) } else { if node.NoteID != 0 { break } - r.out(w, tag("/a", nil, false)) + r.out(w, aCloseTag) } } case Image:

@@ -487,7 +588,9 @@ if r.disableTags == 0 {

//if options.safe && potentiallyUnsafe(dest) { //out(w, `<img src="" alt="`) //} else { - r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest)))) + r.out(w, []byte(`<img src="`)) + escLink(w, dest) + r.out(w, []byte(`" alt="`)) //} } r.disableTags++

@@ -496,15 +599,15 @@ r.disableTags--

if r.disableTags == 0 { if node.LinkData.Title != nil { r.out(w, []byte(`" title="`)) - r.out(w, esc(node.LinkData.Title)) + escapeHTML(w, node.LinkData.Title) } r.out(w, []byte(`" />`)) } } case Code: - r.out(w, tag("code", nil, false)) - r.out(w, escCode(node.Literal)) - r.out(w, tag("/code", nil, false)) + r.out(w, codeTag) + escapeHTML(w, node.Literal) + r.out(w, codeCloseTag) case Document: break case Paragraph:

@@ -523,9 +626,9 @@ }

if node.Parent.Type == BlockQuote && node.Prev == nil { r.cr(w) } - r.out(w, tag("p", attrs, false)) + r.out(w, pTag) } else { - r.out(w, tag("/p", attrs, false)) + r.out(w, pCloseTag) if !(node.Parent.Type == Item && node.Next == nil) { r.cr(w) }

@@ -533,9 +636,9 @@ }

case BlockQuote: if entering { r.cr(w) - r.out(w, tag("blockquote", attrs, false)) + r.out(w, blockquoteTag) } else { - r.out(w, tag("/blockquote", nil, false)) + r.out(w, blockquoteCloseTag) r.cr(w) } case HTMLBlock:

@@ -546,7 +649,7 @@ r.cr(w)

r.out(w, node.Literal) r.cr(w) case Header: - tagname := fmt.Sprintf("h%d", node.Level) + openTag, closeTag := headerTagsFromLevel(node.Level) if entering { if node.IsTitleblock { attrs = append(attrs, `class="title"`)

@@ -562,39 +665,42 @@ }

attrs = append(attrs, fmt.Sprintf(`id="%s"`, id)) } r.cr(w) - r.out(w, tag(tagname, attrs, false)) + r.tag(w, openTag, attrs) } else { - r.out(w, tag("/"+tagname, nil, false)) + r.out(w, closeTag) if !(node.Parent.Type == Item && node.Next == nil) { r.cr(w) } } case HorizontalRule: r.cr(w) - r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0)) + r.outHRTag(w) r.cr(w) case List: - tagName := "ul" + openTag := ulTag + closeTag := ulCloseTag if node.ListFlags&ListTypeOrdered != 0 { - tagName = "ol" + openTag = olTag + closeTag = olCloseTag } if node.ListFlags&ListTypeDefinition != 0 { - tagName = "dl" + openTag = dlTag + closeTag = dlCloseTag } if entering { if node.IsFootnotesList { - r.out(w, []byte("\n<div class=\"footnotes\">\n\n")) - r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0)) + r.out(w, footnotesDivBytes) + r.outHRTag(w) r.cr(w) } r.cr(w) if node.Parent.Type == Item && node.Parent.Parent.Tight { r.cr(w) } - r.out(w, tag(tagName, attrs, false)) + r.tag(w, openTag[:len(openTag)-1], attrs) r.cr(w) } else { - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) //cr(w) //if node.parent.Type != Item { // cr(w)

@@ -606,16 +712,19 @@ if node.Parent.Type == Document || node.Parent.Type == BlockQuote {

r.cr(w) } if node.IsFootnotesList { - r.out(w, []byte("\n</div>\n")) + r.out(w, footnotesCloseDivBytes) } } case Item: - tagName := "li" + openTag := liTag + closeTag := liCloseTag if node.ListFlags&ListTypeDefinition != 0 { - tagName = "dd" + openTag = ddTag + closeTag = ddCloseTag } if node.ListFlags&ListTypeTerm != 0 { - tagName = "dt" + openTag = dtTag + closeTag = dtCloseTag } if entering { if itemOpenCR(node) {

@@ -626,7 +735,7 @@ slug := slugify(node.ListData.RefLink)

r.out(w, footnoteItem(r.FootnoteAnchorPrefix, slug)) break } - r.out(w, tag(tagName, nil, false)) + r.out(w, openTag) } else { if node.ListData.RefLink != nil { slug := slugify(node.ListData.RefLink)

@@ -634,32 +743,34 @@ if r.Flags&FootnoteReturnLinks != 0 {

r.out(w, footnoteReturnLink(r.FootnoteAnchorPrefix, r.FootnoteReturnLinkContents, slug)) } } - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) r.cr(w) } case CodeBlock: attrs = appendLanguageAttr(attrs, node.Info) r.cr(w) - r.out(w, tag("pre", nil, false)) - r.out(w, tag("code", attrs, false)) - r.out(w, escCode(node.Literal)) - r.out(w, tag("/code", nil, false)) - r.out(w, tag("/pre", nil, false)) + r.out(w, preTag) + r.tag(w, codeTag[:len(codeTag)-1], attrs) + escapeHTML(w, node.Literal) + r.out(w, codeCloseTag) + r.out(w, preCloseTag) if node.Parent.Type != Item { r.cr(w) } case Table: if entering { r.cr(w) - r.out(w, tag("table", nil, false)) + r.out(w, tableTag) } else { - r.out(w, tag("/table", nil, false)) + r.out(w, tableCloseTag) r.cr(w) } case TableCell: - tagName := "td" + openTag := tdTag + closeTag := tdCloseTag if node.IsHeader { - tagName = "th" + openTag = thTag + closeTag = thCloseTag } if entering { align := cellAlignment(node.Align)

@@ -669,37 +780,37 @@ }

if node.Prev == nil { r.cr(w) } - r.out(w, tag(tagName, attrs, false)) + r.tag(w, openTag, attrs) } else { - r.out(w, tag("/"+tagName, nil, false)) + r.out(w, closeTag) r.cr(w) } case TableHead: if entering { r.cr(w) - r.out(w, tag("thead", nil, false)) + r.out(w, theadTag) } else { - r.out(w, tag("/thead", nil, false)) + r.out(w, theadCloseTag) r.cr(w) } case TableBody: if entering { r.cr(w) - r.out(w, tag("tbody", nil, false)) + r.out(w, tbodyTag) // XXX: this is to adhere to a rather silly test. Should fix test. if node.FirstChild == nil { r.cr(w) } } else { - r.out(w, tag("/tbody", nil, false)) + r.out(w, tbodyCloseTag) r.cr(w) } case TableRow: if entering { r.cr(w) - r.out(w, tag("tr", nil, false)) + r.out(w, trTag) } else { - r.out(w, tag("/tr", nil, false)) + r.out(w, trCloseTag) r.cr(w) } default:

@@ -725,9 +836,9 @@ }

w.WriteString("<head>\n") w.WriteString(" <title>") if r.Flags&Smartypants != 0 { - w.Write(r.sr.Process([]byte(r.Title))) + r.sr.Process(w, []byte(r.Title)) } else { - w.Write(esc([]byte(r.Title))) + escapeHTML(w, []byte(r.Title)) } w.WriteString("</title>\n") w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")

@@ -740,14 +851,14 @@ w.WriteString(ending)

w.WriteString(">\n") if r.CSS != "" { w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"") - w.Write(esc([]byte(r.CSS))) + escapeHTML(w, []byte(r.CSS)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n") } if r.Icon != "" { w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"") - w.Write(esc([]byte(r.Icon))) + escapeHTML(w, []byte(r.Icon)) w.WriteString("\"") w.WriteString(ending) w.WriteString(">\n")

@@ -807,6 +918,7 @@ w.WriteString("<nav>\n")

w.Write(buf.Bytes()) w.WriteString("\n\n</nav>\n") } + r.lastOutputLen = buf.Len() } func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {

@@ -820,17 +932,17 @@ // Render walks the specified syntax (sub)tree and returns a HTML document.

func (r *HTMLRenderer) Render(ast *Node) []byte { //println("render_Blackfriday") //dump(ast) - var buff bytes.Buffer - r.writeDocumentHeader(&buff) + var buf bytes.Buffer + r.writeDocumentHeader(&buf) if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 { - r.writeTOC(&buff, ast) + r.writeTOC(&buf, ast) if r.Flags&OmitContents != 0 { - return buff.Bytes() + return buf.Bytes() } } ast.Walk(func(node *Node, entering bool) WalkStatus { - return r.RenderNode(&buff, node, entering) + return r.RenderNode(&buf, node, entering) }) - r.writeDocumentFooter(&buff) - return buff.Bytes() + r.writeDocumentFooter(&buf) + return buf.Bytes() }
M inline.goinline.go

@@ -33,51 +33,38 @@ // data is the complete block being rendered

// offset is the number of valid chars before the current cursor func (p *parser) inline(currBlock *Node, data []byte) { - // this is called recursively: enforce a maximum depth - if p.nesting >= p.maxNesting { + // handlers might call us recursively: enforce a maximum depth + if p.nesting >= p.maxNesting || len(data) == 0 { return } p.nesting++ - - i, end := 0, 0 - for i < len(data) { - // Stop at EOL - if data[i] == '\n' && i+1 == len(data) { - break - } - - for ; end < len(data); end++ { - if p.inlineCallback[data[end]] != nil { - break - } - } - - if end >= len(data) { - if data[end-1] == '\n' { - currBlock.AppendChild(text(data[i : end-1])) + beg, end := 0, 0 + for end < len(data) { + handler := p.inlineCallback[data[end]] + if handler != nil { + if consumed, node := handler(p, data, end); consumed == 0 { + // No action from the callback. + end++ } else { - currBlock.AppendChild(text(data[i:end])) + // Copy inactive chars into the output. + currBlock.AppendChild(text(data[beg:end])) + if node != nil { + currBlock.AppendChild(node) + } + // Skip past whatever the callback used. + beg = end + consumed + end = beg } - break - } - - // call the trigger - handler := p.inlineCallback[data[end]] - if consumed, node := handler(p, data, end); consumed == 0 { - // No action from the callback. - end++ } else { - // Copy inactive chars into the output. - currBlock.AppendChild(text(data[i:end])) - if node != nil { - currBlock.AppendChild(node) - } - // Skip past whatever the callback used. - i = end + consumed - end = i + end++ + } + } + if beg < len(data) { + if data[end-1] == '\n' { + end-- } + currBlock.AppendChild(text(data[beg:end])) } - p.nesting-- }

@@ -733,25 +720,45 @@ entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)

return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd } +// hasPrefixCaseInsensitive is a custom implementation of +// strings.HasPrefix(strings.ToLower(s), prefix) +// we rolled our own because ToLower pulls in a huge machinery of lowercasing +// anything from Unicode and that's very slow. Since this func will only be +// used on ASCII protocol prefixes, we can take shortcuts. +func hasPrefixCaseInsensitive(s, prefix []byte) bool { + if len(s) < len(prefix) { + return false + } + delta := byte('a' - 'A') + for i, b := range prefix { + if b != s[i] && b != s[i]+delta { + return false + } + } + return true +} + +var protocolPrefixes = [][]byte{ + []byte("http://"), + []byte("https://"), + []byte("ftp://"), + []byte("file://"), + []byte("mailto:"), +} + +const shortestPrefix = 6 // len("ftp://"), the shortest of the above + func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) { // quick check to rule out most false hits - if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below + if p.insideLink || len(data) < offset+shortestPrefix { return 0, nil } - prefixes := []string{ - "http://", - "https://", - "ftp://", - "file://", - "mailto:", - } - for _, prefix := range prefixes { + for _, prefix := range protocolPrefixes { endOfHead := offset + 8 // 8 is the len() of the longest prefix if endOfHead > len(data) { endOfHead = len(data) } - head := bytes.ToLower(data[offset:endOfHead]) - if bytes.HasPrefix(head, []byte(prefix)) { + if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) { return autoLink(p, data, offset) } }
M inline_test.goinline_test.go

@@ -1133,7 +1133,7 @@

func TestSkipHTML(t *testing.T) { doTestsParam(t, []string{ "<div class=\"foo\"></div>\n\ntext\n\n<form>the form</form>", - "<p>text</p>\n", + "<p>text</p>\n\n<p>the form</p>\n", "text <em>inline html</em> more text", "<p>text inline html more text</p>\n",
M markdown.gomarkdown.go

@@ -385,7 +385,7 @@ if extensions&Footnotes != 0 {

p.notes = make([]*reference, 0) } - p.block(preprocess(p, input)) + p.block(input) // Walk the tree and finish up some of unfinished blocks for p.tip != nil { p.finalize(p.tip)

@@ -439,63 +439,6 @@ node.content = nil

} return GoToNext }) -} - -// preprocess does a preparatory first pass over the input: -// - normalize newlines -// - expand tabs (outside of fenced code blocks) -// - copy everything else -func preprocess(p *parser, input []byte) []byte { - var out bytes.Buffer - tabSize := TabSizeDefault - if p.flags&TabSizeEight != 0 { - tabSize = TabSizeDouble - } - beg := 0 - lastFencedCodeBlockEnd := 0 - for beg < len(input) { - // Find end of this line, then process the line. - end := beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } - - if p.flags&FencedCode != 0 { - // track fenced code block boundaries to suppress tab expansion - // and reference extraction inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCodeBlock(input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - out.WriteByte('\n') - - beg = end - } - - // empty input? - if out.Len() == 0 { - out.WriteByte('\n') - } - - return out.Bytes() } //
M smartypants.gosmartypants.go

@@ -17,6 +17,7 @@ package blackfriday

import ( "bytes" + "io" ) // SPRenderer is a struct containing state of a Smartypants renderer.

@@ -401,13 +402,12 @@ return &r

} // Process is the entry point of the Smartypants renderer. -func (r *SPRenderer) Process(text []byte) []byte { - var buff bytes.Buffer +func (r *SPRenderer) Process(w io.Writer, text []byte) { mark := 0 for i := 0; i < len(text); i++ { if action := r.callbacks[text[i]]; action != nil { if i > mark { - buff.Write(text[mark:i]) + w.Write(text[mark:i]) } previousChar := byte(0) if i > 0 {

@@ -415,12 +415,11 @@ previousChar = text[i-1]

} var tmp bytes.Buffer i += action(&tmp, previousChar, text[i:]) - buff.Write(tmp.Bytes()) + w.Write(tmp.Bytes()) mark = i + 1 } } if mark < len(text) { - buff.Write(text[mark:]) + w.Write(text[mark:]) } - return buff.Bytes() }
M testdata/Inline HTML (Simple).htmltestdata/Inline HTML (Simple).html

@@ -1,13 +1,13 @@

<p>Here's a simple block:</p> <div> - foo + foo </div> <p>This should be a code block, though:</p> <pre><code>&lt;div&gt; - foo + foo &lt;/div&gt; </code></pre>

@@ -19,11 +19,11 @@

<p>Now, nested:</p> <div> - <div> - <div> - foo - </div> - </div> + <div> + <div> + foo + </div> + </div> </div> <p>This should just be an HTML comment:</p>
M testdata/Inline HTML comments.htmltestdata/Inline HTML comments.html

@@ -3,7 +3,7 @@

<!-- This is a simple comment --> <!-- - This is another comment. + This is another comment. --> <p>Paragraph two.</p>
M testdata/Markdown Documentation - Syntax.htmltestdata/Markdown Documentation - Syntax.html

@@ -939,8 +939,8 @@ {} curly braces

[] square brackets () parentheses # hash mark -+ plus sign -- minus sign (hyphen) ++ plus sign +- minus sign (hyphen) . dot ! exclamation mark </code></pre>
M testdata/Tabs.htmltestdata/Tabs.html

@@ -13,13 +13,13 @@ </code></pre>

<p>And:</p> -<pre><code> this code block is indented by two tabs +<pre><code> this code block is indented by two tabs </code></pre> <p>And:</p> -<pre><code>+ this is an example list item - indented with tabs +<pre><code>+ this is an example list item + indented with tabs + this is an example list item indented with spaces