icy does git — grayfriday: ad7f7c56d58a2c7f75a14cdcaa8b8acd5dc4f141

Merge pull request #322 from russross/v2-perf-tweaks

V2 perf tweaks

Vytautas Šaltenis vytas@rtfb.lt

Thu, 02 Feb 2017 17:08:19 +0200

commit

ad7f7c56d58a2c7f75a14cdcaa8b8acd5dc4f141

parent

a4dd8ad4a6fda03e68be3eacb71efbf51565415e

14 files changed, 492 insertions(+), 322 deletions(-)

jump to

.travis.yml

block.go

block_test.go

esc.go

esc_test.go

html.go

inline.go

inline_test.go

markdown.go

smartypants.go

testdata/Inline HTML (Simple).html

testdata/Inline HTML comments.html

testdata/Markdown Documentation - Syntax.html

testdata/Tabs.html

M .travis.yml → .travis.yml

@@ -5,10 +5,9 @@
 language: go
 
 go:
-    - 1.2
-    - 1.3
-    - 1.4
     - 1.5
+    - 1.6
+    - 1.7
 
 install:
     - go get -d -t -v ./...

M block.go → block.go

@@ -29,17 +29,12 @@
 var (
 	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
 	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
-	reTrailingWhitespace  = regexp.MustCompile("(\n *)+$")
 )
 
 // Parse block-level data.
 // Note: this function and many that it calls assume that
 // the input buffer ends with a newline.
 func (p *parser) block(data []byte) {
-	if len(data) == 0 || data[len(data)-1] != '\n' {
-		panic("block input is missing terminating newline")
-	}
-
 	// this is called recursively: enforce a maximum depth
 	if p.nesting >= p.maxNesting {
 		return
@@ -131,7 +126,7 @@ // ______
 		if p.isHRule(data) {
 			p.addBlock(HorizontalRule, nil)
 			var i int
-			for i = 0; data[i] != '\n'; i++ {
+			for i = 0; i < len(data) && data[i] != '\n'; i++ {
 			}
 			data = data[i:]
 			continue
@@ -216,10 +211,10 @@ }
 
 	if p.flags&SpaceHeaders != 0 {
 		level := 0
-		for level < 6 && data[level] == '#' {
+		for level < 6 && level < len(data) && data[level] == '#' {
 			level++
 		}
-		if data[level] != ' ' {
+		if level == len(data) || data[level] != ' ' {
 			return false
 		}
 	}
@@ -228,7 +223,7 @@ }
 
 func (p *parser) prefixHeader(data []byte) int {
 	level := 0
-	for level < 6 && data[level] == '#' {
+	for level < 6 && level < len(data) && data[level] == '#' {
 		level++
 	}
 	i := skipChar(data, level, ' ')
@@ -277,7 +272,7 @@ // test of level 1 header
 	if data[0] == '=' {
 		i := skipChar(data, 1, '=')
 		i = skipChar(data, i, ' ')
-		if data[i] == '\n' {
+		if i < len(data) && data[i] == '\n' {
 			return 1
 		}
 		return 0
@@ -287,7 +282,7 @@ // test of level 2 header
 	if data[0] == '-' {
 		i := skipChar(data, 1, '-')
 		i = skipChar(data, i, ' ')
-		if data[i] == '\n' {
+		if i < len(data) && data[i] == '\n' {
 			return 2
 		}
 		return 0
@@ -419,8 +414,8 @@ return i
 }
 
 func finalizeHTMLBlock(block *Node) {
-	block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
-	block.content = []byte{}
+	block.Literal = block.content
+	block.content = nil
 }
 
 // HTML comment, lax form
@@ -445,6 +440,9 @@ }
 
 // HR, which is the only self-closing block tag considered
 func (p *parser) htmlHr(data []byte, doRender bool) int {
+	if len(data) < 4 {
+		return 0
+	}
 	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 		return 0
 	}
@@ -452,13 +450,11 @@ if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 		// not an <hr> tag after all; at least not a valid one
 		return 0
 	}
-
 	i := 3
-	for data[i] != '>' && data[i] != '\n' {
+	for i < len(data) && data[i] != '>' && data[i] != '\n' {
 		i++
 	}
-
-	if data[i] == '>' {
+	if i < len(data) && data[i] == '>' {
 		i++
 		if j := p.isEmpty(data[i:]); j > 0 {
 			size := i + j
@@ -473,13 +469,12 @@ }
 			return size
 		}
 	}
-
 	return 0
 }
 
 func (p *parser) htmlFindTag(data []byte) (string, bool) {
 	i := 0
-	for isalnum(data[i]) {
+	for i < len(data) && isalnum(data[i]) {
 		i++
 	}
 	key := string(data[:i])
@@ -536,7 +531,10 @@ if data[i] != ' ' && data[i] != '\t' {
 			return 0
 		}
 	}
-	return i + 1
+	if i < len(data) && data[i] == '\n' {
+		i++
+	}
+	return i
 }
 
 func (*parser) isHRule(data []byte) bool {
@@ -555,7 +553,7 @@ c := data[i]
 
 	// the whole line must be the char or whitespace
 	n := 0
-	for data[i] != '\n' {
+	for i < len(data) && data[i] != '\n' {
 		switch {
 		case data[i] == c:
 			n++
@@ -571,8 +569,7 @@
 // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
 // and returns the end index if so, or 0 otherwise. It also returns the marker found.
 // If syntax is not nil, it gets set to the syntax specified in the fence line.
-// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
-func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
+func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
 	i, size := 0, 0
 
 	// skip up to three spaces
@@ -614,7 +611,7 @@ syn := 0
 		i = skipChar(data, i, ' ')
 
 		if i >= len(data) {
-			if newlineOptional && i == len(data) {
+			if i == len(data) {
 				return i, marker
 			}
 			return 0, ""
@@ -659,12 +656,11 @@ }
 
 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		if newlineOptional && i == len(data) {
+		if i == len(data) {
 			return i, marker
 		}
 		return 0, ""
 	}
-
 	return i + 1, marker // Take newline into account.
 }
 
@@ -673,7 +669,7 @@ // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
 // If doRender is true, a final newline is mandatory to recognize the fenced code block.
 func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
 	var syntax string
-	beg, marker := isFenceLine(data, &syntax, "", false)
+	beg, marker := isFenceLine(data, &syntax, "")
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}
@@ -686,8 +682,7 @@ for {
 		// safe to assume beg < len(data)
 
 		// check for the end of the code block
-		newlineOptional := !doRender
-		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@@ -739,7 +734,7 @@ rest := block.content[newlinePos+1:]
 		block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 		block.Literal = rest
 	} else {
-		block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
+		block.Literal = block.content
 	}
 	block.content = nil
 }
@@ -757,7 +752,7 @@ p.addBlock(TableBody, nil)
 
 	for i < len(data) {
 		pipes, rowStart := 0, i
-		for ; data[i] != '\n'; i++ {
+		for ; i < len(data) && data[i] != '\n'; i++ {
 			if data[i] == '|' {
 				pipes++
 			}
@@ -769,7 +764,9 @@ break
 		}
 
 		// include the newline in data sent to tableRow
-		i++
+		if i < len(data) && data[i] == '\n' {
+			i++
+		}
 		p.tableRow(data[rowStart:i], columns, false)
 	}
 
@@ -788,7 +785,7 @@
 func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
 	i := 0
 	colCount := 1
-	for i = 0; data[i] != '\n'; i++ {
+	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 		if data[i] == '|' && !isBackslashEscaped(data, i) {
 			colCount++
 		}
@@ -800,7 +797,11 @@ return
 	}
 
 	// include the newline in the data sent to tableRow
-	header := data[:i+1]
+	j := i
+	if j < len(data) && data[j] == '\n' {
+		j++
+	}
+	header := data[:j]
 
 	// column count ignores pipes at beginning or end of line
 	if data[0] == '|' {
@@ -826,7 +827,7 @@
 	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 	// and trailing | optional on last column
 	col := 0
-	for data[i] != '\n' {
+	for i < len(data) && data[i] != '\n' {
 		dashes := 0
 
 		if data[i] == ':' {
@@ -834,19 +835,21 @@ i++
 			columns[col] |= TableAlignmentLeft
 			dashes++
 		}
-		for data[i] == '-' {
+		for i < len(data) && data[i] == '-' {
 			i++
 			dashes++
 		}
-		if data[i] == ':' {
+		if i < len(data) && data[i] == ':' {
 			i++
 			columns[col] |= TableAlignmentRight
 			dashes++
 		}
-		for data[i] == ' ' {
+		for i < len(data) && data[i] == ' ' {
 			i++
 		}
-
+		if i == len(data) {
+			return
+		}
 		// end of column test is messy
 		switch {
 		case dashes < 3:
@@ -857,12 +860,12 @@ case data[i] == '|' && !isBackslashEscaped(data, i):
 			// marker found, now skip past trailing whitespace
 			col++
 			i++
-			for data[i] == ' ' {
+			for i < len(data) && data[i] == ' ' {
 				i++
 			}
 
 			// trailing junk found after last column
-			if col >= colCount && data[i] != '\n' {
+			if col >= colCount && i < len(data) && data[i] != '\n' {
 				return
 			}
 
@@ -885,7 +888,10 @@ }
 
 	p.addBlock(TableHead, nil)
 	p.tableRow(header, columns, true)
-	size = i + 1
+	size = i
+	if size < len(data) && data[size] == '\n' {
+		size++
+	}
 	return
 }
 
@@ -898,13 +904,13 @@ i++
 	}
 
 	for col = 0; col < len(columns) && i < len(data); col++ {
-		for data[i] == ' ' {
+		for i < len(data) && data[i] == ' ' {
 			i++
 		}
 
 		cellStart := i
 
-		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
+		for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 			i++
 		}
 
@@ -913,7 +919,7 @@
 		// skip the end-of-cell marker, possibly taking us past end of buffer
 		i++
 
-		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
+		for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
 			cellEnd--
 		}
 
@@ -935,11 +941,11 @@
 // returns blockquote prefix length
 func (p *parser) quotePrefix(data []byte) int {
 	i := 0
-	for i < 3 && data[i] == ' ' {
+	for i < 3 && i < len(data) && data[i] == ' ' {
 		i++
 	}
-	if data[i] == '>' {
-		if data[i+1] == ' ' {
+	if i < len(data) && data[i] == '>' {
+		if i+1 < len(data) && data[i+1] == ' ' {
 			return i + 2
 		}
 		return i + 1
@@ -969,7 +975,7 @@ end = beg
 		// Step over whole lines, collecting them. While doing that, check for
 		// fenced code and if one's found, incorporate it altogether,
 		// irregardless of any contents inside it
-		for data[end] != '\n' {
+		for end < len(data) && data[end] != '\n' {
 			if p.flags&FencedCode != 0 {
 				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
@@ -979,7 +985,9 @@ }
 			}
 			end++
 		}
-		end++
+		if end < len(data) && data[end] == '\n' {
+			end++
+		}
 		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 			// skip the prefix
 			beg += pre
@@ -997,7 +1005,10 @@ }
 
 // returns prefix length for block code
 func (p *parser) codePrefix(data []byte) int {
-	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
+	if len(data) >= 1 && data[0] == '\t' {
+		return 1
+	}
+	if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 		return 4
 	}
 	return 0
@@ -1009,10 +1020,12 @@
 	i := 0
 	for i < len(data) {
 		beg := i
-		for data[i] != '\n' {
+		for i < len(data) && data[i] != '\n' {
 			i++
 		}
-		i++
+		if i < len(data) && data[i] == '\n' {
+			i++
+		}
 
 		blankline := p.isEmpty(data[beg:i]) > 0
 		if pre := p.codePrefix(data[beg:i]); pre > 0 {
@@ -1023,7 +1036,7 @@ i = beg
 			break
 		}
 
-		// verbatim copy to the working buffeu
+		// verbatim copy to the working buffer
 		if blankline {
 			work.WriteByte('\n')
 		} else {
@@ -1053,15 +1066,16 @@
 // returns unordered list item prefix
 func (p *parser) uliPrefix(data []byte) int {
 	i := 0
-
 	// start with up to 3 spaces
-	for i < 3 && data[i] == ' ' {
+	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
-
-	// need a *, +, or - followed by a space
+	if i >= len(data)-1 {
+		return 0
+	}
+	// need one of {'*', '+', '-'} followed by a space or a tab
 	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
-		data[i+1] != ' ' {
+		(data[i+1] != ' ' && data[i+1] != '\t') {
 		return 0
 	}
 	return i + 2
@@ -1072,18 +1086,21 @@ func (p *parser) oliPrefix(data []byte) int {
 	i := 0
 
 	// start with up to 3 spaces
-	for i < 3 && data[i] == ' ' {
+	for i < 3 && i < len(data) && data[i] == ' ' {
 		i++
 	}
 
 	// count the digits
 	start := i
-	for data[i] >= '0' && data[i] <= '9' {
+	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
 		i++
 	}
+	if start == i || i >= len(data)-1 {
+		return 0
+	}
 
-	// we need >= 1 digits followed by a dot and a space
-	if start == i || data[i] != '.' || data[i+1] != ' ' {
+	// we need >= 1 digits followed by a dot and a space or a tab
+	if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
 		return 0
 	}
 	return i + 2
@@ -1091,13 +1108,15 @@ }
 
 // returns definition list item prefix
 func (p *parser) dliPrefix(data []byte) int {
+	if len(data) < 2 {
+		return 0
+	}
 	i := 0
-
-	// need a : followed by a spaces
-	if data[i] != ':' || data[i+1] != ' ' {
+	// need a ':' followed by a space or a tab
+	if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
 		return 0
 	}
-	for data[i] == ' ' {
+	for i < len(data) && data[i] == ' ' {
 		i++
 	}
 	return i + 2
@@ -1175,8 +1194,12 @@ // Assumes initial prefix is already removed if this is a sublist.
 func (p *parser) listItem(data []byte, flags *ListType) int {
 	// keep track of the indentation of the first line
 	itemIndent := 0
-	for itemIndent < 3 && data[itemIndent] == ' ' {
-		itemIndent++
+	if data[0] == '\t' {
+		itemIndent += 4
+	} else {
+		for itemIndent < 3 && data[itemIndent] == ' ' {
+			itemIndent++
+		}
 	}
 
 	var bulletChar byte = '*'
@@ -1203,13 +1226,13 @@ }
 	}
 
 	// skip leading whitespace on first line
-	for data[i] == ' ' {
+	for i < len(data) && data[i] == ' ' {
 		i++
 	}
 
 	// find the end of the line
 	line := i
-	for i > 0 && data[i-1] != '\n' {
+	for i > 0 && i < len(data) && data[i-1] != '\n' {
 		i++
 	}
 
@@ -1229,7 +1252,7 @@ for line < len(data) {
 		i++
 
 		// find the end of this line
-		for data[i-1] != '\n' {
+		for i < len(data) && data[i-1] != '\n' {
 			i++
 		}
 
@@ -1243,11 +1266,18 @@ }
 
 		// calculate the indentation
 		indent := 0
-		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
-			indent++
+		indentIndex := 0
+		if data[line] == '\t' {
+			indentIndex++
+			indent += 4
+		} else {
+			for indent < 4 && line+indent < i && data[line+indent] == ' ' {
+				indent++
+				indentIndex++
+			}
 		}
 
-		chunk := data[line+indent : i]
+		chunk := data[line+indentIndex : i]
 
 		// evaluate how this line fits in
 		switch {
@@ -1288,7 +1318,7 @@ case containsBlankLine && indent < 4:
 			if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
 				// is the next item still a part of this list?
 				next := i
-				for data[next] != '\n' {
+				for next < len(data) && data[next] != '\n' {
 					next++
 				}
 				for next < len(data)-1 && data[next] == '\n' {
@@ -1316,7 +1346,7 @@ raw.WriteByte('\n')
 		}
 
 		// add the line into the working buffer without prefix
-		raw.Write(data[line+indent : i])
+		raw.Write(data[line+indentIndex : i])
 
 		line = i
 	}
@@ -1364,8 +1394,11 @@ for data[beg] == ' ' {
 		beg++
 	}
 
+	end := len(data)
 	// trim trailing newline
-	end := len(data) - 1
+	if data[len(data)-1] == '\n' {
+		end--
+	}
 
 	// trim trailing spaces
 	for end > beg && data[end-1] == ' ' {
@@ -1437,7 +1470,7 @@ block.Level = level
 				block.HeaderID = id
 
 				// find the end of the underline
-				for data[i] != '\n' {
+				for i < len(data) && data[i] != '\n' {
 					i++
 				}
 				return i
@@ -1470,7 +1503,8 @@
 		// if there's a definition list item, prev line is a definition term
 		if p.flags&DefinitionLists != 0 {
 			if p.dliPrefix(current) != 0 {
-				return p.list(data[prev:], ListTypeDefinition)
+				ret := p.list(data[prev:], ListTypeDefinition)
+				return ret
 			}
 		}
 
@@ -1486,10 +1520,12 @@ }
 		}
 
 		// otherwise, scan to the beginning of the next line
-		for data[i] != '\n' {
-			i++
+		nl := bytes.IndexByte(data[i:], '\n')
+		if nl >= 0 {
+			i += nl + 1
+		} else {
+			i += len(data[i:])
 		}
-		i++
 	}
 
 	p.renderParagraph(data[:i])

M block_test.go → block_test.go

@@ -1661,14 +1661,14 @@ func TestIsFenceLine(t *testing.T) {
 	tests := []struct {
 		data            []byte
 		syntaxRequested bool
-		newlineOptional bool
 		wantEnd         int
 		wantMarker      string
 		wantSyntax      string
 	}{
 		{
-			data:    []byte("```"),
-			wantEnd: 0,
+			data:       []byte("```"),
+			wantEnd:    3,
+			wantMarker: "```",
 		},
 		{
 			data:       []byte("```\nstuff here\n"),
@@ -1687,21 +1687,13 @@ wantEnd: 0,
 		},
 		{
 			data:            []byte("```"),
-			newlineOptional: true,
-			wantEnd:         3,
-			wantMarker:      "```",
-		},
-		{
-			data:            []byte("```"),
 			syntaxRequested: true,
-			newlineOptional: true,
 			wantEnd:         3,
 			wantMarker:      "```",
 		},
 		{
 			data:            []byte("``` go"),
 			syntaxRequested: true,
-			newlineOptional: true,
 			wantEnd:         6,
 			wantMarker:      "```",
 			wantSyntax:      "go",
@@ -1713,7 +1705,7 @@ var syntax *string
 		if test.syntaxRequested {
 			syntax = new(string)
 		}
-		end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+		end, marker := isFenceLine(test.data, syntax, "```")
 		if got, want := end, test.wantEnd; got != want {
 			t.Errorf("got end %v, want %v", got, want)
 		}

A esc.go

@@ -0,0 +1,34 @@
+package blackfriday
+
+import (
+	"html"
+	"io"
+)
+
+var htmlEscaper = [256][]byte{
+	'&': []byte("&amp;"),
+	'<': []byte("&lt;"),
+	'>': []byte("&gt;"),
+	'"': []byte("&quot;"),
+}
+
+func escapeHTML(w io.Writer, s []byte) {
+	var start, end int
+	for end < len(s) {
+		escSeq := htmlEscaper[s[end]]
+		if escSeq != nil {
+			w.Write(s[start:end])
+			w.Write(escSeq)
+			start = end + 1
+		}
+		end++
+	}
+	if start < len(s) && end <= len(s) {
+		w.Write(s[start:end])
+	}
+}
+
+func escLink(w io.Writer, text []byte) {
+	unesc := html.UnescapeString(string(text))
+	escapeHTML(w, []byte(unesc))
+}

A esc_test.go

@@ -0,0 +1,48 @@
+package blackfriday
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestEsc(t *testing.T) {
+	tests := []string{
+		"abc", "abc",
+		"a&c", "a&amp;c",
+		"<", "&lt;",
+		"[]:<", "[]:&lt;",
+		"Hello <!--", "Hello &lt;!--",
+	}
+	for i := 0; i < len(tests); i += 2 {
+		var b bytes.Buffer
+		escapeHTML(&b, []byte(tests[i]))
+		if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) {
+			t.Errorf("\nInput   [%#v]\nExpected[%#v]\nActual  [%#v]",
+				tests[i], tests[i+1], b.String())
+		}
+	}
+}
+
+func BenchmarkEscapeHTML(b *testing.B) {
+	tests := [][]byte{
+		[]byte(""),
+		[]byte("AT&T has an ampersand in their name."),
+		[]byte("AT&amp;T is another way to write it."),
+		[]byte("This & that."),
+		[]byte("4 < 5."),
+		[]byte("6 > 5."),
+		[]byte("Here's a [link] [1] with an ampersand in the URL."),
+		[]byte("Here's a link with an ampersand in the link text: [AT&T] [2]."),
+		[]byte("Here's an inline [link](/script?foo=1&bar=2)."),
+		[]byte("Here's an inline [link](</script?foo=1&bar=2>)."),
+		[]byte("[1]: http://example.com/?foo=1&bar=2"),
+		[]byte("[2]: http://att.com/  \"AT&T\""),
+	}
+	var buf bytes.Buffer
+	for n := 0; n < b.N; n++ {
+		for _, t := range tests {
+			escapeHTML(&buf, t)
+			buf.Reset()
+		}
+	}
+}

M html.go → html.go

@@ -18,7 +18,6 @@
 import (
 	"bytes"
 	"fmt"
-	"html"
 	"io"
 	"regexp"
 	"strings"
@@ -308,22 +307,24 @@ return pt != Link && pt != CodeBlock && pt != Code
 }
 
 func appendLanguageAttr(attrs []string, info []byte) []string {
-	infoWords := bytes.Split(info, []byte("\t "))
-	if len(infoWords) > 0 && len(infoWords[0]) > 0 {
-		attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0]))
+	if len(info) == 0 {
+		return attrs
+	}
+	endOfLang := bytes.IndexAny(info, "\t ")
+	if endOfLang < 0 {
+		endOfLang = len(info)
 	}
-	return attrs
+	return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang]))
 }
 
-func tag(name string, attrs []string, selfClosing bool) []byte {
-	result := "<" + name
-	if attrs != nil && len(attrs) > 0 {
-		result += " " + strings.Join(attrs, " ")
+func (r *HTMLRenderer) tag(w io.Writer, name []byte, attrs []string) {
+	w.Write(name)
+	if len(attrs) > 0 {
+		w.Write(spaceBytes)
+		w.Write([]byte(strings.Join(attrs, " ")))
 	}
-	if selfClosing {
-		result += " /"
-	}
-	return []byte(result + ">")
+	w.Write(gtBytes)
+	r.lastOutputLen = 1
 }
 
 func footnoteRef(prefix string, node *Node) []byte {
@@ -371,17 +372,6 @@ return ""
 	}
 }
 
-func esc(text []byte) []byte {
-	unesc := []byte(html.UnescapeString(string(text)))
-	return escCode(unesc)
-}
-
-func escCode(text []byte) []byte {
-	e1 := []byte(html.EscapeString(string(text)))
-	e2 := bytes.Replace(e1, []byte("&#34;"), []byte("&quot;"), -1)
-	return bytes.Replace(e2, []byte("&#39;"), []byte{'\''}, -1)
-}
-
 func (r *HTMLRenderer) out(w io.Writer, text []byte) {
 	if r.disableTags > 0 {
 		w.Write(htmlTagRe.ReplaceAll(text, []byte{}))
@@ -393,7 +383,102 @@ }
 
 func (r *HTMLRenderer) cr(w io.Writer) {
 	if r.lastOutputLen > 0 {
-		r.out(w, []byte{'\n'})
+		r.out(w, nlBytes)
+	}
+}
+
+var (
+	nlBytes    = []byte{'\n'}
+	gtBytes    = []byte{'>'}
+	spaceBytes = []byte{' '}
+)
+
+var (
+	brTag              = []byte("<br>")
+	brXHTMLTag         = []byte("<br />")
+	emTag              = []byte("<em>")
+	emCloseTag         = []byte("</em>")
+	strongTag          = []byte("<strong>")
+	strongCloseTag     = []byte("</strong>")
+	delTag             = []byte("<del>")
+	delCloseTag        = []byte("</del>")
+	ttTag              = []byte("<tt>")
+	ttCloseTag         = []byte("</tt>")
+	aTag               = []byte("<a")
+	aCloseTag          = []byte("</a>")
+	preTag             = []byte("<pre>")
+	preCloseTag        = []byte("</pre>")
+	codeTag            = []byte("<code>")
+	codeCloseTag       = []byte("</code>")
+	pTag               = []byte("<p>")
+	pCloseTag          = []byte("</p>")
+	blockquoteTag      = []byte("<blockquote>")
+	blockquoteCloseTag = []byte("</blockquote>")
+	hrTag              = []byte("<hr>")
+	hrXHTMLTag         = []byte("<hr />")
+	ulTag              = []byte("<ul>")
+	ulCloseTag         = []byte("</ul>")
+	olTag              = []byte("<ol>")
+	olCloseTag         = []byte("</ol>")
+	dlTag              = []byte("<dl>")
+	dlCloseTag         = []byte("</dl>")
+	liTag              = []byte("<li>")
+	liCloseTag         = []byte("</li>")
+	ddTag              = []byte("<dd>")
+	ddCloseTag         = []byte("</dd>")
+	dtTag              = []byte("<dt>")
+	dtCloseTag         = []byte("</dt>")
+	tableTag           = []byte("<table>")
+	tableCloseTag      = []byte("</table>")
+	tdTag              = []byte("<td")
+	tdCloseTag         = []byte("</td>")
+	thTag              = []byte("<th")
+	thCloseTag         = []byte("</th>")
+	theadTag           = []byte("<thead>")
+	theadCloseTag      = []byte("</thead>")
+	tbodyTag           = []byte("<tbody>")
+	tbodyCloseTag      = []byte("</tbody>")
+	trTag              = []byte("<tr>")
+	trCloseTag         = []byte("</tr>")
+	h1Tag              = []byte("<h1")
+	h1CloseTag         = []byte("</h1>")
+	h2Tag              = []byte("<h2")
+	h2CloseTag         = []byte("</h2>")
+	h3Tag              = []byte("<h3")
+	h3CloseTag         = []byte("</h3>")
+	h4Tag              = []byte("<h4")
+	h4CloseTag         = []byte("</h4>")
+	h5Tag              = []byte("<h5")
+	h5CloseTag         = []byte("</h5>")
+	h6Tag              = []byte("<h6")
+	h6CloseTag         = []byte("</h6>")
+
+	footnotesDivBytes      = []byte("\n<div class=\"footnotes\">\n\n")
+	footnotesCloseDivBytes = []byte("\n</div>\n")
+)
+
+func headerTagsFromLevel(level int) ([]byte, []byte) {
+	switch level {
+	case 1:
+		return h1Tag, h1CloseTag
+	case 2:
+		return h2Tag, h2CloseTag
+	case 3:
+		return h3Tag, h3CloseTag
+	case 4:
+		return h4Tag, h4CloseTag
+	case 5:
+		return h5Tag, h5CloseTag
+	default:
+		return h6Tag, h6CloseTag
+	}
+}
+
+func (r *HTMLRenderer) outHRTag(w io.Writer) {
+	if r.Flags&UseXHTML == 0 {
+		r.out(w, hrTag)
+	} else {
+		r.out(w, hrXHTMLTag)
 	}
 }
 
@@ -411,34 +496,44 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
 	attrs := []string{}
 	switch node.Type {
 	case Text:
-		node.Literal = esc(node.Literal)
 		if r.Flags&Smartypants != 0 {
-			node.Literal = r.sr.Process(node.Literal)
+			var tmp bytes.Buffer
+			escapeHTML(&tmp, node.Literal)
+			r.sr.Process(w, tmp.Bytes())
+		} else {
+			if node.Parent.Type == Link {
+				escLink(w, node.Literal)
+			} else {
+				escapeHTML(w, node.Literal)
+			}
 		}
-		r.out(w, node.Literal)
 	case Softbreak:
-		r.out(w, []byte{'\n'})
+		r.cr(w)
 		// TODO: make it configurable via out(renderer.softbreak)
 	case Hardbreak:
-		r.out(w, tag("br", nil, true))
+		if r.Flags&UseXHTML == 0 {
+			r.out(w, brTag)
+		} else {
+			r.out(w, brXHTMLTag)
+		}
 		r.cr(w)
 	case Emph:
 		if entering {
-			r.out(w, tag("em", nil, false))
+			r.out(w, emTag)
 		} else {
-			r.out(w, tag("/em", nil, false))
+			r.out(w, emCloseTag)
 		}
 	case Strong:
 		if entering {
-			r.out(w, tag("strong", nil, false))
+			r.out(w, strongTag)
 		} else {
-			r.out(w, tag("/strong", nil, false))
+			r.out(w, strongCloseTag)
 		}
 	case Del:
 		if entering {
-			r.out(w, tag("del", nil, false))
+			r.out(w, delTag)
 		} else {
-			r.out(w, tag("/del", nil, false))
+			r.out(w, delCloseTag)
 		}
 	case HTMLSpan:
 		if r.Flags&SkipHTML != 0 {
@@ -450,30 +545,36 @@ // mark it but don't link it if it is not a safe link: no smartypants
 		dest := node.LinkData.Destination
 		if needSkipLink(r.Flags, dest) {
 			if entering {
-				r.out(w, tag("tt", nil, false))
+				r.out(w, ttTag)
 			} else {
-				r.out(w, tag("/tt", nil, false))
+				r.out(w, ttCloseTag)
 			}
 		} else {
 			if entering {
 				dest = r.addAbsPrefix(dest)
-				//if (!(options.safe && potentiallyUnsafe(node.destination))) {
-				attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest)))
-				//}
+				var hrefBuf bytes.Buffer
+				hrefBuf.WriteString("href=\"")
+				escLink(&hrefBuf, dest)
+				hrefBuf.WriteByte('"')
+				attrs = append(attrs, hrefBuf.String())
 				if node.NoteID != 0 {
 					r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node))
 					break
 				}
 				attrs = appendLinkAttrs(attrs, r.Flags, dest)
 				if len(node.LinkData.Title) > 0 {
-					attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title)))
+					var titleBuff bytes.Buffer
+					titleBuff.WriteString("title=\"")
+					escapeHTML(&titleBuff, node.LinkData.Title)
+					titleBuff.WriteByte('"')
+					attrs = append(attrs, titleBuff.String())
 				}
-				r.out(w, tag("a", attrs, false))
+				r.tag(w, aTag, attrs)
 			} else {
 				if node.NoteID != 0 {
 					break
 				}
-				r.out(w, tag("/a", nil, false))
+				r.out(w, aCloseTag)
 			}
 		}
 	case Image:
@@ -487,7 +588,9 @@ if r.disableTags == 0 {
 				//if options.safe && potentiallyUnsafe(dest) {
 				//out(w, `<img src="" alt="`)
 				//} else {
-				r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest))))
+				r.out(w, []byte(`<img src="`))
+				escLink(w, dest)
+				r.out(w, []byte(`" alt="`))
 				//}
 			}
 			r.disableTags++
@@ -496,15 +599,15 @@ r.disableTags--
 			if r.disableTags == 0 {
 				if node.LinkData.Title != nil {
 					r.out(w, []byte(`" title="`))
-					r.out(w, esc(node.LinkData.Title))
+					escapeHTML(w, node.LinkData.Title)
 				}
 				r.out(w, []byte(`" />`))
 			}
 		}
 	case Code:
-		r.out(w, tag("code", nil, false))
-		r.out(w, escCode(node.Literal))
-		r.out(w, tag("/code", nil, false))
+		r.out(w, codeTag)
+		escapeHTML(w, node.Literal)
+		r.out(w, codeCloseTag)
 	case Document:
 		break
 	case Paragraph:
@@ -523,9 +626,9 @@ }
 			if node.Parent.Type == BlockQuote && node.Prev == nil {
 				r.cr(w)
 			}
-			r.out(w, tag("p", attrs, false))
+			r.out(w, pTag)
 		} else {
-			r.out(w, tag("/p", attrs, false))
+			r.out(w, pCloseTag)
 			if !(node.Parent.Type == Item && node.Next == nil) {
 				r.cr(w)
 			}
@@ -533,9 +636,9 @@ }
 	case BlockQuote:
 		if entering {
 			r.cr(w)
-			r.out(w, tag("blockquote", attrs, false))
+			r.out(w, blockquoteTag)
 		} else {
-			r.out(w, tag("/blockquote", nil, false))
+			r.out(w, blockquoteCloseTag)
 			r.cr(w)
 		}
 	case HTMLBlock:
@@ -546,7 +649,7 @@ r.cr(w)
 		r.out(w, node.Literal)
 		r.cr(w)
 	case Header:
-		tagname := fmt.Sprintf("h%d", node.Level)
+		openTag, closeTag := headerTagsFromLevel(node.Level)
 		if entering {
 			if node.IsTitleblock {
 				attrs = append(attrs, `class="title"`)
@@ -562,39 +665,42 @@ }
 				attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
 			}
 			r.cr(w)
-			r.out(w, tag(tagname, attrs, false))
+			r.tag(w, openTag, attrs)
 		} else {
-			r.out(w, tag("/"+tagname, nil, false))
+			r.out(w, closeTag)
 			if !(node.Parent.Type == Item && node.Next == nil) {
 				r.cr(w)
 			}
 		}
 	case HorizontalRule:
 		r.cr(w)
-		r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0))
+		r.outHRTag(w)
 		r.cr(w)
 	case List:
-		tagName := "ul"
+		openTag := ulTag
+		closeTag := ulCloseTag
 		if node.ListFlags&ListTypeOrdered != 0 {
-			tagName = "ol"
+			openTag = olTag
+			closeTag = olCloseTag
 		}
 		if node.ListFlags&ListTypeDefinition != 0 {
-			tagName = "dl"
+			openTag = dlTag
+			closeTag = dlCloseTag
 		}
 		if entering {
 			if node.IsFootnotesList {
-				r.out(w, []byte("\n<div class=\"footnotes\">\n\n"))
-				r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0))
+				r.out(w, footnotesDivBytes)
+				r.outHRTag(w)
 				r.cr(w)
 			}
 			r.cr(w)
 			if node.Parent.Type == Item && node.Parent.Parent.Tight {
 				r.cr(w)
 			}
-			r.out(w, tag(tagName, attrs, false))
+			r.tag(w, openTag[:len(openTag)-1], attrs)
 			r.cr(w)
 		} else {
-			r.out(w, tag("/"+tagName, nil, false))
+			r.out(w, closeTag)
 			//cr(w)
 			//if node.parent.Type != Item {
 			//	cr(w)
@@ -606,16 +712,19 @@ if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
 				r.cr(w)
 			}
 			if node.IsFootnotesList {
-				r.out(w, []byte("\n</div>\n"))
+				r.out(w, footnotesCloseDivBytes)
 			}
 		}
 	case Item:
-		tagName := "li"
+		openTag := liTag
+		closeTag := liCloseTag
 		if node.ListFlags&ListTypeDefinition != 0 {
-			tagName = "dd"
+			openTag = ddTag
+			closeTag = ddCloseTag
 		}
 		if node.ListFlags&ListTypeTerm != 0 {
-			tagName = "dt"
+			openTag = dtTag
+			closeTag = dtCloseTag
 		}
 		if entering {
 			if itemOpenCR(node) {
@@ -626,7 +735,7 @@ slug := slugify(node.ListData.RefLink)
 				r.out(w, footnoteItem(r.FootnoteAnchorPrefix, slug))
 				break
 			}
-			r.out(w, tag(tagName, nil, false))
+			r.out(w, openTag)
 		} else {
 			if node.ListData.RefLink != nil {
 				slug := slugify(node.ListData.RefLink)
@@ -634,32 +743,34 @@ if r.Flags&FootnoteReturnLinks != 0 {
 					r.out(w, footnoteReturnLink(r.FootnoteAnchorPrefix, r.FootnoteReturnLinkContents, slug))
 				}
 			}
-			r.out(w, tag("/"+tagName, nil, false))
+			r.out(w, closeTag)
 			r.cr(w)
 		}
 	case CodeBlock:
 		attrs = appendLanguageAttr(attrs, node.Info)
 		r.cr(w)
-		r.out(w, tag("pre", nil, false))
-		r.out(w, tag("code", attrs, false))
-		r.out(w, escCode(node.Literal))
-		r.out(w, tag("/code", nil, false))
-		r.out(w, tag("/pre", nil, false))
+		r.out(w, preTag)
+		r.tag(w, codeTag[:len(codeTag)-1], attrs)
+		escapeHTML(w, node.Literal)
+		r.out(w, codeCloseTag)
+		r.out(w, preCloseTag)
 		if node.Parent.Type != Item {
 			r.cr(w)
 		}
 	case Table:
 		if entering {
 			r.cr(w)
-			r.out(w, tag("table", nil, false))
+			r.out(w, tableTag)
 		} else {
-			r.out(w, tag("/table", nil, false))
+			r.out(w, tableCloseTag)
 			r.cr(w)
 		}
 	case TableCell:
-		tagName := "td"
+		openTag := tdTag
+		closeTag := tdCloseTag
 		if node.IsHeader {
-			tagName = "th"
+			openTag = thTag
+			closeTag = thCloseTag
 		}
 		if entering {
 			align := cellAlignment(node.Align)
@@ -669,37 +780,37 @@ }
 			if node.Prev == nil {
 				r.cr(w)
 			}
-			r.out(w, tag(tagName, attrs, false))
+			r.tag(w, openTag, attrs)
 		} else {
-			r.out(w, tag("/"+tagName, nil, false))
+			r.out(w, closeTag)
 			r.cr(w)
 		}
 	case TableHead:
 		if entering {
 			r.cr(w)
-			r.out(w, tag("thead", nil, false))
+			r.out(w, theadTag)
 		} else {
-			r.out(w, tag("/thead", nil, false))
+			r.out(w, theadCloseTag)
 			r.cr(w)
 		}
 	case TableBody:
 		if entering {
 			r.cr(w)
-			r.out(w, tag("tbody", nil, false))
+			r.out(w, tbodyTag)
 			// XXX: this is to adhere to a rather silly test. Should fix test.
 			if node.FirstChild == nil {
 				r.cr(w)
 			}
 		} else {
-			r.out(w, tag("/tbody", nil, false))
+			r.out(w, tbodyCloseTag)
 			r.cr(w)
 		}
 	case TableRow:
 		if entering {
 			r.cr(w)
-			r.out(w, tag("tr", nil, false))
+			r.out(w, trTag)
 		} else {
-			r.out(w, tag("/tr", nil, false))
+			r.out(w, trCloseTag)
 			r.cr(w)
 		}
 	default:
@@ -725,9 +836,9 @@ }
 	w.WriteString("<head>\n")
 	w.WriteString("  <title>")
 	if r.Flags&Smartypants != 0 {
-		w.Write(r.sr.Process([]byte(r.Title)))
+		r.sr.Process(w, []byte(r.Title))
 	} else {
-		w.Write(esc([]byte(r.Title)))
+		escapeHTML(w, []byte(r.Title))
 	}
 	w.WriteString("</title>\n")
 	w.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
@@ -740,14 +851,14 @@ w.WriteString(ending)
 	w.WriteString(">\n")
 	if r.CSS != "" {
 		w.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
-		w.Write(esc([]byte(r.CSS)))
+		escapeHTML(w, []byte(r.CSS))
 		w.WriteString("\"")
 		w.WriteString(ending)
 		w.WriteString(">\n")
 	}
 	if r.Icon != "" {
 		w.WriteString("  <link rel=\"icon\" type=\"image/x-icon\" href=\"")
-		w.Write(esc([]byte(r.Icon)))
+		escapeHTML(w, []byte(r.Icon))
 		w.WriteString("\"")
 		w.WriteString(ending)
 		w.WriteString(">\n")
@@ -807,6 +918,7 @@ w.WriteString("<nav>\n")
 		w.Write(buf.Bytes())
 		w.WriteString("\n\n</nav>\n")
 	}
+	r.lastOutputLen = buf.Len()
 }
 
 func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {
@@ -820,17 +932,17 @@ // Render walks the specified syntax (sub)tree and returns a HTML document.
 func (r *HTMLRenderer) Render(ast *Node) []byte {
 	//println("render_Blackfriday")
 	//dump(ast)
-	var buff bytes.Buffer
-	r.writeDocumentHeader(&buff)
+	var buf bytes.Buffer
+	r.writeDocumentHeader(&buf)
 	if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 {
-		r.writeTOC(&buff, ast)
+		r.writeTOC(&buf, ast)
 		if r.Flags&OmitContents != 0 {
-			return buff.Bytes()
+			return buf.Bytes()
 		}
 	}
 	ast.Walk(func(node *Node, entering bool) WalkStatus {
-		return r.RenderNode(&buff, node, entering)
+		return r.RenderNode(&buf, node, entering)
 	})
-	r.writeDocumentFooter(&buff)
-	return buff.Bytes()
+	r.writeDocumentFooter(&buf)
+	return buf.Bytes()
 }

M inline.go → inline.go

@@ -33,51 +33,38 @@ // data is the complete block being rendered
 // offset is the number of valid chars before the current cursor
 
 func (p *parser) inline(currBlock *Node, data []byte) {
-	// this is called recursively: enforce a maximum depth
-	if p.nesting >= p.maxNesting {
+	// handlers might call us recursively: enforce a maximum depth
+	if p.nesting >= p.maxNesting || len(data) == 0 {
 		return
 	}
 	p.nesting++
-
-	i, end := 0, 0
-	for i < len(data) {
-		// Stop at EOL
-		if data[i] == '\n' && i+1 == len(data) {
-			break
-		}
-
-		for ; end < len(data); end++ {
-			if p.inlineCallback[data[end]] != nil {
-				break
-			}
-		}
-
-		if end >= len(data) {
-			if data[end-1] == '\n' {
-				currBlock.AppendChild(text(data[i : end-1]))
+	beg, end := 0, 0
+	for end < len(data) {
+		handler := p.inlineCallback[data[end]]
+		if handler != nil {
+			if consumed, node := handler(p, data, end); consumed == 0 {
+				// No action from the callback.
+				end++
 			} else {
-				currBlock.AppendChild(text(data[i:end]))
+				// Copy inactive chars into the output.
+				currBlock.AppendChild(text(data[beg:end]))
+				if node != nil {
+					currBlock.AppendChild(node)
+				}
+				// Skip past whatever the callback used.
+				beg = end + consumed
+				end = beg
 			}
-			break
-		}
-
-		// call the trigger
-		handler := p.inlineCallback[data[end]]
-		if consumed, node := handler(p, data, end); consumed == 0 {
-			// No action from the callback.
-			end++
 		} else {
-			// Copy inactive chars into the output.
-			currBlock.AppendChild(text(data[i:end]))
-			if node != nil {
-				currBlock.AppendChild(node)
-			}
-			// Skip past whatever the callback used.
-			i = end + consumed
-			end = i
+			end++
+		}
+	}
+	if beg < len(data) {
+		if data[end-1] == '\n' {
+			end--
 		}
+		currBlock.AppendChild(text(data[beg:end]))
 	}
-
 	p.nesting--
 }
 
@@ -733,25 +720,45 @@ entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
 	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 }
 
+// hasPrefixCaseInsensitive is a custom implementation of
+//     strings.HasPrefix(strings.ToLower(s), prefix)
+// we rolled our own because ToLower pulls in a huge machinery of lowercasing
+// anything from Unicode and that's very slow. Since this func will only be
+// used on ASCII protocol prefixes, we can take shortcuts.
+func hasPrefixCaseInsensitive(s, prefix []byte) bool {
+	if len(s) < len(prefix) {
+		return false
+	}
+	delta := byte('a' - 'A')
+	for i, b := range prefix {
+		if b != s[i] && b != s[i]+delta {
+			return false
+		}
+	}
+	return true
+}
+
+var protocolPrefixes = [][]byte{
+	[]byte("http://"),
+	[]byte("https://"),
+	[]byte("ftp://"),
+	[]byte("file://"),
+	[]byte("mailto:"),
+}
+
+const shortestPrefix = 6 // len("ftp://"), the shortest of the above
+
 func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) {
 	// quick check to rule out most false hits
-	if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
+	if p.insideLink || len(data) < offset+shortestPrefix {
 		return 0, nil
 	}
-	prefixes := []string{
-		"http://",
-		"https://",
-		"ftp://",
-		"file://",
-		"mailto:",
-	}
-	for _, prefix := range prefixes {
+	for _, prefix := range protocolPrefixes {
 		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 		if endOfHead > len(data) {
 			endOfHead = len(data)
 		}
-		head := bytes.ToLower(data[offset:endOfHead])
-		if bytes.HasPrefix(head, []byte(prefix)) {
+		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
 			return autoLink(p, data, offset)
 		}
 	}

M inline_test.go → inline_test.go

@@ -1133,7 +1133,7 @@
 func TestSkipHTML(t *testing.T) {
 	doTestsParam(t, []string{
 		"<div class=\"foo\"></div>\n\ntext\n\n<form>the form</form>",
-		"<p>text</p>\n",
+		"<p>text</p>\n\n<p>the form</p>\n",
 
 		"text <em>inline html</em> more text",
 		"<p>text inline html more text</p>\n",

M markdown.go → markdown.go

@@ -385,7 +385,7 @@ if extensions&Footnotes != 0 {
 		p.notes = make([]*reference, 0)
 	}
 
-	p.block(preprocess(p, input))
+	p.block(input)
 	// Walk the tree and finish up some of unfinished blocks
 	for p.tip != nil {
 		p.finalize(p.tip)
@@ -439,63 +439,6 @@ node.content = nil
 		}
 		return GoToNext
 	})
-}
-
-// preprocess does a preparatory first pass over the input:
-// - normalize newlines
-// - expand tabs (outside of fenced code blocks)
-// - copy everything else
-func preprocess(p *parser, input []byte) []byte {
-	var out bytes.Buffer
-	tabSize := TabSizeDefault
-	if p.flags&TabSizeEight != 0 {
-		tabSize = TabSizeDouble
-	}
-	beg := 0
-	lastFencedCodeBlockEnd := 0
-	for beg < len(input) {
-		// Find end of this line, then process the line.
-		end := beg
-		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-			end++
-		}
-
-		if p.flags&FencedCode != 0 {
-			// track fenced code block boundaries to suppress tab expansion
-			// and reference extraction inside them:
-			if beg >= lastFencedCodeBlockEnd {
-				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
-					lastFencedCodeBlockEnd = beg + i
-				}
-			}
-		}
-
-		// add the line body if present
-		if end > beg {
-			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-				out.Write(input[beg:end])
-			} else {
-				expandTabs(&out, input[beg:end], tabSize)
-			}
-		}
-
-		if end < len(input) && input[end] == '\r' {
-			end++
-		}
-		if end < len(input) && input[end] == '\n' {
-			end++
-		}
-		out.WriteByte('\n')
-
-		beg = end
-	}
-
-	// empty input?
-	if out.Len() == 0 {
-		out.WriteByte('\n')
-	}
-
-	return out.Bytes()
 }
 
 //

M smartypants.go → smartypants.go

@@ -17,6 +17,7 @@ package blackfriday
 
 import (
 	"bytes"
+	"io"
 )
 
 // SPRenderer is a struct containing state of a Smartypants renderer.
@@ -401,13 +402,12 @@ return &r
 }
 
 // Process is the entry point of the Smartypants renderer.
-func (r *SPRenderer) Process(text []byte) []byte {
-	var buff bytes.Buffer
+func (r *SPRenderer) Process(w io.Writer, text []byte) {
 	mark := 0
 	for i := 0; i < len(text); i++ {
 		if action := r.callbacks[text[i]]; action != nil {
 			if i > mark {
-				buff.Write(text[mark:i])
+				w.Write(text[mark:i])
 			}
 			previousChar := byte(0)
 			if i > 0 {
@@ -415,12 +415,11 @@ previousChar = text[i-1]
 			}
 			var tmp bytes.Buffer
 			i += action(&tmp, previousChar, text[i:])
-			buff.Write(tmp.Bytes())
+			w.Write(tmp.Bytes())
 			mark = i + 1
 		}
 	}
 	if mark < len(text) {
-		buff.Write(text[mark:])
+		w.Write(text[mark:])
 	}
-	return buff.Bytes()
 }

M testdata/Inline HTML (Simple).html → testdata/Inline HTML (Simple).html

@@ -1,13 +1,13 @@
 <p>Here's a simple block:</p>
 
 <div>
-    foo
+	foo
 </div>
 
 <p>This should be a code block, though:</p>
 
 <pre><code>&lt;div&gt;
-    foo
+	foo
 &lt;/div&gt;
 </code></pre>
 
@@ -19,11 +19,11 @@
 <p>Now, nested:</p>
 
 <div>
-    <div>
-        <div>
-            foo
-        </div>
-    </div>
+	<div>
+		<div>
+			foo
+		</div>
+	</div>
 </div>
 
 <p>This should just be an HTML comment:</p>

M testdata/Inline HTML comments.html → testdata/Inline HTML comments.html

@@ -3,7 +3,7 @@
 <!-- This is a simple comment -->
 
 <!--
-    This is another comment.
+	This is another comment.
 -->
 
 <p>Paragraph two.</p>

M testdata/Markdown Documentation - Syntax.html → testdata/Markdown Documentation - Syntax.html

@@ -939,8 +939,8 @@ {}  curly braces
 []  square brackets
 ()  parentheses
 #   hash mark
-+   plus sign
--   minus sign (hyphen)
++	plus sign
+-	minus sign (hyphen)
 .   dot
 !   exclamation mark
 </code></pre>

M testdata/Tabs.html → testdata/Tabs.html

@@ -13,13 +13,13 @@ </code></pre>
 
 <p>And:</p>
 
-<pre><code>    this code block is indented by two tabs
+<pre><code>	this code block is indented by two tabs
 </code></pre>
 
 <p>And:</p>
 
-<pre><code>+   this is an example list item
-    indented with tabs
+<pre><code>+	this is an example list item
+	indented with tabs
 
 +   this is an example list item
     indented with spaces

all repos — grayfriday @ ad7f7c56d58a2c7f75a14cdcaa8b8acd5dc4f141

blackfriday fork with a few changes