icy does git — grayfriday: fd97b7d32f610d16b5d2d355ca62d513f956e074

Merge pull request #282 from russross/v2-fix-279

v2: Fix issue in fenced code block pre-processing.

Vytautas Šaltenis vytas@rtfb.lt

Sat, 16 Jul 2016 10:25:34 +0300

commit

fd97b7d32f610d16b5d2d355ca62d513f956e074

parent

6291a00f2fdd8169778c507d63e2feb0ea310990

5 files changed, 199 insertions(+), 71 deletions(-)

jump to

block.go

block_test.go

helpers_test.go

markdown.go

markdown_test.go

M block.go → block.go

@@ -115,7 +115,7 @@ //     return n * fact(n-1)
 		// }
 		// ```
 		if p.flags&FencedCode != 0 {
-			if i := p.fencedCode(data, true); i > 0 {
+			if i := p.fencedCodeBlock(data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@@ -526,7 +526,7 @@
 	return i + skip
 }
 
-func (p *parser) isEmpty(data []byte) int {
+func (*parser) isEmpty(data []byte) int {
 	// it is okay to call isEmpty on an empty buffer
 	if len(data) == 0 {
 		return 0
@@ -541,7 +541,7 @@ }
 	return i + 1
 }
 
-func (p *parser) isHRule(data []byte) bool {
+func (*parser) isHRule(data []byte) bool {
 	i := 0
 
 	// skip up to three spaces
@@ -570,21 +570,24 @@
 	return n >= 3
 }
 
-func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
+// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
+// and returns the end index if so, or 0 otherwise. It also returns the marker found.
+// If syntax is not nil, it gets set to the syntax specified in the fence line.
+// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
+func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 	i, size := 0, 0
-	skip = 0
 
 	// skip up to three spaces
 	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
+
+	// check for the marker characters: ~ or `
 	if i >= len(data) {
-		return
+		return 0, ""
 	}
-
-	// check for the marker characters: ~ or `
 	if data[i] != '~' && data[i] != '`' {
-		return
+		return 0, ""
 	}
 
 	c := data[i]
@@ -593,29 +596,30 @@ // the whole line must be the same char or whitespace
 	for i < len(data) && data[i] == c {
 		size++
 		i++
-	}
-
-	if i >= len(data) {
-		return
 	}
 
 	// the marker char must occur at least 3 times
 	if size < 3 {
-		return
+		return 0, ""
 	}
 	marker = string(data[i-size : i])
 
 	// if this is the end marker, it must match the beginning marker
 	if oldmarker != "" && marker != oldmarker {
-		return
+		return 0, ""
 	}
 
+	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
+	// into one, always get the syntax, and discard it if the caller doesn't care.
 	if syntax != nil {
 		syn := 0
 		i = skipChar(data, i, ' ')
 
 		if i >= len(data) {
-			return
+			if newlineOptional && i == len(data) {
+				return i, marker
+			}
+			return 0, ""
 		}
 
 		syntaxStart := i
@@ -630,7 +634,7 @@ i++
 			}
 
 			if i >= len(data) || data[i] != '}' {
-				return
+				return 0, ""
 			}
 
 			// strip all whitespace at the beginning and the end
@@ -652,37 +656,40 @@ i++
 			}
 		}
 
-		language := string(data[syntaxStart : syntaxStart+syn])
-		*syntax = &language
+		*syntax = string(data[syntaxStart : syntaxStart+syn])
 	}
 
 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		return
+		if newlineOptional && i == len(data) {
+			return i, marker
+		}
+		return 0, ""
 	}
 
-	skip = i + 1
-	return
+	return i + 1, marker // Take newline into account.
 }
 
-func (p *parser) fencedCode(data []byte, doRender bool) int {
-	var lang *string
-	beg, marker := p.isFencedCode(data, &lang, "")
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
+	var syntax string
+	beg, marker := isFenceLine(data, &syntax, "", false)
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}
 
 	var work bytes.Buffer
-	if lang != nil {
-		work.Write([]byte(*lang))
-		work.WriteByte('\n')
-	}
+	work.Write([]byte(syntax))
+	work.WriteByte('\n')
 
 	for {
 		// safe to assume beg < len(data)
 
 		// check for the end of the code block
-		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@@ -702,11 +709,6 @@ work.Write(data[beg:end])
 		}
 		beg = end
 	}
-
-	//syntax := ""
-	//if lang != nil {
-	//	syntax = *lang
-	//}
 
 	if doRender {
 		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
@@ -972,7 +974,7 @@ // fenced code and if one's found, incorporate it altogether,
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&FencedCode != 0 {
-				if i := p.fencedCode(data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@@ -1451,7 +1453,7 @@ }
 
 		// if there's a fenced code block, paragraph is over
 		if p.flags&FencedCode != 0 {
-			if p.fencedCode(current, false) > 0 {
+			if p.fencedCodeBlock(current, false) > 0 {
 				p.renderParagraph(data[:i])
 				return i
 			}

M block_test.go → block_test.go

@@ -1011,6 +1011,12 @@ "<p>Bla bla</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Bla Bla</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>Bla Bla</p>\n",
 
 		"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
 		"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
+
+		"```\n[]:()\n```\n",
+		"<pre><code>[]:()\n</code></pre>\n",
+
+		"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
+		"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
 	}
 	doTestsBlock(t, tests, FencedCode)
 }
@@ -1579,3 +1585,74 @@ `,
 	}
 	doTestsParam(t, tests, TestParams{HTMLFlags: UseXHTML | CompletePage})
 }
+
+func TestIsFenceLine(t *testing.T) {
+	tests := []struct {
+		data            []byte
+		syntaxRequested bool
+		newlineOptional bool
+		wantEnd         int
+		wantMarker      string
+		wantSyntax      string
+	}{
+		{
+			data:    []byte("```"),
+			wantEnd: 0,
+		},
+		{
+			data:       []byte("```\nstuff here\n"),
+			wantEnd:    4,
+			wantMarker: "```",
+		},
+		{
+			data:            []byte("```\nstuff here\n"),
+			syntaxRequested: true,
+			wantEnd:         4,
+			wantMarker:      "```",
+		},
+		{
+			data:    []byte("stuff here\n```\n"),
+			wantEnd: 0,
+		},
+		{
+			data:            []byte("```"),
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("```"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("``` go"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         6,
+			wantMarker:      "```",
+			wantSyntax:      "go",
+		},
+	}
+
+	for _, test := range tests {
+		var syntax *string
+		if test.syntaxRequested {
+			syntax = new(string)
+		}
+		end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+		if got, want := end, test.wantEnd; got != want {
+			t.Errorf("got end %v, want %v", got, want)
+		}
+		if got, want := marker, test.wantMarker; got != want {
+			t.Errorf("got marker %q, want %q", got, want)
+		}
+		if test.syntaxRequested {
+			if got, want := *syntax, test.wantSyntax; got != want {
+				t.Errorf("got syntax %q, want %q", got, want)
+			}
+		}
+	}
+}

M helpers_test.go → helpers_test.go

@@ -49,6 +49,17 @@ renderer := NewHTMLRenderer(params.HTMLRendererParameters)
 	return string(Markdown([]byte(input), renderer, params.Options))
 }
 
+// doTests runs full document tests using MarkdownCommon configuration.
+func doTests(t *testing.T, tests []string) {
+	doTestsParam(t, tests, TestParams{
+		Options: DefaultOptions,
+		HTMLRendererParameters: HTMLRendererParameters{
+			Flags:      CommonHtmlFlags,
+			Extensions: CommonExtensions,
+		},
+	})
+}
+
 func doTestsBlock(t *testing.T, tests []string, extensions Extensions) {
 	doTestsParam(t, tests, TestParams{
 		Options:   Options{Extensions: extensions},

M markdown.go → markdown.go

@@ -526,9 +526,9 @@ })
 }
 
 // first pass:
-// - extract references
-// - expand tabs
 // - normalize newlines
+// - extract references (outside of fenced code blocks)
+// - expand tabs (outside of fenced code blocks)
 // - copy everything else
 func firstPass(p *parser, input []byte) []byte {
 	var out bytes.Buffer
@@ -536,46 +536,46 @@ tabSize := TabSizeDefault
 	if p.flags&TabSizeEight != 0 {
 		tabSize = TabSizeDouble
 	}
-	beg, end := 0, 0
+	beg := 0
 	lastFencedCodeBlockEnd := 0
-	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:], tabSize); end > 0 {
-			beg += end
-		} else { // skip to the next line
-			end = beg
-			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-				end++
-			}
-
-			if p.flags&FencedCode != 0 {
-				// track fenced code block boundaries to suppress tab expansion
-				// inside them:
-				if beg >= lastFencedCodeBlockEnd {
-					if i := p.fencedCode(input[beg:], false); i > 0 {
-						lastFencedCodeBlockEnd = beg + i
-					}
-				}
-			}
+	for beg < len(input) {
+		// Find end of this line, then process the line.
+		end := beg
+		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
+			end++
+		}
 
-			// add the line body if present
-			if end > beg {
-				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-					out.Write(input[beg:end])
-				} else {
-					expandTabs(&out, input[beg:end], tabSize)
+		if p.flags&FencedCode != 0 {
+			// track fenced code block boundaries to suppress tab expansion
+			// and reference extraction inside them:
+			if beg >= lastFencedCodeBlockEnd {
+				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
+					lastFencedCodeBlockEnd = beg + i
 				}
 			}
-			out.WriteByte('\n')
+		}
 
-			if end < len(input) && input[end] == '\r' {
-				end++
-			}
-			if end < len(input) && input[end] == '\n' {
-				end++
+		// add the line body if present
+		if end > beg {
+			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
+				out.Write(input[beg:end])
+			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
+				beg += refEnd
+				continue
+			} else {
+				expandTabs(&out, input[beg:end], tabSize)
 			}
+		}
 
-			beg = end
+		if end < len(input) && input[end] == '\r' {
+			end++
 		}
+		if end < len(input) && input[end] == '\n' {
+			end++
+		}
+		out.WriteByte('\n')
+
+		beg = end
 	}
 
 	// empty input?

A markdown_test.go

@@ -0,0 +1,38 @@
+//
+// Blackfriday Markdown Processor
+// Available at http://github.com/russross/blackfriday
+//
+// Copyright © 2011 Russ Ross <russ@russross.com>.
+// Distributed under the Simplified BSD License.
+// See README.md for details.
+//
+
+//
+// Unit tests for full document parsing and rendering
+//
+
+package blackfriday
+
+import "testing"
+
+func TestDocument(t *testing.T) {
+	var tests = []string{
+		// Empty document.
+		"",
+		"",
+
+		" ",
+		"",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/172
+		"[]:<",
+		"<p>[]:&lt;</p>\n",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/173
+		"   [",
+		"<p>[</p>\n",
+	}
+	doTests(t, tests)
+}

all repos — grayfriday @ fd97b7d32f610d16b5d2d355ca62d513f956e074

blackfriday fork with a few changes