icy does git — grayfriday: ae9562f685bb5b10588adac83dba61a25d420df6

move whitespace stripping to parser, not renderers

Russ Ross russ@russross.com

Wed, 29 Jun 2011 15:38:35 -0600

commit

ae9562f685bb5b10588adac83dba61a25d420df6

parent

d3c822509655d75b89de9a305a3771672395d588

4 files changed, 133 insertions(+), 148 deletions(-)

jump to

block.go

html.go

inline.go

inline_test.go

M block.go → block.go

@@ -258,7 +258,12 @@
 			if j > 0 {
 				size := i + j
 				if doRender {
-					parser.r.BlockHtml(out, data[:size])
+					// trim newlines
+					end := size
+					for end > 0 && data[end-1] == '\n' {
+						end--
+					}
+					parser.r.BlockHtml(out, data[:end])
 				}
 				return size
 			}
@@ -280,7 +285,12 @@ j = parser.isEmpty(data[i:])
 				if j > 0 {
 					size := i + j
 					if doRender {
-						parser.r.BlockHtml(out, data[:size])
+						// trim newlines
+						end := size
+						for end > 0 && data[end-1] == '\n' {
+							end--
+						}
+						parser.r.BlockHtml(out, data[:end])
 					}
 					return size
 				}
@@ -326,7 +336,12 @@ }
 
 	// the end of the block has been found
 	if doRender {
-		parser.r.BlockHtml(out, data[:i])
+		// trim newlines
+		end := i
+		for end > 0 && data[end-1] == '\n' {
+			end--
+		}
+		parser.r.BlockHtml(out, data[:end])
 	}
 
 	return i
@@ -931,11 +946,11 @@ end++
 	}
 
 	// get working buffers
-	var work bytes.Buffer
-	var inter bytes.Buffer
+	var rawItem bytes.Buffer
+	var parsed bytes.Buffer
 
 	// put the first line into the working buffer
-	work.Write(data[beg:end])
+	rawItem.Write(data[beg:end])
 	beg = end
 
 	// process the following lines
@@ -984,7 +999,7 @@ break
 			}
 
 			if sublist == 0 {
-				sublist = work.Len()
+				sublist = rawItem.Len()
 			}
 		} else {
 			// how about a nested prefix header?
@@ -1002,7 +1017,7 @@ *flags |= LIST_ITEM_END_OF_LIST
 					break
 				} else {
 					if containsBlankLine {
-						work.WriteByte('\n')
+						rawItem.WriteByte('\n')
 						containsBlock = true
 					}
 				}
@@ -1012,7 +1027,7 @@
 		containsBlankLine = false
 
 		// add the line into the working buffer without prefix
-		work.Write(data[beg+i : end])
+		rawItem.Write(data[beg+i : end])
 		beg = end
 	}
 
@@ -1021,27 +1036,32 @@ if containsBlock {
 		*flags |= LIST_ITEM_CONTAINS_BLOCK
 	}
 
-	workbytes := work.Bytes()
+	rawItemBytes := rawItem.Bytes()
 	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
 		// intermediate render of block li
-		if sublist > 0 && sublist < len(workbytes) {
-			parser.parseBlock(&inter, workbytes[:sublist])
-			parser.parseBlock(&inter, workbytes[sublist:])
+		if sublist > 0 && sublist < len(rawItemBytes) {
+			parser.parseBlock(&parsed, rawItemBytes[:sublist])
+			parser.parseBlock(&parsed, rawItemBytes[sublist:])
 		} else {
-			parser.parseBlock(&inter, workbytes)
+			parser.parseBlock(&parsed, rawItemBytes)
 		}
 	} else {
 		// intermediate render of inline li
-		if sublist > 0 && sublist < len(workbytes) {
-			parser.parseInline(&inter, workbytes[:sublist])
-			parser.parseBlock(&inter, workbytes[sublist:])
+		if sublist > 0 && sublist < len(rawItemBytes) {
+			parser.parseInline(&parsed, rawItemBytes[:sublist])
+			parser.parseBlock(&parsed, rawItemBytes[sublist:])
 		} else {
-			parser.parseInline(&inter, workbytes)
+			parser.parseInline(&parsed, rawItemBytes)
 		}
 	}
 
 	// render li itself
-	parser.r.ListItem(out, inter.Bytes(), *flags)
+	parsedBytes := parsed.Bytes()
+	parsedEnd := len(parsedBytes)
+	for parsedEnd > 0 && parsedBytes[parsedEnd-1] == '\n' {
+		parsedEnd--
+	}
+	parser.r.ListItem(out, parsedBytes[:parsedEnd], *flags)
 
 	return beg
 }

M html.go → html.go

@@ -19,6 +19,7 @@ import (
 	"bytes"
 	"fmt"
 	"strconv"
+	"strings"
 )
 
 const (
@@ -126,10 +127,7 @@ }
 
 func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
 	marker := out.Len()
-
-	if marker > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 
 	if options.flags&HTML_TOC != 0 {
 		// headerCount is incremented in htmlTocHeader
@@ -157,28 +155,13 @@ if options.flags&HTML_SKIP_HTML != 0 {
 		return
 	}
 
-	sz := len(text)
-	for sz > 0 && text[sz-1] == '\n' {
-		sz--
-	}
-	org := 0
-	for org < sz && text[org] == '\n' {
-		org++
-	}
-	if org >= sz {
-		return
-	}
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-	out.Write(text[org:sz])
+	doubleSpace(out)
+	out.Write(text)
 	out.WriteByte('\n')
 }
 
 func (options *Html) HRule(out *bytes.Buffer) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	out.WriteString("<hr")
 	out.WriteString(options.closeTag)
 }
@@ -192,44 +175,33 @@ }
 }
 
 func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-
-	if lang != "" {
-		out.WriteString("<pre><code class=\"")
-
-		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
-			for i < len(lang) && isspace(lang[i]) {
-				i++
-			}
-
-			if i < len(lang) {
-				org := i
-				for i < len(lang) && !isspace(lang[i]) {
-					i++
-				}
-
-				if lang[org] == '.' {
-					org++
-				}
+	doubleSpace(out)
 
-				if cls > 0 {
-					out.WriteByte(' ')
-				}
-				attrEscape(out, []byte(lang[org:]))
-			}
+	// parse out the language names/classes
+	count := 0
+	for _, elt := range strings.Fields(lang) {
+		if elt[0] == '.' {
+			elt = elt[1:]
 		}
-
-		out.WriteString("\">")
-	} else {
-		out.WriteString("<pre><code>")
+		if len(elt) == 0 {
+			continue
+		}
+		if count == 0 {
+			out.WriteString("<pre><code class=\"")
+		} else {
+			out.WriteByte(' ')
+		}
+		attrEscape(out, []byte(elt))
+		count++
 	}
 
-	if len(text) > 0 {
-		attrEscape(out, text)
+	if count == 0 {
+		out.WriteString("<pre><code>")
+	} else {
+		out.WriteString("\">")
 	}
 
+	attrEscape(out, text)
 	out.WriteString("</code></pre>\n")
 }
 
@@ -252,33 +224,29 @@ * E.g.
  *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
  */
 func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-
-	if len(lang) > 0 {
-		out.WriteString("<pre lang=\"")
+	doubleSpace(out)
 
-		i := 0
-		for i < len(lang) && !isspace(lang[i]) {
-			i++
+	// parse out the language name
+	count := 0
+	for _, elt := range strings.Fields(lang) {
+		if elt[0] == '.' {
+			elt = elt[1:]
 		}
-
-		if lang[0] == '.' {
-			attrEscape(out, []byte(lang[1:i]))
-		} else {
-			attrEscape(out, []byte(lang[:i]))
+		if len(elt) == 0 {
+			continue
 		}
-
+		out.WriteString("<pre lang=\"")
+		attrEscape(out, []byte(elt))
 		out.WriteString("\"><code>")
-	} else {
-		out.WriteString("<pre><code>")
+		count++
+		break
 	}
 
-	if len(text) > 0 {
-		attrEscape(out, text)
+	if count == 0 {
+		out.WriteString("<pre><code>")
 	}
 
+	attrEscape(out, text)
 	out.WriteString("</code></pre>\n")
 }
 
@@ -290,29 +258,23 @@ out.WriteString("</blockquote>")
 }
 
 func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-	out.WriteString("<table><thead>\n")
+	doubleSpace(out)
+	out.WriteString("<table>\n<thead>\n")
 	out.Write(header)
-	out.WriteString("\n</thead><tbody>\n")
+	out.WriteString("\n</thead>\n<tbody>\n")
 	out.Write(body)
-	out.WriteString("\n</tbody></table>")
+	out.WriteString("\n</tbody>\n</table>")
 }
 
 func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	out.WriteString("<tr>\n")
 	out.Write(text)
 	out.WriteString("\n</tr>")
 }
 
 func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	switch align {
 	case TABLE_ALIGNMENT_LEFT:
 		out.WriteString("<td align=\"left\">")
@@ -330,10 +292,8 @@ }
 
 func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
 	marker := out.Len()
+	doubleSpace(out)
 
-	if marker > 0 {
-		out.WriteByte('\n')
-	}
 	if flags&LIST_TYPE_ORDERED != 0 {
 		out.WriteString("<ol>\n")
 	} else {
@@ -352,19 +312,13 @@ }
 
 func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
 	out.WriteString("<li>")
-	size := len(text)
-	for size > 0 && text[size-1] == '\n' {
-		size--
-	}
-	out.Write(text[:size])
+	out.Write(text)
 	out.WriteString("</li>\n")
 }
 
 func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
 	marker := out.Len()
-	if marker > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 
 	out.WriteString("<p>")
 	if !text() {
@@ -375,10 +329,11 @@ out.WriteString("</p>\n")
 }
 
 func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
-	if len(link) == 0 {
-		return
-	}
 	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
+		// mark it but don't link it if it is not a safe link: no smartypants
+		out.WriteString("<tt>")
+		attrEscape(out, link)
+		out.WriteString("</tt>")
 		return
 	}
 
@@ -389,16 +344,14 @@ }
 	attrEscape(out, link)
 	out.WriteString("\">")
 
-	/*
-	 * Pretty print: if we get an email address as
-	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
-	 * want to print the `mailto:` prefix
-	 */
+	// Pretty print: if we get an email address as
+	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
+	// want to print the `mailto:` prefix
 	switch {
 	case bytes.HasPrefix(link, []byte("mailto://")):
-		attrEscape(out, link[9:])
+		attrEscape(out, link[len("mailto://"):])
 	case bytes.HasPrefix(link, []byte("mailto:")):
-		attrEscape(out, link[7:])
+		attrEscape(out, link[len("mailto:"):])
 	default:
 		attrEscape(out, link)
 	}
@@ -413,9 +366,6 @@ out.WriteString("</code>")
 }
 
 func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<strong>")
 	out.Write(text)
 	out.WriteString("</strong>")
@@ -435,9 +385,6 @@ if options.flags&HTML_SKIP_IMAGES != 0 {
 		return
 	}
 
-	if len(link) == 0 {
-		return
-	}
 	out.WriteString("<img src=\"")
 	attrEscape(out, link)
 	out.WriteString("\" alt=\"")
@@ -461,10 +408,18 @@ }
 
 func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
 	if options.flags&HTML_SKIP_LINKS != 0 {
+		// write the link text out but don't link it, just mark it with typewriter font
+		out.WriteString("<tt>")
+		attrEscape(out, content)
+		out.WriteString("</tt>")
 		return
 	}
 
 	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
+		// write the link text out but don't link it, just mark it with typewriter font
+		out.WriteString("<tt>")
+		attrEscape(out, content)
+		out.WriteString("</tt>")
 		return
 	}
 
@@ -497,18 +452,12 @@ out.Write(text)
 }
 
 func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<strong><em>")
 	out.Write(text)
 	out.WriteString("</em></strong>")
 }
 
 func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<del>")
 	out.Write(text)
 	out.WriteString("</del>")
@@ -718,3 +667,9 @@ }
 
 	return isspace(tag[i]) || tag[i] == '>'
 }
+
+func doubleSpace(out *bytes.Buffer) {
+	if out.Len() > 0 {
+		out.WriteByte('\n')
+	}
+}

M inline.go → inline.go

@@ -139,7 +139,9 @@ fEnd--
 	}
 
 	// render the code span
-	parser.r.CodeSpan(out, data[fBegin:fEnd])
+	if fBegin != fEnd {
+		parser.r.CodeSpan(out, data[fBegin:fEnd])
+	}
 
 	return end
 
@@ -409,7 +411,7 @@ uLink = uLinkBuf.Bytes()
 	}
 
 	// links need something to click on and somewhere to go
-	if len(uLink) == 0 || content.Len() == 0 {
+	if len(uLink) == 0 || (!isImg && content.Len() == 0) {
 		return 0
 	}
 
@@ -439,7 +441,9 @@ if end > 2 {
 		if altype != LINK_TYPE_NOT_AUTOLINK {
 			var uLink bytes.Buffer
 			unescapeText(&uLink, data[1:end+1-2])
-			parser.r.AutoLink(out, uLink.Bytes(), altype)
+			if uLink.Len() > 0 {
+				parser.r.AutoLink(out, uLink.Bytes(), altype)
+			}
 		} else {
 			parser.r.RawHtmlTag(out, data[:end])
 		}
@@ -611,7 +615,9 @@
 	var uLink bytes.Buffer
 	unescapeText(&uLink, data[:linkEnd])
 
-	parser.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
+	if uLink.Len() > 0 {
+		parser.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
+	}
 
 	return linkEnd - rewind
 }
@@ -879,11 +885,13 @@ if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
 			var work bytes.Buffer
 			parser.parseInline(&work, data[:i])
 
-			// pick the right renderer
-			if c == '~' {
-				parser.r.StrikeThrough(out, work.Bytes())
-			} else {
-				parser.r.DoubleEmphasis(out, work.Bytes())
+			if work.Len() > 0 {
+				// pick the right renderer
+				if c == '~' {
+					parser.r.StrikeThrough(out, work.Bytes())
+				} else {
+					parser.r.DoubleEmphasis(out, work.Bytes())
+				}
 			}
 			return i + 2
 		}
@@ -915,7 +923,9 @@ // triple symbol found
 			var work bytes.Buffer
 
 			parser.parseInline(&work, data[:i])
-			parser.r.TripleEmphasis(out, work.Bytes())
+			if work.Len() > 0 {
+				parser.r.TripleEmphasis(out, work.Bytes())
+			}
 			return i + 3
 		case (i+1 < len(data) && data[i+1] == c):
 			// double symbol found, hand over to emph1

M inline_test.go → inline_test.go

@@ -224,7 +224,7 @@ "a single multi-tick marker with ``` no text\n",
 		"<p>a single multi-tick marker with ``` no text</p>\n",
 
 		"markers with ` ` a space\n",
-		"<p>markers with <code></code> a space</p>\n",
+		"<p>markers with  a space</p>\n",
 
 		"`source code` and a `stray\n",
 		"<p><code>source code</code> and a `stray</p>\n",

all repos — grayfriday @ ae9562f685bb5b10588adac83dba61a25d420df6

blackfriday fork with a few changes