icy does git — grayfriday: 9f1d6199060129cf6c3ff4a530ab0f68825a6bfa

tab expansion fixed to handle multibyte unicode characters

Russ Ross russ@dixie.edu

Tue, 31 May 2011 12:04:58 -0600

commit

9f1d6199060129cf6c3ff4a530ab0f68825a6bfa

parent

f3386eb8498907af0d96472f3c75637e3badabf2

1 files changed, 34 insertions(+), 9 deletions(-)

jump to

markdown.go

M markdown.go → markdown.go

@@ -14,6 +14,7 @@ package blackfriday
 
 import (
 	"bytes"
+	"utf8"
 )
 
 // These are the supported markdown parsing extensions.
@@ -436,19 +437,43 @@ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 }
 
 // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
-// TODO: count runes rather than bytes
 func expandTabs(out *bytes.Buffer, line []byte) {
-	i, tab := 0, 0
+	// first, check for common cases: no tabs, or only tabs at beginning of line
+	i, prefix := 0, 0
+	slowcase := false
+	for i = 0; i < len(line); i++ {
+		if line[i] == '\t' {
+			if prefix == i {
+				prefix++
+			} else {
+				slowcase = true
+				break
+			}
+		}
+	}
 
+	// no need to decode runes if all tabs are at the beginning of the line
+	if !slowcase {
+		for i = 0; i < prefix*TAB_SIZE; i++ {
+			out.WriteByte(' ')
+		}
+		out.Write(line[prefix:])
+		return
+	}
+
+	// the slow case: we need to count runes to figure out how
+	// many spaces to insert for each tab
+	column := 0
 	for i < len(line) {
-		org := i
+		start := i
 		for i < len(line) && line[i] != '\t' {
-			i++
-			tab++
+			_, size := utf8.DecodeRune(line[i:])
+			i += size
+			column++
 		}
 
-		if i > org {
-			out.Write(line[org:i])
+		if i > start {
+			out.Write(line[start:i])
 		}
 
 		if i >= len(line) {
@@ -457,8 +482,8 @@ }
 
 		for {
 			out.WriteByte(' ')
-			tab++
-			if tab%TAB_SIZE == 0 {
+			column++
+			if column%TAB_SIZE == 0 {
 				break
 			}
 		}

all repos — grayfriday @ 9f1d6199060129cf6c3ff4a530ab0f68825a6bfa

blackfriday fork with a few changes