tab expansion fixed to handle multibyte unicode characters
Russ Ross russ@dixie.edu
Tue, 31 May 2011 12:04:58 -0600
1 files changed,
34 insertions(+),
9 deletions(-)
jump to
M
markdown.go
→
markdown.go
@@ -14,6 +14,7 @@ package blackfriday
import ( "bytes" + "utf8" ) // These are the supported markdown parsing extensions.@@ -436,19 +437,43 @@ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
} // Replace tab characters with spaces, aligning to the next TAB_SIZE column. -// TODO: count runes rather than bytes func expandTabs(out *bytes.Buffer, line []byte) { - i, tab := 0, 0 + // first, check for common cases: no tabs, or only tabs at beginning of line + i, prefix := 0, 0 + slowcase := false + for i = 0; i < len(line); i++ { + if line[i] == '\t' { + if prefix == i { + prefix++ + } else { + slowcase = true + break + } + } + } + // no need to decode runes if all tabs are at the beginning of the line + if !slowcase { + for i = 0; i < prefix*TAB_SIZE; i++ { + out.WriteByte(' ') + } + out.Write(line[prefix:]) + return + } + + // the slow case: we need to count runes to figure out how + // many spaces to insert for each tab + column := 0 for i < len(line) { - org := i + start := i for i < len(line) && line[i] != '\t' { - i++ - tab++ + _, size := utf8.DecodeRune(line[i:]) + i += size + column++ } - if i > org { - out.Write(line[org:i]) + if i > start { + out.Write(line[start:i]) } if i >= len(line) {@@ -457,8 +482,8 @@ }
for { out.WriteByte(' ') - tab++ - if tab%TAB_SIZE == 0 { + column++ + if column%TAB_SIZE == 0 { break } }