all repos — grayfriday @ 8226238289b24ab5a187e73ae27e22193581e35f

blackfriday fork with a few changes

Improve html element stripping code
Vytautas Ĺ altenis Vytautas.Shaltenis@gmail.com
Thu, 18 Apr 2013 03:15:47 +0300
commit

8226238289b24ab5a187e73ae27e22193581e35f

parent

079a55851d3097b4c045a49d7cbe6c9877babc00

2 files changed, 44 insertions(+), 9 deletions(-)

jump to
M html.gohtml.go

@@ -176,13 +176,21 @@ }

out.WriteByte('\n') } -// This is a trivial implementation for the simplest possible case func stripTag(text, tag, newTag string) []byte { - openTag := fmt.Sprintf("<%s>", tag) - closeTag := fmt.Sprintf("</%s>", tag) - openNewTag := fmt.Sprintf("<%s>", newTag) closeNewTag := fmt.Sprintf("</%s>", newTag) - noOpen := strings.Replace(text, openTag, openNewTag, -1) + i := 0 + for i < len(text) && text[i] != '<' { + i++ + } + if i == len(text) { + return []byte(text) + } + found, end := findHtmlTagPos([]byte(text[i:]), tag) + closeTag := fmt.Sprintf("</%s>", tag) + noOpen := text + if found { + noOpen = text[0:i+1] + newTag + text[end:] + } return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1)) }

@@ -664,9 +672,14 @@ }

} func isHtmlTag(tag []byte, tagname string) bool { + found, _ := findHtmlTagPos(tag, tagname) + return found +} + +func findHtmlTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' { - return false + return false, -1 } i++ i = skipSpace(tag, i)

@@ -683,15 +696,34 @@ break

} if strings.ToLower(string(tag[i]))[0] != tagname[j] { - return false + return false, -1 } } if i == len(tag) { - return false + return false, -1 } - return isspace(tag[i]) || tag[i] == '>' + // Now look for closing '>', but ignore it when it's in any kind of quotes, + // it might be JavaScript + inSingleQuote := false + inDoubleQuote := false + inGraveQuote := false + for i < len(tag) { + switch { + case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote: + return true, i + case tag[i] == '\'': + inSingleQuote = !inSingleQuote + case tag[i] == '"': + inDoubleQuote = !inDoubleQuote + case tag[i] == '`': + inGraveQuote = !inGraveQuote + } + i++ + } + + return false, -1 } func skipSpace(tag []byte, i int) int {
M inline_test.goinline_test.go

@@ -86,6 +86,9 @@

"<script>alert()</script>\n", "<p>alert()</p>\n", + "<script src='foo'></script>\n", + "<p></p>\n", + "zz <script src='foo'></script>\n", "<p>zz </p>\n",