all repos — grayfriday @ cc0d56d0920643c43d4fb60b26d62ac10893c6dd

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_TOC                                  // generate a table of contents
 35	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 36	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 37	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 38	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 39	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 40	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 41	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 42)
 43
 44var (
 45	tags = []string{
 46		"b",
 47		"blockquote",
 48		"code",
 49		"del",
 50		"dd",
 51		"dl",
 52		"dt",
 53		"em",
 54		"h1",
 55		"h2",
 56		"h3",
 57		"h4",
 58		"h5",
 59		"h6",
 60		"i",
 61		"kbd",
 62		"li",
 63		"ol",
 64		"p",
 65		"pre",
 66		"s",
 67		"sup",
 68		"sub",
 69		"strong",
 70		"strike",
 71		"ul",
 72	}
 73	urlRe        = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 74	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
 75	anchorClean  = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 76	imgClean     = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 77)
 78
 79// Html is a type that implements the Renderer interface for HTML output.
 80//
 81// Do not create this directly, instead use the HtmlRenderer function.
 82type Html struct {
 83	flags    int    // HTML_* options
 84	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 85	title    string // document title
 86	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 87
 88	// table of contents data
 89	tocMarker    int
 90	headerCount  int
 91	currentLevel int
 92	toc          *bytes.Buffer
 93
 94	smartypants *smartypantsRenderer
 95}
 96
 97const (
 98	xhtmlClose = " />\n"
 99	htmlClose  = ">\n"
100)
101
102// HtmlRenderer creates and configures an Html object, which
103// satisfies the Renderer interface.
104//
105// flags is a set of HTML_* options ORed together.
106// title is the title of the document, and css is a URL for the document's
107// stylesheet.
108// title and css are only used when HTML_COMPLETE_PAGE is selected.
109func HtmlRenderer(flags int, title string, css string) Renderer {
110	// configure the rendering engine
111	closeTag := htmlClose
112	if flags&HTML_USE_XHTML != 0 {
113		closeTag = xhtmlClose
114	}
115
116	return &Html{
117		flags:    flags,
118		closeTag: closeTag,
119		title:    title,
120		css:      css,
121
122		headerCount:  0,
123		currentLevel: 0,
124		toc:          new(bytes.Buffer),
125
126		smartypants: smartypants(flags),
127	}
128}
129
130// Using if statements is a bit faster than a switch statement. As the compiler
131// improves, this should be unnecessary this is only worthwhile because
132// attrEscape is the single largest CPU user in normal use.
133// Also tried using map, but that gave a ~3x slowdown.
134func escapeSingleChar(char byte) (string, bool) {
135	if char == '"' {
136		return "&quot;", true
137	}
138	if char == '&' {
139		return "&amp;", true
140	}
141	if char == '<' {
142		return "&lt;", true
143	}
144	if char == '>' {
145		return "&gt;", true
146	}
147	return "", false
148}
149
150func attrEscape(out *bytes.Buffer, src []byte) {
151	org := 0
152	for i, ch := range src {
153		if entity, ok := escapeSingleChar(ch); ok {
154			if i > org {
155				// copy all the normal characters since the last escape
156				out.Write(src[org:i])
157			}
158			org = i + 1
159			out.WriteString(entity)
160		}
161	}
162	if org < len(src) {
163		out.Write(src[org:])
164	}
165}
166
167func (options *Html) GetFlags() int {
168	return options.flags
169}
170
171func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
172	marker := out.Len()
173	doubleSpace(out)
174
175	if options.flags&HTML_TOC != 0 {
176		// headerCount is incremented in htmlTocHeader
177		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
178	} else {
179		out.WriteString(fmt.Sprintf("<h%d>", level))
180	}
181
182	tocMarker := out.Len()
183	if !text() {
184		out.Truncate(marker)
185		return
186	}
187
188	// are we building a table of contents?
189	if options.flags&HTML_TOC != 0 {
190		options.TocHeader(out.Bytes()[tocMarker:], level)
191	}
192
193	out.WriteString(fmt.Sprintf("</h%d>\n", level))
194}
195
196func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
197	if options.flags&HTML_SKIP_HTML != 0 {
198		return
199	}
200
201	doubleSpace(out)
202	out.Write(text)
203	out.WriteByte('\n')
204}
205
206func (options *Html) HRule(out *bytes.Buffer) {
207	doubleSpace(out)
208	out.WriteString("<hr")
209	out.WriteString(options.closeTag)
210}
211
212func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
213	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
214		options.BlockCodeGithub(out, text, lang)
215	} else {
216		options.BlockCodeNormal(out, text, lang)
217	}
218}
219
220func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
221	doubleSpace(out)
222
223	// parse out the language names/classes
224	count := 0
225	for _, elt := range strings.Fields(lang) {
226		if elt[0] == '.' {
227			elt = elt[1:]
228		}
229		if len(elt) == 0 {
230			continue
231		}
232		if count == 0 {
233			out.WriteString("<pre><code class=\"")
234		} else {
235			out.WriteByte(' ')
236		}
237		attrEscape(out, []byte(elt))
238		count++
239	}
240
241	if count == 0 {
242		out.WriteString("<pre><code>")
243	} else {
244		out.WriteString("\">")
245	}
246
247	attrEscape(out, text)
248	out.WriteString("</code></pre>\n")
249}
250
251// GitHub style code block:
252//
253//              <pre lang="LANG"><code>
254//              ...
255//              </code></pre>
256//
257// Unlike other parsers, we store the language identifier in the <pre>,
258// and don't let the user generate custom classes.
259//
260// The language identifier in the <pre> block gets postprocessed and all
261// the code inside gets syntax highlighted with Pygments. This is much safer
262// than letting the user specify a CSS class for highlighting.
263//
264// Note that we only generate HTML for the first specifier.
265// E.g.
266//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
267func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
268	doubleSpace(out)
269
270	// parse out the language name
271	count := 0
272	for _, elt := range strings.Fields(lang) {
273		if elt[0] == '.' {
274			elt = elt[1:]
275		}
276		if len(elt) == 0 {
277			continue
278		}
279		out.WriteString("<pre lang=\"")
280		attrEscape(out, []byte(elt))
281		out.WriteString("\"><code>")
282		count++
283		break
284	}
285
286	if count == 0 {
287		out.WriteString("<pre><code>")
288	}
289
290	attrEscape(out, text)
291	out.WriteString("</code></pre>\n")
292}
293
294func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
295	doubleSpace(out)
296	out.WriteString("<blockquote>\n")
297	out.Write(text)
298	out.WriteString("</blockquote>\n")
299}
300
301func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
302	doubleSpace(out)
303	out.WriteString("<table>\n<thead>\n")
304	out.Write(header)
305	out.WriteString("</thead>\n\n<tbody>\n")
306	out.Write(body)
307	out.WriteString("</tbody>\n</table>\n")
308}
309
310func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
311	doubleSpace(out)
312	out.WriteString("<tr>\n")
313	out.Write(text)
314	out.WriteString("\n</tr>\n")
315}
316
317func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
318	doubleSpace(out)
319	switch align {
320	case TABLE_ALIGNMENT_LEFT:
321		out.WriteString("<th align=\"left\">")
322	case TABLE_ALIGNMENT_RIGHT:
323		out.WriteString("<th align=\"right\">")
324	case TABLE_ALIGNMENT_CENTER:
325		out.WriteString("<th align=\"center\">")
326	default:
327		out.WriteString("<th>")
328	}
329
330	out.Write(text)
331	out.WriteString("</th>")
332}
333
334func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
335	doubleSpace(out)
336	switch align {
337	case TABLE_ALIGNMENT_LEFT:
338		out.WriteString("<td align=\"left\">")
339	case TABLE_ALIGNMENT_RIGHT:
340		out.WriteString("<td align=\"right\">")
341	case TABLE_ALIGNMENT_CENTER:
342		out.WriteString("<td align=\"center\">")
343	default:
344		out.WriteString("<td>")
345	}
346
347	out.Write(text)
348	out.WriteString("</td>")
349}
350
351func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
352	out.WriteString("<div class=\"footnotes\">\n")
353	options.HRule(out)
354	options.List(out, text, LIST_TYPE_ORDERED)
355	out.WriteString("</div>\n")
356}
357
358func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
359	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
360		doubleSpace(out)
361	}
362	out.WriteString(`<li id="fn:`)
363	out.Write(slugify(name))
364	out.WriteString(`">`)
365	out.Write(text)
366	out.WriteString("</li>\n")
367}
368
369func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
370	marker := out.Len()
371	doubleSpace(out)
372
373	if flags&LIST_TYPE_ORDERED != 0 {
374		out.WriteString("<ol>")
375	} else {
376		out.WriteString("<ul>")
377	}
378	if !text() {
379		out.Truncate(marker)
380		return
381	}
382	if flags&LIST_TYPE_ORDERED != 0 {
383		out.WriteString("</ol>\n")
384	} else {
385		out.WriteString("</ul>\n")
386	}
387}
388
389func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
390	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
391		doubleSpace(out)
392	}
393	out.WriteString("<li>")
394	out.Write(text)
395	out.WriteString("</li>\n")
396}
397
398func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
399	marker := out.Len()
400	doubleSpace(out)
401
402	out.WriteString("<p>")
403	if !text() {
404		out.Truncate(marker)
405		return
406	}
407	out.WriteString("</p>\n")
408}
409
410func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
411	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
412		// mark it but don't link it if it is not a safe link: no smartypants
413		out.WriteString("<tt>")
414		attrEscape(out, link)
415		out.WriteString("</tt>")
416		return
417	}
418
419	out.WriteString("<a href=\"")
420	if kind == LINK_TYPE_EMAIL {
421		out.WriteString("mailto:")
422	}
423	attrEscape(out, link)
424	out.WriteString("\">")
425
426	// Pretty print: if we get an email address as
427	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
428	// want to print the `mailto:` prefix
429	switch {
430	case bytes.HasPrefix(link, []byte("mailto://")):
431		attrEscape(out, link[len("mailto://"):])
432	case bytes.HasPrefix(link, []byte("mailto:")):
433		attrEscape(out, link[len("mailto:"):])
434	default:
435		attrEscape(out, link)
436	}
437
438	out.WriteString("</a>")
439}
440
441func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
442	out.WriteString("<code>")
443	attrEscape(out, text)
444	out.WriteString("</code>")
445}
446
447func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
448	out.WriteString("<strong>")
449	out.Write(text)
450	out.WriteString("</strong>")
451}
452
453func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
454	if len(text) == 0 {
455		return
456	}
457	out.WriteString("<em>")
458	out.Write(text)
459	out.WriteString("</em>")
460}
461
462func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
463	if options.flags&HTML_SKIP_IMAGES != 0 {
464		return
465	}
466
467	out.WriteString("<img src=\"")
468	attrEscape(out, link)
469	out.WriteString("\" alt=\"")
470	if len(alt) > 0 {
471		attrEscape(out, alt)
472	}
473	if len(title) > 0 {
474		out.WriteString("\" title=\"")
475		attrEscape(out, title)
476	}
477
478	out.WriteByte('"')
479	out.WriteString(options.closeTag)
480	return
481}
482
483func (options *Html) LineBreak(out *bytes.Buffer) {
484	out.WriteString("<br")
485	out.WriteString(options.closeTag)
486}
487
488func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
489	if options.flags&HTML_SKIP_LINKS != 0 {
490		// write the link text out but don't link it, just mark it with typewriter font
491		out.WriteString("<tt>")
492		attrEscape(out, content)
493		out.WriteString("</tt>")
494		return
495	}
496
497	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
498		// write the link text out but don't link it, just mark it with typewriter font
499		out.WriteString("<tt>")
500		attrEscape(out, content)
501		out.WriteString("</tt>")
502		return
503	}
504
505	out.WriteString("<a href=\"")
506	attrEscape(out, link)
507	if len(title) > 0 {
508		out.WriteString("\" title=\"")
509		attrEscape(out, title)
510	}
511	out.WriteString("\">")
512	out.Write(content)
513	out.WriteString("</a>")
514	return
515}
516
517func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
518	if options.flags&HTML_SKIP_HTML != 0 {
519		return
520	}
521	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
522		return
523	}
524	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
525		return
526	}
527	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
528		return
529	}
530	out.Write(text)
531}
532
533func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
534	out.WriteString("<strong><em>")
535	out.Write(text)
536	out.WriteString("</em></strong>")
537}
538
539func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
540	out.WriteString("<del>")
541	out.Write(text)
542	out.WriteString("</del>")
543}
544
545func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
546	slug := slugify(ref)
547	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
548	out.Write(slug)
549	out.WriteString(`"><a rel="footnote" href="#fn:`)
550	out.Write(slug)
551	out.WriteString(`">`)
552	out.WriteString(strconv.Itoa(id))
553	out.WriteString(`</a></sup>`)
554}
555
556func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
557	out.Write(entity)
558}
559
560func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
561	if options.flags&HTML_USE_SMARTYPANTS != 0 {
562		options.Smartypants(out, text)
563	} else {
564		attrEscape(out, text)
565	}
566}
567
568func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
569	smrt := smartypantsData{false, false}
570
571	// first do normal entity escaping
572	var escaped bytes.Buffer
573	attrEscape(&escaped, text)
574	text = escaped.Bytes()
575
576	mark := 0
577	for i := 0; i < len(text); i++ {
578		if action := options.smartypants[text[i]]; action != nil {
579			if i > mark {
580				out.Write(text[mark:i])
581			}
582
583			previousChar := byte(0)
584			if i > 0 {
585				previousChar = text[i-1]
586			}
587			i += action(out, &smrt, previousChar, text[i:])
588			mark = i + 1
589		}
590	}
591
592	if mark < len(text) {
593		out.Write(text[mark:])
594	}
595}
596
597func (options *Html) DocumentHeader(out *bytes.Buffer) {
598	if options.flags&HTML_COMPLETE_PAGE == 0 {
599		return
600	}
601
602	ending := ""
603	if options.flags&HTML_USE_XHTML != 0 {
604		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
605		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
606		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
607		ending = " /"
608	} else {
609		out.WriteString("<!DOCTYPE html>\n")
610		out.WriteString("<html>\n")
611	}
612	out.WriteString("<head>\n")
613	out.WriteString("  <title>")
614	options.NormalText(out, []byte(options.title))
615	out.WriteString("</title>\n")
616	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
617	out.WriteString(VERSION)
618	out.WriteString("\"")
619	out.WriteString(ending)
620	out.WriteString(">\n")
621	out.WriteString("  <meta charset=\"utf-8\"")
622	out.WriteString(ending)
623	out.WriteString(">\n")
624	if options.css != "" {
625		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
626		attrEscape(out, []byte(options.css))
627		out.WriteString("\"")
628		out.WriteString(ending)
629		out.WriteString(">\n")
630	}
631	out.WriteString("</head>\n")
632	out.WriteString("<body>\n")
633
634	options.tocMarker = out.Len()
635}
636
637func (options *Html) DocumentFooter(out *bytes.Buffer) {
638	// finalize and insert the table of contents
639	if options.flags&HTML_TOC != 0 {
640		options.TocFinalize()
641
642		// now we have to insert the table of contents into the document
643		var temp bytes.Buffer
644
645		// start by making a copy of everything after the document header
646		temp.Write(out.Bytes()[options.tocMarker:])
647
648		// now clear the copied material from the main output buffer
649		out.Truncate(options.tocMarker)
650
651		// corner case spacing issue
652		if options.flags&HTML_COMPLETE_PAGE != 0 {
653			out.WriteByte('\n')
654		}
655
656		// insert the table of contents
657		out.WriteString("<nav>\n")
658		out.Write(options.toc.Bytes())
659		out.WriteString("</nav>\n")
660
661		// corner case spacing issue
662		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
663			out.WriteByte('\n')
664		}
665
666		// write out everything that came after it
667		if options.flags&HTML_OMIT_CONTENTS == 0 {
668			out.Write(temp.Bytes())
669		}
670	}
671
672	if options.flags&HTML_COMPLETE_PAGE != 0 {
673		out.WriteString("\n</body>\n")
674		out.WriteString("</html>\n")
675	}
676
677}
678
679func (options *Html) TocHeader(text []byte, level int) {
680	for level > options.currentLevel {
681		switch {
682		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
683			// this sublist can nest underneath a header
684			size := options.toc.Len()
685			options.toc.Truncate(size - len("</li>\n"))
686
687		case options.currentLevel > 0:
688			options.toc.WriteString("<li>")
689		}
690		if options.toc.Len() > 0 {
691			options.toc.WriteByte('\n')
692		}
693		options.toc.WriteString("<ul>\n")
694		options.currentLevel++
695	}
696
697	for level < options.currentLevel {
698		options.toc.WriteString("</ul>")
699		if options.currentLevel > 1 {
700			options.toc.WriteString("</li>\n")
701		}
702		options.currentLevel--
703	}
704
705	options.toc.WriteString("<li><a href=\"#toc_")
706	options.toc.WriteString(strconv.Itoa(options.headerCount))
707	options.toc.WriteString("\">")
708	options.headerCount++
709
710	options.toc.Write(text)
711
712	options.toc.WriteString("</a></li>\n")
713}
714
715func (options *Html) TocFinalize() {
716	for options.currentLevel > 1 {
717		options.toc.WriteString("</ul></li>\n")
718		options.currentLevel--
719	}
720
721	if options.currentLevel > 0 {
722		options.toc.WriteString("</ul>\n")
723	}
724}
725
726func isHtmlTag(tag []byte, tagname string) bool {
727	found, _ := findHtmlTagPos(tag, tagname)
728	return found
729}
730
731// Look for a character, but ignore it when it's in any kind of quotes, it
732// might be JavaScript
733func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
734	inSingleQuote := false
735	inDoubleQuote := false
736	inGraveQuote := false
737	i := start
738	for i < len(html) {
739		switch {
740		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
741			return i
742		case html[i] == '\'':
743			inSingleQuote = !inSingleQuote
744		case html[i] == '"':
745			inDoubleQuote = !inDoubleQuote
746		case html[i] == '`':
747			inGraveQuote = !inGraveQuote
748		}
749		i++
750	}
751	return start
752}
753
754func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
755	i := 0
756	if i < len(tag) && tag[0] != '<' {
757		return false, -1
758	}
759	i++
760	i = skipSpace(tag, i)
761
762	if i < len(tag) && tag[i] == '/' {
763		i++
764	}
765
766	i = skipSpace(tag, i)
767	j := 0
768	for ; i < len(tag); i, j = i+1, j+1 {
769		if j >= len(tagname) {
770			break
771		}
772
773		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
774			return false, -1
775		}
776	}
777
778	if i == len(tag) {
779		return false, -1
780	}
781
782	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
783	if rightAngle > i {
784		return true, rightAngle
785	}
786
787	return false, -1
788}
789
790func sanitizeHtml(html []byte) []byte {
791	var result []byte
792	for string(html) != "" {
793		skip, tag, rest := findHtmlTag(html)
794		html = rest
795		result = append(result, skip...)
796		result = append(result, sanitizeTag(tag)...)
797	}
798	return append(result, []byte("\n")...)
799}
800
801func sanitizeTag(tag []byte) []byte {
802	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
803		return tag
804	} else {
805		return []byte("")
806	}
807}
808
809func skipUntilChar(text []byte, start int, char byte) int {
810	i := start
811	for i < len(text) && text[i] != char {
812		i++
813	}
814	return i
815}
816
817func findHtmlTag(html []byte) (skip, tag, rest []byte) {
818	start := skipUntilChar(html, 0, '<')
819	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
820	if rightAngle > start {
821		skip = html[0:start]
822		tag = html[start : rightAngle+1]
823		rest = html[rightAngle+1:]
824		return
825	}
826
827	return []byte(""), []byte(""), []byte("")
828}
829
830func skipSpace(tag []byte, i int) int {
831	for i < len(tag) && isspace(tag[i]) {
832		i++
833	}
834	return i
835}
836
837func doubleSpace(out *bytes.Buffer) {
838	if out.Len() > 0 {
839		out.WriteByte('\n')
840	}
841}