all repos — grayfriday @ 9fc8c9d8660c52c8390ba80e50556577e179b984

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_TOC                                  // generate a table of contents
 35	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 36	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 37	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 38	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 39	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 40	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 41	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 42)
 43
 44var (
 45	tags  = []string{
 46		"b",
 47		"blockquote",
 48		"code",
 49		"del",
 50		"dd",
 51		"dl",
 52		"dt",
 53		"em",
 54		"h1",
 55		"h2",
 56		"h3",
 57		"h4",
 58		"h5",
 59		"h6",
 60		"i",
 61		"kbd",
 62		"li",
 63		"ol",
 64		"p",
 65		"pre",
 66		"s",
 67		"sup",
 68		"sub",
 69		"strong",
 70		"strike",
 71		"ul",
 72	}
 73	urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 74	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
 75	anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 76	imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 77)
 78
 79// Html is a type that implements the Renderer interface for HTML output.
 80//
 81// Do not create this directly, instead use the HtmlRenderer function.
 82type Html struct {
 83	flags    int    // HTML_* options
 84	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 85	title    string // document title
 86	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 87
 88	// table of contents data
 89	tocMarker    int
 90	headerCount  int
 91	currentLevel int
 92	toc          *bytes.Buffer
 93
 94	smartypants *smartypantsRenderer
 95}
 96
 97const (
 98	xhtmlClose = " />\n"
 99	htmlClose  = ">\n"
100)
101
102// HtmlRenderer creates and configures an Html object, which
103// satisfies the Renderer interface.
104//
105// flags is a set of HTML_* options ORed together.
106// title is the title of the document, and css is a URL for the document's
107// stylesheet.
108// title and css are only used when HTML_COMPLETE_PAGE is selected.
109func HtmlRenderer(flags int, title string, css string) Renderer {
110	// configure the rendering engine
111	closeTag := htmlClose
112	if flags&HTML_USE_XHTML != 0 {
113		closeTag = xhtmlClose
114	}
115
116	return &Html{
117		flags:    flags,
118		closeTag: closeTag,
119		title:    title,
120		css:      css,
121
122		headerCount:  0,
123		currentLevel: 0,
124		toc:          new(bytes.Buffer),
125
126		smartypants: smartypants(flags),
127	}
128}
129
130func attrEscape(out *bytes.Buffer, src []byte) {
131	org := 0
132	for i, ch := range src {
133		// using if statements is a bit faster than a switch statement.
134		// as the compiler improves, this should be unnecessary
135		// this is only worthwhile because attrEscape is the single
136		// largest CPU user in normal use
137		if ch == '"' {
138			if i > org {
139				// copy all the normal characters since the last escape
140				out.Write(src[org:i])
141			}
142			org = i + 1
143			out.WriteString("&quot;")
144			continue
145		}
146		if ch == '&' {
147			if i > org {
148				out.Write(src[org:i])
149			}
150			org = i + 1
151			out.WriteString("&amp;")
152			continue
153		}
154		if ch == '<' {
155			if i > org {
156				out.Write(src[org:i])
157			}
158			org = i + 1
159			out.WriteString("&lt;")
160			continue
161		}
162		if ch == '>' {
163			if i > org {
164				out.Write(src[org:i])
165			}
166			org = i + 1
167			out.WriteString("&gt;")
168			continue
169		}
170	}
171	if org < len(src) {
172		out.Write(src[org:])
173	}
174}
175
176func (options *Html) GetFlags() int {
177	return options.flags
178}
179
180func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
181	marker := out.Len()
182	doubleSpace(out)
183
184	if options.flags&HTML_TOC != 0 {
185		// headerCount is incremented in htmlTocHeader
186		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
187	} else {
188		out.WriteString(fmt.Sprintf("<h%d>", level))
189	}
190
191	tocMarker := out.Len()
192	if !text() {
193		out.Truncate(marker)
194		return
195	}
196
197	// are we building a table of contents?
198	if options.flags&HTML_TOC != 0 {
199		options.TocHeader(out.Bytes()[tocMarker:], level)
200	}
201
202	out.WriteString(fmt.Sprintf("</h%d>\n", level))
203}
204
205func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
206	if options.flags&HTML_SKIP_HTML != 0 {
207		return
208	}
209
210	doubleSpace(out)
211	out.Write(text)
212	out.WriteByte('\n')
213}
214
215func (options *Html) HRule(out *bytes.Buffer) {
216	doubleSpace(out)
217	out.WriteString("<hr")
218	out.WriteString(options.closeTag)
219}
220
221func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
222	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
223		options.BlockCodeGithub(out, text, lang)
224	} else {
225		options.BlockCodeNormal(out, text, lang)
226	}
227}
228
229func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
230	doubleSpace(out)
231
232	// parse out the language names/classes
233	count := 0
234	for _, elt := range strings.Fields(lang) {
235		if elt[0] == '.' {
236			elt = elt[1:]
237		}
238		if len(elt) == 0 {
239			continue
240		}
241		if count == 0 {
242			out.WriteString("<pre><code class=\"")
243		} else {
244			out.WriteByte(' ')
245		}
246		attrEscape(out, []byte(elt))
247		count++
248	}
249
250	if count == 0 {
251		out.WriteString("<pre><code>")
252	} else {
253		out.WriteString("\">")
254	}
255
256	attrEscape(out, text)
257	out.WriteString("</code></pre>\n")
258}
259
260// GitHub style code block:
261//
262//              <pre lang="LANG"><code>
263//              ...
264//              </code></pre>
265//
266// Unlike other parsers, we store the language identifier in the <pre>,
267// and don't let the user generate custom classes.
268//
269// The language identifier in the <pre> block gets postprocessed and all
270// the code inside gets syntax highlighted with Pygments. This is much safer
271// than letting the user specify a CSS class for highlighting.
272//
273// Note that we only generate HTML for the first specifier.
274// E.g.
275//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
276func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
277	doubleSpace(out)
278
279	// parse out the language name
280	count := 0
281	for _, elt := range strings.Fields(lang) {
282		if elt[0] == '.' {
283			elt = elt[1:]
284		}
285		if len(elt) == 0 {
286			continue
287		}
288		out.WriteString("<pre lang=\"")
289		attrEscape(out, []byte(elt))
290		out.WriteString("\"><code>")
291		count++
292		break
293	}
294
295	if count == 0 {
296		out.WriteString("<pre><code>")
297	}
298
299	attrEscape(out, text)
300	out.WriteString("</code></pre>\n")
301}
302
303func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
304	doubleSpace(out)
305	out.WriteString("<blockquote>\n")
306	out.Write(text)
307	out.WriteString("</blockquote>\n")
308}
309
310func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
311	doubleSpace(out)
312	out.WriteString("<table>\n<thead>\n")
313	out.Write(header)
314	out.WriteString("</thead>\n\n<tbody>\n")
315	out.Write(body)
316	out.WriteString("</tbody>\n</table>\n")
317}
318
319func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
320	doubleSpace(out)
321	out.WriteString("<tr>\n")
322	out.Write(text)
323	out.WriteString("\n</tr>\n")
324}
325
326func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
327	doubleSpace(out)
328	switch align {
329	case TABLE_ALIGNMENT_LEFT:
330		out.WriteString("<th align=\"left\">")
331	case TABLE_ALIGNMENT_RIGHT:
332		out.WriteString("<th align=\"right\">")
333	case TABLE_ALIGNMENT_CENTER:
334		out.WriteString("<th align=\"center\">")
335	default:
336		out.WriteString("<th>")
337	}
338
339	out.Write(text)
340	out.WriteString("</th>")
341}
342
343func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
344	doubleSpace(out)
345	switch align {
346	case TABLE_ALIGNMENT_LEFT:
347		out.WriteString("<td align=\"left\">")
348	case TABLE_ALIGNMENT_RIGHT:
349		out.WriteString("<td align=\"right\">")
350	case TABLE_ALIGNMENT_CENTER:
351		out.WriteString("<td align=\"center\">")
352	default:
353		out.WriteString("<td>")
354	}
355
356	out.Write(text)
357	out.WriteString("</td>")
358}
359
360func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
361	out.WriteString("<div class=\"footnotes\">\n")
362	options.HRule(out)
363	options.List(out, text, LIST_TYPE_ORDERED)
364	out.WriteString("</div>\n")
365}
366
367func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
368	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
369		doubleSpace(out)
370	}
371	out.WriteString(`<li id="fn:`)
372	out.Write(slugify(name))
373	out.WriteString(`">`)
374	out.Write(text)
375	out.WriteString("</li>\n")
376}
377
378func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
379	marker := out.Len()
380	doubleSpace(out)
381
382	if flags&LIST_TYPE_ORDERED != 0 {
383		out.WriteString("<ol>")
384	} else {
385		out.WriteString("<ul>")
386	}
387	if !text() {
388		out.Truncate(marker)
389		return
390	}
391	if flags&LIST_TYPE_ORDERED != 0 {
392		out.WriteString("</ol>\n")
393	} else {
394		out.WriteString("</ul>\n")
395	}
396}
397
398func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
399	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
400		doubleSpace(out)
401	}
402	out.WriteString("<li>")
403	out.Write(text)
404	out.WriteString("</li>\n")
405}
406
407func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
408	marker := out.Len()
409	doubleSpace(out)
410
411	out.WriteString("<p>")
412	if !text() {
413		out.Truncate(marker)
414		return
415	}
416	out.WriteString("</p>\n")
417}
418
419func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
420	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
421		// mark it but don't link it if it is not a safe link: no smartypants
422		out.WriteString("<tt>")
423		attrEscape(out, link)
424		out.WriteString("</tt>")
425		return
426	}
427
428	out.WriteString("<a href=\"")
429	if kind == LINK_TYPE_EMAIL {
430		out.WriteString("mailto:")
431	}
432	attrEscape(out, link)
433	out.WriteString("\">")
434
435	// Pretty print: if we get an email address as
436	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
437	// want to print the `mailto:` prefix
438	switch {
439	case bytes.HasPrefix(link, []byte("mailto://")):
440		attrEscape(out, link[len("mailto://"):])
441	case bytes.HasPrefix(link, []byte("mailto:")):
442		attrEscape(out, link[len("mailto:"):])
443	default:
444		attrEscape(out, link)
445	}
446
447	out.WriteString("</a>")
448}
449
450func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
451	out.WriteString("<code>")
452	attrEscape(out, text)
453	out.WriteString("</code>")
454}
455
456func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
457	out.WriteString("<strong>")
458	out.Write(text)
459	out.WriteString("</strong>")
460}
461
462func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
463	if len(text) == 0 {
464		return
465	}
466	out.WriteString("<em>")
467	out.Write(text)
468	out.WriteString("</em>")
469}
470
471func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
472	if options.flags&HTML_SKIP_IMAGES != 0 {
473		return
474	}
475
476	out.WriteString("<img src=\"")
477	attrEscape(out, link)
478	out.WriteString("\" alt=\"")
479	if len(alt) > 0 {
480		attrEscape(out, alt)
481	}
482	if len(title) > 0 {
483		out.WriteString("\" title=\"")
484		attrEscape(out, title)
485	}
486
487	out.WriteByte('"')
488	out.WriteString(options.closeTag)
489	return
490}
491
492func (options *Html) LineBreak(out *bytes.Buffer) {
493	out.WriteString("<br")
494	out.WriteString(options.closeTag)
495}
496
497func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
498	if options.flags&HTML_SKIP_LINKS != 0 {
499		// write the link text out but don't link it, just mark it with typewriter font
500		out.WriteString("<tt>")
501		attrEscape(out, content)
502		out.WriteString("</tt>")
503		return
504	}
505
506	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
507		// write the link text out but don't link it, just mark it with typewriter font
508		out.WriteString("<tt>")
509		attrEscape(out, content)
510		out.WriteString("</tt>")
511		return
512	}
513
514	out.WriteString("<a href=\"")
515	attrEscape(out, link)
516	if len(title) > 0 {
517		out.WriteString("\" title=\"")
518		attrEscape(out, title)
519	}
520	out.WriteString("\">")
521	out.Write(content)
522	out.WriteString("</a>")
523	return
524}
525
526func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
527	if options.flags&HTML_SKIP_HTML != 0 {
528		return
529	}
530	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
531		return
532	}
533	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
534		return
535	}
536	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
537		return
538	}
539	out.Write(text)
540}
541
542func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
543	out.WriteString("<strong><em>")
544	out.Write(text)
545	out.WriteString("</em></strong>")
546}
547
548func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
549	out.WriteString("<del>")
550	out.Write(text)
551	out.WriteString("</del>")
552}
553
554func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
555	slug := slugify(ref)
556	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
557	out.Write(slug)
558	out.WriteString(`"><a rel="footnote" href="#fn:`)
559	out.Write(slug)
560	out.WriteString(`">`)
561	out.WriteString(strconv.Itoa(id))
562	out.WriteString(`</a></sup>`)
563}
564
565func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
566	out.Write(entity)
567}
568
569func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
570	if options.flags&HTML_USE_SMARTYPANTS != 0 {
571		options.Smartypants(out, text)
572	} else {
573		attrEscape(out, text)
574	}
575}
576
577func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
578	smrt := smartypantsData{false, false}
579
580	// first do normal entity escaping
581	var escaped bytes.Buffer
582	attrEscape(&escaped, text)
583	text = escaped.Bytes()
584
585	mark := 0
586	for i := 0; i < len(text); i++ {
587		if action := options.smartypants[text[i]]; action != nil {
588			if i > mark {
589				out.Write(text[mark:i])
590			}
591
592			previousChar := byte(0)
593			if i > 0 {
594				previousChar = text[i-1]
595			}
596			i += action(out, &smrt, previousChar, text[i:])
597			mark = i + 1
598		}
599	}
600
601	if mark < len(text) {
602		out.Write(text[mark:])
603	}
604}
605
606func (options *Html) DocumentHeader(out *bytes.Buffer) {
607	if options.flags&HTML_COMPLETE_PAGE == 0 {
608		return
609	}
610
611	ending := ""
612	if options.flags&HTML_USE_XHTML != 0 {
613		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
614		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
615		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
616		ending = " /"
617	} else {
618		out.WriteString("<!DOCTYPE html>\n")
619		out.WriteString("<html>\n")
620	}
621	out.WriteString("<head>\n")
622	out.WriteString("  <title>")
623	options.NormalText(out, []byte(options.title))
624	out.WriteString("</title>\n")
625	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
626	out.WriteString(VERSION)
627	out.WriteString("\"")
628	out.WriteString(ending)
629	out.WriteString(">\n")
630	out.WriteString("  <meta charset=\"utf-8\"")
631	out.WriteString(ending)
632	out.WriteString(">\n")
633	if options.css != "" {
634		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
635		attrEscape(out, []byte(options.css))
636		out.WriteString("\"")
637		out.WriteString(ending)
638		out.WriteString(">\n")
639	}
640	out.WriteString("</head>\n")
641	out.WriteString("<body>\n")
642
643	options.tocMarker = out.Len()
644}
645
646func (options *Html) DocumentFooter(out *bytes.Buffer) {
647	// finalize and insert the table of contents
648	if options.flags&HTML_TOC != 0 {
649		options.TocFinalize()
650
651		// now we have to insert the table of contents into the document
652		var temp bytes.Buffer
653
654		// start by making a copy of everything after the document header
655		temp.Write(out.Bytes()[options.tocMarker:])
656
657		// now clear the copied material from the main output buffer
658		out.Truncate(options.tocMarker)
659
660		// corner case spacing issue
661		if options.flags&HTML_COMPLETE_PAGE != 0 {
662			out.WriteByte('\n')
663		}
664
665		// insert the table of contents
666		out.WriteString("<nav>\n")
667		out.Write(options.toc.Bytes())
668		out.WriteString("</nav>\n")
669
670		// corner case spacing issue
671		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
672			out.WriteByte('\n')
673		}
674
675		// write out everything that came after it
676		if options.flags&HTML_OMIT_CONTENTS == 0 {
677			out.Write(temp.Bytes())
678		}
679	}
680
681	if options.flags&HTML_COMPLETE_PAGE != 0 {
682		out.WriteString("\n</body>\n")
683		out.WriteString("</html>\n")
684	}
685
686}
687
688func (options *Html) TocHeader(text []byte, level int) {
689	for level > options.currentLevel {
690		switch {
691		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
692			// this sublist can nest underneath a header
693			size := options.toc.Len()
694			options.toc.Truncate(size - len("</li>\n"))
695
696		case options.currentLevel > 0:
697			options.toc.WriteString("<li>")
698		}
699		if options.toc.Len() > 0 {
700			options.toc.WriteByte('\n')
701		}
702		options.toc.WriteString("<ul>\n")
703		options.currentLevel++
704	}
705
706	for level < options.currentLevel {
707		options.toc.WriteString("</ul>")
708		if options.currentLevel > 1 {
709			options.toc.WriteString("</li>\n")
710		}
711		options.currentLevel--
712	}
713
714	options.toc.WriteString("<li><a href=\"#toc_")
715	options.toc.WriteString(strconv.Itoa(options.headerCount))
716	options.toc.WriteString("\">")
717	options.headerCount++
718
719	options.toc.Write(text)
720
721	options.toc.WriteString("</a></li>\n")
722}
723
724func (options *Html) TocFinalize() {
725	for options.currentLevel > 1 {
726		options.toc.WriteString("</ul></li>\n")
727		options.currentLevel--
728	}
729
730	if options.currentLevel > 0 {
731		options.toc.WriteString("</ul>\n")
732	}
733}
734
735func isHtmlTag(tag []byte, tagname string) bool {
736	found, _ := findHtmlTagPos(tag, tagname)
737	return found
738}
739
740// Look for a character, but ignore it when it's in any kind of quotes, it
741// might be JavaScript
742func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
743	inSingleQuote := false
744	inDoubleQuote := false
745	inGraveQuote := false
746	i := start
747	for i < len(html) {
748		switch {
749		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
750			return i
751		case html[i] == '\'':
752			inSingleQuote = !inSingleQuote
753		case html[i] == '"':
754			inDoubleQuote = !inDoubleQuote
755		case html[i] == '`':
756			inGraveQuote = !inGraveQuote
757		}
758		i++
759	}
760	return start
761}
762
763func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
764	i := 0
765	if i < len(tag) && tag[0] != '<' {
766		return false, -1
767	}
768	i++
769	i = skipSpace(tag, i)
770
771	if i < len(tag) && tag[i] == '/' {
772		i++
773	}
774
775	i = skipSpace(tag, i)
776	j := 0
777	for ; i < len(tag); i, j = i+1, j+1 {
778		if j >= len(tagname) {
779			break
780		}
781
782		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
783			return false, -1
784		}
785	}
786
787	if i == len(tag) {
788		return false, -1
789	}
790
791	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
792	if rightAngle > i {
793		return true, rightAngle
794	}
795
796	return false, -1
797}
798
799func sanitizeHtml(html []byte) []byte {
800	var result []byte
801	for string(html) != "" {
802		skip, tag, rest := findHtmlTag(html)
803		html = rest
804		result = append(result, skip...)
805		result = append(result, sanitizeTag(tag)...)
806	}
807	return append(result, []byte("\n")...)
808}
809
810func sanitizeTag(tag []byte) []byte {
811	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
812		return tag
813	} else {
814		return []byte("")
815	}
816}
817
818func skipUntilChar(text []byte, start int, char byte) int {
819	i := start
820	for i < len(text) && text[i] != char {
821		i++
822	}
823	return i
824}
825
826func findHtmlTag(html []byte) (skip, tag, rest []byte) {
827	start := skipUntilChar(html, 0, '<')
828	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
829	if rightAngle > start {
830		skip = html[0:start]
831		tag = html[start : rightAngle+1]
832		rest = html[rightAngle+1:]
833		return
834	}
835
836	return []byte(""), []byte(""), []byte("")
837}
838
839func skipSpace(tag []byte, i int) int {
840	for i < len(tag) && isspace(tag[i]) {
841		i++
842	}
843	return i
844}
845
846func doubleSpace(out *bytes.Buffer) {
847	if out.Len() > 0 {
848		out.WriteByte('\n')
849	}
850}