all repos — grayfriday @ e5937643a93cc2e494ee8ba93d03948e221333f5

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_TOC                                  // generate a table of contents
 35	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 36	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 37	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 38	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 39	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 40	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 41	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 42)
 43
 44var (
 45	tags = []string{
 46		"b",
 47		"blockquote",
 48		"code",
 49		"del",
 50		"dd",
 51		"dl",
 52		"dt",
 53		"em",
 54		"h1",
 55		"h2",
 56		"h3",
 57		"h4",
 58		"h5",
 59		"h6",
 60		"i",
 61		"kbd",
 62		"li",
 63		"ol",
 64		"p",
 65		"pre",
 66		"s",
 67		"sup",
 68		"sub",
 69		"strong",
 70		"strike",
 71		"ul",
 72	}
 73	urlRe        = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 74	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
 75	anchorClean  = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 76	imgClean     = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 77	// TODO: improve this regexp to catch all possible entities:
 78	htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
 79)
 80
 81// Html is a type that implements the Renderer interface for HTML output.
 82//
 83// Do not create this directly, instead use the HtmlRenderer function.
 84type Html struct {
 85	flags    int    // HTML_* options
 86	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 87	title    string // document title
 88	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 89
 90	// table of contents data
 91	tocMarker    int
 92	headerCount  int
 93	currentLevel int
 94	toc          *bytes.Buffer
 95
 96	smartypants *smartypantsRenderer
 97}
 98
 99const (
100	xhtmlClose = " />\n"
101	htmlClose  = ">\n"
102)
103
104// HtmlRenderer creates and configures an Html object, which
105// satisfies the Renderer interface.
106//
107// flags is a set of HTML_* options ORed together.
108// title is the title of the document, and css is a URL for the document's
109// stylesheet.
110// title and css are only used when HTML_COMPLETE_PAGE is selected.
111func HtmlRenderer(flags int, title string, css string) Renderer {
112	// configure the rendering engine
113	closeTag := htmlClose
114	if flags&HTML_USE_XHTML != 0 {
115		closeTag = xhtmlClose
116	}
117
118	return &Html{
119		flags:    flags,
120		closeTag: closeTag,
121		title:    title,
122		css:      css,
123
124		headerCount:  0,
125		currentLevel: 0,
126		toc:          new(bytes.Buffer),
127
128		smartypants: smartypants(flags),
129	}
130}
131
132// Using if statements is a bit faster than a switch statement. As the compiler
133// improves, this should be unnecessary this is only worthwhile because
134// attrEscape is the single largest CPU user in normal use.
135// Also tried using map, but that gave a ~3x slowdown.
136func escapeSingleChar(char byte) (string, bool) {
137	if char == '"' {
138		return "&quot;", true
139	}
140	if char == '&' {
141		return "&amp;", true
142	}
143	if char == '<' {
144		return "&lt;", true
145	}
146	if char == '>' {
147		return "&gt;", true
148	}
149	return "", false
150}
151
152func attrEscape(out *bytes.Buffer, src []byte) {
153	org := 0
154	for i, ch := range src {
155		if entity, ok := escapeSingleChar(ch); ok {
156			if i > org {
157				// copy all the normal characters since the last escape
158				out.Write(src[org:i])
159			}
160			org = i + 1
161			out.WriteString(entity)
162		}
163	}
164	if org < len(src) {
165		out.Write(src[org:])
166	}
167}
168
169func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
170	end := 0
171	for _, rang := range skipRanges {
172		attrEscape(out, src[end:rang[0]])
173		out.Write(src[rang[0]:rang[1]])
174		end = rang[1]
175	}
176	attrEscape(out, src[end:])
177}
178
179func (options *Html) GetFlags() int {
180	return options.flags
181}
182
183func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
184	marker := out.Len()
185	doubleSpace(out)
186
187	if options.flags&HTML_TOC != 0 {
188		// headerCount is incremented in htmlTocHeader
189		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
190	} else {
191		out.WriteString(fmt.Sprintf("<h%d>", level))
192	}
193
194	tocMarker := out.Len()
195	if !text() {
196		out.Truncate(marker)
197		return
198	}
199
200	// are we building a table of contents?
201	if options.flags&HTML_TOC != 0 {
202		options.TocHeader(out.Bytes()[tocMarker:], level)
203	}
204
205	out.WriteString(fmt.Sprintf("</h%d>\n", level))
206}
207
208func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
209	if options.flags&HTML_SKIP_HTML != 0 {
210		return
211	}
212
213	doubleSpace(out)
214	out.Write(text)
215	out.WriteByte('\n')
216}
217
218func (options *Html) HRule(out *bytes.Buffer) {
219	doubleSpace(out)
220	out.WriteString("<hr")
221	out.WriteString(options.closeTag)
222}
223
224func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
225	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
226		options.BlockCodeGithub(out, text, lang)
227	} else {
228		options.BlockCodeNormal(out, text, lang)
229	}
230}
231
232func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
233	doubleSpace(out)
234
235	// parse out the language names/classes
236	count := 0
237	for _, elt := range strings.Fields(lang) {
238		if elt[0] == '.' {
239			elt = elt[1:]
240		}
241		if len(elt) == 0 {
242			continue
243		}
244		if count == 0 {
245			out.WriteString("<pre><code class=\"")
246		} else {
247			out.WriteByte(' ')
248		}
249		attrEscape(out, []byte(elt))
250		count++
251	}
252
253	if count == 0 {
254		out.WriteString("<pre><code>")
255	} else {
256		out.WriteString("\">")
257	}
258
259	attrEscape(out, text)
260	out.WriteString("</code></pre>\n")
261}
262
263// GitHub style code block:
264//
265//              <pre lang="LANG"><code>
266//              ...
267//              </code></pre>
268//
269// Unlike other parsers, we store the language identifier in the <pre>,
270// and don't let the user generate custom classes.
271//
272// The language identifier in the <pre> block gets postprocessed and all
273// the code inside gets syntax highlighted with Pygments. This is much safer
274// than letting the user specify a CSS class for highlighting.
275//
276// Note that we only generate HTML for the first specifier.
277// E.g.
278//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
279func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
280	doubleSpace(out)
281
282	// parse out the language name
283	count := 0
284	for _, elt := range strings.Fields(lang) {
285		if elt[0] == '.' {
286			elt = elt[1:]
287		}
288		if len(elt) == 0 {
289			continue
290		}
291		out.WriteString("<pre lang=\"")
292		attrEscape(out, []byte(elt))
293		out.WriteString("\"><code>")
294		count++
295		break
296	}
297
298	if count == 0 {
299		out.WriteString("<pre><code>")
300	}
301
302	attrEscape(out, text)
303	out.WriteString("</code></pre>\n")
304}
305
306func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
307	doubleSpace(out)
308	out.WriteString("<blockquote>\n")
309	out.Write(text)
310	out.WriteString("</blockquote>\n")
311}
312
313func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
314	doubleSpace(out)
315	out.WriteString("<table>\n<thead>\n")
316	out.Write(header)
317	out.WriteString("</thead>\n\n<tbody>\n")
318	out.Write(body)
319	out.WriteString("</tbody>\n</table>\n")
320}
321
322func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
323	doubleSpace(out)
324	out.WriteString("<tr>\n")
325	out.Write(text)
326	out.WriteString("\n</tr>\n")
327}
328
329func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
330	doubleSpace(out)
331	switch align {
332	case TABLE_ALIGNMENT_LEFT:
333		out.WriteString("<th align=\"left\">")
334	case TABLE_ALIGNMENT_RIGHT:
335		out.WriteString("<th align=\"right\">")
336	case TABLE_ALIGNMENT_CENTER:
337		out.WriteString("<th align=\"center\">")
338	default:
339		out.WriteString("<th>")
340	}
341
342	out.Write(text)
343	out.WriteString("</th>")
344}
345
346func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
347	doubleSpace(out)
348	switch align {
349	case TABLE_ALIGNMENT_LEFT:
350		out.WriteString("<td align=\"left\">")
351	case TABLE_ALIGNMENT_RIGHT:
352		out.WriteString("<td align=\"right\">")
353	case TABLE_ALIGNMENT_CENTER:
354		out.WriteString("<td align=\"center\">")
355	default:
356		out.WriteString("<td>")
357	}
358
359	out.Write(text)
360	out.WriteString("</td>")
361}
362
363func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
364	out.WriteString("<div class=\"footnotes\">\n")
365	options.HRule(out)
366	options.List(out, text, LIST_TYPE_ORDERED)
367	out.WriteString("</div>\n")
368}
369
370func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
371	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
372		doubleSpace(out)
373	}
374	out.WriteString(`<li id="fn:`)
375	out.Write(slugify(name))
376	out.WriteString(`">`)
377	out.Write(text)
378	out.WriteString("</li>\n")
379}
380
381func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
382	marker := out.Len()
383	doubleSpace(out)
384
385	if flags&LIST_TYPE_ORDERED != 0 {
386		out.WriteString("<ol>")
387	} else {
388		out.WriteString("<ul>")
389	}
390	if !text() {
391		out.Truncate(marker)
392		return
393	}
394	if flags&LIST_TYPE_ORDERED != 0 {
395		out.WriteString("</ol>\n")
396	} else {
397		out.WriteString("</ul>\n")
398	}
399}
400
401func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
402	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
403		doubleSpace(out)
404	}
405	out.WriteString("<li>")
406	out.Write(text)
407	out.WriteString("</li>\n")
408}
409
410func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
411	marker := out.Len()
412	doubleSpace(out)
413
414	out.WriteString("<p>")
415	if !text() {
416		out.Truncate(marker)
417		return
418	}
419	out.WriteString("</p>\n")
420}
421
422func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
423	skipRanges := htmlEntity.FindAllIndex(link, -1)
424	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
425		// mark it but don't link it if it is not a safe link: no smartypants
426		out.WriteString("<tt>")
427		entityEscapeWithSkip(out, link, skipRanges)
428		out.WriteString("</tt>")
429		return
430	}
431
432	out.WriteString("<a href=\"")
433	if kind == LINK_TYPE_EMAIL {
434		out.WriteString("mailto:")
435	}
436	entityEscapeWithSkip(out, link, skipRanges)
437	out.WriteString("\">")
438
439	// Pretty print: if we get an email address as
440	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
441	// want to print the `mailto:` prefix
442	switch {
443	case bytes.HasPrefix(link, []byte("mailto://")):
444		attrEscape(out, link[len("mailto://"):])
445	case bytes.HasPrefix(link, []byte("mailto:")):
446		attrEscape(out, link[len("mailto:"):])
447	default:
448		entityEscapeWithSkip(out, link, skipRanges)
449	}
450
451	out.WriteString("</a>")
452}
453
454func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
455	out.WriteString("<code>")
456	attrEscape(out, text)
457	out.WriteString("</code>")
458}
459
460func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
461	out.WriteString("<strong>")
462	out.Write(text)
463	out.WriteString("</strong>")
464}
465
466func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
467	if len(text) == 0 {
468		return
469	}
470	out.WriteString("<em>")
471	out.Write(text)
472	out.WriteString("</em>")
473}
474
475func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
476	if options.flags&HTML_SKIP_IMAGES != 0 {
477		return
478	}
479
480	out.WriteString("<img src=\"")
481	attrEscape(out, link)
482	out.WriteString("\" alt=\"")
483	if len(alt) > 0 {
484		attrEscape(out, alt)
485	}
486	if len(title) > 0 {
487		out.WriteString("\" title=\"")
488		attrEscape(out, title)
489	}
490
491	out.WriteByte('"')
492	out.WriteString(options.closeTag)
493	return
494}
495
496func (options *Html) LineBreak(out *bytes.Buffer) {
497	out.WriteString("<br")
498	out.WriteString(options.closeTag)
499}
500
501func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
502	if options.flags&HTML_SKIP_LINKS != 0 {
503		// write the link text out but don't link it, just mark it with typewriter font
504		out.WriteString("<tt>")
505		attrEscape(out, content)
506		out.WriteString("</tt>")
507		return
508	}
509
510	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
511		// write the link text out but don't link it, just mark it with typewriter font
512		out.WriteString("<tt>")
513		attrEscape(out, content)
514		out.WriteString("</tt>")
515		return
516	}
517
518	out.WriteString("<a href=\"")
519	attrEscape(out, link)
520	if len(title) > 0 {
521		out.WriteString("\" title=\"")
522		attrEscape(out, title)
523	}
524	out.WriteString("\">")
525	out.Write(content)
526	out.WriteString("</a>")
527	return
528}
529
530func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
531	if options.flags&HTML_SKIP_HTML != 0 {
532		return
533	}
534	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
535		return
536	}
537	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
538		return
539	}
540	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
541		return
542	}
543	out.Write(text)
544}
545
546func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
547	out.WriteString("<strong><em>")
548	out.Write(text)
549	out.WriteString("</em></strong>")
550}
551
552func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
553	out.WriteString("<del>")
554	out.Write(text)
555	out.WriteString("</del>")
556}
557
558func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
559	slug := slugify(ref)
560	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
561	out.Write(slug)
562	out.WriteString(`"><a rel="footnote" href="#fn:`)
563	out.Write(slug)
564	out.WriteString(`">`)
565	out.WriteString(strconv.Itoa(id))
566	out.WriteString(`</a></sup>`)
567}
568
569func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
570	out.Write(entity)
571}
572
573func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
574	if options.flags&HTML_USE_SMARTYPANTS != 0 {
575		options.Smartypants(out, text)
576	} else {
577		attrEscape(out, text)
578	}
579}
580
581func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
582	smrt := smartypantsData{false, false}
583
584	// first do normal entity escaping
585	var escaped bytes.Buffer
586	attrEscape(&escaped, text)
587	text = escaped.Bytes()
588
589	mark := 0
590	for i := 0; i < len(text); i++ {
591		if action := options.smartypants[text[i]]; action != nil {
592			if i > mark {
593				out.Write(text[mark:i])
594			}
595
596			previousChar := byte(0)
597			if i > 0 {
598				previousChar = text[i-1]
599			}
600			i += action(out, &smrt, previousChar, text[i:])
601			mark = i + 1
602		}
603	}
604
605	if mark < len(text) {
606		out.Write(text[mark:])
607	}
608}
609
610func (options *Html) DocumentHeader(out *bytes.Buffer) {
611	if options.flags&HTML_COMPLETE_PAGE == 0 {
612		return
613	}
614
615	ending := ""
616	if options.flags&HTML_USE_XHTML != 0 {
617		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
618		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
619		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
620		ending = " /"
621	} else {
622		out.WriteString("<!DOCTYPE html>\n")
623		out.WriteString("<html>\n")
624	}
625	out.WriteString("<head>\n")
626	out.WriteString("  <title>")
627	options.NormalText(out, []byte(options.title))
628	out.WriteString("</title>\n")
629	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
630	out.WriteString(VERSION)
631	out.WriteString("\"")
632	out.WriteString(ending)
633	out.WriteString(">\n")
634	out.WriteString("  <meta charset=\"utf-8\"")
635	out.WriteString(ending)
636	out.WriteString(">\n")
637	if options.css != "" {
638		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
639		attrEscape(out, []byte(options.css))
640		out.WriteString("\"")
641		out.WriteString(ending)
642		out.WriteString(">\n")
643	}
644	out.WriteString("</head>\n")
645	out.WriteString("<body>\n")
646
647	options.tocMarker = out.Len()
648}
649
650func (options *Html) DocumentFooter(out *bytes.Buffer) {
651	// finalize and insert the table of contents
652	if options.flags&HTML_TOC != 0 {
653		options.TocFinalize()
654
655		// now we have to insert the table of contents into the document
656		var temp bytes.Buffer
657
658		// start by making a copy of everything after the document header
659		temp.Write(out.Bytes()[options.tocMarker:])
660
661		// now clear the copied material from the main output buffer
662		out.Truncate(options.tocMarker)
663
664		// corner case spacing issue
665		if options.flags&HTML_COMPLETE_PAGE != 0 {
666			out.WriteByte('\n')
667		}
668
669		// insert the table of contents
670		out.WriteString("<nav>\n")
671		out.Write(options.toc.Bytes())
672		out.WriteString("</nav>\n")
673
674		// corner case spacing issue
675		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
676			out.WriteByte('\n')
677		}
678
679		// write out everything that came after it
680		if options.flags&HTML_OMIT_CONTENTS == 0 {
681			out.Write(temp.Bytes())
682		}
683	}
684
685	if options.flags&HTML_COMPLETE_PAGE != 0 {
686		out.WriteString("\n</body>\n")
687		out.WriteString("</html>\n")
688	}
689
690}
691
692func (options *Html) TocHeader(text []byte, level int) {
693	for level > options.currentLevel {
694		switch {
695		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
696			// this sublist can nest underneath a header
697			size := options.toc.Len()
698			options.toc.Truncate(size - len("</li>\n"))
699
700		case options.currentLevel > 0:
701			options.toc.WriteString("<li>")
702		}
703		if options.toc.Len() > 0 {
704			options.toc.WriteByte('\n')
705		}
706		options.toc.WriteString("<ul>\n")
707		options.currentLevel++
708	}
709
710	for level < options.currentLevel {
711		options.toc.WriteString("</ul>")
712		if options.currentLevel > 1 {
713			options.toc.WriteString("</li>\n")
714		}
715		options.currentLevel--
716	}
717
718	options.toc.WriteString("<li><a href=\"#toc_")
719	options.toc.WriteString(strconv.Itoa(options.headerCount))
720	options.toc.WriteString("\">")
721	options.headerCount++
722
723	options.toc.Write(text)
724
725	options.toc.WriteString("</a></li>\n")
726}
727
728func (options *Html) TocFinalize() {
729	for options.currentLevel > 1 {
730		options.toc.WriteString("</ul></li>\n")
731		options.currentLevel--
732	}
733
734	if options.currentLevel > 0 {
735		options.toc.WriteString("</ul>\n")
736	}
737}
738
739func isHtmlTag(tag []byte, tagname string) bool {
740	found, _ := findHtmlTagPos(tag, tagname)
741	return found
742}
743
744// Look for a character, but ignore it when it's in any kind of quotes, it
745// might be JavaScript
746func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
747	inSingleQuote := false
748	inDoubleQuote := false
749	inGraveQuote := false
750	i := start
751	for i < len(html) {
752		switch {
753		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
754			return i
755		case html[i] == '\'':
756			inSingleQuote = !inSingleQuote
757		case html[i] == '"':
758			inDoubleQuote = !inDoubleQuote
759		case html[i] == '`':
760			inGraveQuote = !inGraveQuote
761		}
762		i++
763	}
764	return start
765}
766
767func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
768	i := 0
769	if i < len(tag) && tag[0] != '<' {
770		return false, -1
771	}
772	i++
773	i = skipSpace(tag, i)
774
775	if i < len(tag) && tag[i] == '/' {
776		i++
777	}
778
779	i = skipSpace(tag, i)
780	j := 0
781	for ; i < len(tag); i, j = i+1, j+1 {
782		if j >= len(tagname) {
783			break
784		}
785
786		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
787			return false, -1
788		}
789	}
790
791	if i == len(tag) {
792		return false, -1
793	}
794
795	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
796	if rightAngle > i {
797		return true, rightAngle
798	}
799
800	return false, -1
801}
802
803func sanitizeHtml(html []byte) []byte {
804	var result []byte
805	for string(html) != "" {
806		skip, tag, rest := findHtmlTag(html)
807		html = rest
808		result = append(result, skip...)
809		result = append(result, sanitizeTag(tag)...)
810	}
811	return append(result, []byte("\n")...)
812}
813
814func sanitizeTag(tag []byte) []byte {
815	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
816		return tag
817	} else {
818		return []byte("")
819	}
820}
821
822func skipUntilChar(text []byte, start int, char byte) int {
823	i := start
824	for i < len(text) && text[i] != char {
825		i++
826	}
827	return i
828}
829
830func findHtmlTag(html []byte) (skip, tag, rest []byte) {
831	start := skipUntilChar(html, 0, '<')
832	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
833	if rightAngle > start {
834		skip = html[0:start]
835		tag = html[start : rightAngle+1]
836		rest = html[rightAngle+1:]
837		return
838	}
839
840	return []byte(""), []byte(""), []byte("")
841}
842
843func skipSpace(tag []byte, i int) int {
844	for i < len(tag) && isspace(tag[i]) {
845		i++
846	}
847	return i
848}
849
850func doubleSpace(out *bytes.Buffer) {
851	if out.Len() > 0 {
852		out.WriteByte('\n')
853	}
854}