all repos — grayfriday @ 3ca168f879360b439e173ee4feb5916607ccee40

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_NOFOLLOW_LINKS                       // only link with rel="nofollow"
 35	HTML_TOC                                  // generate a table of contents
 36	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 37	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 38	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 39	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 40	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 41	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 42	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 43)
 44
 45var (
 46	tags = []string{
 47		"b",
 48		"blockquote",
 49		"code",
 50		"del",
 51		"dd",
 52		"dl",
 53		"dt",
 54		"em",
 55		"h1",
 56		"h2",
 57		"h3",
 58		"h4",
 59		"h5",
 60		"h6",
 61		"i",
 62		"kbd",
 63		"li",
 64		"ol",
 65		"p",
 66		"pre",
 67		"s",
 68		"sup",
 69		"sub",
 70		"strong",
 71		"strike",
 72		"ul",		
 73		"table",
 74		"tr",
 75		"td",
 76		"th",
 77		"thead",
 78		"tbody",
 79		
 80	}
 81	
 82	alignments = []string{
 83		"left",
 84		"right",
 85		"center",
 86	}
 87
 88	urlRe        = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 89	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)(\salign="(` + strings.Join(alignments, "|") + `)")?>|<(br|hr)\s?\/?>)$`)
 90	anchorClean  = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 91	imgClean     = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 92	// TODO: improve this regexp to catch all possible entities:
 93	htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
 94)
 95
 96// Html is a type that implements the Renderer interface for HTML output.
 97//
 98// Do not create this directly, instead use the HtmlRenderer function.
 99type Html struct {
100	flags    int    // HTML_* options
101	closeTag string // how to end singleton tags: either " />\n" or ">\n"
102	title    string // document title
103	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
104
105	// table of contents data
106	tocMarker    int
107	headerCount  int
108	currentLevel int
109	toc          *bytes.Buffer
110
111	smartypants *smartypantsRenderer
112}
113
114const (
115	xhtmlClose = " />\n"
116	htmlClose  = ">\n"
117)
118
119// HtmlRenderer creates and configures an Html object, which
120// satisfies the Renderer interface.
121//
122// flags is a set of HTML_* options ORed together.
123// title is the title of the document, and css is a URL for the document's
124// stylesheet.
125// title and css are only used when HTML_COMPLETE_PAGE is selected.
126func HtmlRenderer(flags int, title string, css string) Renderer {
127	// configure the rendering engine
128	closeTag := htmlClose
129	if flags&HTML_USE_XHTML != 0 {
130		closeTag = xhtmlClose
131	}
132
133	return &Html{
134		flags:    flags,
135		closeTag: closeTag,
136		title:    title,
137		css:      css,
138
139		headerCount:  0,
140		currentLevel: 0,
141		toc:          new(bytes.Buffer),
142
143		smartypants: smartypants(flags),
144	}
145}
146
147// Using if statements is a bit faster than a switch statement. As the compiler
148// improves, this should be unnecessary this is only worthwhile because
149// attrEscape is the single largest CPU user in normal use.
150// Also tried using map, but that gave a ~3x slowdown.
151func escapeSingleChar(char byte) (string, bool) {
152	if char == '"' {
153		return "&quot;", true
154	}
155	if char == '&' {
156		return "&amp;", true
157	}
158	if char == '<' {
159		return "&lt;", true
160	}
161	if char == '>' {
162		return "&gt;", true
163	}
164	return "", false
165}
166
167func attrEscape(out *bytes.Buffer, src []byte) {
168	org := 0
169	for i, ch := range src {
170		if entity, ok := escapeSingleChar(ch); ok {
171			if i > org {
172				// copy all the normal characters since the last escape
173				out.Write(src[org:i])
174			}
175			org = i + 1
176			out.WriteString(entity)
177		}
178	}
179	if org < len(src) {
180		out.Write(src[org:])
181	}
182}
183
184func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
185	end := 0
186	for _, rang := range skipRanges {
187		attrEscape(out, src[end:rang[0]])
188		out.Write(src[rang[0]:rang[1]])
189		end = rang[1]
190	}
191	attrEscape(out, src[end:])
192}
193
194func (options *Html) GetFlags() int {
195	return options.flags
196}
197
198func (options *Html) Header(out *bytes.Buffer, text func() bool, level int, id string) {
199	marker := out.Len()
200	doubleSpace(out)
201
202	if id != "" {
203		out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id))
204	} else if options.flags&HTML_TOC != 0 {
205		// headerCount is incremented in htmlTocHeader
206		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
207	} else {
208		out.WriteString(fmt.Sprintf("<h%d>", level))
209	}
210
211	tocMarker := out.Len()
212	if !text() {
213		out.Truncate(marker)
214		return
215	}
216
217	// are we building a table of contents?
218	if options.flags&HTML_TOC != 0 {
219		options.TocHeader(out.Bytes()[tocMarker:], level)
220	}
221
222	out.WriteString(fmt.Sprintf("</h%d>\n", level))
223}
224
225func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
226	if options.flags&HTML_SKIP_HTML != 0 {
227		return
228	}
229
230	doubleSpace(out)
231	out.Write(text)
232	out.WriteByte('\n')
233}
234
235func (options *Html) HRule(out *bytes.Buffer) {
236	doubleSpace(out)
237	out.WriteString("<hr")
238	out.WriteString(options.closeTag)
239}
240
241func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
242	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
243		options.BlockCodeGithub(out, text, lang)
244	} else {
245		options.BlockCodeNormal(out, text, lang)
246	}
247}
248
249func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
250	doubleSpace(out)
251
252	// parse out the language names/classes
253	count := 0
254	for _, elt := range strings.Fields(lang) {
255		if elt[0] == '.' {
256			elt = elt[1:]
257		}
258		if len(elt) == 0 {
259			continue
260		}
261		if count == 0 {
262			out.WriteString("<pre><code class=\"")
263		} else {
264			out.WriteByte(' ')
265		}
266		attrEscape(out, []byte(elt))
267		count++
268	}
269
270	if count == 0 {
271		out.WriteString("<pre><code>")
272	} else {
273		out.WriteString("\">")
274	}
275
276	attrEscape(out, text)
277	out.WriteString("</code></pre>\n")
278}
279
280// GitHub style code block:
281//
282//              <pre lang="LANG"><code>
283//              ...
284//              </code></pre>
285//
286// Unlike other parsers, we store the language identifier in the <pre>,
287// and don't let the user generate custom classes.
288//
289// The language identifier in the <pre> block gets postprocessed and all
290// the code inside gets syntax highlighted with Pygments. This is much safer
291// than letting the user specify a CSS class for highlighting.
292//
293// Note that we only generate HTML for the first specifier.
294// E.g.
295//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
296func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
297	doubleSpace(out)
298
299	// parse out the language name
300	count := 0
301	for _, elt := range strings.Fields(lang) {
302		if elt[0] == '.' {
303			elt = elt[1:]
304		}
305		if len(elt) == 0 {
306			continue
307		}
308		out.WriteString("<pre lang=\"")
309		attrEscape(out, []byte(elt))
310		out.WriteString("\"><code>")
311		count++
312		break
313	}
314
315	if count == 0 {
316		out.WriteString("<pre><code>")
317	}
318
319	attrEscape(out, text)
320	out.WriteString("</code></pre>\n")
321}
322
323func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
324	doubleSpace(out)
325	out.WriteString("<blockquote>\n")
326	out.Write(text)
327	out.WriteString("</blockquote>\n")
328}
329
330func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
331	doubleSpace(out)
332	out.WriteString("<table>\n<thead>\n")
333	out.Write(header)
334	out.WriteString("</thead>\n\n<tbody>\n")
335	out.Write(body)
336	out.WriteString("</tbody>\n</table>\n")
337}
338
339func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
340	doubleSpace(out)
341	out.WriteString("<tr>\n")
342	out.Write(text)
343	out.WriteString("\n</tr>\n")
344}
345
346func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
347	doubleSpace(out)
348	switch align {
349	case TABLE_ALIGNMENT_LEFT:
350		out.WriteString("<th align=\"left\">")
351	case TABLE_ALIGNMENT_RIGHT:
352		out.WriteString("<th align=\"right\">")
353	case TABLE_ALIGNMENT_CENTER:
354		out.WriteString("<th align=\"center\">")
355	default:
356		out.WriteString("<th>")
357	}
358
359	out.Write(text)
360	out.WriteString("</th>")
361}
362
363func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
364	doubleSpace(out)
365	switch align {
366	case TABLE_ALIGNMENT_LEFT:
367		out.WriteString("<td align=\"left\">")
368	case TABLE_ALIGNMENT_RIGHT:
369		out.WriteString("<td align=\"right\">")
370	case TABLE_ALIGNMENT_CENTER:
371		out.WriteString("<td align=\"center\">")
372	default:
373		out.WriteString("<td>")
374	}
375
376	out.Write(text)
377	out.WriteString("</td>")
378}
379
380func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
381	out.WriteString("<div class=\"footnotes\">\n")
382	options.HRule(out)
383	options.List(out, text, LIST_TYPE_ORDERED)
384	out.WriteString("</div>\n")
385}
386
387func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
388	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
389		doubleSpace(out)
390	}
391	out.WriteString(`<li id="fn:`)
392	out.Write(slugify(name))
393	out.WriteString(`">`)
394	out.Write(text)
395	out.WriteString("</li>\n")
396}
397
398func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
399	marker := out.Len()
400	doubleSpace(out)
401
402	if flags&LIST_TYPE_ORDERED != 0 {
403		out.WriteString("<ol>")
404	} else {
405		out.WriteString("<ul>")
406	}
407	if !text() {
408		out.Truncate(marker)
409		return
410	}
411	if flags&LIST_TYPE_ORDERED != 0 {
412		out.WriteString("</ol>\n")
413	} else {
414		out.WriteString("</ul>\n")
415	}
416}
417
418func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
419	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
420		doubleSpace(out)
421	}
422	out.WriteString("<li>")
423	out.Write(text)
424	out.WriteString("</li>\n")
425}
426
427func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
428	marker := out.Len()
429	doubleSpace(out)
430
431	out.WriteString("<p>")
432	if !text() {
433		out.Truncate(marker)
434		return
435	}
436	out.WriteString("</p>\n")
437}
438
439func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
440	skipRanges := htmlEntity.FindAllIndex(link, -1)
441	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
442		// mark it but don't link it if it is not a safe link: no smartypants
443		out.WriteString("<tt>")
444		entityEscapeWithSkip(out, link, skipRanges)
445		out.WriteString("</tt>")
446		return
447	}
448
449	out.WriteString("<a href=\"")
450	if kind == LINK_TYPE_EMAIL {
451		out.WriteString("mailto:")
452	}
453	entityEscapeWithSkip(out, link, skipRanges)
454	out.WriteString("\">")
455
456	// Pretty print: if we get an email address as
457	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
458	// want to print the `mailto:` prefix
459	switch {
460	case bytes.HasPrefix(link, []byte("mailto://")):
461		attrEscape(out, link[len("mailto://"):])
462	case bytes.HasPrefix(link, []byte("mailto:")):
463		attrEscape(out, link[len("mailto:"):])
464	default:
465		entityEscapeWithSkip(out, link, skipRanges)
466	}
467
468	out.WriteString("</a>")
469}
470
471func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
472	out.WriteString("<code>")
473	attrEscape(out, text)
474	out.WriteString("</code>")
475}
476
477func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
478	out.WriteString("<strong>")
479	out.Write(text)
480	out.WriteString("</strong>")
481}
482
483func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
484	if len(text) == 0 {
485		return
486	}
487	out.WriteString("<em>")
488	out.Write(text)
489	out.WriteString("</em>")
490}
491
492func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
493	if options.flags&HTML_SKIP_IMAGES != 0 {
494		return
495	}
496
497	out.WriteString("<img src=\"")
498	attrEscape(out, link)
499	out.WriteString("\" alt=\"")
500	if len(alt) > 0 {
501		attrEscape(out, alt)
502	}
503	if len(title) > 0 {
504		out.WriteString("\" title=\"")
505		attrEscape(out, title)
506	}
507
508	out.WriteByte('"')
509	out.WriteString(options.closeTag)
510	return
511}
512
513func (options *Html) LineBreak(out *bytes.Buffer) {
514	out.WriteString("<br")
515	out.WriteString(options.closeTag)
516}
517
518func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
519	if options.flags&HTML_SKIP_LINKS != 0 {
520		// write the link text out but don't link it, just mark it with typewriter font
521		out.WriteString("<tt>")
522		attrEscape(out, content)
523		out.WriteString("</tt>")
524		return
525	}
526
527	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
528		// write the link text out but don't link it, just mark it with typewriter font
529		out.WriteString("<tt>")
530		attrEscape(out, content)
531		out.WriteString("</tt>")
532		return
533	}
534
535	out.WriteString("<a href=\"")
536	attrEscape(out, link)
537	if len(title) > 0 {
538		out.WriteString("\" title=\"")
539		attrEscape(out, title)
540	}
541	if options.flags&HTML_NOFOLLOW_LINKS != 0 {
542		out.WriteString("\" rel=\"nofollow")
543	}
544	out.WriteString("\">")
545	out.Write(content)
546	out.WriteString("</a>")
547	return
548}
549
550func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
551	if options.flags&HTML_SKIP_HTML != 0 {
552		return
553	}
554	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
555		return
556	}
557	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
558		return
559	}
560	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
561		return
562	}
563	out.Write(text)
564}
565
566func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
567	out.WriteString("<strong><em>")
568	out.Write(text)
569	out.WriteString("</em></strong>")
570}
571
572func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
573	out.WriteString("<del>")
574	out.Write(text)
575	out.WriteString("</del>")
576}
577
578func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
579	slug := slugify(ref)
580	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
581	out.Write(slug)
582	out.WriteString(`"><a rel="footnote" href="#fn:`)
583	out.Write(slug)
584	out.WriteString(`">`)
585	out.WriteString(strconv.Itoa(id))
586	out.WriteString(`</a></sup>`)
587}
588
589func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
590	out.Write(entity)
591}
592
593func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
594	if options.flags&HTML_USE_SMARTYPANTS != 0 {
595		options.Smartypants(out, text)
596	} else {
597		attrEscape(out, text)
598	}
599}
600
601func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
602	smrt := smartypantsData{false, false}
603
604	// first do normal entity escaping
605	var escaped bytes.Buffer
606	attrEscape(&escaped, text)
607	text = escaped.Bytes()
608
609	mark := 0
610	for i := 0; i < len(text); i++ {
611		if action := options.smartypants[text[i]]; action != nil {
612			if i > mark {
613				out.Write(text[mark:i])
614			}
615
616			previousChar := byte(0)
617			if i > 0 {
618				previousChar = text[i-1]
619			}
620			i += action(out, &smrt, previousChar, text[i:])
621			mark = i + 1
622		}
623	}
624
625	if mark < len(text) {
626		out.Write(text[mark:])
627	}
628}
629
630func (options *Html) DocumentHeader(out *bytes.Buffer) {
631	if options.flags&HTML_COMPLETE_PAGE == 0 {
632		return
633	}
634
635	ending := ""
636	if options.flags&HTML_USE_XHTML != 0 {
637		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
638		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
639		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
640		ending = " /"
641	} else {
642		out.WriteString("<!DOCTYPE html>\n")
643		out.WriteString("<html>\n")
644	}
645	out.WriteString("<head>\n")
646	out.WriteString("  <title>")
647	options.NormalText(out, []byte(options.title))
648	out.WriteString("</title>\n")
649	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
650	out.WriteString(VERSION)
651	out.WriteString("\"")
652	out.WriteString(ending)
653	out.WriteString(">\n")
654	out.WriteString("  <meta charset=\"utf-8\"")
655	out.WriteString(ending)
656	out.WriteString(">\n")
657	if options.css != "" {
658		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
659		attrEscape(out, []byte(options.css))
660		out.WriteString("\"")
661		out.WriteString(ending)
662		out.WriteString(">\n")
663	}
664	out.WriteString("</head>\n")
665	out.WriteString("<body>\n")
666
667	options.tocMarker = out.Len()
668}
669
670func (options *Html) DocumentFooter(out *bytes.Buffer) {
671	// finalize and insert the table of contents
672	if options.flags&HTML_TOC != 0 {
673		options.TocFinalize()
674
675		// now we have to insert the table of contents into the document
676		var temp bytes.Buffer
677
678		// start by making a copy of everything after the document header
679		temp.Write(out.Bytes()[options.tocMarker:])
680
681		// now clear the copied material from the main output buffer
682		out.Truncate(options.tocMarker)
683
684		// corner case spacing issue
685		if options.flags&HTML_COMPLETE_PAGE != 0 {
686			out.WriteByte('\n')
687		}
688
689		// insert the table of contents
690		out.WriteString("<nav>\n")
691		out.Write(options.toc.Bytes())
692		out.WriteString("</nav>\n")
693
694		// corner case spacing issue
695		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
696			out.WriteByte('\n')
697		}
698
699		// write out everything that came after it
700		if options.flags&HTML_OMIT_CONTENTS == 0 {
701			out.Write(temp.Bytes())
702		}
703	}
704
705	if options.flags&HTML_COMPLETE_PAGE != 0 {
706		out.WriteString("\n</body>\n")
707		out.WriteString("</html>\n")
708	}
709
710}
711
712func (options *Html) TocHeader(text []byte, level int) {
713	for level > options.currentLevel {
714		switch {
715		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
716			// this sublist can nest underneath a header
717			size := options.toc.Len()
718			options.toc.Truncate(size - len("</li>\n"))
719
720		case options.currentLevel > 0:
721			options.toc.WriteString("<li>")
722		}
723		if options.toc.Len() > 0 {
724			options.toc.WriteByte('\n')
725		}
726		options.toc.WriteString("<ul>\n")
727		options.currentLevel++
728	}
729
730	for level < options.currentLevel {
731		options.toc.WriteString("</ul>")
732		if options.currentLevel > 1 {
733			options.toc.WriteString("</li>\n")
734		}
735		options.currentLevel--
736	}
737
738	options.toc.WriteString("<li><a href=\"#toc_")
739	options.toc.WriteString(strconv.Itoa(options.headerCount))
740	options.toc.WriteString("\">")
741	options.headerCount++
742
743	options.toc.Write(text)
744
745	options.toc.WriteString("</a></li>\n")
746}
747
748func (options *Html) TocFinalize() {
749	for options.currentLevel > 1 {
750		options.toc.WriteString("</ul></li>\n")
751		options.currentLevel--
752	}
753
754	if options.currentLevel > 0 {
755		options.toc.WriteString("</ul>\n")
756	}
757}
758
759func isHtmlTag(tag []byte, tagname string) bool {
760	found, _ := findHtmlTagPos(tag, tagname)
761	return found
762}
763
764// Look for a character, but ignore it when it's in any kind of quotes, it
765// might be JavaScript
766func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
767	inSingleQuote := false
768	inDoubleQuote := false
769	inGraveQuote := false
770	i := start
771	for i < len(html) {
772		switch {
773		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
774			return i
775		case html[i] == '\'':
776			inSingleQuote = !inSingleQuote
777		case html[i] == '"':
778			inDoubleQuote = !inDoubleQuote
779		case html[i] == '`':
780			inGraveQuote = !inGraveQuote
781		}
782		i++
783	}
784	return start
785}
786
787func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
788	i := 0
789	if i < len(tag) && tag[0] != '<' {
790		return false, -1
791	}
792	i++
793	i = skipSpace(tag, i)
794
795	if i < len(tag) && tag[i] == '/' {
796		i++
797	}
798
799	i = skipSpace(tag, i)
800	j := 0
801	for ; i < len(tag); i, j = i+1, j+1 {
802		if j >= len(tagname) {
803			break
804		}
805
806		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
807			return false, -1
808		}
809	}
810
811	if i == len(tag) {
812		return false, -1
813	}
814
815	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
816	if rightAngle > i {
817		return true, rightAngle
818	}
819
820	return false, -1
821}
822
823func sanitizeHtml(html []byte) []byte {
824	var result []byte
825	for string(html) != "" {
826		skip, tag, rest := findHtmlTag(html)
827		html = rest
828		result = append(result, skip...)
829		result = append(result, sanitizeTag(tag)...)
830	}
831	return append(result, []byte("\n")...)
832}
833
834func sanitizeTag(tag []byte) []byte {
835	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
836		return tag
837	}
838	return []byte("")
839}
840
841func skipUntilChar(text []byte, start int, char byte) int {
842	i := start
843	for i < len(text) && text[i] != char {
844		i++
845	}
846	return i
847}
848
849func findHtmlTag(html []byte) (skip, tag, rest []byte) {
850	start := skipUntilChar(html, 0, '<')
851	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
852	if rightAngle > start {
853		skip = html[0:start]
854		tag = html[start : rightAngle+1]
855		rest = html[rightAngle+1:]
856		return
857	}
858
859	return []byte(""), []byte(""), []byte("")
860}
861
862func skipSpace(tag []byte, i int) int {
863	for i < len(tag) && isspace(tag[i]) {
864		i++
865	}
866	return i
867}
868
869func doubleSpace(out *bytes.Buffer) {
870	if out.Len() > 0 {
871		out.WriteByte('\n')
872	}
873}