icy does git — grayfriday (cd3fa08cb15a5c07b87d967cea59653e63948a77): html.go

html.go (view raw)
  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_NOFOLLOW_LINKS                       // only link with rel="nofollow"
 35	HTML_TOC                                  // generate a table of contents
 36	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 37	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 38	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 39	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 40	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 41	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 42	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 43)
 44
 45var (
 46	tags = []string{
 47		"b",
 48		"blockquote",
 49		"code",
 50		"del",
 51		"dd",
 52		"dl",
 53		"dt",
 54		"em",
 55		"h1",
 56		"h2",
 57		"h3",
 58		"h4",
 59		"h5",
 60		"h6",
 61		"i",
 62		"kbd",
 63		"li",
 64		"ol",
 65		"p",
 66		"pre",
 67		"s",
 68		"sup",
 69		"sub",
 70		"strong",
 71		"strike",
 72		"ul",
 73	}
 74	urlRe        = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 75	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
 76	anchorClean  = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 77	imgClean     = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 78	// TODO: improve this regexp to catch all possible entities:
 79	htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
 80)
 81
 82// Html is a type that implements the Renderer interface for HTML output.
 83//
 84// Do not create this directly, instead use the HtmlRenderer function.
 85type Html struct {
 86	flags    int    // HTML_* options
 87	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 88	title    string // document title
 89	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 90
 91	// table of contents data
 92	tocMarker    int
 93	headerCount  int
 94	currentLevel int
 95	toc          *bytes.Buffer
 96
 97	smartypants *smartypantsRenderer
 98}
 99
100const (
101	xhtmlClose = " />\n"
102	htmlClose  = ">\n"
103)
104
105// HtmlRenderer creates and configures an Html object, which
106// satisfies the Renderer interface.
107//
108// flags is a set of HTML_* options ORed together.
109// title is the title of the document, and css is a URL for the document's
110// stylesheet.
111// title and css are only used when HTML_COMPLETE_PAGE is selected.
112func HtmlRenderer(flags int, title string, css string) Renderer {
113	// configure the rendering engine
114	closeTag := htmlClose
115	if flags&HTML_USE_XHTML != 0 {
116		closeTag = xhtmlClose
117	}
118
119	return &Html{
120		flags:    flags,
121		closeTag: closeTag,
122		title:    title,
123		css:      css,
124
125		headerCount:  0,
126		currentLevel: 0,
127		toc:          new(bytes.Buffer),
128
129		smartypants: smartypants(flags),
130	}
131}
132
133// Using if statements is a bit faster than a switch statement. As the compiler
134// improves, this should be unnecessary this is only worthwhile because
135// attrEscape is the single largest CPU user in normal use.
136// Also tried using map, but that gave a ~3x slowdown.
137func escapeSingleChar(char byte) (string, bool) {
138	if char == '"' {
139		return "&quot;", true
140	}
141	if char == '&' {
142		return "&amp;", true
143	}
144	if char == '<' {
145		return "&lt;", true
146	}
147	if char == '>' {
148		return "&gt;", true
149	}
150	return "", false
151}
152
153func attrEscape(out *bytes.Buffer, src []byte) {
154	org := 0
155	for i, ch := range src {
156		if entity, ok := escapeSingleChar(ch); ok {
157			if i > org {
158				// copy all the normal characters since the last escape
159				out.Write(src[org:i])
160			}
161			org = i + 1
162			out.WriteString(entity)
163		}
164	}
165	if org < len(src) {
166		out.Write(src[org:])
167	}
168}
169
170func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
171	end := 0
172	for _, rang := range skipRanges {
173		attrEscape(out, src[end:rang[0]])
174		out.Write(src[rang[0]:rang[1]])
175		end = rang[1]
176	}
177	attrEscape(out, src[end:])
178}
179
180func (options *Html) GetFlags() int {
181	return options.flags
182}
183
184func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
185	marker := out.Len()
186	doubleSpace(out)
187
188	if options.flags&HTML_TOC != 0 {
189		// headerCount is incremented in htmlTocHeader
190		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
191	} else {
192		out.WriteString(fmt.Sprintf("<h%d>", level))
193	}
194
195	tocMarker := out.Len()
196	if !text() {
197		out.Truncate(marker)
198		return
199	}
200
201	// are we building a table of contents?
202	if options.flags&HTML_TOC != 0 {
203		options.TocHeader(out.Bytes()[tocMarker:], level)
204	}
205
206	out.WriteString(fmt.Sprintf("</h%d>\n", level))
207}
208
209func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
210	if options.flags&HTML_SKIP_HTML != 0 {
211		return
212	}
213
214	doubleSpace(out)
215	out.Write(text)
216	out.WriteByte('\n')
217}
218
219func (options *Html) HRule(out *bytes.Buffer) {
220	doubleSpace(out)
221	out.WriteString("<hr")
222	out.WriteString(options.closeTag)
223}
224
225func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
226	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
227		options.BlockCodeGithub(out, text, lang)
228	} else {
229		options.BlockCodeNormal(out, text, lang)
230	}
231}
232
233func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
234	doubleSpace(out)
235
236	// parse out the language names/classes
237	count := 0
238	for _, elt := range strings.Fields(lang) {
239		if elt[0] == '.' {
240			elt = elt[1:]
241		}
242		if len(elt) == 0 {
243			continue
244		}
245		if count == 0 {
246			out.WriteString("<pre><code class=\"")
247		} else {
248			out.WriteByte(' ')
249		}
250		attrEscape(out, []byte(elt))
251		count++
252	}
253
254	if count == 0 {
255		out.WriteString("<pre><code>")
256	} else {
257		out.WriteString("\">")
258	}
259
260	attrEscape(out, text)
261	out.WriteString("</code></pre>\n")
262}
263
264// GitHub style code block:
265//
266//              <pre lang="LANG"><code>
267//              ...
268//              </code></pre>
269//
270// Unlike other parsers, we store the language identifier in the <pre>,
271// and don't let the user generate custom classes.
272//
273// The language identifier in the <pre> block gets postprocessed and all
274// the code inside gets syntax highlighted with Pygments. This is much safer
275// than letting the user specify a CSS class for highlighting.
276//
277// Note that we only generate HTML for the first specifier.
278// E.g.
279//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
280func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
281	doubleSpace(out)
282
283	// parse out the language name
284	count := 0
285	for _, elt := range strings.Fields(lang) {
286		if elt[0] == '.' {
287			elt = elt[1:]
288		}
289		if len(elt) == 0 {
290			continue
291		}
292		out.WriteString("<pre lang=\"")
293		attrEscape(out, []byte(elt))
294		out.WriteString("\"><code>")
295		count++
296		break
297	}
298
299	if count == 0 {
300		out.WriteString("<pre><code>")
301	}
302
303	attrEscape(out, text)
304	out.WriteString("</code></pre>\n")
305}
306
307func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
308	doubleSpace(out)
309	out.WriteString("<blockquote>\n")
310	out.Write(text)
311	out.WriteString("</blockquote>\n")
312}
313
314func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
315	doubleSpace(out)
316	out.WriteString("<table>\n<thead>\n")
317	out.Write(header)
318	out.WriteString("</thead>\n\n<tbody>\n")
319	out.Write(body)
320	out.WriteString("</tbody>\n</table>\n")
321}
322
323func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
324	doubleSpace(out)
325	out.WriteString("<tr>\n")
326	out.Write(text)
327	out.WriteString("\n</tr>\n")
328}
329
330func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
331	doubleSpace(out)
332	switch align {
333	case TABLE_ALIGNMENT_LEFT:
334		out.WriteString("<th align=\"left\">")
335	case TABLE_ALIGNMENT_RIGHT:
336		out.WriteString("<th align=\"right\">")
337	case TABLE_ALIGNMENT_CENTER:
338		out.WriteString("<th align=\"center\">")
339	default:
340		out.WriteString("<th>")
341	}
342
343	out.Write(text)
344	out.WriteString("</th>")
345}
346
347func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
348	doubleSpace(out)
349	switch align {
350	case TABLE_ALIGNMENT_LEFT:
351		out.WriteString("<td align=\"left\">")
352	case TABLE_ALIGNMENT_RIGHT:
353		out.WriteString("<td align=\"right\">")
354	case TABLE_ALIGNMENT_CENTER:
355		out.WriteString("<td align=\"center\">")
356	default:
357		out.WriteString("<td>")
358	}
359
360	out.Write(text)
361	out.WriteString("</td>")
362}
363
364func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
365	out.WriteString("<div class=\"footnotes\">\n")
366	options.HRule(out)
367	options.List(out, text, LIST_TYPE_ORDERED)
368	out.WriteString("</div>\n")
369}
370
371func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
372	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
373		doubleSpace(out)
374	}
375	out.WriteString(`<li id="fn:`)
376	out.Write(slugify(name))
377	out.WriteString(`">`)
378	out.Write(text)
379	out.WriteString("</li>\n")
380}
381
382func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
383	marker := out.Len()
384	doubleSpace(out)
385
386	if flags&LIST_TYPE_ORDERED != 0 {
387		out.WriteString("<ol>")
388	} else {
389		out.WriteString("<ul>")
390	}
391	if !text() {
392		out.Truncate(marker)
393		return
394	}
395	if flags&LIST_TYPE_ORDERED != 0 {
396		out.WriteString("</ol>\n")
397	} else {
398		out.WriteString("</ul>\n")
399	}
400}
401
402func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
403	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
404		doubleSpace(out)
405	}
406	out.WriteString("<li>")
407	out.Write(text)
408	out.WriteString("</li>\n")
409}
410
411func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
412	marker := out.Len()
413	doubleSpace(out)
414
415	out.WriteString("<p>")
416	if !text() {
417		out.Truncate(marker)
418		return
419	}
420	out.WriteString("</p>\n")
421}
422
423func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
424	skipRanges := htmlEntity.FindAllIndex(link, -1)
425	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
426		// mark it but don't link it if it is not a safe link: no smartypants
427		out.WriteString("<tt>")
428		entityEscapeWithSkip(out, link, skipRanges)
429		out.WriteString("</tt>")
430		return
431	}
432
433	out.WriteString("<a href=\"")
434	if kind == LINK_TYPE_EMAIL {
435		out.WriteString("mailto:")
436	}
437	entityEscapeWithSkip(out, link, skipRanges)
438	out.WriteString("\">")
439
440	// Pretty print: if we get an email address as
441	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
442	// want to print the `mailto:` prefix
443	switch {
444	case bytes.HasPrefix(link, []byte("mailto://")):
445		attrEscape(out, link[len("mailto://"):])
446	case bytes.HasPrefix(link, []byte("mailto:")):
447		attrEscape(out, link[len("mailto:"):])
448	default:
449		entityEscapeWithSkip(out, link, skipRanges)
450	}
451
452	out.WriteString("</a>")
453}
454
455func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
456	out.WriteString("<code>")
457	attrEscape(out, text)
458	out.WriteString("</code>")
459}
460
461func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
462	out.WriteString("<strong>")
463	out.Write(text)
464	out.WriteString("</strong>")
465}
466
467func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
468	if len(text) == 0 {
469		return
470	}
471	out.WriteString("<em>")
472	out.Write(text)
473	out.WriteString("</em>")
474}
475
476func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
477	if options.flags&HTML_SKIP_IMAGES != 0 {
478		return
479	}
480
481	out.WriteString("<img src=\"")
482	attrEscape(out, link)
483	out.WriteString("\" alt=\"")
484	if len(alt) > 0 {
485		attrEscape(out, alt)
486	}
487	if len(title) > 0 {
488		out.WriteString("\" title=\"")
489		attrEscape(out, title)
490	}
491
492	out.WriteByte('"')
493	out.WriteString(options.closeTag)
494	return
495}
496
497func (options *Html) LineBreak(out *bytes.Buffer) {
498	out.WriteString("<br")
499	out.WriteString(options.closeTag)
500}
501
502func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
503	if options.flags&HTML_SKIP_LINKS != 0 {
504		// write the link text out but don't link it, just mark it with typewriter font
505		out.WriteString("<tt>")
506		attrEscape(out, content)
507		out.WriteString("</tt>")
508		return
509	}
510
511	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
512		// write the link text out but don't link it, just mark it with typewriter font
513		out.WriteString("<tt>")
514		attrEscape(out, content)
515		out.WriteString("</tt>")
516		return
517	}
518
519	out.WriteString("<a href=\"")
520	attrEscape(out, link)
521	if len(title) > 0 {
522		out.WriteString("\" title=\"")
523		attrEscape(out, title)
524	}
525	if options.flags&HTML_NOFOLLOW_LINKS != 0 {
526		out.WriteString("\" rel=\"nofollow")
527	}
528	out.WriteString("\">")
529	out.Write(content)
530	out.WriteString("</a>")
531	return
532}
533
534func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
535	if options.flags&HTML_SKIP_HTML != 0 {
536		return
537	}
538	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
539		return
540	}
541	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
542		return
543	}
544	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
545		return
546	}
547	out.Write(text)
548}
549
550func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
551	out.WriteString("<strong><em>")
552	out.Write(text)
553	out.WriteString("</em></strong>")
554}
555
556func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
557	out.WriteString("<del>")
558	out.Write(text)
559	out.WriteString("</del>")
560}
561
562func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
563	slug := slugify(ref)
564	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
565	out.Write(slug)
566	out.WriteString(`"><a rel="footnote" href="#fn:`)
567	out.Write(slug)
568	out.WriteString(`">`)
569	out.WriteString(strconv.Itoa(id))
570	out.WriteString(`</a></sup>`)
571}
572
573func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
574	out.Write(entity)
575}
576
577func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
578	if options.flags&HTML_USE_SMARTYPANTS != 0 {
579		options.Smartypants(out, text)
580	} else {
581		attrEscape(out, text)
582	}
583}
584
585func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
586	smrt := smartypantsData{false, false}
587
588	// first do normal entity escaping
589	var escaped bytes.Buffer
590	attrEscape(&escaped, text)
591	text = escaped.Bytes()
592
593	mark := 0
594	for i := 0; i < len(text); i++ {
595		if action := options.smartypants[text[i]]; action != nil {
596			if i > mark {
597				out.Write(text[mark:i])
598			}
599
600			previousChar := byte(0)
601			if i > 0 {
602				previousChar = text[i-1]
603			}
604			i += action(out, &smrt, previousChar, text[i:])
605			mark = i + 1
606		}
607	}
608
609	if mark < len(text) {
610		out.Write(text[mark:])
611	}
612}
613
614func (options *Html) DocumentHeader(out *bytes.Buffer) {
615	if options.flags&HTML_COMPLETE_PAGE == 0 {
616		return
617	}
618
619	ending := ""
620	if options.flags&HTML_USE_XHTML != 0 {
621		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
622		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
623		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
624		ending = " /"
625	} else {
626		out.WriteString("<!DOCTYPE html>\n")
627		out.WriteString("<html>\n")
628	}
629	out.WriteString("<head>\n")
630	out.WriteString("  <title>")
631	options.NormalText(out, []byte(options.title))
632	out.WriteString("</title>\n")
633	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
634	out.WriteString(VERSION)
635	out.WriteString("\"")
636	out.WriteString(ending)
637	out.WriteString(">\n")
638	out.WriteString("  <meta charset=\"utf-8\"")
639	out.WriteString(ending)
640	out.WriteString(">\n")
641	if options.css != "" {
642		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
643		attrEscape(out, []byte(options.css))
644		out.WriteString("\"")
645		out.WriteString(ending)
646		out.WriteString(">\n")
647	}
648	out.WriteString("</head>\n")
649	out.WriteString("<body>\n")
650
651	options.tocMarker = out.Len()
652}
653
654func (options *Html) DocumentFooter(out *bytes.Buffer) {
655	// finalize and insert the table of contents
656	if options.flags&HTML_TOC != 0 {
657		options.TocFinalize()
658
659		// now we have to insert the table of contents into the document
660		var temp bytes.Buffer
661
662		// start by making a copy of everything after the document header
663		temp.Write(out.Bytes()[options.tocMarker:])
664
665		// now clear the copied material from the main output buffer
666		out.Truncate(options.tocMarker)
667
668		// corner case spacing issue
669		if options.flags&HTML_COMPLETE_PAGE != 0 {
670			out.WriteByte('\n')
671		}
672
673		// insert the table of contents
674		out.WriteString("<nav>\n")
675		out.Write(options.toc.Bytes())
676		out.WriteString("</nav>\n")
677
678		// corner case spacing issue
679		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
680			out.WriteByte('\n')
681		}
682
683		// write out everything that came after it
684		if options.flags&HTML_OMIT_CONTENTS == 0 {
685			out.Write(temp.Bytes())
686		}
687	}
688
689	if options.flags&HTML_COMPLETE_PAGE != 0 {
690		out.WriteString("\n</body>\n")
691		out.WriteString("</html>\n")
692	}
693
694}
695
696func (options *Html) TocHeader(text []byte, level int) {
697	for level > options.currentLevel {
698		switch {
699		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
700			// this sublist can nest underneath a header
701			size := options.toc.Len()
702			options.toc.Truncate(size - len("</li>\n"))
703
704		case options.currentLevel > 0:
705			options.toc.WriteString("<li>")
706		}
707		if options.toc.Len() > 0 {
708			options.toc.WriteByte('\n')
709		}
710		options.toc.WriteString("<ul>\n")
711		options.currentLevel++
712	}
713
714	for level < options.currentLevel {
715		options.toc.WriteString("</ul>")
716		if options.currentLevel > 1 {
717			options.toc.WriteString("</li>\n")
718		}
719		options.currentLevel--
720	}
721
722	options.toc.WriteString("<li><a href=\"#toc_")
723	options.toc.WriteString(strconv.Itoa(options.headerCount))
724	options.toc.WriteString("\">")
725	options.headerCount++
726
727	options.toc.Write(text)
728
729	options.toc.WriteString("</a></li>\n")
730}
731
732func (options *Html) TocFinalize() {
733	for options.currentLevel > 1 {
734		options.toc.WriteString("</ul></li>\n")
735		options.currentLevel--
736	}
737
738	if options.currentLevel > 0 {
739		options.toc.WriteString("</ul>\n")
740	}
741}
742
743func isHtmlTag(tag []byte, tagname string) bool {
744	found, _ := findHtmlTagPos(tag, tagname)
745	return found
746}
747
748// Look for a character, but ignore it when it's in any kind of quotes, it
749// might be JavaScript
750func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
751	inSingleQuote := false
752	inDoubleQuote := false
753	inGraveQuote := false
754	i := start
755	for i < len(html) {
756		switch {
757		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
758			return i
759		case html[i] == '\'':
760			inSingleQuote = !inSingleQuote
761		case html[i] == '"':
762			inDoubleQuote = !inDoubleQuote
763		case html[i] == '`':
764			inGraveQuote = !inGraveQuote
765		}
766		i++
767	}
768	return start
769}
770
771func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
772	i := 0
773	if i < len(tag) && tag[0] != '<' {
774		return false, -1
775	}
776	i++
777	i = skipSpace(tag, i)
778
779	if i < len(tag) && tag[i] == '/' {
780		i++
781	}
782
783	i = skipSpace(tag, i)
784	j := 0
785	for ; i < len(tag); i, j = i+1, j+1 {
786		if j >= len(tagname) {
787			break
788		}
789
790		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
791			return false, -1
792		}
793	}
794
795	if i == len(tag) {
796		return false, -1
797	}
798
799	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
800	if rightAngle > i {
801		return true, rightAngle
802	}
803
804	return false, -1
805}
806
807func sanitizeHtml(html []byte) []byte {
808	var result []byte
809	for string(html) != "" {
810		skip, tag, rest := findHtmlTag(html)
811		html = rest
812		result = append(result, skip...)
813		result = append(result, sanitizeTag(tag)...)
814	}
815	return append(result, []byte("\n")...)
816}
817
818func sanitizeTag(tag []byte) []byte {
819	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
820		return tag
821	} else {
822		return []byte("")
823	}
824}
825
826func skipUntilChar(text []byte, start int, char byte) int {
827	i := start
828	for i < len(text) && text[i] != char {
829		i++
830	}
831	return i
832}
833
834func findHtmlTag(html []byte) (skip, tag, rest []byte) {
835	start := skipUntilChar(html, 0, '<')
836	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
837	if rightAngle > start {
838		skip = html[0:start]
839		tag = html[start : rightAngle+1]
840		rest = html[rightAngle+1:]
841		return
842	}
843
844	return []byte(""), []byte(""), []byte("")
845}
846
847func skipSpace(tag []byte, i int) int {
848	for i < len(tag) && isspace(tag[i]) {
849		i++
850	}
851	return i
852}
853
854func doubleSpace(out *bytes.Buffer) {
855	if out.Len() > 0 {
856		out.WriteByte('\n')
857	}
858}
all repos — grayfriday @ cd3fa08cb15a5c07b87d967cea59653e63948a77

blackfriday fork with a few changes