html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "regexp"
22 "strconv"
23 "strings"
24)
25
26// Html renderer configuration options.
27const (
28 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
29 HTML_SKIP_STYLE // skip embedded <style> elements
30 HTML_SKIP_IMAGES // skip embedded images
31 HTML_SKIP_LINKS // skip all links
32 HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
33 HTML_SAFELINK // only link to trusted protocols
34 HTML_TOC // generate a table of contents
35 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
36 HTML_COMPLETE_PAGE // generate a complete HTML page
37 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
38 HTML_USE_XHTML // generate XHTML output instead of HTML
39 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
40 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
41 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
42)
43
44var (
45 tags = []string{
46 "b",
47 "blockquote",
48 "code",
49 "del",
50 "dd",
51 "dl",
52 "dt",
53 "em",
54 "h1",
55 "h2",
56 "h3",
57 "h4",
58 "h5",
59 "h6",
60 "i",
61 "kbd",
62 "li",
63 "ol",
64 "p",
65 "pre",
66 "s",
67 "sup",
68 "sub",
69 "strong",
70 "strike",
71 "ul",
72 }
73 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
74 tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
75 anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
76 imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
77)
78
79// Html is a type that implements the Renderer interface for HTML output.
80//
81// Do not create this directly, instead use the HtmlRenderer function.
82type Html struct {
83 flags int // HTML_* options
84 closeTag string // how to end singleton tags: either " />\n" or ">\n"
85 title string // document title
86 css string // optional css file url (used with HTML_COMPLETE_PAGE)
87
88 // table of contents data
89 tocMarker int
90 headerCount int
91 currentLevel int
92 toc *bytes.Buffer
93
94 smartypants *smartypantsRenderer
95}
96
97const (
98 xhtmlClose = " />\n"
99 htmlClose = ">\n"
100)
101
102// HtmlRenderer creates and configures an Html object, which
103// satisfies the Renderer interface.
104//
105// flags is a set of HTML_* options ORed together.
106// title is the title of the document, and css is a URL for the document's
107// stylesheet.
108// title and css are only used when HTML_COMPLETE_PAGE is selected.
109func HtmlRenderer(flags int, title string, css string) Renderer {
110 // configure the rendering engine
111 closeTag := htmlClose
112 if flags&HTML_USE_XHTML != 0 {
113 closeTag = xhtmlClose
114 }
115
116 return &Html{
117 flags: flags,
118 closeTag: closeTag,
119 title: title,
120 css: css,
121
122 headerCount: 0,
123 currentLevel: 0,
124 toc: new(bytes.Buffer),
125
126 smartypants: smartypants(flags),
127 }
128}
129
130func attrEscape(out *bytes.Buffer, src []byte) {
131 org := 0
132 for i, ch := range src {
133 // using if statements is a bit faster than a switch statement.
134 // as the compiler improves, this should be unnecessary
135 // this is only worthwhile because attrEscape is the single
136 // largest CPU user in normal use
137 if ch == '"' {
138 if i > org {
139 // copy all the normal characters since the last escape
140 out.Write(src[org:i])
141 }
142 org = i + 1
143 out.WriteString(""")
144 continue
145 }
146 if ch == '&' {
147 if i > org {
148 out.Write(src[org:i])
149 }
150 org = i + 1
151 out.WriteString("&")
152 continue
153 }
154 if ch == '<' {
155 if i > org {
156 out.Write(src[org:i])
157 }
158 org = i + 1
159 out.WriteString("<")
160 continue
161 }
162 if ch == '>' {
163 if i > org {
164 out.Write(src[org:i])
165 }
166 org = i + 1
167 out.WriteString(">")
168 continue
169 }
170 }
171 if org < len(src) {
172 out.Write(src[org:])
173 }
174}
175
176func (options *Html) GetFlags() int {
177 return options.flags
178}
179
180func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
181 marker := out.Len()
182 doubleSpace(out)
183
184 if options.flags&HTML_TOC != 0 {
185 // headerCount is incremented in htmlTocHeader
186 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
187 } else {
188 out.WriteString(fmt.Sprintf("<h%d>", level))
189 }
190
191 tocMarker := out.Len()
192 if !text() {
193 out.Truncate(marker)
194 return
195 }
196
197 // are we building a table of contents?
198 if options.flags&HTML_TOC != 0 {
199 options.TocHeader(out.Bytes()[tocMarker:], level)
200 }
201
202 out.WriteString(fmt.Sprintf("</h%d>\n", level))
203}
204
205func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
206 if options.flags&HTML_SKIP_HTML != 0 {
207 return
208 }
209
210 doubleSpace(out)
211 out.Write(text)
212 out.WriteByte('\n')
213}
214
215func (options *Html) HRule(out *bytes.Buffer) {
216 doubleSpace(out)
217 out.WriteString("<hr")
218 out.WriteString(options.closeTag)
219}
220
221func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
222 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
223 options.BlockCodeGithub(out, text, lang)
224 } else {
225 options.BlockCodeNormal(out, text, lang)
226 }
227}
228
229func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
230 doubleSpace(out)
231
232 // parse out the language names/classes
233 count := 0
234 for _, elt := range strings.Fields(lang) {
235 if elt[0] == '.' {
236 elt = elt[1:]
237 }
238 if len(elt) == 0 {
239 continue
240 }
241 if count == 0 {
242 out.WriteString("<pre><code class=\"")
243 } else {
244 out.WriteByte(' ')
245 }
246 attrEscape(out, []byte(elt))
247 count++
248 }
249
250 if count == 0 {
251 out.WriteString("<pre><code>")
252 } else {
253 out.WriteString("\">")
254 }
255
256 attrEscape(out, text)
257 out.WriteString("</code></pre>\n")
258}
259
260// GitHub style code block:
261//
262// <pre lang="LANG"><code>
263// ...
264// </code></pre>
265//
266// Unlike other parsers, we store the language identifier in the <pre>,
267// and don't let the user generate custom classes.
268//
269// The language identifier in the <pre> block gets postprocessed and all
270// the code inside gets syntax highlighted with Pygments. This is much safer
271// than letting the user specify a CSS class for highlighting.
272//
273// Note that we only generate HTML for the first specifier.
274// E.g.
275// ~~~~ {.python .numbered} => <pre lang="python"><code>
276func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
277 doubleSpace(out)
278
279 // parse out the language name
280 count := 0
281 for _, elt := range strings.Fields(lang) {
282 if elt[0] == '.' {
283 elt = elt[1:]
284 }
285 if len(elt) == 0 {
286 continue
287 }
288 out.WriteString("<pre lang=\"")
289 attrEscape(out, []byte(elt))
290 out.WriteString("\"><code>")
291 count++
292 break
293 }
294
295 if count == 0 {
296 out.WriteString("<pre><code>")
297 }
298
299 attrEscape(out, text)
300 out.WriteString("</code></pre>\n")
301}
302
303func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
304 doubleSpace(out)
305 out.WriteString("<blockquote>\n")
306 out.Write(text)
307 out.WriteString("</blockquote>\n")
308}
309
310func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
311 doubleSpace(out)
312 out.WriteString("<table>\n<thead>\n")
313 out.Write(header)
314 out.WriteString("</thead>\n\n<tbody>\n")
315 out.Write(body)
316 out.WriteString("</tbody>\n</table>\n")
317}
318
319func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
320 doubleSpace(out)
321 out.WriteString("<tr>\n")
322 out.Write(text)
323 out.WriteString("\n</tr>\n")
324}
325
326func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
327 doubleSpace(out)
328 switch align {
329 case TABLE_ALIGNMENT_LEFT:
330 out.WriteString("<th align=\"left\">")
331 case TABLE_ALIGNMENT_RIGHT:
332 out.WriteString("<th align=\"right\">")
333 case TABLE_ALIGNMENT_CENTER:
334 out.WriteString("<th align=\"center\">")
335 default:
336 out.WriteString("<th>")
337 }
338
339 out.Write(text)
340 out.WriteString("</th>")
341}
342
343func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
344 doubleSpace(out)
345 switch align {
346 case TABLE_ALIGNMENT_LEFT:
347 out.WriteString("<td align=\"left\">")
348 case TABLE_ALIGNMENT_RIGHT:
349 out.WriteString("<td align=\"right\">")
350 case TABLE_ALIGNMENT_CENTER:
351 out.WriteString("<td align=\"center\">")
352 default:
353 out.WriteString("<td>")
354 }
355
356 out.Write(text)
357 out.WriteString("</td>")
358}
359
360func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
361 out.WriteString("<div class=\"footnotes\">\n")
362 options.HRule(out)
363 options.List(out, text, LIST_TYPE_ORDERED)
364 out.WriteString("</div>\n")
365}
366
367func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
368 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
369 doubleSpace(out)
370 }
371 out.WriteString(`<li id="fn:`)
372 out.Write(slugify(name))
373 out.WriteString(`">`)
374 out.Write(text)
375 out.WriteString("</li>\n")
376}
377
378func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
379 marker := out.Len()
380 doubleSpace(out)
381
382 if flags&LIST_TYPE_ORDERED != 0 {
383 out.WriteString("<ol>")
384 } else {
385 out.WriteString("<ul>")
386 }
387 if !text() {
388 out.Truncate(marker)
389 return
390 }
391 if flags&LIST_TYPE_ORDERED != 0 {
392 out.WriteString("</ol>\n")
393 } else {
394 out.WriteString("</ul>\n")
395 }
396}
397
398func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
399 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
400 doubleSpace(out)
401 }
402 out.WriteString("<li>")
403 out.Write(text)
404 out.WriteString("</li>\n")
405}
406
407func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
408 marker := out.Len()
409 doubleSpace(out)
410
411 out.WriteString("<p>")
412 if !text() {
413 out.Truncate(marker)
414 return
415 }
416 out.WriteString("</p>\n")
417}
418
419func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
420 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
421 // mark it but don't link it if it is not a safe link: no smartypants
422 out.WriteString("<tt>")
423 attrEscape(out, link)
424 out.WriteString("</tt>")
425 return
426 }
427
428 out.WriteString("<a href=\"")
429 if kind == LINK_TYPE_EMAIL {
430 out.WriteString("mailto:")
431 }
432 attrEscape(out, link)
433 out.WriteString("\">")
434
435 // Pretty print: if we get an email address as
436 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
437 // want to print the `mailto:` prefix
438 switch {
439 case bytes.HasPrefix(link, []byte("mailto://")):
440 attrEscape(out, link[len("mailto://"):])
441 case bytes.HasPrefix(link, []byte("mailto:")):
442 attrEscape(out, link[len("mailto:"):])
443 default:
444 attrEscape(out, link)
445 }
446
447 out.WriteString("</a>")
448}
449
450func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
451 out.WriteString("<code>")
452 attrEscape(out, text)
453 out.WriteString("</code>")
454}
455
456func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
457 out.WriteString("<strong>")
458 out.Write(text)
459 out.WriteString("</strong>")
460}
461
462func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
463 if len(text) == 0 {
464 return
465 }
466 out.WriteString("<em>")
467 out.Write(text)
468 out.WriteString("</em>")
469}
470
471func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
472 if options.flags&HTML_SKIP_IMAGES != 0 {
473 return
474 }
475
476 out.WriteString("<img src=\"")
477 attrEscape(out, link)
478 out.WriteString("\" alt=\"")
479 if len(alt) > 0 {
480 attrEscape(out, alt)
481 }
482 if len(title) > 0 {
483 out.WriteString("\" title=\"")
484 attrEscape(out, title)
485 }
486
487 out.WriteByte('"')
488 out.WriteString(options.closeTag)
489 return
490}
491
492func (options *Html) LineBreak(out *bytes.Buffer) {
493 out.WriteString("<br")
494 out.WriteString(options.closeTag)
495}
496
497func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
498 if options.flags&HTML_SKIP_LINKS != 0 {
499 // write the link text out but don't link it, just mark it with typewriter font
500 out.WriteString("<tt>")
501 attrEscape(out, content)
502 out.WriteString("</tt>")
503 return
504 }
505
506 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
507 // write the link text out but don't link it, just mark it with typewriter font
508 out.WriteString("<tt>")
509 attrEscape(out, content)
510 out.WriteString("</tt>")
511 return
512 }
513
514 out.WriteString("<a href=\"")
515 attrEscape(out, link)
516 if len(title) > 0 {
517 out.WriteString("\" title=\"")
518 attrEscape(out, title)
519 }
520 out.WriteString("\">")
521 out.Write(content)
522 out.WriteString("</a>")
523 return
524}
525
526func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
527 if options.flags&HTML_SKIP_HTML != 0 {
528 return
529 }
530 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
531 return
532 }
533 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
534 return
535 }
536 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
537 return
538 }
539 out.Write(text)
540}
541
542func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
543 out.WriteString("<strong><em>")
544 out.Write(text)
545 out.WriteString("</em></strong>")
546}
547
548func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
549 out.WriteString("<del>")
550 out.Write(text)
551 out.WriteString("</del>")
552}
553
554func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
555 slug := slugify(ref)
556 out.WriteString(`<sup class="footnote-ref" id="fnref:`)
557 out.Write(slug)
558 out.WriteString(`"><a rel="footnote" href="#fn:`)
559 out.Write(slug)
560 out.WriteString(`">`)
561 out.WriteString(strconv.Itoa(id))
562 out.WriteString(`</a></sup>`)
563}
564
565func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
566 out.Write(entity)
567}
568
569func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
570 if options.flags&HTML_USE_SMARTYPANTS != 0 {
571 options.Smartypants(out, text)
572 } else {
573 attrEscape(out, text)
574 }
575}
576
577func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
578 smrt := smartypantsData{false, false}
579
580 // first do normal entity escaping
581 var escaped bytes.Buffer
582 attrEscape(&escaped, text)
583 text = escaped.Bytes()
584
585 mark := 0
586 for i := 0; i < len(text); i++ {
587 if action := options.smartypants[text[i]]; action != nil {
588 if i > mark {
589 out.Write(text[mark:i])
590 }
591
592 previousChar := byte(0)
593 if i > 0 {
594 previousChar = text[i-1]
595 }
596 i += action(out, &smrt, previousChar, text[i:])
597 mark = i + 1
598 }
599 }
600
601 if mark < len(text) {
602 out.Write(text[mark:])
603 }
604}
605
606func (options *Html) DocumentHeader(out *bytes.Buffer) {
607 if options.flags&HTML_COMPLETE_PAGE == 0 {
608 return
609 }
610
611 ending := ""
612 if options.flags&HTML_USE_XHTML != 0 {
613 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
614 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
615 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
616 ending = " /"
617 } else {
618 out.WriteString("<!DOCTYPE html>\n")
619 out.WriteString("<html>\n")
620 }
621 out.WriteString("<head>\n")
622 out.WriteString(" <title>")
623 options.NormalText(out, []byte(options.title))
624 out.WriteString("</title>\n")
625 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
626 out.WriteString(VERSION)
627 out.WriteString("\"")
628 out.WriteString(ending)
629 out.WriteString(">\n")
630 out.WriteString(" <meta charset=\"utf-8\"")
631 out.WriteString(ending)
632 out.WriteString(">\n")
633 if options.css != "" {
634 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
635 attrEscape(out, []byte(options.css))
636 out.WriteString("\"")
637 out.WriteString(ending)
638 out.WriteString(">\n")
639 }
640 out.WriteString("</head>\n")
641 out.WriteString("<body>\n")
642
643 options.tocMarker = out.Len()
644}
645
646func (options *Html) DocumentFooter(out *bytes.Buffer) {
647 // finalize and insert the table of contents
648 if options.flags&HTML_TOC != 0 {
649 options.TocFinalize()
650
651 // now we have to insert the table of contents into the document
652 var temp bytes.Buffer
653
654 // start by making a copy of everything after the document header
655 temp.Write(out.Bytes()[options.tocMarker:])
656
657 // now clear the copied material from the main output buffer
658 out.Truncate(options.tocMarker)
659
660 // corner case spacing issue
661 if options.flags&HTML_COMPLETE_PAGE != 0 {
662 out.WriteByte('\n')
663 }
664
665 // insert the table of contents
666 out.WriteString("<nav>\n")
667 out.Write(options.toc.Bytes())
668 out.WriteString("</nav>\n")
669
670 // corner case spacing issue
671 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
672 out.WriteByte('\n')
673 }
674
675 // write out everything that came after it
676 if options.flags&HTML_OMIT_CONTENTS == 0 {
677 out.Write(temp.Bytes())
678 }
679 }
680
681 if options.flags&HTML_COMPLETE_PAGE != 0 {
682 out.WriteString("\n</body>\n")
683 out.WriteString("</html>\n")
684 }
685
686}
687
688func (options *Html) TocHeader(text []byte, level int) {
689 for level > options.currentLevel {
690 switch {
691 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
692 // this sublist can nest underneath a header
693 size := options.toc.Len()
694 options.toc.Truncate(size - len("</li>\n"))
695
696 case options.currentLevel > 0:
697 options.toc.WriteString("<li>")
698 }
699 if options.toc.Len() > 0 {
700 options.toc.WriteByte('\n')
701 }
702 options.toc.WriteString("<ul>\n")
703 options.currentLevel++
704 }
705
706 for level < options.currentLevel {
707 options.toc.WriteString("</ul>")
708 if options.currentLevel > 1 {
709 options.toc.WriteString("</li>\n")
710 }
711 options.currentLevel--
712 }
713
714 options.toc.WriteString("<li><a href=\"#toc_")
715 options.toc.WriteString(strconv.Itoa(options.headerCount))
716 options.toc.WriteString("\">")
717 options.headerCount++
718
719 options.toc.Write(text)
720
721 options.toc.WriteString("</a></li>\n")
722}
723
724func (options *Html) TocFinalize() {
725 for options.currentLevel > 1 {
726 options.toc.WriteString("</ul></li>\n")
727 options.currentLevel--
728 }
729
730 if options.currentLevel > 0 {
731 options.toc.WriteString("</ul>\n")
732 }
733}
734
735func isHtmlTag(tag []byte, tagname string) bool {
736 found, _ := findHtmlTagPos(tag, tagname)
737 return found
738}
739
740// Look for a character, but ignore it when it's in any kind of quotes, it
741// might be JavaScript
742func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
743 inSingleQuote := false
744 inDoubleQuote := false
745 inGraveQuote := false
746 i := start
747 for i < len(html) {
748 switch {
749 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
750 return i
751 case html[i] == '\'':
752 inSingleQuote = !inSingleQuote
753 case html[i] == '"':
754 inDoubleQuote = !inDoubleQuote
755 case html[i] == '`':
756 inGraveQuote = !inGraveQuote
757 }
758 i++
759 }
760 return start
761}
762
763func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
764 i := 0
765 if i < len(tag) && tag[0] != '<' {
766 return false, -1
767 }
768 i++
769 i = skipSpace(tag, i)
770
771 if i < len(tag) && tag[i] == '/' {
772 i++
773 }
774
775 i = skipSpace(tag, i)
776 j := 0
777 for ; i < len(tag); i, j = i+1, j+1 {
778 if j >= len(tagname) {
779 break
780 }
781
782 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
783 return false, -1
784 }
785 }
786
787 if i == len(tag) {
788 return false, -1
789 }
790
791 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
792 if rightAngle > i {
793 return true, rightAngle
794 }
795
796 return false, -1
797}
798
799func sanitizeHtml(html []byte) []byte {
800 var result []byte
801 for string(html) != "" {
802 skip, tag, rest := findHtmlTag(html)
803 html = rest
804 result = append(result, skip...)
805 result = append(result, sanitizeTag(tag)...)
806 }
807 return append(result, []byte("\n")...)
808}
809
810func sanitizeTag(tag []byte) []byte {
811 if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
812 return tag
813 } else {
814 return []byte("")
815 }
816}
817
818func skipUntilChar(text []byte, start int, char byte) int {
819 i := start
820 for i < len(text) && text[i] != char {
821 i++
822 }
823 return i
824}
825
826func findHtmlTag(html []byte) (skip, tag, rest []byte) {
827 start := skipUntilChar(html, 0, '<')
828 rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
829 if rightAngle > start {
830 skip = html[0:start]
831 tag = html[start : rightAngle+1]
832 rest = html[rightAngle+1:]
833 return
834 }
835
836 return []byte(""), []byte(""), []byte("")
837}
838
839func skipSpace(tag []byte, i int) int {
840 for i < len(tag) && isspace(tag[i]) {
841 i++
842 }
843 return i
844}
845
846func doubleSpace(out *bytes.Buffer) {
847 if out.Len() > 0 {
848 out.WriteByte('\n')
849 }
850}