html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "regexp"
22 "strconv"
23 "strings"
24)
25
26// Html renderer configuration options.
27const (
28 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
29 HTML_SKIP_STYLE // skip embedded <style> elements
30 HTML_SKIP_IMAGES // skip embedded images
31 HTML_SKIP_LINKS // skip all links
32 HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
33 HTML_SAFELINK // only link to trusted protocols
34 HTML_TOC // generate a table of contents
35 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
36 HTML_COMPLETE_PAGE // generate a complete HTML page
37 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
38 HTML_USE_XHTML // generate XHTML output instead of HTML
39 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
40 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
41 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
42)
43
44var (
45 tags = []string{
46 "b",
47 "blockquote",
48 "code",
49 "del",
50 "dd",
51 "dl",
52 "dt",
53 "em",
54 "h1",
55 "h2",
56 "h3",
57 "h4",
58 "h5",
59 "h6",
60 "i",
61 "kbd",
62 "li",
63 "ol",
64 "p",
65 "pre",
66 "s",
67 "sup",
68 "sub",
69 "strong",
70 "strike",
71 "ul",
72 }
73 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
74 tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
75 anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
76 imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
77)
78
79// Html is a type that implements the Renderer interface for HTML output.
80//
81// Do not create this directly, instead use the HtmlRenderer function.
82type Html struct {
83 flags int // HTML_* options
84 closeTag string // how to end singleton tags: either " />\n" or ">\n"
85 title string // document title
86 css string // optional css file url (used with HTML_COMPLETE_PAGE)
87
88 // table of contents data
89 tocMarker int
90 headerCount int
91 currentLevel int
92 toc *bytes.Buffer
93
94 smartypants *smartypantsRenderer
95}
96
97const (
98 xhtmlClose = " />\n"
99 htmlClose = ">\n"
100)
101
102// HtmlRenderer creates and configures an Html object, which
103// satisfies the Renderer interface.
104//
105// flags is a set of HTML_* options ORed together.
106// title is the title of the document, and css is a URL for the document's
107// stylesheet.
108// title and css are only used when HTML_COMPLETE_PAGE is selected.
109func HtmlRenderer(flags int, title string, css string) Renderer {
110 // configure the rendering engine
111 closeTag := htmlClose
112 if flags&HTML_USE_XHTML != 0 {
113 closeTag = xhtmlClose
114 }
115
116 return &Html{
117 flags: flags,
118 closeTag: closeTag,
119 title: title,
120 css: css,
121
122 headerCount: 0,
123 currentLevel: 0,
124 toc: new(bytes.Buffer),
125
126 smartypants: smartypants(flags),
127 }
128}
129
130// Using if statements is a bit faster than a switch statement. As the compiler
131// improves, this should be unnecessary this is only worthwhile because
132// attrEscape is the single largest CPU user in normal use.
133// Also tried using map, but that gave a ~3x slowdown.
134func escapeSingleChar(char byte) (string, bool) {
135 if char == '"' {
136 return """, true
137 }
138 if char == '&' {
139 return "&", true
140 }
141 if char == '<' {
142 return "<", true
143 }
144 if char == '>' {
145 return ">", true
146 }
147 return "", false
148}
149
150func attrEscape(out *bytes.Buffer, src []byte) {
151 org := 0
152 for i, ch := range src {
153 if entity, ok := escapeSingleChar(ch); ok {
154 if i > org {
155 // copy all the normal characters since the last escape
156 out.Write(src[org:i])
157 }
158 org = i + 1
159 out.WriteString(entity)
160 }
161 }
162 if org < len(src) {
163 out.Write(src[org:])
164 }
165}
166
167func (options *Html) GetFlags() int {
168 return options.flags
169}
170
171func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
172 marker := out.Len()
173 doubleSpace(out)
174
175 if options.flags&HTML_TOC != 0 {
176 // headerCount is incremented in htmlTocHeader
177 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
178 } else {
179 out.WriteString(fmt.Sprintf("<h%d>", level))
180 }
181
182 tocMarker := out.Len()
183 if !text() {
184 out.Truncate(marker)
185 return
186 }
187
188 // are we building a table of contents?
189 if options.flags&HTML_TOC != 0 {
190 options.TocHeader(out.Bytes()[tocMarker:], level)
191 }
192
193 out.WriteString(fmt.Sprintf("</h%d>\n", level))
194}
195
196func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
197 if options.flags&HTML_SKIP_HTML != 0 {
198 return
199 }
200
201 doubleSpace(out)
202 out.Write(text)
203 out.WriteByte('\n')
204}
205
206func (options *Html) HRule(out *bytes.Buffer) {
207 doubleSpace(out)
208 out.WriteString("<hr")
209 out.WriteString(options.closeTag)
210}
211
212func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
213 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
214 options.BlockCodeGithub(out, text, lang)
215 } else {
216 options.BlockCodeNormal(out, text, lang)
217 }
218}
219
220func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
221 doubleSpace(out)
222
223 // parse out the language names/classes
224 count := 0
225 for _, elt := range strings.Fields(lang) {
226 if elt[0] == '.' {
227 elt = elt[1:]
228 }
229 if len(elt) == 0 {
230 continue
231 }
232 if count == 0 {
233 out.WriteString("<pre><code class=\"")
234 } else {
235 out.WriteByte(' ')
236 }
237 attrEscape(out, []byte(elt))
238 count++
239 }
240
241 if count == 0 {
242 out.WriteString("<pre><code>")
243 } else {
244 out.WriteString("\">")
245 }
246
247 attrEscape(out, text)
248 out.WriteString("</code></pre>\n")
249}
250
251// GitHub style code block:
252//
253// <pre lang="LANG"><code>
254// ...
255// </code></pre>
256//
257// Unlike other parsers, we store the language identifier in the <pre>,
258// and don't let the user generate custom classes.
259//
260// The language identifier in the <pre> block gets postprocessed and all
261// the code inside gets syntax highlighted with Pygments. This is much safer
262// than letting the user specify a CSS class for highlighting.
263//
264// Note that we only generate HTML for the first specifier.
265// E.g.
266// ~~~~ {.python .numbered} => <pre lang="python"><code>
267func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
268 doubleSpace(out)
269
270 // parse out the language name
271 count := 0
272 for _, elt := range strings.Fields(lang) {
273 if elt[0] == '.' {
274 elt = elt[1:]
275 }
276 if len(elt) == 0 {
277 continue
278 }
279 out.WriteString("<pre lang=\"")
280 attrEscape(out, []byte(elt))
281 out.WriteString("\"><code>")
282 count++
283 break
284 }
285
286 if count == 0 {
287 out.WriteString("<pre><code>")
288 }
289
290 attrEscape(out, text)
291 out.WriteString("</code></pre>\n")
292}
293
294func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
295 doubleSpace(out)
296 out.WriteString("<blockquote>\n")
297 out.Write(text)
298 out.WriteString("</blockquote>\n")
299}
300
301func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
302 doubleSpace(out)
303 out.WriteString("<table>\n<thead>\n")
304 out.Write(header)
305 out.WriteString("</thead>\n\n<tbody>\n")
306 out.Write(body)
307 out.WriteString("</tbody>\n</table>\n")
308}
309
310func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
311 doubleSpace(out)
312 out.WriteString("<tr>\n")
313 out.Write(text)
314 out.WriteString("\n</tr>\n")
315}
316
317func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
318 doubleSpace(out)
319 switch align {
320 case TABLE_ALIGNMENT_LEFT:
321 out.WriteString("<th align=\"left\">")
322 case TABLE_ALIGNMENT_RIGHT:
323 out.WriteString("<th align=\"right\">")
324 case TABLE_ALIGNMENT_CENTER:
325 out.WriteString("<th align=\"center\">")
326 default:
327 out.WriteString("<th>")
328 }
329
330 out.Write(text)
331 out.WriteString("</th>")
332}
333
334func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
335 doubleSpace(out)
336 switch align {
337 case TABLE_ALIGNMENT_LEFT:
338 out.WriteString("<td align=\"left\">")
339 case TABLE_ALIGNMENT_RIGHT:
340 out.WriteString("<td align=\"right\">")
341 case TABLE_ALIGNMENT_CENTER:
342 out.WriteString("<td align=\"center\">")
343 default:
344 out.WriteString("<td>")
345 }
346
347 out.Write(text)
348 out.WriteString("</td>")
349}
350
351func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
352 out.WriteString("<div class=\"footnotes\">\n")
353 options.HRule(out)
354 options.List(out, text, LIST_TYPE_ORDERED)
355 out.WriteString("</div>\n")
356}
357
358func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
359 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
360 doubleSpace(out)
361 }
362 out.WriteString(`<li id="fn:`)
363 out.Write(slugify(name))
364 out.WriteString(`">`)
365 out.Write(text)
366 out.WriteString("</li>\n")
367}
368
369func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
370 marker := out.Len()
371 doubleSpace(out)
372
373 if flags&LIST_TYPE_ORDERED != 0 {
374 out.WriteString("<ol>")
375 } else {
376 out.WriteString("<ul>")
377 }
378 if !text() {
379 out.Truncate(marker)
380 return
381 }
382 if flags&LIST_TYPE_ORDERED != 0 {
383 out.WriteString("</ol>\n")
384 } else {
385 out.WriteString("</ul>\n")
386 }
387}
388
389func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
390 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
391 doubleSpace(out)
392 }
393 out.WriteString("<li>")
394 out.Write(text)
395 out.WriteString("</li>\n")
396}
397
398func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
399 marker := out.Len()
400 doubleSpace(out)
401
402 out.WriteString("<p>")
403 if !text() {
404 out.Truncate(marker)
405 return
406 }
407 out.WriteString("</p>\n")
408}
409
410func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
411 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
412 // mark it but don't link it if it is not a safe link: no smartypants
413 out.WriteString("<tt>")
414 attrEscape(out, link)
415 out.WriteString("</tt>")
416 return
417 }
418
419 out.WriteString("<a href=\"")
420 if kind == LINK_TYPE_EMAIL {
421 out.WriteString("mailto:")
422 }
423 attrEscape(out, link)
424 out.WriteString("\">")
425
426 // Pretty print: if we get an email address as
427 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
428 // want to print the `mailto:` prefix
429 switch {
430 case bytes.HasPrefix(link, []byte("mailto://")):
431 attrEscape(out, link[len("mailto://"):])
432 case bytes.HasPrefix(link, []byte("mailto:")):
433 attrEscape(out, link[len("mailto:"):])
434 default:
435 attrEscape(out, link)
436 }
437
438 out.WriteString("</a>")
439}
440
441func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
442 out.WriteString("<code>")
443 attrEscape(out, text)
444 out.WriteString("</code>")
445}
446
447func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
448 out.WriteString("<strong>")
449 out.Write(text)
450 out.WriteString("</strong>")
451}
452
453func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
454 if len(text) == 0 {
455 return
456 }
457 out.WriteString("<em>")
458 out.Write(text)
459 out.WriteString("</em>")
460}
461
462func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
463 if options.flags&HTML_SKIP_IMAGES != 0 {
464 return
465 }
466
467 out.WriteString("<img src=\"")
468 attrEscape(out, link)
469 out.WriteString("\" alt=\"")
470 if len(alt) > 0 {
471 attrEscape(out, alt)
472 }
473 if len(title) > 0 {
474 out.WriteString("\" title=\"")
475 attrEscape(out, title)
476 }
477
478 out.WriteByte('"')
479 out.WriteString(options.closeTag)
480 return
481}
482
483func (options *Html) LineBreak(out *bytes.Buffer) {
484 out.WriteString("<br")
485 out.WriteString(options.closeTag)
486}
487
488func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
489 if options.flags&HTML_SKIP_LINKS != 0 {
490 // write the link text out but don't link it, just mark it with typewriter font
491 out.WriteString("<tt>")
492 attrEscape(out, content)
493 out.WriteString("</tt>")
494 return
495 }
496
497 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
498 // write the link text out but don't link it, just mark it with typewriter font
499 out.WriteString("<tt>")
500 attrEscape(out, content)
501 out.WriteString("</tt>")
502 return
503 }
504
505 out.WriteString("<a href=\"")
506 attrEscape(out, link)
507 if len(title) > 0 {
508 out.WriteString("\" title=\"")
509 attrEscape(out, title)
510 }
511 out.WriteString("\">")
512 out.Write(content)
513 out.WriteString("</a>")
514 return
515}
516
517func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
518 if options.flags&HTML_SKIP_HTML != 0 {
519 return
520 }
521 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
522 return
523 }
524 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
525 return
526 }
527 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
528 return
529 }
530 out.Write(text)
531}
532
533func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
534 out.WriteString("<strong><em>")
535 out.Write(text)
536 out.WriteString("</em></strong>")
537}
538
539func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
540 out.WriteString("<del>")
541 out.Write(text)
542 out.WriteString("</del>")
543}
544
545func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
546 slug := slugify(ref)
547 out.WriteString(`<sup class="footnote-ref" id="fnref:`)
548 out.Write(slug)
549 out.WriteString(`"><a rel="footnote" href="#fn:`)
550 out.Write(slug)
551 out.WriteString(`">`)
552 out.WriteString(strconv.Itoa(id))
553 out.WriteString(`</a></sup>`)
554}
555
556func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
557 out.Write(entity)
558}
559
560func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
561 if options.flags&HTML_USE_SMARTYPANTS != 0 {
562 options.Smartypants(out, text)
563 } else {
564 attrEscape(out, text)
565 }
566}
567
568func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
569 smrt := smartypantsData{false, false}
570
571 // first do normal entity escaping
572 var escaped bytes.Buffer
573 attrEscape(&escaped, text)
574 text = escaped.Bytes()
575
576 mark := 0
577 for i := 0; i < len(text); i++ {
578 if action := options.smartypants[text[i]]; action != nil {
579 if i > mark {
580 out.Write(text[mark:i])
581 }
582
583 previousChar := byte(0)
584 if i > 0 {
585 previousChar = text[i-1]
586 }
587 i += action(out, &smrt, previousChar, text[i:])
588 mark = i + 1
589 }
590 }
591
592 if mark < len(text) {
593 out.Write(text[mark:])
594 }
595}
596
597func (options *Html) DocumentHeader(out *bytes.Buffer) {
598 if options.flags&HTML_COMPLETE_PAGE == 0 {
599 return
600 }
601
602 ending := ""
603 if options.flags&HTML_USE_XHTML != 0 {
604 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
605 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
606 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
607 ending = " /"
608 } else {
609 out.WriteString("<!DOCTYPE html>\n")
610 out.WriteString("<html>\n")
611 }
612 out.WriteString("<head>\n")
613 out.WriteString(" <title>")
614 options.NormalText(out, []byte(options.title))
615 out.WriteString("</title>\n")
616 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
617 out.WriteString(VERSION)
618 out.WriteString("\"")
619 out.WriteString(ending)
620 out.WriteString(">\n")
621 out.WriteString(" <meta charset=\"utf-8\"")
622 out.WriteString(ending)
623 out.WriteString(">\n")
624 if options.css != "" {
625 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
626 attrEscape(out, []byte(options.css))
627 out.WriteString("\"")
628 out.WriteString(ending)
629 out.WriteString(">\n")
630 }
631 out.WriteString("</head>\n")
632 out.WriteString("<body>\n")
633
634 options.tocMarker = out.Len()
635}
636
637func (options *Html) DocumentFooter(out *bytes.Buffer) {
638 // finalize and insert the table of contents
639 if options.flags&HTML_TOC != 0 {
640 options.TocFinalize()
641
642 // now we have to insert the table of contents into the document
643 var temp bytes.Buffer
644
645 // start by making a copy of everything after the document header
646 temp.Write(out.Bytes()[options.tocMarker:])
647
648 // now clear the copied material from the main output buffer
649 out.Truncate(options.tocMarker)
650
651 // corner case spacing issue
652 if options.flags&HTML_COMPLETE_PAGE != 0 {
653 out.WriteByte('\n')
654 }
655
656 // insert the table of contents
657 out.WriteString("<nav>\n")
658 out.Write(options.toc.Bytes())
659 out.WriteString("</nav>\n")
660
661 // corner case spacing issue
662 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
663 out.WriteByte('\n')
664 }
665
666 // write out everything that came after it
667 if options.flags&HTML_OMIT_CONTENTS == 0 {
668 out.Write(temp.Bytes())
669 }
670 }
671
672 if options.flags&HTML_COMPLETE_PAGE != 0 {
673 out.WriteString("\n</body>\n")
674 out.WriteString("</html>\n")
675 }
676
677}
678
679func (options *Html) TocHeader(text []byte, level int) {
680 for level > options.currentLevel {
681 switch {
682 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
683 // this sublist can nest underneath a header
684 size := options.toc.Len()
685 options.toc.Truncate(size - len("</li>\n"))
686
687 case options.currentLevel > 0:
688 options.toc.WriteString("<li>")
689 }
690 if options.toc.Len() > 0 {
691 options.toc.WriteByte('\n')
692 }
693 options.toc.WriteString("<ul>\n")
694 options.currentLevel++
695 }
696
697 for level < options.currentLevel {
698 options.toc.WriteString("</ul>")
699 if options.currentLevel > 1 {
700 options.toc.WriteString("</li>\n")
701 }
702 options.currentLevel--
703 }
704
705 options.toc.WriteString("<li><a href=\"#toc_")
706 options.toc.WriteString(strconv.Itoa(options.headerCount))
707 options.toc.WriteString("\">")
708 options.headerCount++
709
710 options.toc.Write(text)
711
712 options.toc.WriteString("</a></li>\n")
713}
714
715func (options *Html) TocFinalize() {
716 for options.currentLevel > 1 {
717 options.toc.WriteString("</ul></li>\n")
718 options.currentLevel--
719 }
720
721 if options.currentLevel > 0 {
722 options.toc.WriteString("</ul>\n")
723 }
724}
725
726func isHtmlTag(tag []byte, tagname string) bool {
727 found, _ := findHtmlTagPos(tag, tagname)
728 return found
729}
730
731// Look for a character, but ignore it when it's in any kind of quotes, it
732// might be JavaScript
733func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
734 inSingleQuote := false
735 inDoubleQuote := false
736 inGraveQuote := false
737 i := start
738 for i < len(html) {
739 switch {
740 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
741 return i
742 case html[i] == '\'':
743 inSingleQuote = !inSingleQuote
744 case html[i] == '"':
745 inDoubleQuote = !inDoubleQuote
746 case html[i] == '`':
747 inGraveQuote = !inGraveQuote
748 }
749 i++
750 }
751 return start
752}
753
754func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
755 i := 0
756 if i < len(tag) && tag[0] != '<' {
757 return false, -1
758 }
759 i++
760 i = skipSpace(tag, i)
761
762 if i < len(tag) && tag[i] == '/' {
763 i++
764 }
765
766 i = skipSpace(tag, i)
767 j := 0
768 for ; i < len(tag); i, j = i+1, j+1 {
769 if j >= len(tagname) {
770 break
771 }
772
773 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
774 return false, -1
775 }
776 }
777
778 if i == len(tag) {
779 return false, -1
780 }
781
782 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
783 if rightAngle > i {
784 return true, rightAngle
785 }
786
787 return false, -1
788}
789
790func sanitizeHtml(html []byte) []byte {
791 var result []byte
792 for string(html) != "" {
793 skip, tag, rest := findHtmlTag(html)
794 html = rest
795 result = append(result, skip...)
796 result = append(result, sanitizeTag(tag)...)
797 }
798 return append(result, []byte("\n")...)
799}
800
801func sanitizeTag(tag []byte) []byte {
802 if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
803 return tag
804 } else {
805 return []byte("")
806 }
807}
808
809func skipUntilChar(text []byte, start int, char byte) int {
810 i := start
811 for i < len(text) && text[i] != char {
812 i++
813 }
814 return i
815}
816
817func findHtmlTag(html []byte) (skip, tag, rest []byte) {
818 start := skipUntilChar(html, 0, '<')
819 rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
820 if rightAngle > start {
821 skip = html[0:start]
822 tag = html[start : rightAngle+1]
823 rest = html[rightAngle+1:]
824 return
825 }
826
827 return []byte(""), []byte(""), []byte("")
828}
829
830func skipSpace(tag []byte, i int) int {
831 for i < len(tag) && isspace(tag[i]) {
832 i++
833 }
834 return i
835}
836
837func doubleSpace(out *bytes.Buffer) {
838 if out.Len() > 0 {
839 out.WriteByte('\n')
840 }
841}