html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "regexp"
22 "strconv"
23 "strings"
24)
25
26// Html renderer configuration options.
27const (
28 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
29 HTML_SKIP_STYLE // skip embedded <style> elements
30 HTML_SKIP_IMAGES // skip embedded images
31 HTML_SKIP_LINKS // skip all links
32 HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
33 HTML_SAFELINK // only link to trusted protocols
34 HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
35 HTML_TOC // generate a table of contents
36 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
37 HTML_COMPLETE_PAGE // generate a complete HTML page
38 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
39 HTML_USE_XHTML // generate XHTML output instead of HTML
40 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
41 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
42 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
43)
44
45var (
46 tags = []string{
47 "b",
48 "blockquote",
49 "code",
50 "del",
51 "dd",
52 "dl",
53 "dt",
54 "em",
55 "h1",
56 "h2",
57 "h3",
58 "h4",
59 "h5",
60 "h6",
61 "i",
62 "kbd",
63 "li",
64 "ol",
65 "p",
66 "pre",
67 "s",
68 "sup",
69 "sub",
70 "strong",
71 "strike",
72 "ul",
73 }
74 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
75 tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
76 anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
77 imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
78 // TODO: improve this regexp to catch all possible entities:
79 htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
80)
81
82// Html is a type that implements the Renderer interface for HTML output.
83//
84// Do not create this directly, instead use the HtmlRenderer function.
85type Html struct {
86 flags int // HTML_* options
87 closeTag string // how to end singleton tags: either " />\n" or ">\n"
88 title string // document title
89 css string // optional css file url (used with HTML_COMPLETE_PAGE)
90
91 // table of contents data
92 tocMarker int
93 headerCount int
94 currentLevel int
95 toc *bytes.Buffer
96
97 smartypants *smartypantsRenderer
98}
99
100const (
101 xhtmlClose = " />\n"
102 htmlClose = ">\n"
103)
104
105// HtmlRenderer creates and configures an Html object, which
106// satisfies the Renderer interface.
107//
108// flags is a set of HTML_* options ORed together.
109// title is the title of the document, and css is a URL for the document's
110// stylesheet.
111// title and css are only used when HTML_COMPLETE_PAGE is selected.
112func HtmlRenderer(flags int, title string, css string) Renderer {
113 // configure the rendering engine
114 closeTag := htmlClose
115 if flags&HTML_USE_XHTML != 0 {
116 closeTag = xhtmlClose
117 }
118
119 return &Html{
120 flags: flags,
121 closeTag: closeTag,
122 title: title,
123 css: css,
124
125 headerCount: 0,
126 currentLevel: 0,
127 toc: new(bytes.Buffer),
128
129 smartypants: smartypants(flags),
130 }
131}
132
133// Using if statements is a bit faster than a switch statement. As the compiler
134// improves, this should be unnecessary this is only worthwhile because
135// attrEscape is the single largest CPU user in normal use.
136// Also tried using map, but that gave a ~3x slowdown.
137func escapeSingleChar(char byte) (string, bool) {
138 if char == '"' {
139 return """, true
140 }
141 if char == '&' {
142 return "&", true
143 }
144 if char == '<' {
145 return "<", true
146 }
147 if char == '>' {
148 return ">", true
149 }
150 return "", false
151}
152
153func attrEscape(out *bytes.Buffer, src []byte) {
154 org := 0
155 for i, ch := range src {
156 if entity, ok := escapeSingleChar(ch); ok {
157 if i > org {
158 // copy all the normal characters since the last escape
159 out.Write(src[org:i])
160 }
161 org = i + 1
162 out.WriteString(entity)
163 }
164 }
165 if org < len(src) {
166 out.Write(src[org:])
167 }
168}
169
170func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
171 end := 0
172 for _, rang := range skipRanges {
173 attrEscape(out, src[end:rang[0]])
174 out.Write(src[rang[0]:rang[1]])
175 end = rang[1]
176 }
177 attrEscape(out, src[end:])
178}
179
180func (options *Html) GetFlags() int {
181 return options.flags
182}
183
184func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
185 marker := out.Len()
186 doubleSpace(out)
187
188 if options.flags&HTML_TOC != 0 {
189 // headerCount is incremented in htmlTocHeader
190 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
191 } else {
192 out.WriteString(fmt.Sprintf("<h%d>", level))
193 }
194
195 tocMarker := out.Len()
196 if !text() {
197 out.Truncate(marker)
198 return
199 }
200
201 // are we building a table of contents?
202 if options.flags&HTML_TOC != 0 {
203 options.TocHeader(out.Bytes()[tocMarker:], level)
204 }
205
206 out.WriteString(fmt.Sprintf("</h%d>\n", level))
207}
208
209func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
210 if options.flags&HTML_SKIP_HTML != 0 {
211 return
212 }
213
214 doubleSpace(out)
215 out.Write(text)
216 out.WriteByte('\n')
217}
218
219func (options *Html) HRule(out *bytes.Buffer) {
220 doubleSpace(out)
221 out.WriteString("<hr")
222 out.WriteString(options.closeTag)
223}
224
225func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
226 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
227 options.BlockCodeGithub(out, text, lang)
228 } else {
229 options.BlockCodeNormal(out, text, lang)
230 }
231}
232
233func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
234 doubleSpace(out)
235
236 // parse out the language names/classes
237 count := 0
238 for _, elt := range strings.Fields(lang) {
239 if elt[0] == '.' {
240 elt = elt[1:]
241 }
242 if len(elt) == 0 {
243 continue
244 }
245 if count == 0 {
246 out.WriteString("<pre><code class=\"")
247 } else {
248 out.WriteByte(' ')
249 }
250 attrEscape(out, []byte(elt))
251 count++
252 }
253
254 if count == 0 {
255 out.WriteString("<pre><code>")
256 } else {
257 out.WriteString("\">")
258 }
259
260 attrEscape(out, text)
261 out.WriteString("</code></pre>\n")
262}
263
264// GitHub style code block:
265//
266// <pre lang="LANG"><code>
267// ...
268// </code></pre>
269//
270// Unlike other parsers, we store the language identifier in the <pre>,
271// and don't let the user generate custom classes.
272//
273// The language identifier in the <pre> block gets postprocessed and all
274// the code inside gets syntax highlighted with Pygments. This is much safer
275// than letting the user specify a CSS class for highlighting.
276//
277// Note that we only generate HTML for the first specifier.
278// E.g.
279// ~~~~ {.python .numbered} => <pre lang="python"><code>
280func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
281 doubleSpace(out)
282
283 // parse out the language name
284 count := 0
285 for _, elt := range strings.Fields(lang) {
286 if elt[0] == '.' {
287 elt = elt[1:]
288 }
289 if len(elt) == 0 {
290 continue
291 }
292 out.WriteString("<pre lang=\"")
293 attrEscape(out, []byte(elt))
294 out.WriteString("\"><code>")
295 count++
296 break
297 }
298
299 if count == 0 {
300 out.WriteString("<pre><code>")
301 }
302
303 attrEscape(out, text)
304 out.WriteString("</code></pre>\n")
305}
306
307func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
308 doubleSpace(out)
309 out.WriteString("<blockquote>\n")
310 out.Write(text)
311 out.WriteString("</blockquote>\n")
312}
313
314func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
315 doubleSpace(out)
316 out.WriteString("<table>\n<thead>\n")
317 out.Write(header)
318 out.WriteString("</thead>\n\n<tbody>\n")
319 out.Write(body)
320 out.WriteString("</tbody>\n</table>\n")
321}
322
323func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
324 doubleSpace(out)
325 out.WriteString("<tr>\n")
326 out.Write(text)
327 out.WriteString("\n</tr>\n")
328}
329
330func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
331 doubleSpace(out)
332 switch align {
333 case TABLE_ALIGNMENT_LEFT:
334 out.WriteString("<th align=\"left\">")
335 case TABLE_ALIGNMENT_RIGHT:
336 out.WriteString("<th align=\"right\">")
337 case TABLE_ALIGNMENT_CENTER:
338 out.WriteString("<th align=\"center\">")
339 default:
340 out.WriteString("<th>")
341 }
342
343 out.Write(text)
344 out.WriteString("</th>")
345}
346
347func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
348 doubleSpace(out)
349 switch align {
350 case TABLE_ALIGNMENT_LEFT:
351 out.WriteString("<td align=\"left\">")
352 case TABLE_ALIGNMENT_RIGHT:
353 out.WriteString("<td align=\"right\">")
354 case TABLE_ALIGNMENT_CENTER:
355 out.WriteString("<td align=\"center\">")
356 default:
357 out.WriteString("<td>")
358 }
359
360 out.Write(text)
361 out.WriteString("</td>")
362}
363
364func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
365 out.WriteString("<div class=\"footnotes\">\n")
366 options.HRule(out)
367 options.List(out, text, LIST_TYPE_ORDERED)
368 out.WriteString("</div>\n")
369}
370
371func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
372 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
373 doubleSpace(out)
374 }
375 out.WriteString(`<li id="fn:`)
376 out.Write(slugify(name))
377 out.WriteString(`">`)
378 out.Write(text)
379 out.WriteString("</li>\n")
380}
381
382func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
383 marker := out.Len()
384 doubleSpace(out)
385
386 if flags&LIST_TYPE_ORDERED != 0 {
387 out.WriteString("<ol>")
388 } else {
389 out.WriteString("<ul>")
390 }
391 if !text() {
392 out.Truncate(marker)
393 return
394 }
395 if flags&LIST_TYPE_ORDERED != 0 {
396 out.WriteString("</ol>\n")
397 } else {
398 out.WriteString("</ul>\n")
399 }
400}
401
402func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
403 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
404 doubleSpace(out)
405 }
406 out.WriteString("<li>")
407 out.Write(text)
408 out.WriteString("</li>\n")
409}
410
411func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
412 marker := out.Len()
413 doubleSpace(out)
414
415 out.WriteString("<p>")
416 if !text() {
417 out.Truncate(marker)
418 return
419 }
420 out.WriteString("</p>\n")
421}
422
423func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
424 skipRanges := htmlEntity.FindAllIndex(link, -1)
425 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
426 // mark it but don't link it if it is not a safe link: no smartypants
427 out.WriteString("<tt>")
428 entityEscapeWithSkip(out, link, skipRanges)
429 out.WriteString("</tt>")
430 return
431 }
432
433 out.WriteString("<a href=\"")
434 if kind == LINK_TYPE_EMAIL {
435 out.WriteString("mailto:")
436 }
437 entityEscapeWithSkip(out, link, skipRanges)
438 out.WriteString("\">")
439
440 // Pretty print: if we get an email address as
441 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
442 // want to print the `mailto:` prefix
443 switch {
444 case bytes.HasPrefix(link, []byte("mailto://")):
445 attrEscape(out, link[len("mailto://"):])
446 case bytes.HasPrefix(link, []byte("mailto:")):
447 attrEscape(out, link[len("mailto:"):])
448 default:
449 entityEscapeWithSkip(out, link, skipRanges)
450 }
451
452 out.WriteString("</a>")
453}
454
455func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
456 out.WriteString("<code>")
457 attrEscape(out, text)
458 out.WriteString("</code>")
459}
460
461func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
462 out.WriteString("<strong>")
463 out.Write(text)
464 out.WriteString("</strong>")
465}
466
467func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
468 if len(text) == 0 {
469 return
470 }
471 out.WriteString("<em>")
472 out.Write(text)
473 out.WriteString("</em>")
474}
475
476func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
477 if options.flags&HTML_SKIP_IMAGES != 0 {
478 return
479 }
480
481 out.WriteString("<img src=\"")
482 attrEscape(out, link)
483 out.WriteString("\" alt=\"")
484 if len(alt) > 0 {
485 attrEscape(out, alt)
486 }
487 if len(title) > 0 {
488 out.WriteString("\" title=\"")
489 attrEscape(out, title)
490 }
491
492 out.WriteByte('"')
493 out.WriteString(options.closeTag)
494 return
495}
496
497func (options *Html) LineBreak(out *bytes.Buffer) {
498 out.WriteString("<br")
499 out.WriteString(options.closeTag)
500}
501
502func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
503 if options.flags&HTML_SKIP_LINKS != 0 {
504 // write the link text out but don't link it, just mark it with typewriter font
505 out.WriteString("<tt>")
506 attrEscape(out, content)
507 out.WriteString("</tt>")
508 return
509 }
510
511 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
512 // write the link text out but don't link it, just mark it with typewriter font
513 out.WriteString("<tt>")
514 attrEscape(out, content)
515 out.WriteString("</tt>")
516 return
517 }
518
519 out.WriteString("<a href=\"")
520 attrEscape(out, link)
521 if len(title) > 0 {
522 out.WriteString("\" title=\"")
523 attrEscape(out, title)
524 }
525 if options.flags&HTML_NOFOLLOW_LINKS != 0 {
526 out.WriteString("\" rel=\"nofollow")
527 }
528 out.WriteString("\">")
529 out.Write(content)
530 out.WriteString("</a>")
531 return
532}
533
534func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
535 if options.flags&HTML_SKIP_HTML != 0 {
536 return
537 }
538 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
539 return
540 }
541 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
542 return
543 }
544 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
545 return
546 }
547 out.Write(text)
548}
549
550func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
551 out.WriteString("<strong><em>")
552 out.Write(text)
553 out.WriteString("</em></strong>")
554}
555
556func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
557 out.WriteString("<del>")
558 out.Write(text)
559 out.WriteString("</del>")
560}
561
562func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
563 slug := slugify(ref)
564 out.WriteString(`<sup class="footnote-ref" id="fnref:`)
565 out.Write(slug)
566 out.WriteString(`"><a rel="footnote" href="#fn:`)
567 out.Write(slug)
568 out.WriteString(`">`)
569 out.WriteString(strconv.Itoa(id))
570 out.WriteString(`</a></sup>`)
571}
572
573func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
574 out.Write(entity)
575}
576
577func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
578 if options.flags&HTML_USE_SMARTYPANTS != 0 {
579 options.Smartypants(out, text)
580 } else {
581 attrEscape(out, text)
582 }
583}
584
585func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
586 smrt := smartypantsData{false, false}
587
588 // first do normal entity escaping
589 var escaped bytes.Buffer
590 attrEscape(&escaped, text)
591 text = escaped.Bytes()
592
593 mark := 0
594 for i := 0; i < len(text); i++ {
595 if action := options.smartypants[text[i]]; action != nil {
596 if i > mark {
597 out.Write(text[mark:i])
598 }
599
600 previousChar := byte(0)
601 if i > 0 {
602 previousChar = text[i-1]
603 }
604 i += action(out, &smrt, previousChar, text[i:])
605 mark = i + 1
606 }
607 }
608
609 if mark < len(text) {
610 out.Write(text[mark:])
611 }
612}
613
614func (options *Html) DocumentHeader(out *bytes.Buffer) {
615 if options.flags&HTML_COMPLETE_PAGE == 0 {
616 return
617 }
618
619 ending := ""
620 if options.flags&HTML_USE_XHTML != 0 {
621 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
622 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
623 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
624 ending = " /"
625 } else {
626 out.WriteString("<!DOCTYPE html>\n")
627 out.WriteString("<html>\n")
628 }
629 out.WriteString("<head>\n")
630 out.WriteString(" <title>")
631 options.NormalText(out, []byte(options.title))
632 out.WriteString("</title>\n")
633 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
634 out.WriteString(VERSION)
635 out.WriteString("\"")
636 out.WriteString(ending)
637 out.WriteString(">\n")
638 out.WriteString(" <meta charset=\"utf-8\"")
639 out.WriteString(ending)
640 out.WriteString(">\n")
641 if options.css != "" {
642 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
643 attrEscape(out, []byte(options.css))
644 out.WriteString("\"")
645 out.WriteString(ending)
646 out.WriteString(">\n")
647 }
648 out.WriteString("</head>\n")
649 out.WriteString("<body>\n")
650
651 options.tocMarker = out.Len()
652}
653
654func (options *Html) DocumentFooter(out *bytes.Buffer) {
655 // finalize and insert the table of contents
656 if options.flags&HTML_TOC != 0 {
657 options.TocFinalize()
658
659 // now we have to insert the table of contents into the document
660 var temp bytes.Buffer
661
662 // start by making a copy of everything after the document header
663 temp.Write(out.Bytes()[options.tocMarker:])
664
665 // now clear the copied material from the main output buffer
666 out.Truncate(options.tocMarker)
667
668 // corner case spacing issue
669 if options.flags&HTML_COMPLETE_PAGE != 0 {
670 out.WriteByte('\n')
671 }
672
673 // insert the table of contents
674 out.WriteString("<nav>\n")
675 out.Write(options.toc.Bytes())
676 out.WriteString("</nav>\n")
677
678 // corner case spacing issue
679 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
680 out.WriteByte('\n')
681 }
682
683 // write out everything that came after it
684 if options.flags&HTML_OMIT_CONTENTS == 0 {
685 out.Write(temp.Bytes())
686 }
687 }
688
689 if options.flags&HTML_COMPLETE_PAGE != 0 {
690 out.WriteString("\n</body>\n")
691 out.WriteString("</html>\n")
692 }
693
694}
695
696func (options *Html) TocHeader(text []byte, level int) {
697 for level > options.currentLevel {
698 switch {
699 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
700 // this sublist can nest underneath a header
701 size := options.toc.Len()
702 options.toc.Truncate(size - len("</li>\n"))
703
704 case options.currentLevel > 0:
705 options.toc.WriteString("<li>")
706 }
707 if options.toc.Len() > 0 {
708 options.toc.WriteByte('\n')
709 }
710 options.toc.WriteString("<ul>\n")
711 options.currentLevel++
712 }
713
714 for level < options.currentLevel {
715 options.toc.WriteString("</ul>")
716 if options.currentLevel > 1 {
717 options.toc.WriteString("</li>\n")
718 }
719 options.currentLevel--
720 }
721
722 options.toc.WriteString("<li><a href=\"#toc_")
723 options.toc.WriteString(strconv.Itoa(options.headerCount))
724 options.toc.WriteString("\">")
725 options.headerCount++
726
727 options.toc.Write(text)
728
729 options.toc.WriteString("</a></li>\n")
730}
731
732func (options *Html) TocFinalize() {
733 for options.currentLevel > 1 {
734 options.toc.WriteString("</ul></li>\n")
735 options.currentLevel--
736 }
737
738 if options.currentLevel > 0 {
739 options.toc.WriteString("</ul>\n")
740 }
741}
742
743func isHtmlTag(tag []byte, tagname string) bool {
744 found, _ := findHtmlTagPos(tag, tagname)
745 return found
746}
747
748// Look for a character, but ignore it when it's in any kind of quotes, it
749// might be JavaScript
750func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
751 inSingleQuote := false
752 inDoubleQuote := false
753 inGraveQuote := false
754 i := start
755 for i < len(html) {
756 switch {
757 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
758 return i
759 case html[i] == '\'':
760 inSingleQuote = !inSingleQuote
761 case html[i] == '"':
762 inDoubleQuote = !inDoubleQuote
763 case html[i] == '`':
764 inGraveQuote = !inGraveQuote
765 }
766 i++
767 }
768 return start
769}
770
771func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
772 i := 0
773 if i < len(tag) && tag[0] != '<' {
774 return false, -1
775 }
776 i++
777 i = skipSpace(tag, i)
778
779 if i < len(tag) && tag[i] == '/' {
780 i++
781 }
782
783 i = skipSpace(tag, i)
784 j := 0
785 for ; i < len(tag); i, j = i+1, j+1 {
786 if j >= len(tagname) {
787 break
788 }
789
790 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
791 return false, -1
792 }
793 }
794
795 if i == len(tag) {
796 return false, -1
797 }
798
799 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
800 if rightAngle > i {
801 return true, rightAngle
802 }
803
804 return false, -1
805}
806
807func sanitizeHtml(html []byte) []byte {
808 var result []byte
809 for string(html) != "" {
810 skip, tag, rest := findHtmlTag(html)
811 html = rest
812 result = append(result, skip...)
813 result = append(result, sanitizeTag(tag)...)
814 }
815 return append(result, []byte("\n")...)
816}
817
818func sanitizeTag(tag []byte) []byte {
819 if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
820 return tag
821 }
822 return []byte("")
823}
824
825func skipUntilChar(text []byte, start int, char byte) int {
826 i := start
827 for i < len(text) && text[i] != char {
828 i++
829 }
830 return i
831}
832
833func findHtmlTag(html []byte) (skip, tag, rest []byte) {
834 start := skipUntilChar(html, 0, '<')
835 rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
836 if rightAngle > start {
837 skip = html[0:start]
838 tag = html[start : rightAngle+1]
839 rest = html[rightAngle+1:]
840 return
841 }
842
843 return []byte(""), []byte(""), []byte("")
844}
845
846func skipSpace(tag []byte, i int) int {
847 for i < len(tag) && isspace(tag[i]) {
848 i++
849 }
850 return i
851}
852
853func doubleSpace(out *bytes.Buffer) {
854 if out.Len() > 0 {
855 out.WriteByte('\n')
856 }
857}