html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SKIP_SCRIPT // skip embedded <script> elements
32 HTML_SAFELINK // only link to trusted protocols
33 HTML_TOC // generate a table of contents
34 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
35 HTML_COMPLETE_PAGE // generate a complete HTML page
36 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
37 HTML_USE_XHTML // generate XHTML output instead of HTML
38 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
39 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
40 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
41)
42
43// Html is a type that implements the Renderer interface for HTML output.
44//
45// Do not create this directly, instead use the HtmlRenderer function.
46type Html struct {
47 flags int // HTML_* options
48 closeTag string // how to end singleton tags: either " />\n" or ">\n"
49 title string // document title
50 css string // optional css file url (used with HTML_COMPLETE_PAGE)
51
52 // table of contents data
53 tocMarker int
54 headerCount int
55 currentLevel int
56 toc *bytes.Buffer
57
58 smartypants *smartypantsRenderer
59}
60
61const (
62 xhtmlClose = " />\n"
63 htmlClose = ">\n"
64)
65
66// HtmlRenderer creates and configures an Html object, which
67// satisfies the Renderer interface.
68//
69// flags is a set of HTML_* options ORed together.
70// title is the title of the document, and css is a URL for the document's
71// stylesheet.
72// title and css are only used when HTML_COMPLETE_PAGE is selected.
73func HtmlRenderer(flags int, title string, css string) Renderer {
74 // configure the rendering engine
75 closeTag := htmlClose
76 if flags&HTML_USE_XHTML != 0 {
77 closeTag = xhtmlClose
78 }
79
80 return &Html{
81 flags: flags,
82 closeTag: closeTag,
83 title: title,
84 css: css,
85
86 headerCount: 0,
87 currentLevel: 0,
88 toc: new(bytes.Buffer),
89
90 smartypants: smartypants(flags),
91 }
92}
93
94func attrEscape(out *bytes.Buffer, src []byte) {
95 org := 0
96 for i, ch := range src {
97 // using if statements is a bit faster than a switch statement.
98 // as the compiler improves, this should be unnecessary
99 // this is only worthwhile because attrEscape is the single
100 // largest CPU user in normal use
101 if ch == '"' {
102 if i > org {
103 // copy all the normal characters since the last escape
104 out.Write(src[org:i])
105 }
106 org = i + 1
107 out.WriteString(""")
108 continue
109 }
110 if ch == '&' {
111 if i > org {
112 out.Write(src[org:i])
113 }
114 org = i + 1
115 out.WriteString("&")
116 continue
117 }
118 if ch == '<' {
119 if i > org {
120 out.Write(src[org:i])
121 }
122 org = i + 1
123 out.WriteString("<")
124 continue
125 }
126 if ch == '>' {
127 if i > org {
128 out.Write(src[org:i])
129 }
130 org = i + 1
131 out.WriteString(">")
132 continue
133 }
134 }
135 if org < len(src) {
136 out.Write(src[org:])
137 }
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141 marker := out.Len()
142 doubleSpace(out)
143
144 if options.flags&HTML_TOC != 0 {
145 // headerCount is incremented in htmlTocHeader
146 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147 } else {
148 out.WriteString(fmt.Sprintf("<h%d>", level))
149 }
150
151 tocMarker := out.Len()
152 if !text() {
153 out.Truncate(marker)
154 return
155 }
156
157 // are we building a table of contents?
158 if options.flags&HTML_TOC != 0 {
159 options.TocHeader(out.Bytes()[tocMarker:], level)
160 }
161
162 out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166 if options.flags&HTML_SKIP_HTML != 0 {
167 return
168 }
169
170 doubleSpace(out)
171 if options.flags&HTML_SKIP_SCRIPT != 0 {
172 out.Write(stripTag(string(text), "script", "p"))
173 } else {
174 out.Write(text)
175 }
176 out.WriteByte('\n')
177}
178
179func stripTag(text, tag, newTag string) []byte {
180 closeNewTag := fmt.Sprintf("</%s>", newTag)
181 i := 0
182 for i < len(text) && text[i] != '<' {
183 i++
184 }
185 if i == len(text) {
186 return []byte(text)
187 }
188 found, end := findHtmlTagPos([]byte(text[i:]), tag)
189 closeTag := fmt.Sprintf("</%s>", tag)
190 noOpen := text
191 if found {
192 noOpen = text[0:i+1] + newTag + text[end:]
193 }
194 return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
195}
196
197func (options *Html) HRule(out *bytes.Buffer) {
198 doubleSpace(out)
199 out.WriteString("<hr")
200 out.WriteString(options.closeTag)
201}
202
203func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
204 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
205 options.BlockCodeGithub(out, text, lang)
206 } else {
207 options.BlockCodeNormal(out, text, lang)
208 }
209}
210
211func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
212 doubleSpace(out)
213
214 // parse out the language names/classes
215 count := 0
216 for _, elt := range strings.Fields(lang) {
217 if elt[0] == '.' {
218 elt = elt[1:]
219 }
220 if len(elt) == 0 {
221 continue
222 }
223 if count == 0 {
224 out.WriteString("<pre><code class=\"")
225 } else {
226 out.WriteByte(' ')
227 }
228 attrEscape(out, []byte(elt))
229 count++
230 }
231
232 if count == 0 {
233 out.WriteString("<pre><code>")
234 } else {
235 out.WriteString("\">")
236 }
237
238 attrEscape(out, text)
239 out.WriteString("</code></pre>\n")
240}
241
242// GitHub style code block:
243//
244// <pre lang="LANG"><code>
245// ...
246// </code></pre>
247//
248// Unlike other parsers, we store the language identifier in the <pre>,
249// and don't let the user generate custom classes.
250//
251// The language identifier in the <pre> block gets postprocessed and all
252// the code inside gets syntax highlighted with Pygments. This is much safer
253// than letting the user specify a CSS class for highlighting.
254//
255// Note that we only generate HTML for the first specifier.
256// E.g.
257// ~~~~ {.python .numbered} => <pre lang="python"><code>
258func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
259 doubleSpace(out)
260
261 // parse out the language name
262 count := 0
263 for _, elt := range strings.Fields(lang) {
264 if elt[0] == '.' {
265 elt = elt[1:]
266 }
267 if len(elt) == 0 {
268 continue
269 }
270 out.WriteString("<pre lang=\"")
271 attrEscape(out, []byte(elt))
272 out.WriteString("\"><code>")
273 count++
274 break
275 }
276
277 if count == 0 {
278 out.WriteString("<pre><code>")
279 }
280
281 attrEscape(out, text)
282 out.WriteString("</code></pre>\n")
283}
284
285func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
286 doubleSpace(out)
287 out.WriteString("<blockquote>\n")
288 out.Write(text)
289 out.WriteString("</blockquote>\n")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293 doubleSpace(out)
294 out.WriteString("<table>\n<thead>\n")
295 out.Write(header)
296 out.WriteString("</thead>\n\n<tbody>\n")
297 out.Write(body)
298 out.WriteString("</tbody>\n</table>\n")
299}
300
301func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
302 doubleSpace(out)
303 out.WriteString("<tr>\n")
304 out.Write(text)
305 out.WriteString("\n</tr>\n")
306}
307
308func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
309 doubleSpace(out)
310 switch align {
311 case TABLE_ALIGNMENT_LEFT:
312 out.WriteString("<th align=\"left\">")
313 case TABLE_ALIGNMENT_RIGHT:
314 out.WriteString("<th align=\"right\">")
315 case TABLE_ALIGNMENT_CENTER:
316 out.WriteString("<th align=\"center\">")
317 default:
318 out.WriteString("<th>")
319 }
320
321 out.Write(text)
322 out.WriteString("</th>")
323}
324
325func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
326 doubleSpace(out)
327 switch align {
328 case TABLE_ALIGNMENT_LEFT:
329 out.WriteString("<td align=\"left\">")
330 case TABLE_ALIGNMENT_RIGHT:
331 out.WriteString("<td align=\"right\">")
332 case TABLE_ALIGNMENT_CENTER:
333 out.WriteString("<td align=\"center\">")
334 default:
335 out.WriteString("<td>")
336 }
337
338 out.Write(text)
339 out.WriteString("</td>")
340}
341
342func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
343 out.WriteString("<div class=\"footnotes\">\n")
344 options.HRule(out)
345 options.List(out, text, LIST_TYPE_ORDERED)
346 out.WriteString("</div>\n")
347}
348
349func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
350 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
351 doubleSpace(out)
352 }
353 out.WriteString(`<li id="fn:`)
354 out.Write(slugify(name))
355 out.WriteString(`">`)
356 out.Write(text)
357 out.WriteString("</li>\n")
358}
359
360func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
361 marker := out.Len()
362 doubleSpace(out)
363
364 if flags&LIST_TYPE_ORDERED != 0 {
365 out.WriteString("<ol>")
366 } else {
367 out.WriteString("<ul>")
368 }
369 if !text() {
370 out.Truncate(marker)
371 return
372 }
373 if flags&LIST_TYPE_ORDERED != 0 {
374 out.WriteString("</ol>\n")
375 } else {
376 out.WriteString("</ul>\n")
377 }
378}
379
380func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
381 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
382 doubleSpace(out)
383 }
384 out.WriteString("<li>")
385 out.Write(text)
386 out.WriteString("</li>\n")
387}
388
389func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
390 marker := out.Len()
391 doubleSpace(out)
392
393 out.WriteString("<p>")
394 if !text() {
395 out.Truncate(marker)
396 return
397 }
398 out.WriteString("</p>\n")
399}
400
401func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
402 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
403 // mark it but don't link it if it is not a safe link: no smartypants
404 out.WriteString("<tt>")
405 attrEscape(out, link)
406 out.WriteString("</tt>")
407 return
408 }
409
410 out.WriteString("<a href=\"")
411 if kind == LINK_TYPE_EMAIL {
412 out.WriteString("mailto:")
413 }
414 attrEscape(out, link)
415 out.WriteString("\">")
416
417 // Pretty print: if we get an email address as
418 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
419 // want to print the `mailto:` prefix
420 switch {
421 case bytes.HasPrefix(link, []byte("mailto://")):
422 attrEscape(out, link[len("mailto://"):])
423 case bytes.HasPrefix(link, []byte("mailto:")):
424 attrEscape(out, link[len("mailto:"):])
425 default:
426 attrEscape(out, link)
427 }
428
429 out.WriteString("</a>")
430}
431
432func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
433 out.WriteString("<code>")
434 attrEscape(out, text)
435 out.WriteString("</code>")
436}
437
438func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
439 out.WriteString("<strong>")
440 out.Write(text)
441 out.WriteString("</strong>")
442}
443
444func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
445 if len(text) == 0 {
446 return
447 }
448 out.WriteString("<em>")
449 out.Write(text)
450 out.WriteString("</em>")
451}
452
453func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
454 if options.flags&HTML_SKIP_IMAGES != 0 {
455 return
456 }
457
458 out.WriteString("<img src=\"")
459 attrEscape(out, link)
460 out.WriteString("\" alt=\"")
461 if len(alt) > 0 {
462 attrEscape(out, alt)
463 }
464 if len(title) > 0 {
465 out.WriteString("\" title=\"")
466 attrEscape(out, title)
467 }
468
469 out.WriteByte('"')
470 out.WriteString(options.closeTag)
471 return
472}
473
474func (options *Html) LineBreak(out *bytes.Buffer) {
475 out.WriteString("<br")
476 out.WriteString(options.closeTag)
477}
478
479func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
480 if options.flags&HTML_SKIP_LINKS != 0 {
481 // write the link text out but don't link it, just mark it with typewriter font
482 out.WriteString("<tt>")
483 attrEscape(out, content)
484 out.WriteString("</tt>")
485 return
486 }
487
488 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
489 // write the link text out but don't link it, just mark it with typewriter font
490 out.WriteString("<tt>")
491 attrEscape(out, content)
492 out.WriteString("</tt>")
493 return
494 }
495
496 out.WriteString("<a href=\"")
497 attrEscape(out, link)
498 if len(title) > 0 {
499 out.WriteString("\" title=\"")
500 attrEscape(out, title)
501 }
502 out.WriteString("\">")
503 out.Write(content)
504 out.WriteString("</a>")
505 return
506}
507
508func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
509 if options.flags&HTML_SKIP_HTML != 0 {
510 return
511 }
512 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
513 return
514 }
515 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
516 return
517 }
518 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
519 return
520 }
521 if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
522 return
523 }
524 out.Write(text)
525}
526
527func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
528 out.WriteString("<strong><em>")
529 out.Write(text)
530 out.WriteString("</em></strong>")
531}
532
533func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
534 out.WriteString("<del>")
535 out.Write(text)
536 out.WriteString("</del>")
537}
538
539func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
540 slug := slugify(ref)
541 out.WriteString(`<sup class="footnote-ref" id="fnref:`)
542 out.Write(slug)
543 out.WriteString(`"><a rel="footnote" href="#fn:`)
544 out.Write(slug)
545 out.WriteString(`">`)
546 out.WriteString(strconv.Itoa(id))
547 out.WriteString(`</a></sup>`)
548}
549
550func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
551 out.Write(entity)
552}
553
554func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
555 if options.flags&HTML_USE_SMARTYPANTS != 0 {
556 options.Smartypants(out, text)
557 } else {
558 attrEscape(out, text)
559 }
560}
561
562func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
563 smrt := smartypantsData{false, false}
564
565 // first do normal entity escaping
566 var escaped bytes.Buffer
567 attrEscape(&escaped, text)
568 text = escaped.Bytes()
569
570 mark := 0
571 for i := 0; i < len(text); i++ {
572 if action := options.smartypants[text[i]]; action != nil {
573 if i > mark {
574 out.Write(text[mark:i])
575 }
576
577 previousChar := byte(0)
578 if i > 0 {
579 previousChar = text[i-1]
580 }
581 i += action(out, &smrt, previousChar, text[i:])
582 mark = i + 1
583 }
584 }
585
586 if mark < len(text) {
587 out.Write(text[mark:])
588 }
589}
590
591func (options *Html) DocumentHeader(out *bytes.Buffer) {
592 if options.flags&HTML_COMPLETE_PAGE == 0 {
593 return
594 }
595
596 ending := ""
597 if options.flags&HTML_USE_XHTML != 0 {
598 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
599 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
600 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
601 ending = " /"
602 } else {
603 out.WriteString("<!DOCTYPE html>\n")
604 out.WriteString("<html>\n")
605 }
606 out.WriteString("<head>\n")
607 out.WriteString(" <title>")
608 options.NormalText(out, []byte(options.title))
609 out.WriteString("</title>\n")
610 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
611 out.WriteString(VERSION)
612 out.WriteString("\"")
613 out.WriteString(ending)
614 out.WriteString(">\n")
615 out.WriteString(" <meta charset=\"utf-8\"")
616 out.WriteString(ending)
617 out.WriteString(">\n")
618 if options.css != "" {
619 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
620 attrEscape(out, []byte(options.css))
621 out.WriteString("\"")
622 out.WriteString(ending)
623 out.WriteString(">\n")
624 }
625 out.WriteString("</head>\n")
626 out.WriteString("<body>\n")
627
628 options.tocMarker = out.Len()
629}
630
631func (options *Html) DocumentFooter(out *bytes.Buffer) {
632 // finalize and insert the table of contents
633 if options.flags&HTML_TOC != 0 {
634 options.TocFinalize()
635
636 // now we have to insert the table of contents into the document
637 var temp bytes.Buffer
638
639 // start by making a copy of everything after the document header
640 temp.Write(out.Bytes()[options.tocMarker:])
641
642 // now clear the copied material from the main output buffer
643 out.Truncate(options.tocMarker)
644
645 // corner case spacing issue
646 if options.flags&HTML_COMPLETE_PAGE != 0 {
647 out.WriteByte('\n')
648 }
649
650 // insert the table of contents
651 out.WriteString("<nav>\n")
652 out.Write(options.toc.Bytes())
653 out.WriteString("</nav>\n")
654
655 // corner case spacing issue
656 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
657 out.WriteByte('\n')
658 }
659
660 // write out everything that came after it
661 if options.flags&HTML_OMIT_CONTENTS == 0 {
662 out.Write(temp.Bytes())
663 }
664 }
665
666 if options.flags&HTML_COMPLETE_PAGE != 0 {
667 out.WriteString("\n</body>\n")
668 out.WriteString("</html>\n")
669 }
670
671}
672
673func (options *Html) TocHeader(text []byte, level int) {
674 for level > options.currentLevel {
675 switch {
676 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
677 // this sublist can nest underneath a header
678 size := options.toc.Len()
679 options.toc.Truncate(size - len("</li>\n"))
680
681 case options.currentLevel > 0:
682 options.toc.WriteString("<li>")
683 }
684 if options.toc.Len() > 0 {
685 options.toc.WriteByte('\n')
686 }
687 options.toc.WriteString("<ul>\n")
688 options.currentLevel++
689 }
690
691 for level < options.currentLevel {
692 options.toc.WriteString("</ul>")
693 if options.currentLevel > 1 {
694 options.toc.WriteString("</li>\n")
695 }
696 options.currentLevel--
697 }
698
699 options.toc.WriteString("<li><a href=\"#toc_")
700 options.toc.WriteString(strconv.Itoa(options.headerCount))
701 options.toc.WriteString("\">")
702 options.headerCount++
703
704 options.toc.Write(text)
705
706 options.toc.WriteString("</a></li>\n")
707}
708
709func (options *Html) TocFinalize() {
710 for options.currentLevel > 1 {
711 options.toc.WriteString("</ul></li>\n")
712 options.currentLevel--
713 }
714
715 if options.currentLevel > 0 {
716 options.toc.WriteString("</ul>\n")
717 }
718}
719
720func isHtmlTag(tag []byte, tagname string) bool {
721 found, _ := findHtmlTagPos(tag, tagname)
722 return found
723}
724
725func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
726 i := 0
727 if i < len(tag) && tag[0] != '<' {
728 return false, -1
729 }
730 i++
731 i = skipSpace(tag, i)
732
733 if i < len(tag) && tag[i] == '/' {
734 i++
735 }
736
737 i = skipSpace(tag, i)
738 j := 0
739 for ; i < len(tag); i, j = i+1, j+1 {
740 if j >= len(tagname) {
741 break
742 }
743
744 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
745 return false, -1
746 }
747 }
748
749 if i == len(tag) {
750 return false, -1
751 }
752
753 // Now look for closing '>', but ignore it when it's in any kind of quotes,
754 // it might be JavaScript
755 inSingleQuote := false
756 inDoubleQuote := false
757 inGraveQuote := false
758 for i < len(tag) {
759 switch {
760 case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
761 return true, i
762 case tag[i] == '\'':
763 inSingleQuote = !inSingleQuote
764 case tag[i] == '"':
765 inDoubleQuote = !inDoubleQuote
766 case tag[i] == '`':
767 inGraveQuote = !inGraveQuote
768 }
769 i++
770 }
771
772 return false, -1
773}
774
775func skipSpace(tag []byte, i int) int {
776 for i < len(tag) && isspace(tag[i]) {
777 i++
778 }
779 return i
780}
781
782func doubleSpace(out *bytes.Buffer) {
783 if out.Len() > 0 {
784 out.WriteByte('\n')
785 }
786}