html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SKIP_SCRIPT // skip embedded <script> elements
32 HTML_SAFELINK // only link to trusted protocols
33 HTML_TOC // generate a table of contents
34 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
35 HTML_COMPLETE_PAGE // generate a complete HTML page
36 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
37 HTML_USE_XHTML // generate XHTML output instead of HTML
38 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
39 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
40 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
41)
42
43// Html is a type that implements the Renderer interface for HTML output.
44//
45// Do not create this directly, instead use the HtmlRenderer function.
46type Html struct {
47 flags int // HTML_* options
48 closeTag string // how to end singleton tags: either " />\n" or ">\n"
49 title string // document title
50 css string // optional css file url (used with HTML_COMPLETE_PAGE)
51
52 // table of contents data
53 tocMarker int
54 headerCount int
55 currentLevel int
56 toc *bytes.Buffer
57
58 smartypants *smartypantsRenderer
59}
60
61const (
62 xhtmlClose = " />\n"
63 htmlClose = ">\n"
64)
65
66// HtmlRenderer creates and configures an Html object, which
67// satisfies the Renderer interface.
68//
69// flags is a set of HTML_* options ORed together.
70// title is the title of the document, and css is a URL for the document's
71// stylesheet.
72// title and css are only used when HTML_COMPLETE_PAGE is selected.
73func HtmlRenderer(flags int, title string, css string) Renderer {
74 // configure the rendering engine
75 closeTag := htmlClose
76 if flags&HTML_USE_XHTML != 0 {
77 closeTag = xhtmlClose
78 }
79
80 return &Html{
81 flags: flags,
82 closeTag: closeTag,
83 title: title,
84 css: css,
85
86 headerCount: 0,
87 currentLevel: 0,
88 toc: new(bytes.Buffer),
89
90 smartypants: smartypants(flags),
91 }
92}
93
94func attrEscape(out *bytes.Buffer, src []byte) {
95 org := 0
96 for i, ch := range src {
97 // using if statements is a bit faster than a switch statement.
98 // as the compiler improves, this should be unnecessary
99 // this is only worthwhile because attrEscape is the single
100 // largest CPU user in normal use
101 if ch == '"' {
102 if i > org {
103 // copy all the normal characters since the last escape
104 out.Write(src[org:i])
105 }
106 org = i + 1
107 out.WriteString(""")
108 continue
109 }
110 if ch == '&' {
111 if i > org {
112 out.Write(src[org:i])
113 }
114 org = i + 1
115 out.WriteString("&")
116 continue
117 }
118 if ch == '<' {
119 if i > org {
120 out.Write(src[org:i])
121 }
122 org = i + 1
123 out.WriteString("<")
124 continue
125 }
126 if ch == '>' {
127 if i > org {
128 out.Write(src[org:i])
129 }
130 org = i + 1
131 out.WriteString(">")
132 continue
133 }
134 }
135 if org < len(src) {
136 out.Write(src[org:])
137 }
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141 marker := out.Len()
142 doubleSpace(out)
143
144 if options.flags&HTML_TOC != 0 {
145 // headerCount is incremented in htmlTocHeader
146 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147 } else {
148 out.WriteString(fmt.Sprintf("<h%d>", level))
149 }
150
151 tocMarker := out.Len()
152 if !text() {
153 out.Truncate(marker)
154 return
155 }
156
157 // are we building a table of contents?
158 if options.flags&HTML_TOC != 0 {
159 options.TocHeader(out.Bytes()[tocMarker:], level)
160 }
161
162 out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166 if options.flags&HTML_SKIP_HTML != 0 {
167 return
168 }
169
170 doubleSpace(out)
171 if options.flags&HTML_SKIP_SCRIPT != 0 {
172 out.Write(stripTag(string(text), "script", "p"))
173 } else {
174 out.Write(text)
175 }
176 out.WriteByte('\n')
177}
178
179// This is a trivial implementation for the simplest possible case
180func stripTag(text, tag, newTag string) []byte {
181 openTag := fmt.Sprintf("<%s>", tag)
182 closeTag := fmt.Sprintf("</%s>", tag)
183 openNewTag := fmt.Sprintf("<%s>", newTag)
184 closeNewTag := fmt.Sprintf("</%s>", newTag)
185 noOpen := strings.Replace(text, openTag, openNewTag, -1)
186 return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
187}
188
189func (options *Html) HRule(out *bytes.Buffer) {
190 doubleSpace(out)
191 out.WriteString("<hr")
192 out.WriteString(options.closeTag)
193}
194
195func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
196 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
197 options.BlockCodeGithub(out, text, lang)
198 } else {
199 options.BlockCodeNormal(out, text, lang)
200 }
201}
202
203func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
204 doubleSpace(out)
205
206 // parse out the language names/classes
207 count := 0
208 for _, elt := range strings.Fields(lang) {
209 if elt[0] == '.' {
210 elt = elt[1:]
211 }
212 if len(elt) == 0 {
213 continue
214 }
215 if count == 0 {
216 out.WriteString("<pre><code class=\"")
217 } else {
218 out.WriteByte(' ')
219 }
220 attrEscape(out, []byte(elt))
221 count++
222 }
223
224 if count == 0 {
225 out.WriteString("<pre><code>")
226 } else {
227 out.WriteString("\">")
228 }
229
230 attrEscape(out, text)
231 out.WriteString("</code></pre>\n")
232}
233
234// GitHub style code block:
235//
236// <pre lang="LANG"><code>
237// ...
238// </code></pre>
239//
240// Unlike other parsers, we store the language identifier in the <pre>,
241// and don't let the user generate custom classes.
242//
243// The language identifier in the <pre> block gets postprocessed and all
244// the code inside gets syntax highlighted with Pygments. This is much safer
245// than letting the user specify a CSS class for highlighting.
246//
247// Note that we only generate HTML for the first specifier.
248// E.g.
249// ~~~~ {.python .numbered} => <pre lang="python"><code>
250func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
251 doubleSpace(out)
252
253 // parse out the language name
254 count := 0
255 for _, elt := range strings.Fields(lang) {
256 if elt[0] == '.' {
257 elt = elt[1:]
258 }
259 if len(elt) == 0 {
260 continue
261 }
262 out.WriteString("<pre lang=\"")
263 attrEscape(out, []byte(elt))
264 out.WriteString("\"><code>")
265 count++
266 break
267 }
268
269 if count == 0 {
270 out.WriteString("<pre><code>")
271 }
272
273 attrEscape(out, text)
274 out.WriteString("</code></pre>\n")
275}
276
277func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
278 doubleSpace(out)
279 out.WriteString("<blockquote>\n")
280 out.Write(text)
281 out.WriteString("</blockquote>\n")
282}
283
284func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
285 doubleSpace(out)
286 out.WriteString("<table>\n<thead>\n")
287 out.Write(header)
288 out.WriteString("</thead>\n\n<tbody>\n")
289 out.Write(body)
290 out.WriteString("</tbody>\n</table>\n")
291}
292
293func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
294 doubleSpace(out)
295 out.WriteString("<tr>\n")
296 out.Write(text)
297 out.WriteString("\n</tr>\n")
298}
299
300func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
301 doubleSpace(out)
302 switch align {
303 case TABLE_ALIGNMENT_LEFT:
304 out.WriteString("<td align=\"left\">")
305 case TABLE_ALIGNMENT_RIGHT:
306 out.WriteString("<td align=\"right\">")
307 case TABLE_ALIGNMENT_CENTER:
308 out.WriteString("<td align=\"center\">")
309 default:
310 out.WriteString("<td>")
311 }
312
313 out.Write(text)
314 out.WriteString("</td>")
315}
316
317func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
318 marker := out.Len()
319 doubleSpace(out)
320
321 if flags&LIST_TYPE_ORDERED != 0 {
322 out.WriteString("<ol>")
323 } else {
324 out.WriteString("<ul>")
325 }
326 if !text() {
327 out.Truncate(marker)
328 return
329 }
330 if flags&LIST_TYPE_ORDERED != 0 {
331 out.WriteString("</ol>\n")
332 } else {
333 out.WriteString("</ul>\n")
334 }
335}
336
337func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
338 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
339 doubleSpace(out)
340 }
341 out.WriteString("<li>")
342 out.Write(text)
343 out.WriteString("</li>\n")
344}
345
346func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
347 marker := out.Len()
348 doubleSpace(out)
349
350 out.WriteString("<p>")
351 if !text() {
352 out.Truncate(marker)
353 return
354 }
355 out.WriteString("</p>\n")
356}
357
358func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
359 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
360 // mark it but don't link it if it is not a safe link: no smartypants
361 out.WriteString("<tt>")
362 attrEscape(out, link)
363 out.WriteString("</tt>")
364 return
365 }
366
367 out.WriteString("<a href=\"")
368 if kind == LINK_TYPE_EMAIL {
369 out.WriteString("mailto:")
370 }
371 attrEscape(out, link)
372 out.WriteString("\">")
373
374 // Pretty print: if we get an email address as
375 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
376 // want to print the `mailto:` prefix
377 switch {
378 case bytes.HasPrefix(link, []byte("mailto://")):
379 attrEscape(out, link[len("mailto://"):])
380 case bytes.HasPrefix(link, []byte("mailto:")):
381 attrEscape(out, link[len("mailto:"):])
382 default:
383 attrEscape(out, link)
384 }
385
386 out.WriteString("</a>")
387}
388
389func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
390 out.WriteString("<code>")
391 attrEscape(out, text)
392 out.WriteString("</code>")
393}
394
395func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
396 out.WriteString("<strong>")
397 out.Write(text)
398 out.WriteString("</strong>")
399}
400
401func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
402 if len(text) == 0 {
403 return
404 }
405 out.WriteString("<em>")
406 out.Write(text)
407 out.WriteString("</em>")
408}
409
410func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
411 if options.flags&HTML_SKIP_IMAGES != 0 {
412 return
413 }
414
415 out.WriteString("<img src=\"")
416 attrEscape(out, link)
417 out.WriteString("\" alt=\"")
418 if len(alt) > 0 {
419 attrEscape(out, alt)
420 }
421 if len(title) > 0 {
422 out.WriteString("\" title=\"")
423 attrEscape(out, title)
424 }
425
426 out.WriteByte('"')
427 out.WriteString(options.closeTag)
428 return
429}
430
431func (options *Html) LineBreak(out *bytes.Buffer) {
432 out.WriteString("<br")
433 out.WriteString(options.closeTag)
434}
435
436func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
437 if options.flags&HTML_SKIP_LINKS != 0 {
438 // write the link text out but don't link it, just mark it with typewriter font
439 out.WriteString("<tt>")
440 attrEscape(out, content)
441 out.WriteString("</tt>")
442 return
443 }
444
445 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
446 // write the link text out but don't link it, just mark it with typewriter font
447 out.WriteString("<tt>")
448 attrEscape(out, content)
449 out.WriteString("</tt>")
450 return
451 }
452
453 out.WriteString("<a href=\"")
454 attrEscape(out, link)
455 if len(title) > 0 {
456 out.WriteString("\" title=\"")
457 attrEscape(out, title)
458 }
459 out.WriteString("\">")
460 out.Write(content)
461 out.WriteString("</a>")
462 return
463}
464
465func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
466 if options.flags&HTML_SKIP_HTML != 0 {
467 return
468 }
469 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
470 return
471 }
472 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
473 return
474 }
475 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
476 return
477 }
478 if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
479 return
480 }
481 out.Write(text)
482}
483
484func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
485 out.WriteString("<strong><em>")
486 out.Write(text)
487 out.WriteString("</em></strong>")
488}
489
490func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
491 out.WriteString("<del>")
492 out.Write(text)
493 out.WriteString("</del>")
494}
495
496func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
497 out.Write(entity)
498}
499
500func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
501 if options.flags&HTML_USE_SMARTYPANTS != 0 {
502 options.Smartypants(out, text)
503 } else {
504 attrEscape(out, text)
505 }
506}
507
508func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
509 smrt := smartypantsData{false, false}
510
511 // first do normal entity escaping
512 var escaped bytes.Buffer
513 attrEscape(&escaped, text)
514 text = escaped.Bytes()
515
516 mark := 0
517 for i := 0; i < len(text); i++ {
518 if action := options.smartypants[text[i]]; action != nil {
519 if i > mark {
520 out.Write(text[mark:i])
521 }
522
523 previousChar := byte(0)
524 if i > 0 {
525 previousChar = text[i-1]
526 }
527 i += action(out, &smrt, previousChar, text[i:])
528 mark = i + 1
529 }
530 }
531
532 if mark < len(text) {
533 out.Write(text[mark:])
534 }
535}
536
537func (options *Html) DocumentHeader(out *bytes.Buffer) {
538 if options.flags&HTML_COMPLETE_PAGE == 0 {
539 return
540 }
541
542 ending := ""
543 if options.flags&HTML_USE_XHTML != 0 {
544 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
545 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
546 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
547 ending = " /"
548 } else {
549 out.WriteString("<!DOCTYPE html>\n")
550 out.WriteString("<html>\n")
551 }
552 out.WriteString("<head>\n")
553 out.WriteString(" <title>")
554 options.NormalText(out, []byte(options.title))
555 out.WriteString("</title>\n")
556 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
557 out.WriteString(VERSION)
558 out.WriteString("\"")
559 out.WriteString(ending)
560 out.WriteString(">\n")
561 out.WriteString(" <meta charset=\"utf-8\"")
562 out.WriteString(ending)
563 out.WriteString(">\n")
564 if options.css != "" {
565 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
566 attrEscape(out, []byte(options.css))
567 out.WriteString("\"")
568 out.WriteString(ending)
569 out.WriteString(">\n")
570 }
571 out.WriteString("</head>\n")
572 out.WriteString("<body>\n")
573
574 options.tocMarker = out.Len()
575}
576
577func (options *Html) DocumentFooter(out *bytes.Buffer) {
578 // finalize and insert the table of contents
579 if options.flags&HTML_TOC != 0 {
580 options.TocFinalize()
581
582 // now we have to insert the table of contents into the document
583 var temp bytes.Buffer
584
585 // start by making a copy of everything after the document header
586 temp.Write(out.Bytes()[options.tocMarker:])
587
588 // now clear the copied material from the main output buffer
589 out.Truncate(options.tocMarker)
590
591 // corner case spacing issue
592 if options.flags&HTML_COMPLETE_PAGE != 0 {
593 out.WriteByte('\n')
594 }
595
596 // insert the table of contents
597 out.WriteString("<nav>\n")
598 out.Write(options.toc.Bytes())
599 out.WriteString("</nav>\n")
600
601 // corner case spacing issue
602 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
603 out.WriteByte('\n')
604 }
605
606 // write out everything that came after it
607 if options.flags&HTML_OMIT_CONTENTS == 0 {
608 out.Write(temp.Bytes())
609 }
610 }
611
612 if options.flags&HTML_COMPLETE_PAGE != 0 {
613 out.WriteString("\n</body>\n")
614 out.WriteString("</html>\n")
615 }
616
617}
618
619func (options *Html) TocHeader(text []byte, level int) {
620 for level > options.currentLevel {
621 switch {
622 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
623 // this sublist can nest underneath a header
624 size := options.toc.Len()
625 options.toc.Truncate(size - len("</li>\n"))
626
627 case options.currentLevel > 0:
628 options.toc.WriteString("<li>")
629 }
630 if options.toc.Len() > 0 {
631 options.toc.WriteByte('\n')
632 }
633 options.toc.WriteString("<ul>\n")
634 options.currentLevel++
635 }
636
637 for level < options.currentLevel {
638 options.toc.WriteString("</ul>")
639 if options.currentLevel > 1 {
640 options.toc.WriteString("</li>\n")
641 }
642 options.currentLevel--
643 }
644
645 options.toc.WriteString("<li><a href=\"#toc_")
646 options.toc.WriteString(strconv.Itoa(options.headerCount))
647 options.toc.WriteString("\">")
648 options.headerCount++
649
650 options.toc.Write(text)
651
652 options.toc.WriteString("</a></li>\n")
653}
654
655func (options *Html) TocFinalize() {
656 for options.currentLevel > 1 {
657 options.toc.WriteString("</ul></li>\n")
658 options.currentLevel--
659 }
660
661 if options.currentLevel > 0 {
662 options.toc.WriteString("</ul>\n")
663 }
664}
665
666func isHtmlTag(tag []byte, tagname string) bool {
667 i := 0
668 if i < len(tag) && tag[0] != '<' {
669 return false
670 }
671 i++
672 i = skipSpace(tag, i)
673
674 if i < len(tag) && tag[i] == '/' {
675 i++
676 }
677
678 i = skipSpace(tag, i)
679 j := 0
680 for ; i < len(tag); i, j = i+1, j+1 {
681 if j >= len(tagname) {
682 break
683 }
684
685 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
686 return false
687 }
688 }
689
690 if i == len(tag) {
691 return false
692 }
693
694 return isspace(tag[i]) || tag[i] == '>'
695}
696
697func skipSpace(tag []byte, i int) int {
698 for i < len(tag) && isspace(tag[i]) {
699 i++
700 }
701 return i
702}
703
704func doubleSpace(out *bytes.Buffer) {
705 if out.Len() > 0 {
706 out.WriteByte('\n')
707 }
708}