html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SKIP_SCRIPT // skip embedded <script> elements
32 HTML_SAFELINK // only link to trusted protocols
33 HTML_TOC // generate a table of contents
34 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
35 HTML_COMPLETE_PAGE // generate a complete HTML page
36 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
37 HTML_USE_XHTML // generate XHTML output instead of HTML
38 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
39 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
40 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
41)
42
43// Html is a type that implements the Renderer interface for HTML output.
44//
45// Do not create this directly, instead use the HtmlRenderer function.
46type Html struct {
47 flags int // HTML_* options
48 closeTag string // how to end singleton tags: either " />\n" or ">\n"
49 title string // document title
50 css string // optional css file url (used with HTML_COMPLETE_PAGE)
51
52 // table of contents data
53 tocMarker int
54 headerCount int
55 currentLevel int
56 toc *bytes.Buffer
57
58 smartypants *smartypantsRenderer
59}
60
61const (
62 xhtmlClose = " />\n"
63 htmlClose = ">\n"
64)
65
66// HtmlRenderer creates and configures an Html object, which
67// satisfies the Renderer interface.
68//
69// flags is a set of HTML_* options ORed together.
70// title is the title of the document, and css is a URL for the document's
71// stylesheet.
72// title and css are only used when HTML_COMPLETE_PAGE is selected.
73func HtmlRenderer(flags int, title string, css string) Renderer {
74 // configure the rendering engine
75 closeTag := htmlClose
76 if flags&HTML_USE_XHTML != 0 {
77 closeTag = xhtmlClose
78 }
79
80 return &Html{
81 flags: flags,
82 closeTag: closeTag,
83 title: title,
84 css: css,
85
86 headerCount: 0,
87 currentLevel: 0,
88 toc: new(bytes.Buffer),
89
90 smartypants: smartypants(flags),
91 }
92}
93
94func attrEscape(out *bytes.Buffer, src []byte) {
95 org := 0
96 for i, ch := range src {
97 // using if statements is a bit faster than a switch statement.
98 // as the compiler improves, this should be unnecessary
99 // this is only worthwhile because attrEscape is the single
100 // largest CPU user in normal use
101 if ch == '"' {
102 if i > org {
103 // copy all the normal characters since the last escape
104 out.Write(src[org:i])
105 }
106 org = i + 1
107 out.WriteString(""")
108 continue
109 }
110 if ch == '&' {
111 if i > org {
112 out.Write(src[org:i])
113 }
114 org = i + 1
115 out.WriteString("&")
116 continue
117 }
118 if ch == '<' {
119 if i > org {
120 out.Write(src[org:i])
121 }
122 org = i + 1
123 out.WriteString("<")
124 continue
125 }
126 if ch == '>' {
127 if i > org {
128 out.Write(src[org:i])
129 }
130 org = i + 1
131 out.WriteString(">")
132 continue
133 }
134 }
135 if org < len(src) {
136 out.Write(src[org:])
137 }
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141 marker := out.Len()
142 doubleSpace(out)
143
144 if options.flags&HTML_TOC != 0 {
145 // headerCount is incremented in htmlTocHeader
146 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147 } else {
148 out.WriteString(fmt.Sprintf("<h%d>", level))
149 }
150
151 tocMarker := out.Len()
152 if !text() {
153 out.Truncate(marker)
154 return
155 }
156
157 // are we building a table of contents?
158 if options.flags&HTML_TOC != 0 {
159 options.TocHeader(out.Bytes()[tocMarker:], level)
160 }
161
162 out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166 if options.flags&HTML_SKIP_HTML != 0 {
167 return
168 }
169
170 doubleSpace(out)
171 out.Write(text)
172 out.WriteByte('\n')
173}
174
175func (options *Html) HRule(out *bytes.Buffer) {
176 doubleSpace(out)
177 out.WriteString("<hr")
178 out.WriteString(options.closeTag)
179}
180
181func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
182 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
183 options.BlockCodeGithub(out, text, lang)
184 } else {
185 options.BlockCodeNormal(out, text, lang)
186 }
187}
188
189func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
190 doubleSpace(out)
191
192 // parse out the language names/classes
193 count := 0
194 for _, elt := range strings.Fields(lang) {
195 if elt[0] == '.' {
196 elt = elt[1:]
197 }
198 if len(elt) == 0 {
199 continue
200 }
201 if count == 0 {
202 out.WriteString("<pre><code class=\"")
203 } else {
204 out.WriteByte(' ')
205 }
206 attrEscape(out, []byte(elt))
207 count++
208 }
209
210 if count == 0 {
211 out.WriteString("<pre><code>")
212 } else {
213 out.WriteString("\">")
214 }
215
216 attrEscape(out, text)
217 out.WriteString("</code></pre>\n")
218}
219
220// GitHub style code block:
221//
222// <pre lang="LANG"><code>
223// ...
224// </code></pre>
225//
226// Unlike other parsers, we store the language identifier in the <pre>,
227// and don't let the user generate custom classes.
228//
229// The language identifier in the <pre> block gets postprocessed and all
230// the code inside gets syntax highlighted with Pygments. This is much safer
231// than letting the user specify a CSS class for highlighting.
232//
233// Note that we only generate HTML for the first specifier.
234// E.g.
235// ~~~~ {.python .numbered} => <pre lang="python"><code>
236func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
237 doubleSpace(out)
238
239 // parse out the language name
240 count := 0
241 for _, elt := range strings.Fields(lang) {
242 if elt[0] == '.' {
243 elt = elt[1:]
244 }
245 if len(elt) == 0 {
246 continue
247 }
248 out.WriteString("<pre lang=\"")
249 attrEscape(out, []byte(elt))
250 out.WriteString("\"><code>")
251 count++
252 break
253 }
254
255 if count == 0 {
256 out.WriteString("<pre><code>")
257 }
258
259 attrEscape(out, text)
260 out.WriteString("</code></pre>\n")
261}
262
263func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
264 doubleSpace(out)
265 out.WriteString("<blockquote>\n")
266 out.Write(text)
267 out.WriteString("</blockquote>\n")
268}
269
270func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
271 doubleSpace(out)
272 out.WriteString("<table>\n<thead>\n")
273 out.Write(header)
274 out.WriteString("</thead>\n\n<tbody>\n")
275 out.Write(body)
276 out.WriteString("</tbody>\n</table>\n")
277}
278
279func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
280 doubleSpace(out)
281 out.WriteString("<tr>\n")
282 out.Write(text)
283 out.WriteString("\n</tr>\n")
284}
285
286func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
287 doubleSpace(out)
288 switch align {
289 case TABLE_ALIGNMENT_LEFT:
290 out.WriteString("<td align=\"left\">")
291 case TABLE_ALIGNMENT_RIGHT:
292 out.WriteString("<td align=\"right\">")
293 case TABLE_ALIGNMENT_CENTER:
294 out.WriteString("<td align=\"center\">")
295 default:
296 out.WriteString("<td>")
297 }
298
299 out.Write(text)
300 out.WriteString("</td>")
301}
302
303func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
304 marker := out.Len()
305 doubleSpace(out)
306
307 if flags&LIST_TYPE_ORDERED != 0 {
308 out.WriteString("<ol>")
309 } else {
310 out.WriteString("<ul>")
311 }
312 if !text() {
313 out.Truncate(marker)
314 return
315 }
316 if flags&LIST_TYPE_ORDERED != 0 {
317 out.WriteString("</ol>\n")
318 } else {
319 out.WriteString("</ul>\n")
320 }
321}
322
323func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
324 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
325 doubleSpace(out)
326 }
327 out.WriteString("<li>")
328 out.Write(text)
329 out.WriteString("</li>\n")
330}
331
332func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
333 marker := out.Len()
334 doubleSpace(out)
335
336 out.WriteString("<p>")
337 if !text() {
338 out.Truncate(marker)
339 return
340 }
341 out.WriteString("</p>\n")
342}
343
344func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
345 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
346 // mark it but don't link it if it is not a safe link: no smartypants
347 out.WriteString("<tt>")
348 attrEscape(out, link)
349 out.WriteString("</tt>")
350 return
351 }
352
353 out.WriteString("<a href=\"")
354 if kind == LINK_TYPE_EMAIL {
355 out.WriteString("mailto:")
356 }
357 attrEscape(out, link)
358 out.WriteString("\">")
359
360 // Pretty print: if we get an email address as
361 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
362 // want to print the `mailto:` prefix
363 switch {
364 case bytes.HasPrefix(link, []byte("mailto://")):
365 attrEscape(out, link[len("mailto://"):])
366 case bytes.HasPrefix(link, []byte("mailto:")):
367 attrEscape(out, link[len("mailto:"):])
368 default:
369 attrEscape(out, link)
370 }
371
372 out.WriteString("</a>")
373}
374
375func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
376 out.WriteString("<code>")
377 attrEscape(out, text)
378 out.WriteString("</code>")
379}
380
381func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
382 out.WriteString("<strong>")
383 out.Write(text)
384 out.WriteString("</strong>")
385}
386
387func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
388 if len(text) == 0 {
389 return
390 }
391 out.WriteString("<em>")
392 out.Write(text)
393 out.WriteString("</em>")
394}
395
396func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
397 if options.flags&HTML_SKIP_IMAGES != 0 {
398 return
399 }
400
401 out.WriteString("<img src=\"")
402 attrEscape(out, link)
403 out.WriteString("\" alt=\"")
404 if len(alt) > 0 {
405 attrEscape(out, alt)
406 }
407 if len(title) > 0 {
408 out.WriteString("\" title=\"")
409 attrEscape(out, title)
410 }
411
412 out.WriteByte('"')
413 out.WriteString(options.closeTag)
414 return
415}
416
417func (options *Html) LineBreak(out *bytes.Buffer) {
418 out.WriteString("<br")
419 out.WriteString(options.closeTag)
420}
421
422func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
423 if options.flags&HTML_SKIP_LINKS != 0 {
424 // write the link text out but don't link it, just mark it with typewriter font
425 out.WriteString("<tt>")
426 attrEscape(out, content)
427 out.WriteString("</tt>")
428 return
429 }
430
431 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
432 // write the link text out but don't link it, just mark it with typewriter font
433 out.WriteString("<tt>")
434 attrEscape(out, content)
435 out.WriteString("</tt>")
436 return
437 }
438
439 out.WriteString("<a href=\"")
440 attrEscape(out, link)
441 if len(title) > 0 {
442 out.WriteString("\" title=\"")
443 attrEscape(out, title)
444 }
445 out.WriteString("\">")
446 out.Write(content)
447 out.WriteString("</a>")
448 return
449}
450
451func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
452 if options.flags&HTML_SKIP_HTML != 0 {
453 return
454 }
455 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
456 return
457 }
458 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
459 return
460 }
461 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
462 return
463 }
464 if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
465 return
466 }
467 out.Write(text)
468}
469
470func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
471 out.WriteString("<strong><em>")
472 out.Write(text)
473 out.WriteString("</em></strong>")
474}
475
476func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
477 out.WriteString("<del>")
478 out.Write(text)
479 out.WriteString("</del>")
480}
481
482func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
483 out.Write(entity)
484}
485
486func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
487 if options.flags&HTML_USE_SMARTYPANTS != 0 {
488 options.Smartypants(out, text)
489 } else {
490 attrEscape(out, text)
491 }
492}
493
494func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
495 smrt := smartypantsData{false, false}
496
497 // first do normal entity escaping
498 var escaped bytes.Buffer
499 attrEscape(&escaped, text)
500 text = escaped.Bytes()
501
502 mark := 0
503 for i := 0; i < len(text); i++ {
504 if action := options.smartypants[text[i]]; action != nil {
505 if i > mark {
506 out.Write(text[mark:i])
507 }
508
509 previousChar := byte(0)
510 if i > 0 {
511 previousChar = text[i-1]
512 }
513 i += action(out, &smrt, previousChar, text[i:])
514 mark = i + 1
515 }
516 }
517
518 if mark < len(text) {
519 out.Write(text[mark:])
520 }
521}
522
523func (options *Html) DocumentHeader(out *bytes.Buffer) {
524 if options.flags&HTML_COMPLETE_PAGE == 0 {
525 return
526 }
527
528 ending := ""
529 if options.flags&HTML_USE_XHTML != 0 {
530 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
531 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
532 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
533 ending = " /"
534 } else {
535 out.WriteString("<!DOCTYPE html>\n")
536 out.WriteString("<html>\n")
537 }
538 out.WriteString("<head>\n")
539 out.WriteString(" <title>")
540 options.NormalText(out, []byte(options.title))
541 out.WriteString("</title>\n")
542 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
543 out.WriteString(VERSION)
544 out.WriteString("\"")
545 out.WriteString(ending)
546 out.WriteString(">\n")
547 out.WriteString(" <meta charset=\"utf-8\"")
548 out.WriteString(ending)
549 out.WriteString(">\n")
550 if options.css != "" {
551 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
552 attrEscape(out, []byte(options.css))
553 out.WriteString("\"")
554 out.WriteString(ending)
555 out.WriteString(">\n")
556 }
557 out.WriteString("</head>\n")
558 out.WriteString("<body>\n")
559
560 options.tocMarker = out.Len()
561}
562
563func (options *Html) DocumentFooter(out *bytes.Buffer) {
564 // finalize and insert the table of contents
565 if options.flags&HTML_TOC != 0 {
566 options.TocFinalize()
567
568 // now we have to insert the table of contents into the document
569 var temp bytes.Buffer
570
571 // start by making a copy of everything after the document header
572 temp.Write(out.Bytes()[options.tocMarker:])
573
574 // now clear the copied material from the main output buffer
575 out.Truncate(options.tocMarker)
576
577 // corner case spacing issue
578 if options.flags&HTML_COMPLETE_PAGE != 0 {
579 out.WriteByte('\n')
580 }
581
582 // insert the table of contents
583 out.WriteString("<nav>\n")
584 out.Write(options.toc.Bytes())
585 out.WriteString("</nav>\n")
586
587 // corner case spacing issue
588 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
589 out.WriteByte('\n')
590 }
591
592 // write out everything that came after it
593 if options.flags&HTML_OMIT_CONTENTS == 0 {
594 out.Write(temp.Bytes())
595 }
596 }
597
598 if options.flags&HTML_COMPLETE_PAGE != 0 {
599 out.WriteString("\n</body>\n")
600 out.WriteString("</html>\n")
601 }
602
603}
604
605func (options *Html) TocHeader(text []byte, level int) {
606 for level > options.currentLevel {
607 switch {
608 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
609 // this sublist can nest underneath a header
610 size := options.toc.Len()
611 options.toc.Truncate(size - len("</li>\n"))
612
613 case options.currentLevel > 0:
614 options.toc.WriteString("<li>")
615 }
616 if options.toc.Len() > 0 {
617 options.toc.WriteByte('\n')
618 }
619 options.toc.WriteString("<ul>\n")
620 options.currentLevel++
621 }
622
623 for level < options.currentLevel {
624 options.toc.WriteString("</ul>")
625 if options.currentLevel > 1 {
626 options.toc.WriteString("</li>\n")
627 }
628 options.currentLevel--
629 }
630
631 options.toc.WriteString("<li><a href=\"#toc_")
632 options.toc.WriteString(strconv.Itoa(options.headerCount))
633 options.toc.WriteString("\">")
634 options.headerCount++
635
636 options.toc.Write(text)
637
638 options.toc.WriteString("</a></li>\n")
639}
640
641func (options *Html) TocFinalize() {
642 for options.currentLevel > 1 {
643 options.toc.WriteString("</ul></li>\n")
644 options.currentLevel--
645 }
646
647 if options.currentLevel > 0 {
648 options.toc.WriteString("</ul>\n")
649 }
650}
651
652func isHtmlTag(tag []byte, tagname string) bool {
653 i := 0
654 if i < len(tag) && tag[0] != '<' {
655 return false
656 }
657 i++
658 i = skipSpace(tag, i)
659
660 if i < len(tag) && tag[i] == '/' {
661 i++
662 }
663
664 i = skipSpace(tag, i)
665 j := 0
666 for ; i < len(tag); i, j = i+1, j+1 {
667 if j >= len(tagname) {
668 break
669 }
670
671 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
672 return false
673 }
674 }
675
676 if i == len(tag) {
677 return false
678 }
679
680 return isspace(tag[i]) || tag[i] == '>'
681}
682
683func skipSpace(tag []byte, i int) int {
684 for i < len(tag) && isspace(tag[i]) {
685 i++
686 }
687 return i
688}
689
690func doubleSpace(out *bytes.Buffer) {
691 if out.Len() > 0 {
692 out.WriteByte('\n')
693 }
694}