html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SAFELINK // only link to trusted protocols
32 HTML_TOC // generate a table of contents
33 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
34 HTML_COMPLETE_PAGE // generate a complete HTML page
35 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
36 HTML_USE_XHTML // generate XHTML output instead of HTML
37 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
38 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
39 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
40)
41
42// Html is a type that implements the Renderer interface for HTML output.
43//
44// Do not create this directly, instead use the HtmlRenderer function.
45type Html struct {
46 flags int // HTML_* options
47 closeTag string // how to end singleton tags: either " />\n" or ">\n"
48 title string // document title
49 css string // optional css file url (used with HTML_COMPLETE_PAGE)
50
51 // table of contents data
52 tocMarker int
53 headerCount int
54 currentLevel int
55 toc *bytes.Buffer
56
57 smartypants *smartypantsRenderer
58}
59
60const (
61 xhtmlClose = " />\n"
62 htmlClose = ">\n"
63)
64
65// HtmlRenderer creates and configures an Html object, which
66// satisfies the Renderer interface.
67//
68// flags is a set of HTML_* options ORed together.
69// title is the title of the document, and css is a URL for the document's
70// stylesheet.
71// title and css are only used when HTML_COMPLETE_PAGE is selected.
72func HtmlRenderer(flags int, title string, css string) Renderer {
73 // configure the rendering engine
74 closeTag := htmlClose
75 if flags&HTML_USE_XHTML != 0 {
76 closeTag = xhtmlClose
77 }
78
79 return &Html{
80 flags: flags,
81 closeTag: closeTag,
82 title: title,
83 css: css,
84
85 headerCount: 0,
86 currentLevel: 0,
87 toc: new(bytes.Buffer),
88
89 smartypants: smartypants(flags),
90 }
91}
92
93func attrEscape(out *bytes.Buffer, src []byte) {
94 org := 0
95 for i, ch := range src {
96 // using if statements is a bit faster than a switch statement.
97 // as the compiler improves, this should be unnecessary
98 // this is only worthwhile because attrEscape is the single
99 // largest CPU user in normal use
100 if ch == '"' {
101 if i > org {
102 // copy all the normal characters since the last escape
103 out.Write(src[org:i])
104 }
105 org = i + 1
106 out.WriteString(""")
107 continue
108 }
109 if ch == '&' {
110 if i > org {
111 out.Write(src[org:i])
112 }
113 org = i + 1
114 out.WriteString("&")
115 continue
116 }
117 if ch == '<' {
118 if i > org {
119 out.Write(src[org:i])
120 }
121 org = i + 1
122 out.WriteString("<")
123 continue
124 }
125 if ch == '>' {
126 if i > org {
127 out.Write(src[org:i])
128 }
129 org = i + 1
130 out.WriteString(">")
131 continue
132 }
133 }
134 if org < len(src) {
135 out.Write(src[org:])
136 }
137}
138
139func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
140 marker := out.Len()
141 doubleSpace(out)
142
143 if options.flags&HTML_TOC != 0 {
144 // headerCount is incremented in htmlTocHeader
145 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
146 } else {
147 out.WriteString(fmt.Sprintf("<h%d>", level))
148 }
149
150 tocMarker := out.Len()
151 if !text() {
152 out.Truncate(marker)
153 return
154 }
155
156 // are we building a table of contents?
157 if options.flags&HTML_TOC != 0 {
158 options.TocHeader(out.Bytes()[tocMarker:], level)
159 }
160
161 out.WriteString(fmt.Sprintf("</h%d>\n", level))
162}
163
164func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
165 if options.flags&HTML_SKIP_HTML != 0 {
166 return
167 }
168
169 doubleSpace(out)
170 out.Write(text)
171 out.WriteByte('\n')
172}
173
174func (options *Html) HRule(out *bytes.Buffer) {
175 doubleSpace(out)
176 out.WriteString("<hr")
177 out.WriteString(options.closeTag)
178}
179
180func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
181 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
182 options.BlockCodeGithub(out, text, lang)
183 } else {
184 options.BlockCodeNormal(out, text, lang)
185 }
186}
187
188func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
189 doubleSpace(out)
190
191 // parse out the language names/classes
192 count := 0
193 for _, elt := range strings.Fields(lang) {
194 if elt[0] == '.' {
195 elt = elt[1:]
196 }
197 if len(elt) == 0 {
198 continue
199 }
200 if count == 0 {
201 out.WriteString("<pre><code class=\"")
202 } else {
203 out.WriteByte(' ')
204 }
205 attrEscape(out, []byte(elt))
206 count++
207 }
208
209 if count == 0 {
210 out.WriteString("<pre><code>")
211 } else {
212 out.WriteString("\">")
213 }
214
215 attrEscape(out, text)
216 out.WriteString("</code></pre>\n")
217}
218
219/*
220 * GitHub style code block:
221 *
222 * <pre lang="LANG"><code>
223 * ...
224 * </pre></code>
225 *
226 * Unlike other parsers, we store the language identifier in the <pre>,
227 * and don't let the user generate custom classes.
228 *
229 * The language identifier in the <pre> block gets postprocessed and all
230 * the code inside gets syntax highlighted with Pygments. This is much safer
231 * than letting the user specify a CSS class for highlighting.
232 *
233 * Note that we only generate HTML for the first specifier.
234 * E.g.
235 * ~~~~ {.python .numbered} => <pre lang="python"><code>
236 */
237func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
238 doubleSpace(out)
239
240 // parse out the language name
241 count := 0
242 for _, elt := range strings.Fields(lang) {
243 if elt[0] == '.' {
244 elt = elt[1:]
245 }
246 if len(elt) == 0 {
247 continue
248 }
249 out.WriteString("<pre lang=\"")
250 attrEscape(out, []byte(elt))
251 out.WriteString("\"><code>")
252 count++
253 break
254 }
255
256 if count == 0 {
257 out.WriteString("<pre><code>")
258 }
259
260 attrEscape(out, text)
261 out.WriteString("</code></pre>\n")
262}
263
264func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
265 doubleSpace(out)
266 out.WriteString("<blockquote>\n")
267 out.Write(text)
268 out.WriteString("</blockquote>\n")
269}
270
271func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
272 doubleSpace(out)
273 out.WriteString("<table>\n<thead>\n")
274 out.Write(header)
275 out.WriteString("</thead>\n\n<tbody>\n")
276 out.Write(body)
277 out.WriteString("</tbody>\n</table>\n")
278}
279
280func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
281 doubleSpace(out)
282 out.WriteString("<tr>\n")
283 out.Write(text)
284 out.WriteString("\n</tr>\n")
285}
286
287func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
288 doubleSpace(out)
289 switch align {
290 case TABLE_ALIGNMENT_LEFT:
291 out.WriteString("<td align=\"left\">")
292 case TABLE_ALIGNMENT_RIGHT:
293 out.WriteString("<td align=\"right\">")
294 case TABLE_ALIGNMENT_CENTER:
295 out.WriteString("<td align=\"center\">")
296 default:
297 out.WriteString("<td>")
298 }
299
300 out.Write(text)
301 out.WriteString("</td>")
302}
303
304func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
305 marker := out.Len()
306 doubleSpace(out)
307
308 if flags&LIST_TYPE_ORDERED != 0 {
309 out.WriteString("<ol>")
310 } else {
311 out.WriteString("<ul>")
312 }
313 if !text() {
314 out.Truncate(marker)
315 return
316 }
317 if flags&LIST_TYPE_ORDERED != 0 {
318 out.WriteString("</ol>\n")
319 } else {
320 out.WriteString("</ul>\n")
321 }
322}
323
324func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
325 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
326 doubleSpace(out)
327 }
328 out.WriteString("<li>")
329 out.Write(text)
330 out.WriteString("</li>\n")
331}
332
333func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
334 marker := out.Len()
335 doubleSpace(out)
336
337 out.WriteString("<p>")
338 if !text() {
339 out.Truncate(marker)
340 return
341 }
342 out.WriteString("</p>\n")
343}
344
345func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
346 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
347 // mark it but don't link it if it is not a safe link: no smartypants
348 out.WriteString("<tt>")
349 attrEscape(out, link)
350 out.WriteString("</tt>")
351 return
352 }
353
354 out.WriteString("<a href=\"")
355 if kind == LINK_TYPE_EMAIL {
356 out.WriteString("mailto:")
357 }
358 attrEscape(out, link)
359 out.WriteString("\">")
360
361 // Pretty print: if we get an email address as
362 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
363 // want to print the `mailto:` prefix
364 switch {
365 case bytes.HasPrefix(link, []byte("mailto://")):
366 attrEscape(out, link[len("mailto://"):])
367 case bytes.HasPrefix(link, []byte("mailto:")):
368 attrEscape(out, link[len("mailto:"):])
369 default:
370 attrEscape(out, link)
371 }
372
373 out.WriteString("</a>")
374}
375
376func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
377 out.WriteString("<code>")
378 attrEscape(out, text)
379 out.WriteString("</code>")
380}
381
382func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
383 out.WriteString("<strong>")
384 out.Write(text)
385 out.WriteString("</strong>")
386}
387
388func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
389 if len(text) == 0 {
390 return
391 }
392 out.WriteString("<em>")
393 out.Write(text)
394 out.WriteString("</em>")
395}
396
397func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
398 if options.flags&HTML_SKIP_IMAGES != 0 {
399 return
400 }
401
402 out.WriteString("<img src=\"")
403 attrEscape(out, link)
404 out.WriteString("\" alt=\"")
405 if len(alt) > 0 {
406 attrEscape(out, alt)
407 }
408 if len(title) > 0 {
409 out.WriteString("\" title=\"")
410 attrEscape(out, title)
411 }
412
413 out.WriteByte('"')
414 out.WriteString(options.closeTag)
415 return
416}
417
418func (options *Html) LineBreak(out *bytes.Buffer) {
419 out.WriteString("<br")
420 out.WriteString(options.closeTag)
421}
422
423func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
424 if options.flags&HTML_SKIP_LINKS != 0 {
425 // write the link text out but don't link it, just mark it with typewriter font
426 out.WriteString("<tt>")
427 attrEscape(out, content)
428 out.WriteString("</tt>")
429 return
430 }
431
432 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
433 // write the link text out but don't link it, just mark it with typewriter font
434 out.WriteString("<tt>")
435 attrEscape(out, content)
436 out.WriteString("</tt>")
437 return
438 }
439
440 out.WriteString("<a href=\"")
441 attrEscape(out, link)
442 if len(title) > 0 {
443 out.WriteString("\" title=\"")
444 attrEscape(out, title)
445 }
446 out.WriteString("\">")
447 out.Write(content)
448 out.WriteString("</a>")
449 return
450}
451
452func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
453 if options.flags&HTML_SKIP_HTML != 0 {
454 return
455 }
456 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
457 return
458 }
459 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
460 return
461 }
462 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
463 return
464 }
465 out.Write(text)
466}
467
468func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
469 out.WriteString("<strong><em>")
470 out.Write(text)
471 out.WriteString("</em></strong>")
472}
473
474func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
475 out.WriteString("<del>")
476 out.Write(text)
477 out.WriteString("</del>")
478}
479
480func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
481 out.Write(entity)
482}
483
484func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
485 if options.flags&HTML_USE_SMARTYPANTS != 0 {
486 options.Smartypants(out, text)
487 } else {
488 attrEscape(out, text)
489 }
490}
491
492func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
493 smrt := smartypantsData{false, false}
494
495 // first do normal entity escaping
496 var escaped bytes.Buffer
497 attrEscape(&escaped, text)
498 text = escaped.Bytes()
499
500 mark := 0
501 for i := 0; i < len(text); i++ {
502 if action := options.smartypants[text[i]]; action != nil {
503 if i > mark {
504 out.Write(text[mark:i])
505 }
506
507 previousChar := byte(0)
508 if i > 0 {
509 previousChar = text[i-1]
510 }
511 i += action(out, &smrt, previousChar, text[i:])
512 mark = i + 1
513 }
514 }
515
516 if mark < len(text) {
517 out.Write(text[mark:])
518 }
519}
520
521func (options *Html) DocumentHeader(out *bytes.Buffer) {
522 if options.flags&HTML_COMPLETE_PAGE == 0 {
523 return
524 }
525
526 ending := ""
527 if options.flags&HTML_USE_XHTML != 0 {
528 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
529 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
530 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
531 ending = " /"
532 } else {
533 out.WriteString("<!DOCTYPE html>\n")
534 out.WriteString("<html>\n")
535 }
536 out.WriteString("<head>\n")
537 out.WriteString(" <title>")
538 options.NormalText(out, []byte(options.title))
539 out.WriteString("</title>\n")
540 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
541 out.WriteString(VERSION)
542 out.WriteString("\"")
543 out.WriteString(ending)
544 out.WriteString(">\n")
545 out.WriteString(" <meta charset=\"utf-8\"")
546 out.WriteString(ending)
547 out.WriteString(">\n")
548 if options.css != "" {
549 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
550 attrEscape(out, []byte(options.css))
551 out.WriteString("\"")
552 out.WriteString(ending)
553 out.WriteString(">\n")
554 }
555 out.WriteString("</head>\n")
556 out.WriteString("<body>\n")
557
558 options.tocMarker = out.Len()
559}
560
561func (options *Html) DocumentFooter(out *bytes.Buffer) {
562 // finalize and insert the table of contents
563 if options.flags&HTML_TOC != 0 {
564 options.TocFinalize()
565
566 // now we have to insert the table of contents into the document
567 var temp bytes.Buffer
568
569 // start by making a copy of everything after the document header
570 temp.Write(out.Bytes()[options.tocMarker:])
571
572 // now clear the copied material from the main output buffer
573 out.Truncate(options.tocMarker)
574
575 // corner case spacing issue
576 if options.flags&HTML_COMPLETE_PAGE != 0 {
577 out.WriteByte('\n')
578 }
579
580 // insert the table of contents
581 out.WriteString("<nav>\n")
582 out.Write(options.toc.Bytes())
583 out.WriteString("</nav>\n")
584
585 // corner case spacing issue
586 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
587 out.WriteByte('\n')
588 }
589
590 // write out everything that came after it
591 if options.flags&HTML_OMIT_CONTENTS == 0 {
592 out.Write(temp.Bytes())
593 }
594 }
595
596 if options.flags&HTML_COMPLETE_PAGE != 0 {
597 out.WriteString("\n</body>\n")
598 out.WriteString("</html>\n")
599 }
600
601}
602
603func (options *Html) TocHeader(text []byte, level int) {
604 for level > options.currentLevel {
605 switch {
606 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
607 // this sublist can nest underneath a header
608 size := options.toc.Len()
609 options.toc.Truncate(size - len("</li>\n"))
610
611 case options.currentLevel > 0:
612 options.toc.WriteString("<li>")
613 }
614 if options.toc.Len() > 0 {
615 options.toc.WriteByte('\n')
616 }
617 options.toc.WriteString("<ul>\n")
618 options.currentLevel++
619 }
620
621 for level < options.currentLevel {
622 options.toc.WriteString("</ul>")
623 if options.currentLevel > 1 {
624 options.toc.WriteString("</li>\n")
625 }
626 options.currentLevel--
627 }
628
629 options.toc.WriteString("<li><a href=\"#toc_")
630 options.toc.WriteString(strconv.Itoa(options.headerCount))
631 options.toc.WriteString("\">")
632 options.headerCount++
633
634 options.toc.Write(text)
635
636 options.toc.WriteString("</a></li>\n")
637}
638
639func (options *Html) TocFinalize() {
640 for options.currentLevel > 1 {
641 options.toc.WriteString("</ul></li>\n")
642 options.currentLevel--
643 }
644
645 if options.currentLevel > 0 {
646 options.toc.WriteString("</ul>\n")
647 }
648}
649
650func isHtmlTag(tag []byte, tagname string) bool {
651 i := 0
652 if i < len(tag) && tag[0] != '<' {
653 return false
654 }
655 i++
656 for i < len(tag) && isspace(tag[i]) {
657 i++
658 }
659
660 if i < len(tag) && tag[i] == '/' {
661 i++
662 }
663
664 for i < len(tag) && isspace(tag[i]) {
665 i++
666 }
667
668 j := i
669 for ; i < len(tag); i, j = i+1, j+1 {
670 if j >= len(tagname) {
671 break
672 }
673
674 if tag[i] != tagname[j] {
675 return false
676 }
677 }
678
679 if i == len(tag) {
680 return false
681 }
682
683 return isspace(tag[i]) || tag[i] == '>'
684}
685
686func doubleSpace(out *bytes.Buffer) {
687 if out.Len() > 0 {
688 out.WriteByte('\n')
689 }
690}