html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SAFELINK // only link to trusted protocols
32 HTML_TOC // generate a table of contents
33 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
34 HTML_COMPLETE_PAGE // generate a complete HTML page
35 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
36 HTML_USE_XHTML // generate XHTML output instead of HTML
37 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
38 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
39 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
40)
41
42// Html is a type that implements the Renderer interface for HTML output.
43//
44// Do not create this directly, instead use the HtmlRenderer function.
45type Html struct {
46 flags int // HTML_* options
47 closeTag string // how to end singleton tags: either " />\n" or ">\n"
48 title string // document title
49 css string // optional css file url (used with HTML_COMPLETE_PAGE)
50
51 // table of contents data
52 tocMarker int
53 headerCount int
54 currentLevel int
55 toc *bytes.Buffer
56
57 smartypants *smartypantsRenderer
58}
59
60const (
61 xhtmlClose = " />\n"
62 htmlClose = ">\n"
63)
64
65// HtmlRenderer creates and configures an Html object, which
66// satisfies the Renderer interface.
67//
68// flags is a set of HTML_* options ORed together.
69// title is the title of the document, and css is a URL for the document's
70// stylesheet.
71// title and css are only used when HTML_COMPLETE_PAGE is selected.
72func HtmlRenderer(flags int, title string, css string) Renderer {
73 // configure the rendering engine
74 closeTag := htmlClose
75 if flags&HTML_USE_XHTML != 0 {
76 closeTag = xhtmlClose
77 }
78
79 return &Html{
80 flags: flags,
81 closeTag: closeTag,
82 title: title,
83 css: css,
84
85 headerCount: 0,
86 currentLevel: 0,
87 toc: new(bytes.Buffer),
88
89 smartypants: smartypants(flags),
90 }
91}
92
93func attrEscape(out *bytes.Buffer, src []byte) {
94 org := 0
95 for i, ch := range src {
96 // using if statements is a bit faster than a switch statement.
97 // as the compiler improves, this should be unnecessary
98 // this is only worthwhile because attrEscape is the single
99 // largest CPU user in normal use
100 if ch == '"' {
101 if i > org {
102 // copy all the normal characters since the last escape
103 out.Write(src[org:i])
104 }
105 org = i + 1
106 out.WriteString(""")
107 continue
108 }
109 if ch == '&' {
110 if i > org {
111 out.Write(src[org:i])
112 }
113 org = i + 1
114 out.WriteString("&")
115 continue
116 }
117 if ch == '<' {
118 if i > org {
119 out.Write(src[org:i])
120 }
121 org = i + 1
122 out.WriteString("<")
123 continue
124 }
125 if ch == '>' {
126 if i > org {
127 out.Write(src[org:i])
128 }
129 org = i + 1
130 out.WriteString(">")
131 continue
132 }
133 }
134 if org < len(src) {
135 out.Write(src[org:])
136 }
137}
138
139func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
140 marker := out.Len()
141 doubleSpace(out)
142
143 if options.flags&HTML_TOC != 0 {
144 // headerCount is incremented in htmlTocHeader
145 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
146 } else {
147 out.WriteString(fmt.Sprintf("<h%d>", level))
148 }
149
150 tocMarker := out.Len()
151 if !text() {
152 out.Truncate(marker)
153 return
154 }
155
156 // are we building a table of contents?
157 if options.flags&HTML_TOC != 0 {
158 options.TocHeader(out.Bytes()[tocMarker:], level)
159 }
160
161 out.WriteString(fmt.Sprintf("</h%d>\n", level))
162}
163
164func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
165 if options.flags&HTML_SKIP_HTML != 0 {
166 return
167 }
168
169 doubleSpace(out)
170 out.Write(text)
171 out.WriteByte('\n')
172}
173
174func (options *Html) HRule(out *bytes.Buffer) {
175 doubleSpace(out)
176 out.WriteString("<hr")
177 out.WriteString(options.closeTag)
178}
179
180func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
181 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
182 options.BlockCodeGithub(out, text, lang)
183 } else {
184 options.BlockCodeNormal(out, text, lang)
185 }
186}
187
188func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
189 doubleSpace(out)
190
191 // parse out the language names/classes
192 count := 0
193 for _, elt := range strings.Fields(lang) {
194 if elt[0] == '.' {
195 elt = elt[1:]
196 }
197 if len(elt) == 0 {
198 continue
199 }
200 if count == 0 {
201 out.WriteString("<pre><code class=\"")
202 } else {
203 out.WriteByte(' ')
204 }
205 attrEscape(out, []byte(elt))
206 count++
207 }
208
209 if count == 0 {
210 out.WriteString("<pre><code>")
211 } else {
212 out.WriteString("\">")
213 }
214
215 attrEscape(out, text)
216 out.WriteString("</code></pre>\n")
217}
218
219/*
220 * GitHub style code block:
221 *
222 * <pre lang="LANG"><code>
223 * ...
224 * </pre></code>
225 *
226 * Unlike other parsers, we store the language identifier in the <pre>,
227 * and don't let the user generate custom classes.
228 *
229 * The language identifier in the <pre> block gets postprocessed and all
230 * the code inside gets syntax highlighted with Pygments. This is much safer
231 * than letting the user specify a CSS class for highlighting.
232 *
233 * Note that we only generate HTML for the first specifier.
234 * E.g.
235 * ~~~~ {.python .numbered} => <pre lang="python"><code>
236 */
237func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
238 doubleSpace(out)
239
240 // parse out the language name
241 count := 0
242 for _, elt := range strings.Fields(lang) {
243 if elt[0] == '.' {
244 elt = elt[1:]
245 }
246 if len(elt) == 0 {
247 continue
248 }
249 out.WriteString("<pre lang=\"")
250 attrEscape(out, []byte(elt))
251 out.WriteString("\"><code>")
252 count++
253 break
254 }
255
256 if count == 0 {
257 out.WriteString("<pre><code>")
258 }
259
260 attrEscape(out, text)
261 out.WriteString("</code></pre>\n")
262}
263
264
265func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
266 doubleSpace(out)
267 out.WriteString("<blockquote>\n")
268 out.Write(text)
269 out.WriteString("</blockquote>\n")
270}
271
272func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
273 doubleSpace(out)
274 out.WriteString("<table>\n<thead>\n")
275 out.Write(header)
276 out.WriteString("</thead>\n\n<tbody>\n")
277 out.Write(body)
278 out.WriteString("</tbody>\n</table>\n")
279}
280
281func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
282 doubleSpace(out)
283 out.WriteString("<tr>\n")
284 out.Write(text)
285 out.WriteString("\n</tr>\n")
286}
287
288func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
289 doubleSpace(out)
290 switch align {
291 case TABLE_ALIGNMENT_LEFT:
292 out.WriteString("<td align=\"left\">")
293 case TABLE_ALIGNMENT_RIGHT:
294 out.WriteString("<td align=\"right\">")
295 case TABLE_ALIGNMENT_CENTER:
296 out.WriteString("<td align=\"center\">")
297 default:
298 out.WriteString("<td>")
299 }
300
301 out.Write(text)
302 out.WriteString("</td>")
303}
304
305func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
306 marker := out.Len()
307 doubleSpace(out)
308
309 if flags&LIST_TYPE_ORDERED != 0 {
310 out.WriteString("<ol>")
311 } else {
312 out.WriteString("<ul>")
313 }
314 if !text() {
315 out.Truncate(marker)
316 return
317 }
318 if flags&LIST_TYPE_ORDERED != 0 {
319 out.WriteString("</ol>\n")
320 } else {
321 out.WriteString("</ul>\n")
322 }
323}
324
325func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
326 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
327 doubleSpace(out)
328 }
329 out.WriteString("<li>")
330 out.Write(text)
331 out.WriteString("</li>\n")
332}
333
334func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
335 marker := out.Len()
336 doubleSpace(out)
337
338 out.WriteString("<p>")
339 if !text() {
340 out.Truncate(marker)
341 return
342 }
343 out.WriteString("</p>\n")
344}
345
346func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
347 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
348 // mark it but don't link it if it is not a safe link: no smartypants
349 out.WriteString("<tt>")
350 attrEscape(out, link)
351 out.WriteString("</tt>")
352 return
353 }
354
355 out.WriteString("<a href=\"")
356 if kind == LINK_TYPE_EMAIL {
357 out.WriteString("mailto:")
358 }
359 attrEscape(out, link)
360 out.WriteString("\">")
361
362 // Pretty print: if we get an email address as
363 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
364 // want to print the `mailto:` prefix
365 switch {
366 case bytes.HasPrefix(link, []byte("mailto://")):
367 attrEscape(out, link[len("mailto://"):])
368 case bytes.HasPrefix(link, []byte("mailto:")):
369 attrEscape(out, link[len("mailto:"):])
370 default:
371 attrEscape(out, link)
372 }
373
374 out.WriteString("</a>")
375}
376
377func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
378 out.WriteString("<code>")
379 attrEscape(out, text)
380 out.WriteString("</code>")
381}
382
383func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
384 out.WriteString("<strong>")
385 out.Write(text)
386 out.WriteString("</strong>")
387}
388
389func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
390 if len(text) == 0 {
391 return
392 }
393 out.WriteString("<em>")
394 out.Write(text)
395 out.WriteString("</em>")
396}
397
398func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
399 if options.flags&HTML_SKIP_IMAGES != 0 {
400 return
401 }
402
403 out.WriteString("<img src=\"")
404 attrEscape(out, link)
405 out.WriteString("\" alt=\"")
406 if len(alt) > 0 {
407 attrEscape(out, alt)
408 }
409 if len(title) > 0 {
410 out.WriteString("\" title=\"")
411 attrEscape(out, title)
412 }
413
414 out.WriteByte('"')
415 out.WriteString(options.closeTag)
416 return
417}
418
419func (options *Html) LineBreak(out *bytes.Buffer) {
420 out.WriteString("<br")
421 out.WriteString(options.closeTag)
422}
423
424func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
425 if options.flags&HTML_SKIP_LINKS != 0 {
426 // write the link text out but don't link it, just mark it with typewriter font
427 out.WriteString("<tt>")
428 attrEscape(out, content)
429 out.WriteString("</tt>")
430 return
431 }
432
433 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
434 // write the link text out but don't link it, just mark it with typewriter font
435 out.WriteString("<tt>")
436 attrEscape(out, content)
437 out.WriteString("</tt>")
438 return
439 }
440
441 out.WriteString("<a href=\"")
442 attrEscape(out, link)
443 if len(title) > 0 {
444 out.WriteString("\" title=\"")
445 attrEscape(out, title)
446 }
447 out.WriteString("\">")
448 out.Write(content)
449 out.WriteString("</a>")
450 return
451}
452
453func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
454 if options.flags&HTML_SKIP_HTML != 0 {
455 return
456 }
457 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
458 return
459 }
460 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
461 return
462 }
463 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
464 return
465 }
466 out.Write(text)
467}
468
469func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
470 out.WriteString("<strong><em>")
471 out.Write(text)
472 out.WriteString("</em></strong>")
473}
474
475func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
476 out.WriteString("<del>")
477 out.Write(text)
478 out.WriteString("</del>")
479}
480
481func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
482 out.Write(entity)
483}
484
485func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
486 if options.flags&HTML_USE_SMARTYPANTS != 0 {
487 options.Smartypants(out, text)
488 } else {
489 attrEscape(out, text)
490 }
491}
492
493func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
494 smrt := smartypantsData{false, false}
495
496 // first do normal entity escaping
497 var escaped bytes.Buffer
498 attrEscape(&escaped, text)
499 text = escaped.Bytes()
500
501 mark := 0
502 for i := 0; i < len(text); i++ {
503 if action := options.smartypants[text[i]]; action != nil {
504 if i > mark {
505 out.Write(text[mark:i])
506 }
507
508 previousChar := byte(0)
509 if i > 0 {
510 previousChar = text[i-1]
511 }
512 i += action(out, &smrt, previousChar, text[i:])
513 mark = i + 1
514 }
515 }
516
517 if mark < len(text) {
518 out.Write(text[mark:])
519 }
520}
521
522func (options *Html) DocumentHeader(out *bytes.Buffer) {
523 if options.flags&HTML_COMPLETE_PAGE == 0 {
524 return
525 }
526
527 ending := ""
528 if options.flags&HTML_USE_XHTML != 0 {
529 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
530 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
531 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
532 ending = " /"
533 } else {
534 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
535 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
536 out.WriteString("<html>\n")
537 }
538 out.WriteString("<head>\n")
539 out.WriteString(" <title>")
540 options.NormalText(out, []byte(options.title))
541 out.WriteString("</title>\n")
542 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
543 out.WriteString(VERSION)
544 out.WriteString("\"")
545 out.WriteString(ending)
546 out.WriteString(">\n")
547 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
548 out.WriteString(ending)
549 out.WriteString(">\n")
550 if options.css != "" {
551 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
552 attrEscape(out, []byte(options.css))
553 out.WriteString("\"")
554 out.WriteString(ending)
555 out.WriteString(">\n")
556 }
557 out.WriteString("</head>\n")
558 out.WriteString("<body>\n")
559
560 options.tocMarker = out.Len()
561}
562
563func (options *Html) DocumentFooter(out *bytes.Buffer) {
564 // finalize and insert the table of contents
565 if options.flags&HTML_TOC != 0 {
566 options.TocFinalize()
567
568 // now we have to insert the table of contents into the document
569 var temp bytes.Buffer
570
571 // start by making a copy of everything after the document header
572 temp.Write(out.Bytes()[options.tocMarker:])
573
574 // now clear the copied material from the main output buffer
575 out.Truncate(options.tocMarker)
576
577 // corner case spacing issue
578 if options.flags&HTML_COMPLETE_PAGE != 0 {
579 out.WriteByte('\n')
580 }
581
582 // insert the table of contents
583 out.Write(options.toc.Bytes())
584
585 // corner case spacing issue
586 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
587 out.WriteByte('\n')
588 }
589
590 // write out everything that came after it
591 if options.flags&HTML_OMIT_CONTENTS == 0 {
592 out.Write(temp.Bytes())
593 }
594 }
595
596 if options.flags&HTML_COMPLETE_PAGE != 0 {
597 out.WriteString("\n</body>\n")
598 out.WriteString("</html>\n")
599 }
600
601}
602
603func (options *Html) TocHeader(text []byte, level int) {
604 for level > options.currentLevel {
605 switch {
606 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
607 // this sublist can nest underneath a header
608 size := options.toc.Len()
609 options.toc.Truncate(size - len("</li>\n"))
610
611 case options.currentLevel > 0:
612 options.toc.WriteString("<li>")
613 }
614 if options.toc.Len() > 0 {
615 options.toc.WriteByte('\n')
616 }
617 options.toc.WriteString("<ul>\n")
618 options.currentLevel++
619 }
620
621 for level < options.currentLevel {
622 options.toc.WriteString("</ul>")
623 if options.currentLevel > 1 {
624 options.toc.WriteString("</li>\n")
625 }
626 options.currentLevel--
627 }
628
629 options.toc.WriteString("<li><a href=\"#toc_")
630 options.toc.WriteString(strconv.Itoa(options.headerCount))
631 options.toc.WriteString("\">")
632 options.headerCount++
633
634 options.toc.Write(text)
635
636 options.toc.WriteString("</a></li>\n")
637}
638
639func (options *Html) TocFinalize() {
640 for options.currentLevel > 1 {
641 options.toc.WriteString("</ul></li>\n")
642 options.currentLevel--
643 }
644
645 if options.currentLevel > 0 {
646 options.toc.WriteString("</ul>\n")
647 }
648}
649
650func isHtmlTag(tag []byte, tagname string) bool {
651 i := 0
652 if i < len(tag) && tag[0] != '<' {
653 return false
654 }
655 i++
656 for i < len(tag) && isspace(tag[i]) {
657 i++
658 }
659
660 if i < len(tag) && tag[i] == '/' {
661 i++
662 }
663
664 for i < len(tag) && isspace(tag[i]) {
665 i++
666 }
667
668 j := i
669 for ; i < len(tag); i, j = i+1, j+1 {
670 if j >= len(tagname) {
671 break
672 }
673
674 if tag[i] != tagname[j] {
675 return false
676 }
677 }
678
679 if i == len(tag) {
680 return false
681 }
682
683 return isspace(tag[i]) || tag[i] == '>'
684}
685
686func doubleSpace(out *bytes.Buffer) {
687 if out.Len() > 0 {
688 out.WriteByte('\n')
689 }
690}