html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SAFELINK // only link to trusted protocols
32 HTML_TOC // generate a table of contents
33 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
34 HTML_COMPLETE_PAGE // generate a complete HTML page
35 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
36 HTML_USE_XHTML // generate XHTML output instead of HTML
37 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
38 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
39 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
40)
41
42// Html is a type that implements the Renderer interface for HTML output.
43//
44// Do not create this directly, instead use the HtmlRenderer function.
45type Html struct {
46 flags int // HTML_* options
47 closeTag string // how to end singleton tags: either " />\n" or ">\n"
48 title string // document title
49 css string // optional css file url (used with HTML_COMPLETE_PAGE)
50
51 // table of contents data
52 tocMarker int
53 headerCount int
54 currentLevel int
55 toc *bytes.Buffer
56
57 smartypants *smartypantsRenderer
58}
59
60const (
61 xhtmlClose = " />\n"
62 htmlClose = ">\n"
63)
64
65// HtmlRenderer creates and configures an Html object, which
66// satisfies the Renderer interface.
67//
68// flags is a set of HTML_* options ORed together.
69// title is the title of the document, and css is a URL for the document's
70// stylesheet.
71// title and css are only used when HTML_COMPLETE_PAGE is selected.
72func HtmlRenderer(flags int, title string, css string) Renderer {
73 // configure the rendering engine
74 closeTag := htmlClose
75 if flags&HTML_USE_XHTML != 0 {
76 closeTag = xhtmlClose
77 }
78
79 return &Html{
80 flags: flags,
81 closeTag: closeTag,
82 title: title,
83 css: css,
84
85 headerCount: 0,
86 currentLevel: 0,
87 toc: new(bytes.Buffer),
88
89 smartypants: smartypants(flags),
90 }
91}
92
93func attrEscape(out *bytes.Buffer, src []byte) {
94 org := 0
95 for i, ch := range src {
96 // using if statements is a bit faster than a switch statement.
97 // as the compiler improves, this should be unnecessary
98 // this is only worthwhile because attrEscape is the single
99 // largest CPU user in normal use
100 if ch == '"' {
101 if i > org {
102 // copy all the normal characters since the last escape
103 out.Write(src[org:i])
104 }
105 org = i + 1
106 out.WriteString(""")
107 continue
108 }
109 if ch == '&' {
110 if i > org {
111 out.Write(src[org:i])
112 }
113 org = i + 1
114 out.WriteString("&")
115 continue
116 }
117 if ch == '<' {
118 if i > org {
119 out.Write(src[org:i])
120 }
121 org = i + 1
122 out.WriteString("<")
123 continue
124 }
125 if ch == '>' {
126 if i > org {
127 out.Write(src[org:i])
128 }
129 org = i + 1
130 out.WriteString(">")
131 continue
132 }
133 }
134 if org < len(src) {
135 out.Write(src[org:])
136 }
137}
138
139func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
140 marker := out.Len()
141 doubleSpace(out)
142
143 if options.flags&HTML_TOC != 0 {
144 // headerCount is incremented in htmlTocHeader
145 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
146 } else {
147 out.WriteString(fmt.Sprintf("<h%d>", level))
148 }
149
150 tocMarker := out.Len()
151 if !text() {
152 out.Truncate(marker)
153 return
154 }
155
156 // are we building a table of contents?
157 if options.flags&HTML_TOC != 0 {
158 options.TocHeader(out.Bytes()[tocMarker:], level)
159 }
160
161 out.WriteString(fmt.Sprintf("</h%d>\n", level))
162}
163
164func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
165 if options.flags&HTML_SKIP_HTML != 0 {
166 return
167 }
168
169 doubleSpace(out)
170 out.Write(text)
171 out.WriteByte('\n')
172}
173
174func (options *Html) HRule(out *bytes.Buffer) {
175 doubleSpace(out)
176 out.WriteString("<hr")
177 out.WriteString(options.closeTag)
178}
179
180func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
181 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
182 options.BlockCodeGithub(out, text, lang)
183 } else {
184 options.BlockCodeNormal(out, text, lang)
185 }
186}
187
188func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
189 doubleSpace(out)
190
191 // parse out the language names/classes
192 count := 0
193 for _, elt := range strings.Fields(lang) {
194 if elt[0] == '.' {
195 elt = elt[1:]
196 }
197 if len(elt) == 0 {
198 continue
199 }
200 if count == 0 {
201 out.WriteString("<pre><code class=\"")
202 } else {
203 out.WriteByte(' ')
204 }
205 attrEscape(out, []byte(elt))
206 count++
207 }
208
209 if count == 0 {
210 out.WriteString("<pre><code>")
211 } else {
212 out.WriteString("\">")
213 }
214
215 attrEscape(out, text)
216 out.WriteString("</code></pre>\n")
217}
218
219// GitHub style code block:
220//
221// <pre lang="LANG"><code>
222// ...
223// </code></pre>
224//
225// Unlike other parsers, we store the language identifier in the <pre>,
226// and don't let the user generate custom classes.
227//
228// The language identifier in the <pre> block gets postprocessed and all
229// the code inside gets syntax highlighted with Pygments. This is much safer
230// than letting the user specify a CSS class for highlighting.
231//
232// Note that we only generate HTML for the first specifier.
233// E.g.
234// ~~~~ {.python .numbered} => <pre lang="python"><code>
235func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
236 doubleSpace(out)
237
238 // parse out the language name
239 count := 0
240 for _, elt := range strings.Fields(lang) {
241 if elt[0] == '.' {
242 elt = elt[1:]
243 }
244 if len(elt) == 0 {
245 continue
246 }
247 out.WriteString("<pre lang=\"")
248 attrEscape(out, []byte(elt))
249 out.WriteString("\"><code>")
250 count++
251 break
252 }
253
254 if count == 0 {
255 out.WriteString("<pre><code>")
256 }
257
258 attrEscape(out, text)
259 out.WriteString("</code></pre>\n")
260}
261
262func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
263 doubleSpace(out)
264 out.WriteString("<blockquote>\n")
265 out.Write(text)
266 out.WriteString("</blockquote>\n")
267}
268
269func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
270 doubleSpace(out)
271 out.WriteString("<table>\n<thead>\n")
272 out.Write(header)
273 out.WriteString("</thead>\n\n<tbody>\n")
274 out.Write(body)
275 out.WriteString("</tbody>\n</table>\n")
276}
277
278func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
279 doubleSpace(out)
280 out.WriteString("<tr>\n")
281 out.Write(text)
282 out.WriteString("\n</tr>\n")
283}
284
285func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
286 doubleSpace(out)
287 switch align {
288 case TABLE_ALIGNMENT_LEFT:
289 out.WriteString("<td align=\"left\">")
290 case TABLE_ALIGNMENT_RIGHT:
291 out.WriteString("<td align=\"right\">")
292 case TABLE_ALIGNMENT_CENTER:
293 out.WriteString("<td align=\"center\">")
294 default:
295 out.WriteString("<td>")
296 }
297
298 out.Write(text)
299 out.WriteString("</td>")
300}
301
302func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
303 marker := out.Len()
304 doubleSpace(out)
305
306 if flags&LIST_TYPE_ORDERED != 0 {
307 out.WriteString("<ol>")
308 } else {
309 out.WriteString("<ul>")
310 }
311 if !text() {
312 out.Truncate(marker)
313 return
314 }
315 if flags&LIST_TYPE_ORDERED != 0 {
316 out.WriteString("</ol>\n")
317 } else {
318 out.WriteString("</ul>\n")
319 }
320}
321
322func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
323 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
324 doubleSpace(out)
325 }
326 out.WriteString("<li>")
327 out.Write(text)
328 out.WriteString("</li>\n")
329}
330
331func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
332 marker := out.Len()
333 doubleSpace(out)
334
335 out.WriteString("<p>")
336 if !text() {
337 out.Truncate(marker)
338 return
339 }
340 out.WriteString("</p>\n")
341}
342
343func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
344 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
345 // mark it but don't link it if it is not a safe link: no smartypants
346 out.WriteString("<tt>")
347 attrEscape(out, link)
348 out.WriteString("</tt>")
349 return
350 }
351
352 out.WriteString("<a href=\"")
353 if kind == LINK_TYPE_EMAIL {
354 out.WriteString("mailto:")
355 }
356 attrEscape(out, link)
357 out.WriteString("\">")
358
359 // Pretty print: if we get an email address as
360 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
361 // want to print the `mailto:` prefix
362 switch {
363 case bytes.HasPrefix(link, []byte("mailto://")):
364 attrEscape(out, link[len("mailto://"):])
365 case bytes.HasPrefix(link, []byte("mailto:")):
366 attrEscape(out, link[len("mailto:"):])
367 default:
368 attrEscape(out, link)
369 }
370
371 out.WriteString("</a>")
372}
373
374func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
375 out.WriteString("<code>")
376 attrEscape(out, text)
377 out.WriteString("</code>")
378}
379
380func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
381 out.WriteString("<strong>")
382 out.Write(text)
383 out.WriteString("</strong>")
384}
385
386func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
387 if len(text) == 0 {
388 return
389 }
390 out.WriteString("<em>")
391 out.Write(text)
392 out.WriteString("</em>")
393}
394
395func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
396 if options.flags&HTML_SKIP_IMAGES != 0 {
397 return
398 }
399
400 out.WriteString("<img src=\"")
401 attrEscape(out, link)
402 out.WriteString("\" alt=\"")
403 if len(alt) > 0 {
404 attrEscape(out, alt)
405 }
406 if len(title) > 0 {
407 out.WriteString("\" title=\"")
408 attrEscape(out, title)
409 }
410
411 out.WriteByte('"')
412 out.WriteString(options.closeTag)
413 return
414}
415
416func (options *Html) LineBreak(out *bytes.Buffer) {
417 out.WriteString("<br")
418 out.WriteString(options.closeTag)
419}
420
421func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
422 if options.flags&HTML_SKIP_LINKS != 0 {
423 // write the link text out but don't link it, just mark it with typewriter font
424 out.WriteString("<tt>")
425 attrEscape(out, content)
426 out.WriteString("</tt>")
427 return
428 }
429
430 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
431 // write the link text out but don't link it, just mark it with typewriter font
432 out.WriteString("<tt>")
433 attrEscape(out, content)
434 out.WriteString("</tt>")
435 return
436 }
437
438 out.WriteString("<a href=\"")
439 attrEscape(out, link)
440 if len(title) > 0 {
441 out.WriteString("\" title=\"")
442 attrEscape(out, title)
443 }
444 out.WriteString("\">")
445 out.Write(content)
446 out.WriteString("</a>")
447 return
448}
449
450func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
451 if options.flags&HTML_SKIP_HTML != 0 {
452 return
453 }
454 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
455 return
456 }
457 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
458 return
459 }
460 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
461 return
462 }
463 out.Write(text)
464}
465
466func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
467 out.WriteString("<strong><em>")
468 out.Write(text)
469 out.WriteString("</em></strong>")
470}
471
472func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
473 out.WriteString("<del>")
474 out.Write(text)
475 out.WriteString("</del>")
476}
477
478func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
479 out.Write(entity)
480}
481
482func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
483 if options.flags&HTML_USE_SMARTYPANTS != 0 {
484 options.Smartypants(out, text)
485 } else {
486 attrEscape(out, text)
487 }
488}
489
490func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
491 smrt := smartypantsData{false, false}
492
493 // first do normal entity escaping
494 var escaped bytes.Buffer
495 attrEscape(&escaped, text)
496 text = escaped.Bytes()
497
498 mark := 0
499 for i := 0; i < len(text); i++ {
500 if action := options.smartypants[text[i]]; action != nil {
501 if i > mark {
502 out.Write(text[mark:i])
503 }
504
505 previousChar := byte(0)
506 if i > 0 {
507 previousChar = text[i-1]
508 }
509 i += action(out, &smrt, previousChar, text[i:])
510 mark = i + 1
511 }
512 }
513
514 if mark < len(text) {
515 out.Write(text[mark:])
516 }
517}
518
519func (options *Html) DocumentHeader(out *bytes.Buffer) {
520 if options.flags&HTML_COMPLETE_PAGE == 0 {
521 return
522 }
523
524 ending := ""
525 if options.flags&HTML_USE_XHTML != 0 {
526 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
527 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
528 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
529 ending = " /"
530 } else {
531 out.WriteString("<!DOCTYPE html>\n")
532 out.WriteString("<html>\n")
533 }
534 out.WriteString("<head>\n")
535 out.WriteString(" <title>")
536 options.NormalText(out, []byte(options.title))
537 out.WriteString("</title>\n")
538 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
539 out.WriteString(VERSION)
540 out.WriteString("\"")
541 out.WriteString(ending)
542 out.WriteString(">\n")
543 out.WriteString(" <meta charset=\"utf-8\"")
544 out.WriteString(ending)
545 out.WriteString(">\n")
546 if options.css != "" {
547 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
548 attrEscape(out, []byte(options.css))
549 out.WriteString("\"")
550 out.WriteString(ending)
551 out.WriteString(">\n")
552 }
553 out.WriteString("</head>\n")
554 out.WriteString("<body>\n")
555
556 options.tocMarker = out.Len()
557}
558
559func (options *Html) DocumentFooter(out *bytes.Buffer) {
560 // finalize and insert the table of contents
561 if options.flags&HTML_TOC != 0 {
562 options.TocFinalize()
563
564 // now we have to insert the table of contents into the document
565 var temp bytes.Buffer
566
567 // start by making a copy of everything after the document header
568 temp.Write(out.Bytes()[options.tocMarker:])
569
570 // now clear the copied material from the main output buffer
571 out.Truncate(options.tocMarker)
572
573 // corner case spacing issue
574 if options.flags&HTML_COMPLETE_PAGE != 0 {
575 out.WriteByte('\n')
576 }
577
578 // insert the table of contents
579 out.WriteString("<nav>\n")
580 out.Write(options.toc.Bytes())
581 out.WriteString("</nav>\n")
582
583 // corner case spacing issue
584 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
585 out.WriteByte('\n')
586 }
587
588 // write out everything that came after it
589 if options.flags&HTML_OMIT_CONTENTS == 0 {
590 out.Write(temp.Bytes())
591 }
592 }
593
594 if options.flags&HTML_COMPLETE_PAGE != 0 {
595 out.WriteString("\n</body>\n")
596 out.WriteString("</html>\n")
597 }
598
599}
600
601func (options *Html) TocHeader(text []byte, level int) {
602 for level > options.currentLevel {
603 switch {
604 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
605 // this sublist can nest underneath a header
606 size := options.toc.Len()
607 options.toc.Truncate(size - len("</li>\n"))
608
609 case options.currentLevel > 0:
610 options.toc.WriteString("<li>")
611 }
612 if options.toc.Len() > 0 {
613 options.toc.WriteByte('\n')
614 }
615 options.toc.WriteString("<ul>\n")
616 options.currentLevel++
617 }
618
619 for level < options.currentLevel {
620 options.toc.WriteString("</ul>")
621 if options.currentLevel > 1 {
622 options.toc.WriteString("</li>\n")
623 }
624 options.currentLevel--
625 }
626
627 options.toc.WriteString("<li><a href=\"#toc_")
628 options.toc.WriteString(strconv.Itoa(options.headerCount))
629 options.toc.WriteString("\">")
630 options.headerCount++
631
632 options.toc.Write(text)
633
634 options.toc.WriteString("</a></li>\n")
635}
636
637func (options *Html) TocFinalize() {
638 for options.currentLevel > 1 {
639 options.toc.WriteString("</ul></li>\n")
640 options.currentLevel--
641 }
642
643 if options.currentLevel > 0 {
644 options.toc.WriteString("</ul>\n")
645 }
646}
647
648func isHtmlTag(tag []byte, tagname string) bool {
649 i := 0
650 if i < len(tag) && tag[0] != '<' {
651 return false
652 }
653 i++
654 for i < len(tag) && isspace(tag[i]) {
655 i++
656 }
657
658 if i < len(tag) && tag[i] == '/' {
659 i++
660 }
661
662 for i < len(tag) && isspace(tag[i]) {
663 i++
664 }
665
666 j := i
667 for ; i < len(tag); i, j = i+1, j+1 {
668 if j >= len(tagname) {
669 break
670 }
671
672 if tag[i] != tagname[j] {
673 return false
674 }
675 }
676
677 if i == len(tag) {
678 return false
679 }
680
681 return isspace(tag[i]) || tag[i] == '>'
682}
683
684func doubleSpace(out *bytes.Buffer) {
685 if out.Len() > 0 {
686 out.WriteByte('\n')
687 }
688}