html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25// Html renderer configuration options.
26const (
27 HTML_SKIP_HTML = 1 << iota // skip preformatted HTML blocks
28 HTML_SKIP_STYLE // skip embedded <style> elements
29 HTML_SKIP_IMAGES // skip embedded images
30 HTML_SKIP_LINKS // skip all links
31 HTML_SKIP_SCRIPT // skip embedded <script> elements
32 HTML_SAFELINK // only link to trusted protocols
33 HTML_TOC // generate a table of contents
34 HTML_OMIT_CONTENTS // skip the main contents (for a standalone table of contents)
35 HTML_COMPLETE_PAGE // generate a complete HTML page
36 HTML_GITHUB_BLOCKCODE // use github fenced code rendering rules
37 HTML_USE_XHTML // generate XHTML output instead of HTML
38 HTML_USE_SMARTYPANTS // enable smart punctuation substitutions
39 HTML_SMARTYPANTS_FRACTIONS // enable smart fractions (with HTML_USE_SMARTYPANTS)
40 HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
41)
42
43// Html is a type that implements the Renderer interface for HTML output.
44//
45// Do not create this directly, instead use the HtmlRenderer function.
46type Html struct {
47 flags int // HTML_* options
48 closeTag string // how to end singleton tags: either " />\n" or ">\n"
49 title string // document title
50 css string // optional css file url (used with HTML_COMPLETE_PAGE)
51
52 // table of contents data
53 tocMarker int
54 headerCount int
55 currentLevel int
56 toc *bytes.Buffer
57
58 smartypants *smartypantsRenderer
59}
60
61const (
62 xhtmlClose = " />\n"
63 htmlClose = ">\n"
64)
65
66// HtmlRenderer creates and configures an Html object, which
67// satisfies the Renderer interface.
68//
69// flags is a set of HTML_* options ORed together.
70// title is the title of the document, and css is a URL for the document's
71// stylesheet.
72// title and css are only used when HTML_COMPLETE_PAGE is selected.
73func HtmlRenderer(flags int, title string, css string) Renderer {
74 // configure the rendering engine
75 closeTag := htmlClose
76 if flags&HTML_USE_XHTML != 0 {
77 closeTag = xhtmlClose
78 }
79
80 return &Html{
81 flags: flags,
82 closeTag: closeTag,
83 title: title,
84 css: css,
85
86 headerCount: 0,
87 currentLevel: 0,
88 toc: new(bytes.Buffer),
89
90 smartypants: smartypants(flags),
91 }
92}
93
94func attrEscape(out *bytes.Buffer, src []byte) {
95 org := 0
96 for i, ch := range src {
97 // using if statements is a bit faster than a switch statement.
98 // as the compiler improves, this should be unnecessary
99 // this is only worthwhile because attrEscape is the single
100 // largest CPU user in normal use
101 if ch == '"' {
102 if i > org {
103 // copy all the normal characters since the last escape
104 out.Write(src[org:i])
105 }
106 org = i + 1
107 out.WriteString(""")
108 continue
109 }
110 if ch == '&' {
111 if i > org {
112 out.Write(src[org:i])
113 }
114 org = i + 1
115 out.WriteString("&")
116 continue
117 }
118 if ch == '<' {
119 if i > org {
120 out.Write(src[org:i])
121 }
122 org = i + 1
123 out.WriteString("<")
124 continue
125 }
126 if ch == '>' {
127 if i > org {
128 out.Write(src[org:i])
129 }
130 org = i + 1
131 out.WriteString(">")
132 continue
133 }
134 }
135 if org < len(src) {
136 out.Write(src[org:])
137 }
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141 marker := out.Len()
142 doubleSpace(out)
143
144 if options.flags&HTML_TOC != 0 {
145 // headerCount is incremented in htmlTocHeader
146 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147 } else {
148 out.WriteString(fmt.Sprintf("<h%d>", level))
149 }
150
151 tocMarker := out.Len()
152 if !text() {
153 out.Truncate(marker)
154 return
155 }
156
157 // are we building a table of contents?
158 if options.flags&HTML_TOC != 0 {
159 options.TocHeader(out.Bytes()[tocMarker:], level)
160 }
161
162 out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166 if options.flags&HTML_SKIP_HTML != 0 {
167 return
168 }
169
170 doubleSpace(out)
171 if options.flags&HTML_SKIP_SCRIPT != 0 {
172 out.Write(stripTag(string(text), "script", "p"))
173 } else {
174 out.Write(text)
175 }
176 out.WriteByte('\n')
177}
178
179func stripTag(text, tag, newTag string) []byte {
180 closeNewTag := fmt.Sprintf("</%s>", newTag)
181 i := 0
182 for i < len(text) && text[i] != '<' {
183 i++
184 }
185 if i == len(text) {
186 return []byte(text)
187 }
188 found, end := findHtmlTagPos([]byte(text[i:]), tag)
189 closeTag := fmt.Sprintf("</%s>", tag)
190 noOpen := text
191 if found {
192 noOpen = text[0:i+1] + newTag + text[end:]
193 }
194 return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
195}
196
197func (options *Html) HRule(out *bytes.Buffer) {
198 doubleSpace(out)
199 out.WriteString("<hr")
200 out.WriteString(options.closeTag)
201}
202
203func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
204 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
205 options.BlockCodeGithub(out, text, lang)
206 } else {
207 options.BlockCodeNormal(out, text, lang)
208 }
209}
210
211func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
212 doubleSpace(out)
213
214 // parse out the language names/classes
215 count := 0
216 for _, elt := range strings.Fields(lang) {
217 if elt[0] == '.' {
218 elt = elt[1:]
219 }
220 if len(elt) == 0 {
221 continue
222 }
223 if count == 0 {
224 out.WriteString("<pre><code class=\"")
225 } else {
226 out.WriteByte(' ')
227 }
228 attrEscape(out, []byte(elt))
229 count++
230 }
231
232 if count == 0 {
233 out.WriteString("<pre><code>")
234 } else {
235 out.WriteString("\">")
236 }
237
238 attrEscape(out, text)
239 out.WriteString("</code></pre>\n")
240}
241
242// GitHub style code block:
243//
244// <pre lang="LANG"><code>
245// ...
246// </code></pre>
247//
248// Unlike other parsers, we store the language identifier in the <pre>,
249// and don't let the user generate custom classes.
250//
251// The language identifier in the <pre> block gets postprocessed and all
252// the code inside gets syntax highlighted with Pygments. This is much safer
253// than letting the user specify a CSS class for highlighting.
254//
255// Note that we only generate HTML for the first specifier.
256// E.g.
257// ~~~~ {.python .numbered} => <pre lang="python"><code>
258func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
259 doubleSpace(out)
260
261 // parse out the language name
262 count := 0
263 for _, elt := range strings.Fields(lang) {
264 if elt[0] == '.' {
265 elt = elt[1:]
266 }
267 if len(elt) == 0 {
268 continue
269 }
270 out.WriteString("<pre lang=\"")
271 attrEscape(out, []byte(elt))
272 out.WriteString("\"><code>")
273 count++
274 break
275 }
276
277 if count == 0 {
278 out.WriteString("<pre><code>")
279 }
280
281 attrEscape(out, text)
282 out.WriteString("</code></pre>\n")
283}
284
285func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
286 doubleSpace(out)
287 out.WriteString("<blockquote>\n")
288 out.Write(text)
289 out.WriteString("</blockquote>\n")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293 doubleSpace(out)
294 out.WriteString("<table>\n<thead>\n")
295 out.Write(header)
296 out.WriteString("</thead>\n\n<tbody>\n")
297 out.Write(body)
298 out.WriteString("</tbody>\n</table>\n")
299}
300
301func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
302 doubleSpace(out)
303 out.WriteString("<tr>\n")
304 out.Write(text)
305 out.WriteString("\n</tr>\n")
306}
307
308func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
309 doubleSpace(out)
310 switch align {
311 case TABLE_ALIGNMENT_LEFT:
312 out.WriteString("<td align=\"left\">")
313 case TABLE_ALIGNMENT_RIGHT:
314 out.WriteString("<td align=\"right\">")
315 case TABLE_ALIGNMENT_CENTER:
316 out.WriteString("<td align=\"center\">")
317 default:
318 out.WriteString("<td>")
319 }
320
321 out.Write(text)
322 out.WriteString("</td>")
323}
324
325func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
326 out.WriteString("<div class=\"footnotes\">\n")
327 options.HRule(out)
328 options.List(out, text, LIST_TYPE_ORDERED)
329 out.WriteString("</div>\n")
330}
331
332func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
333 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
334 doubleSpace(out)
335 }
336 out.WriteString(`<li id="fn:`)
337 out.Write(slugify(name))
338 out.WriteString(`">`)
339 out.Write(text)
340 out.WriteString("</li>\n")
341}
342
343func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
344 marker := out.Len()
345 doubleSpace(out)
346
347 if flags&LIST_TYPE_ORDERED != 0 {
348 out.WriteString("<ol>")
349 } else {
350 out.WriteString("<ul>")
351 }
352 if !text() {
353 out.Truncate(marker)
354 return
355 }
356 if flags&LIST_TYPE_ORDERED != 0 {
357 out.WriteString("</ol>\n")
358 } else {
359 out.WriteString("</ul>\n")
360 }
361}
362
363func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
364 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
365 doubleSpace(out)
366 }
367 out.WriteString("<li>")
368 out.Write(text)
369 out.WriteString("</li>\n")
370}
371
372func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
373 marker := out.Len()
374 doubleSpace(out)
375
376 out.WriteString("<p>")
377 if !text() {
378 out.Truncate(marker)
379 return
380 }
381 out.WriteString("</p>\n")
382}
383
384func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
385 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
386 // mark it but don't link it if it is not a safe link: no smartypants
387 out.WriteString("<tt>")
388 attrEscape(out, link)
389 out.WriteString("</tt>")
390 return
391 }
392
393 out.WriteString("<a href=\"")
394 if kind == LINK_TYPE_EMAIL {
395 out.WriteString("mailto:")
396 }
397 attrEscape(out, link)
398 out.WriteString("\">")
399
400 // Pretty print: if we get an email address as
401 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
402 // want to print the `mailto:` prefix
403 switch {
404 case bytes.HasPrefix(link, []byte("mailto://")):
405 attrEscape(out, link[len("mailto://"):])
406 case bytes.HasPrefix(link, []byte("mailto:")):
407 attrEscape(out, link[len("mailto:"):])
408 default:
409 attrEscape(out, link)
410 }
411
412 out.WriteString("</a>")
413}
414
415func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
416 out.WriteString("<code>")
417 attrEscape(out, text)
418 out.WriteString("</code>")
419}
420
421func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
422 out.WriteString("<strong>")
423 out.Write(text)
424 out.WriteString("</strong>")
425}
426
427func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
428 if len(text) == 0 {
429 return
430 }
431 out.WriteString("<em>")
432 out.Write(text)
433 out.WriteString("</em>")
434}
435
436func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
437 if options.flags&HTML_SKIP_IMAGES != 0 {
438 return
439 }
440
441 out.WriteString("<img src=\"")
442 attrEscape(out, link)
443 out.WriteString("\" alt=\"")
444 if len(alt) > 0 {
445 attrEscape(out, alt)
446 }
447 if len(title) > 0 {
448 out.WriteString("\" title=\"")
449 attrEscape(out, title)
450 }
451
452 out.WriteByte('"')
453 out.WriteString(options.closeTag)
454 return
455}
456
457func (options *Html) LineBreak(out *bytes.Buffer) {
458 out.WriteString("<br")
459 out.WriteString(options.closeTag)
460}
461
462func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
463 if options.flags&HTML_SKIP_LINKS != 0 {
464 // write the link text out but don't link it, just mark it with typewriter font
465 out.WriteString("<tt>")
466 attrEscape(out, content)
467 out.WriteString("</tt>")
468 return
469 }
470
471 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
472 // write the link text out but don't link it, just mark it with typewriter font
473 out.WriteString("<tt>")
474 attrEscape(out, content)
475 out.WriteString("</tt>")
476 return
477 }
478
479 out.WriteString("<a href=\"")
480 attrEscape(out, link)
481 if len(title) > 0 {
482 out.WriteString("\" title=\"")
483 attrEscape(out, title)
484 }
485 out.WriteString("\">")
486 out.Write(content)
487 out.WriteString("</a>")
488 return
489}
490
491func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
492 if options.flags&HTML_SKIP_HTML != 0 {
493 return
494 }
495 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
496 return
497 }
498 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
499 return
500 }
501 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
502 return
503 }
504 if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
505 return
506 }
507 out.Write(text)
508}
509
510func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
511 out.WriteString("<strong><em>")
512 out.Write(text)
513 out.WriteString("</em></strong>")
514}
515
516func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
517 out.WriteString("<del>")
518 out.Write(text)
519 out.WriteString("</del>")
520}
521
522func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
523 slug := slugify(ref)
524 out.WriteString(`<sup class="footnote-ref" id="fnref:`)
525 out.Write(slug)
526 out.WriteString(`"><a rel="footnote" href="#fn:`)
527 out.Write(slug)
528 out.WriteString(`">`)
529 out.WriteString(strconv.Itoa(id))
530 out.WriteString(`</a></sup>`)
531}
532
533func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
534 out.Write(entity)
535}
536
537func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
538 if options.flags&HTML_USE_SMARTYPANTS != 0 {
539 options.Smartypants(out, text)
540 } else {
541 attrEscape(out, text)
542 }
543}
544
545func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
546 smrt := smartypantsData{false, false}
547
548 // first do normal entity escaping
549 var escaped bytes.Buffer
550 attrEscape(&escaped, text)
551 text = escaped.Bytes()
552
553 mark := 0
554 for i := 0; i < len(text); i++ {
555 if action := options.smartypants[text[i]]; action != nil {
556 if i > mark {
557 out.Write(text[mark:i])
558 }
559
560 previousChar := byte(0)
561 if i > 0 {
562 previousChar = text[i-1]
563 }
564 i += action(out, &smrt, previousChar, text[i:])
565 mark = i + 1
566 }
567 }
568
569 if mark < len(text) {
570 out.Write(text[mark:])
571 }
572}
573
574func (options *Html) DocumentHeader(out *bytes.Buffer) {
575 if options.flags&HTML_COMPLETE_PAGE == 0 {
576 return
577 }
578
579 ending := ""
580 if options.flags&HTML_USE_XHTML != 0 {
581 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
582 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
583 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
584 ending = " /"
585 } else {
586 out.WriteString("<!DOCTYPE html>\n")
587 out.WriteString("<html>\n")
588 }
589 out.WriteString("<head>\n")
590 out.WriteString(" <title>")
591 options.NormalText(out, []byte(options.title))
592 out.WriteString("</title>\n")
593 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
594 out.WriteString(VERSION)
595 out.WriteString("\"")
596 out.WriteString(ending)
597 out.WriteString(">\n")
598 out.WriteString(" <meta charset=\"utf-8\"")
599 out.WriteString(ending)
600 out.WriteString(">\n")
601 if options.css != "" {
602 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
603 attrEscape(out, []byte(options.css))
604 out.WriteString("\"")
605 out.WriteString(ending)
606 out.WriteString(">\n")
607 }
608 out.WriteString("</head>\n")
609 out.WriteString("<body>\n")
610
611 options.tocMarker = out.Len()
612}
613
614func (options *Html) DocumentFooter(out *bytes.Buffer) {
615 // finalize and insert the table of contents
616 if options.flags&HTML_TOC != 0 {
617 options.TocFinalize()
618
619 // now we have to insert the table of contents into the document
620 var temp bytes.Buffer
621
622 // start by making a copy of everything after the document header
623 temp.Write(out.Bytes()[options.tocMarker:])
624
625 // now clear the copied material from the main output buffer
626 out.Truncate(options.tocMarker)
627
628 // corner case spacing issue
629 if options.flags&HTML_COMPLETE_PAGE != 0 {
630 out.WriteByte('\n')
631 }
632
633 // insert the table of contents
634 out.WriteString("<nav>\n")
635 out.Write(options.toc.Bytes())
636 out.WriteString("</nav>\n")
637
638 // corner case spacing issue
639 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
640 out.WriteByte('\n')
641 }
642
643 // write out everything that came after it
644 if options.flags&HTML_OMIT_CONTENTS == 0 {
645 out.Write(temp.Bytes())
646 }
647 }
648
649 if options.flags&HTML_COMPLETE_PAGE != 0 {
650 out.WriteString("\n</body>\n")
651 out.WriteString("</html>\n")
652 }
653
654}
655
656func (options *Html) TocHeader(text []byte, level int) {
657 for level > options.currentLevel {
658 switch {
659 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
660 // this sublist can nest underneath a header
661 size := options.toc.Len()
662 options.toc.Truncate(size - len("</li>\n"))
663
664 case options.currentLevel > 0:
665 options.toc.WriteString("<li>")
666 }
667 if options.toc.Len() > 0 {
668 options.toc.WriteByte('\n')
669 }
670 options.toc.WriteString("<ul>\n")
671 options.currentLevel++
672 }
673
674 for level < options.currentLevel {
675 options.toc.WriteString("</ul>")
676 if options.currentLevel > 1 {
677 options.toc.WriteString("</li>\n")
678 }
679 options.currentLevel--
680 }
681
682 options.toc.WriteString("<li><a href=\"#toc_")
683 options.toc.WriteString(strconv.Itoa(options.headerCount))
684 options.toc.WriteString("\">")
685 options.headerCount++
686
687 options.toc.Write(text)
688
689 options.toc.WriteString("</a></li>\n")
690}
691
692func (options *Html) TocFinalize() {
693 for options.currentLevel > 1 {
694 options.toc.WriteString("</ul></li>\n")
695 options.currentLevel--
696 }
697
698 if options.currentLevel > 0 {
699 options.toc.WriteString("</ul>\n")
700 }
701}
702
703func isHtmlTag(tag []byte, tagname string) bool {
704 found, _ := findHtmlTagPos(tag, tagname)
705 return found
706}
707
708func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
709 i := 0
710 if i < len(tag) && tag[0] != '<' {
711 return false, -1
712 }
713 i++
714 i = skipSpace(tag, i)
715
716 if i < len(tag) && tag[i] == '/' {
717 i++
718 }
719
720 i = skipSpace(tag, i)
721 j := 0
722 for ; i < len(tag); i, j = i+1, j+1 {
723 if j >= len(tagname) {
724 break
725 }
726
727 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
728 return false, -1
729 }
730 }
731
732 if i == len(tag) {
733 return false, -1
734 }
735
736 // Now look for closing '>', but ignore it when it's in any kind of quotes,
737 // it might be JavaScript
738 inSingleQuote := false
739 inDoubleQuote := false
740 inGraveQuote := false
741 for i < len(tag) {
742 switch {
743 case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
744 return true, i
745 case tag[i] == '\'':
746 inSingleQuote = !inSingleQuote
747 case tag[i] == '"':
748 inDoubleQuote = !inDoubleQuote
749 case tag[i] == '`':
750 inGraveQuote = !inGraveQuote
751 }
752 i++
753 }
754
755 return false, -1
756}
757
758func skipSpace(tag []byte, i int) int {
759 for i < len(tag) && isspace(tag[i]) {
760 i++
761 }
762 return i
763}
764
765func doubleSpace(out *bytes.Buffer) {
766 if out.Len() > 0 {
767 out.WriteByte('\n')
768 }
769}