html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22)
23
24const (
25 HTML_SKIP_HTML = 1 << iota
26 HTML_SKIP_STYLE
27 HTML_SKIP_IMAGES
28 HTML_SKIP_LINKS
29 HTML_SAFELINK
30 HTML_TOC
31 HTML_OMIT_CONTENTS
32 HTML_COMPLETE_PAGE
33 HTML_GITHUB_BLOCKCODE
34 HTML_USE_XHTML
35 HTML_USE_SMARTYPANTS
36 HTML_SMARTYPANTS_FRACTIONS
37 HTML_SMARTYPANTS_LATEX_DASHES
38)
39
40type Html struct {
41 flags int // HTML_* options
42 closeTag string // how to end singleton tags: either " />\n" or ">\n"
43 title string // document title
44 css string // optional css file url (used with HTML_COMPLETE_PAGE)
45
46 // table of contents data
47 tocMarker int
48 headerCount int
49 currentLevel int
50 toc *bytes.Buffer
51
52 smartypants *SmartypantsRenderer
53}
54
55const (
56 xhtmlClose = " />\n"
57 htmlClose = ">\n"
58)
59
60func HtmlRenderer(flags int, title string, css string) Renderer {
61 // configure the rendering engine
62 closeTag := htmlClose
63 if flags&HTML_USE_XHTML != 0 {
64 closeTag = xhtmlClose
65 }
66
67 return &Html{
68 flags: flags,
69 closeTag: closeTag,
70 title: title,
71 css: css,
72
73 headerCount: 0,
74 currentLevel: 0,
75 toc: new(bytes.Buffer),
76
77 smartypants: Smartypants(flags),
78 }
79}
80
81func attrEscape(out *bytes.Buffer, src []byte) {
82 org := 0
83 for i, ch := range src {
84 // using if statements is a bit faster than a switch statement.
85 // as the compiler improves, this should be unnecessary
86 // this is only worthwhile because attrEscape is the single
87 // largest CPU user in normal use
88 if ch == '"' {
89 if i > org {
90 // copy all the normal characters since the last escape
91 out.Write(src[org:i])
92 }
93 org = i + 1
94 out.WriteString(""")
95 continue
96 }
97 if ch == '&' {
98 if i > org {
99 out.Write(src[org:i])
100 }
101 org = i + 1
102 out.WriteString("&")
103 continue
104 }
105 if ch == '<' {
106 if i > org {
107 out.Write(src[org:i])
108 }
109 org = i + 1
110 out.WriteString("<")
111 continue
112 }
113 if ch == '>' {
114 if i > org {
115 out.Write(src[org:i])
116 }
117 org = i + 1
118 out.WriteString(">")
119 continue
120 }
121 }
122 if org < len(src) {
123 out.Write(src[org:])
124 }
125}
126
127func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
128 marker := out.Len()
129
130 if marker > 0 {
131 out.WriteByte('\n')
132 }
133
134 if options.flags&HTML_TOC != 0 {
135 // headerCount is incremented in htmlTocHeader
136 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
137 } else {
138 out.WriteString(fmt.Sprintf("<h%d>", level))
139 }
140
141 tocMarker := out.Len()
142 if !text() {
143 out.Truncate(marker)
144 return
145 }
146
147 // are we building a table of contents?
148 if options.flags&HTML_TOC != 0 {
149 options.TocHeader(out.Bytes()[tocMarker:], level)
150 }
151
152 out.WriteString(fmt.Sprintf("</h%d>\n", level))
153}
154
155func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
156 if options.flags&HTML_SKIP_HTML != 0 {
157 return
158 }
159
160 sz := len(text)
161 for sz > 0 && text[sz-1] == '\n' {
162 sz--
163 }
164 org := 0
165 for org < sz && text[org] == '\n' {
166 org++
167 }
168 if org >= sz {
169 return
170 }
171 if out.Len() > 0 {
172 out.WriteByte('\n')
173 }
174 out.Write(text[org:sz])
175 out.WriteByte('\n')
176}
177
178func (options *Html) HRule(out *bytes.Buffer) {
179 if out.Len() > 0 {
180 out.WriteByte('\n')
181 }
182 out.WriteString("<hr")
183 out.WriteString(options.closeTag)
184}
185
186func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
187 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
188 options.BlockCodeGithub(out, text, lang)
189 } else {
190 options.BlockCodeNormal(out, text, lang)
191 }
192}
193
194func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
195 if out.Len() > 0 {
196 out.WriteByte('\n')
197 }
198
199 if lang != "" {
200 out.WriteString("<pre><code class=\"")
201
202 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
203 for i < len(lang) && isspace(lang[i]) {
204 i++
205 }
206
207 if i < len(lang) {
208 org := i
209 for i < len(lang) && !isspace(lang[i]) {
210 i++
211 }
212
213 if lang[org] == '.' {
214 org++
215 }
216
217 if cls > 0 {
218 out.WriteByte(' ')
219 }
220 attrEscape(out, []byte(lang[org:]))
221 }
222 }
223
224 out.WriteString("\">")
225 } else {
226 out.WriteString("<pre><code>")
227 }
228
229 if len(text) > 0 {
230 attrEscape(out, text)
231 }
232
233 out.WriteString("</code></pre>\n")
234}
235
236/*
237 * GitHub style code block:
238 *
239 * <pre lang="LANG"><code>
240 * ...
241 * </pre></code>
242 *
243 * Unlike other parsers, we store the language identifier in the <pre>,
244 * and don't let the user generate custom classes.
245 *
246 * The language identifier in the <pre> block gets postprocessed and all
247 * the code inside gets syntax highlighted with Pygments. This is much safer
248 * than letting the user specify a CSS class for highlighting.
249 *
250 * Note that we only generate HTML for the first specifier.
251 * E.g.
252 * ~~~~ {.python .numbered} => <pre lang="python"><code>
253 */
254func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
255 if out.Len() > 0 {
256 out.WriteByte('\n')
257 }
258
259 if len(lang) > 0 {
260 out.WriteString("<pre lang=\"")
261
262 i := 0
263 for i < len(lang) && !isspace(lang[i]) {
264 i++
265 }
266
267 if lang[0] == '.' {
268 attrEscape(out, []byte(lang[1:i]))
269 } else {
270 attrEscape(out, []byte(lang[:i]))
271 }
272
273 out.WriteString("\"><code>")
274 } else {
275 out.WriteString("<pre><code>")
276 }
277
278 if len(text) > 0 {
279 attrEscape(out, text)
280 }
281
282 out.WriteString("</code></pre>\n")
283}
284
285
286func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
287 out.WriteString("<blockquote>\n")
288 out.Write(text)
289 out.WriteString("</blockquote>")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293 if out.Len() > 0 {
294 out.WriteByte('\n')
295 }
296 out.WriteString("<table><thead>\n")
297 out.Write(header)
298 out.WriteString("\n</thead><tbody>\n")
299 out.Write(body)
300 out.WriteString("\n</tbody></table>")
301}
302
303func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
304 if out.Len() > 0 {
305 out.WriteByte('\n')
306 }
307 out.WriteString("<tr>\n")
308 out.Write(text)
309 out.WriteString("\n</tr>")
310}
311
312func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
313 if out.Len() > 0 {
314 out.WriteByte('\n')
315 }
316 switch align {
317 case TABLE_ALIGNMENT_LEFT:
318 out.WriteString("<td align=\"left\">")
319 case TABLE_ALIGNMENT_RIGHT:
320 out.WriteString("<td align=\"right\">")
321 case TABLE_ALIGNMENT_CENTER:
322 out.WriteString("<td align=\"center\">")
323 default:
324 out.WriteString("<td>")
325 }
326
327 out.Write(text)
328 out.WriteString("</td>")
329}
330
331func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
332 marker := out.Len()
333
334 if marker > 0 {
335 out.WriteByte('\n')
336 }
337 if flags&LIST_TYPE_ORDERED != 0 {
338 out.WriteString("<ol>\n")
339 } else {
340 out.WriteString("<ul>\n")
341 }
342 if !text() {
343 out.Truncate(marker)
344 return
345 }
346 if flags&LIST_TYPE_ORDERED != 0 {
347 out.WriteString("</ol>\n")
348 } else {
349 out.WriteString("</ul>\n")
350 }
351}
352
353func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
354 out.WriteString("<li>")
355 size := len(text)
356 for size > 0 && text[size-1] == '\n' {
357 size--
358 }
359 out.Write(text[:size])
360 out.WriteString("</li>\n")
361}
362
363func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
364 marker := out.Len()
365 if marker > 0 {
366 out.WriteByte('\n')
367 }
368
369 out.WriteString("<p>")
370 if !text() {
371 out.Truncate(marker)
372 return
373 }
374 out.WriteString("</p>\n")
375}
376
377func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) bool {
378 if len(link) == 0 {
379 return false
380 }
381 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
382 return false
383 }
384
385 out.WriteString("<a href=\"")
386 if kind == LINK_TYPE_EMAIL {
387 out.WriteString("mailto:")
388 }
389 attrEscape(out, link)
390 out.WriteString("\">")
391
392 /*
393 * Pretty print: if we get an email address as
394 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
395 * want to print the `mailto:` prefix
396 */
397 switch {
398 case bytes.HasPrefix(link, []byte("mailto://")):
399 attrEscape(out, link[9:])
400 case bytes.HasPrefix(link, []byte("mailto:")):
401 attrEscape(out, link[7:])
402 default:
403 attrEscape(out, link)
404 }
405
406 out.WriteString("</a>")
407
408 return true
409}
410
411func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) bool {
412 out.WriteString("<code>")
413 attrEscape(out, text)
414 out.WriteString("</code>")
415 return true
416}
417
418func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) bool {
419 if len(text) == 0 {
420 return false
421 }
422 out.WriteString("<strong>")
423 out.Write(text)
424 out.WriteString("</strong>")
425 return true
426}
427
428func (options *Html) Emphasis(out *bytes.Buffer, text []byte) bool {
429 if len(text) == 0 {
430 return false
431 }
432 out.WriteString("<em>")
433 out.Write(text)
434 out.WriteString("</em>")
435 return true
436}
437
438func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) bool {
439 if options.flags&HTML_SKIP_IMAGES != 0 {
440 return false
441 }
442
443 if len(link) == 0 {
444 return false
445 }
446 out.WriteString("<img src=\"")
447 attrEscape(out, link)
448 out.WriteString("\" alt=\"")
449 if len(alt) > 0 {
450 attrEscape(out, alt)
451 }
452 if len(title) > 0 {
453 out.WriteString("\" title=\"")
454 attrEscape(out, title)
455 }
456
457 out.WriteByte('"')
458 out.WriteString(options.closeTag)
459 return true
460}
461
462func (options *Html) LineBreak(out *bytes.Buffer) bool {
463 out.WriteString("<br")
464 out.WriteString(options.closeTag)
465 return true
466}
467
468func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) bool {
469 if options.flags&HTML_SKIP_LINKS != 0 {
470 return false
471 }
472
473 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
474 return false
475 }
476
477 out.WriteString("<a href=\"")
478 attrEscape(out, link)
479 if len(title) > 0 {
480 out.WriteString("\" title=\"")
481 attrEscape(out, title)
482 }
483 out.WriteString("\">")
484 out.Write(content)
485 out.WriteString("</a>")
486 return true
487}
488
489func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) bool {
490 if options.flags&HTML_SKIP_HTML != 0 {
491 return true
492 }
493 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
494 return true
495 }
496 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
497 return true
498 }
499 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
500 return true
501 }
502 out.Write(text)
503 return true
504}
505
506func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) bool {
507 if len(text) == 0 {
508 return false
509 }
510 out.WriteString("<strong><em>")
511 out.Write(text)
512 out.WriteString("</em></strong>")
513 return true
514}
515
516func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) bool {
517 if len(text) == 0 {
518 return false
519 }
520 out.WriteString("<del>")
521 out.Write(text)
522 out.WriteString("</del>")
523 return true
524}
525
526func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
527 out.Write(entity)
528}
529
530func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
531 if options.flags&HTML_USE_SMARTYPANTS != 0 {
532 options.Smartypants(out, text)
533 } else {
534 attrEscape(out, text)
535 }
536}
537
538func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
539 smrt := smartypantsData{false, false}
540
541 // first do normal entity escaping
542 var escaped bytes.Buffer
543 attrEscape(&escaped, text)
544 text = escaped.Bytes()
545
546 mark := 0
547 for i := 0; i < len(text); i++ {
548 if action := options.smartypants[text[i]]; action != nil {
549 if i > mark {
550 out.Write(text[mark:i])
551 }
552
553 previousChar := byte(0)
554 if i > 0 {
555 previousChar = text[i-1]
556 }
557 i += action(out, &smrt, previousChar, text[i:])
558 mark = i + 1
559 }
560 }
561
562 if mark < len(text) {
563 out.Write(text[mark:])
564 }
565}
566
567func (options *Html) DocumentHeader(out *bytes.Buffer) {
568 if options.flags&HTML_COMPLETE_PAGE == 0 {
569 return
570 }
571
572 ending := ""
573 if options.flags&HTML_USE_XHTML != 0 {
574 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
575 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
576 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
577 ending = " /"
578 } else {
579 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
580 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
581 out.WriteString("<html>\n")
582 }
583 out.WriteString("<head>\n")
584 out.WriteString(" <title>")
585 options.NormalText(out, []byte(options.title))
586 out.WriteString("</title>\n")
587 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
588 out.WriteString(VERSION)
589 out.WriteString("\"")
590 out.WriteString(ending)
591 out.WriteString(">\n")
592 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
593 out.WriteString(ending)
594 out.WriteString(">\n")
595 if options.css != "" {
596 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
597 attrEscape(out, []byte(options.css))
598 out.WriteString("\"")
599 out.WriteString(ending)
600 out.WriteString(">\n")
601 }
602 out.WriteString("</head>\n")
603 out.WriteString("<body>\n")
604
605 options.tocMarker = out.Len()
606}
607
608func (options *Html) DocumentFooter(out *bytes.Buffer) {
609 // finalize and insert the table of contents
610 if options.flags&HTML_TOC != 0 {
611 options.TocFinalize()
612
613 // now we have to insert the table of contents into the document
614 var temp bytes.Buffer
615
616 // start by making a copy of everything after the document header
617 temp.Write(out.Bytes()[options.tocMarker:])
618
619 // now clear the copied material from the main output buffer
620 out.Truncate(options.tocMarker)
621
622 // insert the table of contents
623 out.Write(options.toc.Bytes())
624
625 // write out everything that came after it
626 if options.flags&HTML_OMIT_CONTENTS == 0 {
627 out.Write(temp.Bytes())
628 }
629 }
630
631 if options.flags&HTML_COMPLETE_PAGE != 0 {
632 out.WriteString("\n</body>\n")
633 out.WriteString("</html>\n")
634 }
635
636}
637
638func (options *Html) TocHeader(text []byte, level int) {
639 for level > options.currentLevel {
640 switch {
641 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
642 size := options.toc.Len()
643 options.toc.Truncate(size - len("</li>\n"))
644
645 case options.currentLevel > 0:
646 options.toc.WriteString("<li>")
647 }
648 options.toc.WriteString("\n<ul>\n")
649 options.currentLevel++
650 }
651
652 for level < options.currentLevel {
653 options.toc.WriteString("</ul>")
654 if options.currentLevel > 1 {
655 options.toc.WriteString("</li>\n")
656 }
657 options.currentLevel--
658 }
659
660 options.toc.WriteString("<li><a href=\"#toc_")
661 options.toc.WriteString(strconv.Itoa(options.headerCount))
662 options.toc.WriteString("\">")
663 options.headerCount++
664
665 options.toc.Write(text)
666
667 options.toc.WriteString("</a></li>\n")
668}
669
670func (options *Html) TocFinalize() {
671 for options.currentLevel > 1 {
672 options.toc.WriteString("</ul></li>\n")
673 options.currentLevel--
674 }
675
676 if options.currentLevel > 0 {
677 options.toc.WriteString("</ul>\n")
678 }
679}
680
681func isHtmlTag(tag []byte, tagname string) bool {
682 i := 0
683 if i < len(tag) && tag[0] != '<' {
684 return false
685 }
686 i++
687 for i < len(tag) && isspace(tag[i]) {
688 i++
689 }
690
691 if i < len(tag) && tag[i] == '/' {
692 i++
693 }
694
695 for i < len(tag) && isspace(tag[i]) {
696 i++
697 }
698
699 j := i
700 for ; i < len(tag); i, j = i+1, j+1 {
701 if j >= len(tagname) {
702 break
703 }
704
705 if tag[i] != tagname[j] {
706 return false
707 }
708 }
709
710 if i == len(tag) {
711 return false
712 }
713
714 return isspace(tag[i]) || tag[i] == '>'
715}