html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22)
23
24const (
25 HTML_SKIP_HTML = 1 << iota
26 HTML_SKIP_STYLE
27 HTML_SKIP_IMAGES
28 HTML_SKIP_LINKS
29 HTML_SAFELINK
30 HTML_TOC
31 HTML_OMIT_CONTENTS
32 HTML_COMPLETE_PAGE
33 HTML_GITHUB_BLOCKCODE
34 HTML_USE_XHTML
35 HTML_USE_SMARTYPANTS
36 HTML_SMARTYPANTS_FRACTIONS
37 HTML_SMARTYPANTS_LATEX_DASHES
38)
39
40type htmlOptions struct {
41 flags int // HTML_* options
42 closeTag string // how to end singleton tags: either " />\n" or ">\n"
43 title string // document title
44 css string // optional css file url (used with HTML_COMPLETE_PAGE)
45
46 // table of contents data
47 tocMarker int
48 headerCount int
49 currentLevel int
50 toc *bytes.Buffer
51
52 smartypants *SmartypantsRenderer
53}
54
55var xhtmlClose = " />\n"
56var htmlClose = ">\n"
57
58func HtmlRenderer(flags int, title string, css string) *Renderer {
59 // configure the rendering engine
60 r := new(Renderer)
61
62 r.BlockCode = htmlBlockCode
63 r.BlockQuote = htmlBlockQuote
64 r.BlockHtml = htmlBlockHtml
65 r.Header = htmlHeader
66 r.HRule = htmlHRule
67 r.List = htmlList
68 r.ListItem = htmlListItem
69 r.Paragraph = htmlParagraph
70 r.Table = htmlTable
71 r.TableRow = htmlTableRow
72 r.TableCell = htmlTableCell
73
74 r.AutoLink = htmlAutoLink
75 r.CodeSpan = htmlCodeSpan
76 r.DoubleEmphasis = htmlDoubleEmphasis
77 r.Emphasis = htmlEmphasis
78 r.Image = htmlImage
79 r.LineBreak = htmlLineBreak
80 r.Link = htmlLink
81 r.RawHtmlTag = htmlRawTag
82 r.TripleEmphasis = htmlTripleEmphasis
83 r.StrikeThrough = htmlStrikeThrough
84
85 r.Entity = htmlEntity
86 r.NormalText = htmlNormalText
87
88 r.DocumentHeader = htmlDocumentHeader
89 r.DocumentFooter = htmlDocumentFooter
90
91 closeTag := htmlClose
92 if flags&HTML_USE_XHTML != 0 {
93 closeTag = xhtmlClose
94 }
95
96 r.Opaque = &htmlOptions{
97 flags: flags,
98 closeTag: closeTag,
99 title: title,
100 css: css,
101
102 headerCount: 0,
103 currentLevel: 0,
104 toc: new(bytes.Buffer),
105
106 smartypants: Smartypants(flags),
107 }
108 return r
109}
110
111func attrEscape(out *bytes.Buffer, src []byte) {
112 org := 0
113 for i, ch := range src {
114 // using if statements is a bit faster than a switch statement.
115 // as the compiler improves, this should be unnecessary
116 // this is only worthwhile because attrEscape is the single
117 // largest CPU user in normal use
118 if ch == '"' {
119 if i > org {
120 // copy all the normal characters since the last escape
121 out.Write(src[org:i])
122 }
123 org = i + 1
124 out.WriteString(""")
125 continue
126 }
127 if ch == '&' {
128 if i > org {
129 out.Write(src[org:i])
130 }
131 org = i + 1
132 out.WriteString("&")
133 continue
134 }
135 if ch == '<' {
136 if i > org {
137 out.Write(src[org:i])
138 }
139 org = i + 1
140 out.WriteString("<")
141 continue
142 }
143 if ch == '>' {
144 if i > org {
145 out.Write(src[org:i])
146 }
147 org = i + 1
148 out.WriteString(">")
149 continue
150 }
151 }
152 if org < len(src) {
153 out.Write(src[org:])
154 }
155}
156
157func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
158 options := opaque.(*htmlOptions)
159 marker := out.Len()
160
161 if marker > 0 {
162 out.WriteByte('\n')
163 }
164
165 if options.flags&HTML_TOC != 0 {
166 // headerCount is incremented in htmlTocHeader
167 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
168 } else {
169 out.WriteString(fmt.Sprintf("<h%d>", level))
170 }
171
172 tocMarker := out.Len()
173 if !text() {
174 out.Truncate(marker)
175 return
176 }
177
178 // are we building a table of contents?
179 if options.flags&HTML_TOC != 0 {
180 htmlTocHeader(out.Bytes()[tocMarker:], level, opaque)
181 }
182
183 out.WriteString(fmt.Sprintf("</h%d>\n", level))
184}
185
186func htmlBlockHtml(out *bytes.Buffer, text []byte, opaque interface{}) {
187 options := opaque.(*htmlOptions)
188 if options.flags&HTML_SKIP_HTML != 0 {
189 return
190 }
191
192 sz := len(text)
193 for sz > 0 && text[sz-1] == '\n' {
194 sz--
195 }
196 org := 0
197 for org < sz && text[org] == '\n' {
198 org++
199 }
200 if org >= sz {
201 return
202 }
203 if out.Len() > 0 {
204 out.WriteByte('\n')
205 }
206 out.Write(text[org:sz])
207 out.WriteByte('\n')
208}
209
210func htmlHRule(out *bytes.Buffer, opaque interface{}) {
211 options := opaque.(*htmlOptions)
212
213 if out.Len() > 0 {
214 out.WriteByte('\n')
215 }
216 out.WriteString("<hr")
217 out.WriteString(options.closeTag)
218}
219
220func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
221 options := opaque.(*htmlOptions)
222 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
223 htmlBlockCodeGithub(out, text, lang, opaque)
224 } else {
225 htmlBlockCodeNormal(out, text, lang, opaque)
226 }
227}
228
229func htmlBlockCodeNormal(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
230 if out.Len() > 0 {
231 out.WriteByte('\n')
232 }
233
234 if lang != "" {
235 out.WriteString("<pre><code class=\"")
236
237 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
238 for i < len(lang) && isspace(lang[i]) {
239 i++
240 }
241
242 if i < len(lang) {
243 org := i
244 for i < len(lang) && !isspace(lang[i]) {
245 i++
246 }
247
248 if lang[org] == '.' {
249 org++
250 }
251
252 if cls > 0 {
253 out.WriteByte(' ')
254 }
255 attrEscape(out, []byte(lang[org:]))
256 }
257 }
258
259 out.WriteString("\">")
260 } else {
261 out.WriteString("<pre><code>")
262 }
263
264 if len(text) > 0 {
265 attrEscape(out, text)
266 }
267
268 out.WriteString("</code></pre>\n")
269}
270
271/*
272 * GitHub style code block:
273 *
274 * <pre lang="LANG"><code>
275 * ...
276 * </pre></code>
277 *
278 * Unlike other parsers, we store the language identifier in the <pre>,
279 * and don't let the user generate custom classes.
280 *
281 * The language identifier in the <pre> block gets postprocessed and all
282 * the code inside gets syntax highlighted with Pygments. This is much safer
283 * than letting the user specify a CSS class for highlighting.
284 *
285 * Note that we only generate HTML for the first specifier.
286 * E.g.
287 * ~~~~ {.python .numbered} => <pre lang="python"><code>
288 */
289func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
290 if out.Len() > 0 {
291 out.WriteByte('\n')
292 }
293
294 if len(lang) > 0 {
295 out.WriteString("<pre lang=\"")
296
297 i := 0
298 for i < len(lang) && !isspace(lang[i]) {
299 i++
300 }
301
302 if lang[0] == '.' {
303 attrEscape(out, []byte(lang[1:i]))
304 } else {
305 attrEscape(out, []byte(lang[:i]))
306 }
307
308 out.WriteString("\"><code>")
309 } else {
310 out.WriteString("<pre><code>")
311 }
312
313 if len(text) > 0 {
314 attrEscape(out, text)
315 }
316
317 out.WriteString("</code></pre>\n")
318}
319
320
321func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
322 out.WriteString("<blockquote>\n")
323 out.Write(text)
324 out.WriteString("</blockquote>")
325}
326
327func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
328 if out.Len() > 0 {
329 out.WriteByte('\n')
330 }
331 out.WriteString("<table><thead>\n")
332 out.Write(header)
333 out.WriteString("\n</thead><tbody>\n")
334 out.Write(body)
335 out.WriteString("\n</tbody></table>")
336}
337
338func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
339 if out.Len() > 0 {
340 out.WriteByte('\n')
341 }
342 out.WriteString("<tr>\n")
343 out.Write(text)
344 out.WriteString("\n</tr>")
345}
346
347func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
348 if out.Len() > 0 {
349 out.WriteByte('\n')
350 }
351 switch align {
352 case TABLE_ALIGNMENT_LEFT:
353 out.WriteString("<td align=\"left\">")
354 case TABLE_ALIGNMENT_RIGHT:
355 out.WriteString("<td align=\"right\">")
356 case TABLE_ALIGNMENT_CENTER:
357 out.WriteString("<td align=\"center\">")
358 default:
359 out.WriteString("<td>")
360 }
361
362 out.Write(text)
363 out.WriteString("</td>")
364}
365
366func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
367 marker := out.Len()
368
369 if marker > 0 {
370 out.WriteByte('\n')
371 }
372 if flags&LIST_TYPE_ORDERED != 0 {
373 out.WriteString("<ol>\n")
374 } else {
375 out.WriteString("<ul>\n")
376 }
377 if !text() {
378 out.Truncate(marker)
379 return
380 }
381 if flags&LIST_TYPE_ORDERED != 0 {
382 out.WriteString("</ol>\n")
383 } else {
384 out.WriteString("</ul>\n")
385 }
386}
387
388func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
389 out.WriteString("<li>")
390 size := len(text)
391 for size > 0 && text[size-1] == '\n' {
392 size--
393 }
394 out.Write(text[:size])
395 out.WriteString("</li>\n")
396}
397
398func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
399 marker := out.Len()
400 if marker > 0 {
401 out.WriteByte('\n')
402 }
403
404 out.WriteString("<p>")
405 if !text() {
406 out.Truncate(marker)
407 return
408 }
409 out.WriteString("</p>\n")
410}
411
412func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) bool {
413 options := opaque.(*htmlOptions)
414
415 if len(link) == 0 {
416 return false
417 }
418 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
419 return false
420 }
421
422 out.WriteString("<a href=\"")
423 if kind == LINK_TYPE_EMAIL {
424 out.WriteString("mailto:")
425 }
426 attrEscape(out, link)
427 out.WriteString("\">")
428
429 /*
430 * Pretty print: if we get an email address as
431 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
432 * want to print the `mailto:` prefix
433 */
434 switch {
435 case bytes.HasPrefix(link, []byte("mailto://")):
436 attrEscape(out, link[9:])
437 case bytes.HasPrefix(link, []byte("mailto:")):
438 attrEscape(out, link[7:])
439 default:
440 attrEscape(out, link)
441 }
442
443 out.WriteString("</a>")
444
445 return true
446}
447
448func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) bool {
449 out.WriteString("<code>")
450 attrEscape(out, text)
451 out.WriteString("</code>")
452 return true
453}
454
455func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
456 if len(text) == 0 {
457 return false
458 }
459 out.WriteString("<strong>")
460 out.Write(text)
461 out.WriteString("</strong>")
462 return true
463}
464
465func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
466 if len(text) == 0 {
467 return false
468 }
469 out.WriteString("<em>")
470 out.Write(text)
471 out.WriteString("</em>")
472 return true
473}
474
475func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) bool {
476 options := opaque.(*htmlOptions)
477 if options.flags&HTML_SKIP_IMAGES != 0 {
478 return false
479 }
480
481 if len(link) == 0 {
482 return false
483 }
484 out.WriteString("<img src=\"")
485 attrEscape(out, link)
486 out.WriteString("\" alt=\"")
487 if len(alt) > 0 {
488 attrEscape(out, alt)
489 }
490 if len(title) > 0 {
491 out.WriteString("\" title=\"")
492 attrEscape(out, title)
493 }
494
495 out.WriteByte('"')
496 out.WriteString(options.closeTag)
497 return true
498}
499
500func htmlLineBreak(out *bytes.Buffer, opaque interface{}) bool {
501 options := opaque.(*htmlOptions)
502 out.WriteString("<br")
503 out.WriteString(options.closeTag)
504 return true
505}
506
507func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) bool {
508 options := opaque.(*htmlOptions)
509 if options.flags&HTML_SKIP_LINKS != 0 {
510 return false
511 }
512
513 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
514 return false
515 }
516
517 out.WriteString("<a href=\"")
518 attrEscape(out, link)
519 if len(title) > 0 {
520 out.WriteString("\" title=\"")
521 attrEscape(out, title)
522 }
523 out.WriteString("\">")
524 out.Write(content)
525 out.WriteString("</a>")
526 return true
527}
528
529func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) bool {
530 options := opaque.(*htmlOptions)
531 if options.flags&HTML_SKIP_HTML != 0 {
532 return true
533 }
534 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
535 return true
536 }
537 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
538 return true
539 }
540 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
541 return true
542 }
543 out.Write(text)
544 return true
545}
546
547func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
548 if len(text) == 0 {
549 return false
550 }
551 out.WriteString("<strong><em>")
552 out.Write(text)
553 out.WriteString("</em></strong>")
554 return true
555}
556
557func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) bool {
558 if len(text) == 0 {
559 return false
560 }
561 out.WriteString("<del>")
562 out.Write(text)
563 out.WriteString("</del>")
564 return true
565}
566
567func htmlEntity(out *bytes.Buffer, entity []byte, opaque interface{}) {
568 out.Write(entity)
569}
570
571func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
572 options := opaque.(*htmlOptions)
573 if options.flags&HTML_USE_SMARTYPANTS != 0 {
574 htmlSmartypants(out, text, opaque)
575 } else {
576 attrEscape(out, text)
577 }
578}
579
580func htmlDocumentHeader(out *bytes.Buffer, opaque interface{}) {
581 options := opaque.(*htmlOptions)
582 if options.flags&HTML_COMPLETE_PAGE == 0 {
583 return
584 }
585
586 ending := ""
587 if options.flags&HTML_USE_XHTML != 0 {
588 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
589 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
590 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
591 ending = " /"
592 } else {
593 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
594 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
595 out.WriteString("<html>\n")
596 }
597 out.WriteString("<head>\n")
598 out.WriteString(" <title>")
599 htmlNormalText(out, []byte(options.title), opaque)
600 out.WriteString("</title>\n")
601 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
602 out.WriteString(VERSION)
603 out.WriteString("\"")
604 out.WriteString(ending)
605 out.WriteString(">\n")
606 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
607 out.WriteString(ending)
608 out.WriteString(">\n")
609 if options.css != "" {
610 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
611 attrEscape(out, []byte(options.css))
612 out.WriteString("\"")
613 out.WriteString(ending)
614 out.WriteString(">\n")
615 }
616 out.WriteString("</head>\n")
617 out.WriteString("<body>\n")
618
619 options.tocMarker = out.Len()
620}
621
622func htmlDocumentFooter(out *bytes.Buffer, opaque interface{}) {
623 options := opaque.(*htmlOptions)
624
625 // finalize and insert the table of contents
626 if options.flags&HTML_TOC != 0 {
627 htmlTocFinalize(opaque)
628
629 // now we have to insert the table of contents into the document
630 var temp bytes.Buffer
631
632 // start by making a copy of everything after the document header
633 temp.Write(out.Bytes()[options.tocMarker:])
634
635 // now clear the copied material from the main output buffer
636 out.Truncate(options.tocMarker)
637
638 // insert the table of contents
639 out.Write(options.toc.Bytes())
640
641 // write out everything that came after it
642 if options.flags&HTML_OMIT_CONTENTS == 0 {
643 out.Write(temp.Bytes())
644 }
645 }
646
647 if options.flags&HTML_COMPLETE_PAGE != 0 {
648 out.WriteString("\n</body>\n")
649 out.WriteString("</html>\n")
650 }
651
652}
653
654func htmlTocHeader(text []byte, level int, opaque interface{}) {
655 options := opaque.(*htmlOptions)
656
657 for level > options.currentLevel {
658 switch {
659 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
660 size := options.toc.Len()
661 options.toc.Truncate(size - len("</li>\n"))
662
663 case options.currentLevel > 0:
664 options.toc.WriteString("<li>")
665 }
666 options.toc.WriteString("\n<ul>\n")
667 options.currentLevel++
668 }
669
670 for level < options.currentLevel {
671 options.toc.WriteString("</ul>")
672 if options.currentLevel > 1 {
673 options.toc.WriteString("</li>\n")
674 }
675 options.currentLevel--
676 }
677
678 options.toc.WriteString("<li><a href=\"#toc_")
679 options.toc.WriteString(strconv.Itoa(options.headerCount))
680 options.toc.WriteString("\">")
681 options.headerCount++
682
683 options.toc.Write(text)
684
685 options.toc.WriteString("</a></li>\n")
686}
687
688func htmlTocFinalize(opaque interface{}) {
689 options := opaque.(*htmlOptions)
690 for options.currentLevel > 1 {
691 options.toc.WriteString("</ul></li>\n")
692 options.currentLevel--
693 }
694
695 if options.currentLevel > 0 {
696 options.toc.WriteString("</ul>\n")
697 }
698}
699
700func isHtmlTag(tag []byte, tagname string) bool {
701 i := 0
702 if i < len(tag) && tag[0] != '<' {
703 return false
704 }
705 i++
706 for i < len(tag) && isspace(tag[i]) {
707 i++
708 }
709
710 if i < len(tag) && tag[i] == '/' {
711 i++
712 }
713
714 for i < len(tag) && isspace(tag[i]) {
715 i++
716 }
717
718 j := i
719 for ; i < len(tag); i, j = i+1, j+1 {
720 if j >= len(tagname) {
721 break
722 }
723
724 if tag[i] != tagname[j] {
725 return false
726 }
727 }
728
729 if i == len(tag) {
730 return false
731 }
732
733 return isspace(tag[i]) || tag[i] == '>'
734}