html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22)
23
24const (
25 HTML_SKIP_HTML = 1 << iota
26 HTML_SKIP_STYLE
27 HTML_SKIP_IMAGES
28 HTML_SKIP_LINKS
29 HTML_SAFELINK
30 HTML_TOC
31 HTML_GITHUB_BLOCKCODE
32 HTML_USE_XHTML
33 HTML_USE_SMARTYPANTS
34 HTML_SMARTYPANTS_FRACTIONS
35 HTML_SMARTYPANTS_LATEX_DASHES
36)
37
38type htmlOptions struct {
39 flags int
40 closeTag string // how to end singleton tags: either " />\n" or ">\n"
41 tocData struct {
42 headerCount int
43 currentLevel int
44 }
45 smartypants *SmartypantsRenderer
46}
47
48var xhtmlClose = " />\n"
49var htmlClose = ">\n"
50
51func HtmlRenderer(flags int) *Renderer {
52 // configure the rendering engine
53 r := new(Renderer)
54 r.BlockCode = htmlBlockCode
55 r.BlockQuote = htmlBlockQuote
56 r.BlockHtml = htmlBlockHtml
57 r.Header = htmlHeader
58 r.HRule = htmlHRule
59 r.List = htmlList
60 r.ListItem = htmlListItem
61 r.Paragraph = htmlParagraph
62 r.Table = htmlTable
63 r.TableRow = htmlTableRow
64 r.TableCell = htmlTableCell
65
66 r.AutoLink = htmlAutoLink
67 r.CodeSpan = htmlCodeSpan
68 r.DoubleEmphasis = htmlDoubleEmphasis
69 r.Emphasis = htmlEmphasis
70 r.Image = htmlImage
71 r.LineBreak = htmlLineBreak
72 r.Link = htmlLink
73 r.RawHtmlTag = htmlRawTag
74 r.TripleEmphasis = htmlTripleEmphasis
75 r.StrikeThrough = htmlStrikeThrough
76 r.NormalText = htmlNormalText
77
78 closeTag := htmlClose
79 if flags&HTML_USE_XHTML != 0 {
80 closeTag = xhtmlClose
81 }
82 r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: Smartypants(flags)}
83 return r
84}
85
86func HtmlTocRenderer(flags int) *Renderer {
87 // configure the rendering engine
88 r := new(Renderer)
89 r.Header = htmlTocHeader
90
91 r.CodeSpan = htmlCodeSpan
92 r.DoubleEmphasis = htmlDoubleEmphasis
93 r.Emphasis = htmlEmphasis
94 r.TripleEmphasis = htmlTripleEmphasis
95 r.StrikeThrough = htmlStrikeThrough
96
97 r.DocumentFooter = htmlTocFinalize
98
99 closeTag := ">\n"
100 if flags&HTML_USE_XHTML != 0 {
101 closeTag = " />\n"
102 }
103 r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
104 return r
105}
106
107func attrEscape(out *bytes.Buffer, src []byte) {
108 org := 0
109 for i, ch := range src {
110 // using if statements is a bit faster than a switch statement.
111 // as the compiler improves, this should be unnecessary
112 // this is only worthwhile because attrEscape is the single
113 // largest CPU user in normal use
114 if ch == '"' {
115 if i > org {
116 // copy all the normal characters since the last escape
117 out.Write(src[org:i])
118 }
119 org = i + 1
120 out.WriteString(""")
121 continue
122 }
123 if ch == '&' {
124 if i > org {
125 out.Write(src[org:i])
126 }
127 org = i + 1
128 out.WriteString("&")
129 continue
130 }
131 if ch == '<' {
132 if i > org {
133 out.Write(src[org:i])
134 }
135 org = i + 1
136 out.WriteString("<")
137 continue
138 }
139 if ch == '>' {
140 if i > org {
141 out.Write(src[org:i])
142 }
143 org = i + 1
144 out.WriteString(">")
145 continue
146 }
147 }
148 if org < len(src) {
149 out.Write(src[org:])
150 }
151}
152
153func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
154 options := opaque.(*htmlOptions)
155 marker := out.Len()
156
157 if marker > 0 {
158 out.WriteByte('\n')
159 }
160
161 if options.flags&HTML_TOC != 0 {
162 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
163 options.tocData.headerCount++
164 } else {
165 out.WriteString(fmt.Sprintf("<h%d>", level))
166 }
167
168 if !text() {
169 out.Truncate(marker)
170 return
171 }
172 out.WriteString(fmt.Sprintf("</h%d>\n", level))
173}
174
175func htmlBlockHtml(out *bytes.Buffer, text []byte, opaque interface{}) {
176 options := opaque.(*htmlOptions)
177 if options.flags&HTML_SKIP_HTML != 0 {
178 return
179 }
180
181 sz := len(text)
182 for sz > 0 && text[sz-1] == '\n' {
183 sz--
184 }
185 org := 0
186 for org < sz && text[org] == '\n' {
187 org++
188 }
189 if org >= sz {
190 return
191 }
192 if out.Len() > 0 {
193 out.WriteByte('\n')
194 }
195 out.Write(text[org:sz])
196 out.WriteByte('\n')
197}
198
199func htmlHRule(out *bytes.Buffer, opaque interface{}) {
200 options := opaque.(*htmlOptions)
201
202 if out.Len() > 0 {
203 out.WriteByte('\n')
204 }
205 out.WriteString("<hr")
206 out.WriteString(options.closeTag)
207}
208
209func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
210 options := opaque.(*htmlOptions)
211 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
212 htmlBlockCodeGithub(out, text, lang, opaque)
213 } else {
214 htmlBlockCodeNormal(out, text, lang, opaque)
215 }
216}
217
218func htmlBlockCodeNormal(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
219 if out.Len() > 0 {
220 out.WriteByte('\n')
221 }
222
223 if lang != "" {
224 out.WriteString("<pre><code class=\"")
225
226 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
227 for i < len(lang) && isspace(lang[i]) {
228 i++
229 }
230
231 if i < len(lang) {
232 org := i
233 for i < len(lang) && !isspace(lang[i]) {
234 i++
235 }
236
237 if lang[org] == '.' {
238 org++
239 }
240
241 if cls > 0 {
242 out.WriteByte(' ')
243 }
244 attrEscape(out, []byte(lang[org:]))
245 }
246 }
247
248 out.WriteString("\">")
249 } else {
250 out.WriteString("<pre><code>")
251 }
252
253 if len(text) > 0 {
254 attrEscape(out, text)
255 }
256
257 out.WriteString("</code></pre>\n")
258}
259
260/*
261 * GitHub style code block:
262 *
263 * <pre lang="LANG"><code>
264 * ...
265 * </pre></code>
266 *
267 * Unlike other parsers, we store the language identifier in the <pre>,
268 * and don't let the user generate custom classes.
269 *
270 * The language identifier in the <pre> block gets postprocessed and all
271 * the code inside gets syntax highlighted with Pygments. This is much safer
272 * than letting the user specify a CSS class for highlighting.
273 *
274 * Note that we only generate HTML for the first specifier.
275 * E.g.
276 * ~~~~ {.python .numbered} => <pre lang="python"><code>
277 */
278func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
279 if out.Len() > 0 {
280 out.WriteByte('\n')
281 }
282
283 if len(lang) > 0 {
284 out.WriteString("<pre lang=\"")
285
286 i := 0
287 for i < len(lang) && !isspace(lang[i]) {
288 i++
289 }
290
291 if lang[0] == '.' {
292 attrEscape(out, []byte(lang[1:i]))
293 } else {
294 attrEscape(out, []byte(lang[:i]))
295 }
296
297 out.WriteString("\"><code>")
298 } else {
299 out.WriteString("<pre><code>")
300 }
301
302 if len(text) > 0 {
303 attrEscape(out, text)
304 }
305
306 out.WriteString("</code></pre>\n")
307}
308
309
310func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
311 out.WriteString("<blockquote>\n")
312 out.Write(text)
313 out.WriteString("</blockquote>")
314}
315
316func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
317 if out.Len() > 0 {
318 out.WriteByte('\n')
319 }
320 out.WriteString("<table><thead>\n")
321 out.Write(header)
322 out.WriteString("\n</thead><tbody>\n")
323 out.Write(body)
324 out.WriteString("\n</tbody></table>")
325}
326
327func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
328 if out.Len() > 0 {
329 out.WriteByte('\n')
330 }
331 out.WriteString("<tr>\n")
332 out.Write(text)
333 out.WriteString("\n</tr>")
334}
335
336func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
337 if out.Len() > 0 {
338 out.WriteByte('\n')
339 }
340 switch align {
341 case TABLE_ALIGNMENT_LEFT:
342 out.WriteString("<td align=\"left\">")
343 case TABLE_ALIGNMENT_RIGHT:
344 out.WriteString("<td align=\"right\">")
345 case TABLE_ALIGNMENT_CENTER:
346 out.WriteString("<td align=\"center\">")
347 default:
348 out.WriteString("<td>")
349 }
350
351 out.Write(text)
352 out.WriteString("</td>")
353}
354
355func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
356 marker := out.Len()
357
358 if marker > 0 {
359 out.WriteByte('\n')
360 }
361 if flags&LIST_TYPE_ORDERED != 0 {
362 out.WriteString("<ol>\n")
363 } else {
364 out.WriteString("<ul>\n")
365 }
366 if !text() {
367 out.Truncate(marker)
368 return
369 }
370 if flags&LIST_TYPE_ORDERED != 0 {
371 out.WriteString("</ol>\n")
372 } else {
373 out.WriteString("</ul>\n")
374 }
375}
376
377func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
378 out.WriteString("<li>")
379 size := len(text)
380 for size > 0 && text[size-1] == '\n' {
381 size--
382 }
383 out.Write(text[:size])
384 out.WriteString("</li>\n")
385}
386
387func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
388 marker := out.Len()
389 if marker > 0 {
390 out.WriteByte('\n')
391 }
392
393 out.WriteString("<p>")
394 if !text() {
395 out.Truncate(marker)
396 return
397 }
398 out.WriteString("</p>\n")
399}
400
401func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) bool {
402 options := opaque.(*htmlOptions)
403
404 if len(link) == 0 {
405 return false
406 }
407 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
408 return false
409 }
410
411 out.WriteString("<a href=\"")
412 if kind == LINK_TYPE_EMAIL {
413 out.WriteString("mailto:")
414 }
415 attrEscape(out, link)
416 out.WriteString("\">")
417
418 /*
419 * Pretty print: if we get an email address as
420 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
421 * want to print the `mailto:` prefix
422 */
423 switch {
424 case bytes.HasPrefix(link, []byte("mailto://")):
425 attrEscape(out, link[9:])
426 case bytes.HasPrefix(link, []byte("mailto:")):
427 attrEscape(out, link[7:])
428 default:
429 attrEscape(out, link)
430 }
431
432 out.WriteString("</a>")
433
434 return true
435}
436
437func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) bool {
438 out.WriteString("<code>")
439 attrEscape(out, text)
440 out.WriteString("</code>")
441 return true
442}
443
444func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
445 if len(text) == 0 {
446 return false
447 }
448 out.WriteString("<strong>")
449 out.Write(text)
450 out.WriteString("</strong>")
451 return true
452}
453
454func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
455 if len(text) == 0 {
456 return false
457 }
458 out.WriteString("<em>")
459 out.Write(text)
460 out.WriteString("</em>")
461 return true
462}
463
464func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) bool {
465 options := opaque.(*htmlOptions)
466 if options.flags&HTML_SKIP_IMAGES != 0 {
467 return false
468 }
469
470 if len(link) == 0 {
471 return false
472 }
473 out.WriteString("<img src=\"")
474 attrEscape(out, link)
475 out.WriteString("\" alt=\"")
476 if len(alt) > 0 {
477 attrEscape(out, alt)
478 }
479 if len(title) > 0 {
480 out.WriteString("\" title=\"")
481 attrEscape(out, title)
482 }
483
484 out.WriteByte('"')
485 out.WriteString(options.closeTag)
486 return true
487}
488
489func htmlLineBreak(out *bytes.Buffer, opaque interface{}) bool {
490 options := opaque.(*htmlOptions)
491 out.WriteString("<br")
492 out.WriteString(options.closeTag)
493 return true
494}
495
496func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) bool {
497 options := opaque.(*htmlOptions)
498 if options.flags&HTML_SKIP_LINKS != 0 {
499 return false
500 }
501
502 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
503 return false
504 }
505
506 out.WriteString("<a href=\"")
507 attrEscape(out, link)
508 if len(title) > 0 {
509 out.WriteString("\" title=\"")
510 attrEscape(out, title)
511 }
512 out.WriteString("\">")
513 out.Write(content)
514 out.WriteString("</a>")
515 return true
516}
517
518func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) bool {
519 options := opaque.(*htmlOptions)
520 if options.flags&HTML_SKIP_HTML != 0 {
521 return true
522 }
523 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
524 return true
525 }
526 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
527 return true
528 }
529 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
530 return true
531 }
532 out.Write(text)
533 return true
534}
535
536func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
537 if len(text) == 0 {
538 return false
539 }
540 out.WriteString("<strong><em>")
541 out.Write(text)
542 out.WriteString("</em></strong>")
543 return true
544}
545
546func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) bool {
547 if len(text) == 0 {
548 return false
549 }
550 out.WriteString("<del>")
551 out.Write(text)
552 out.WriteString("</del>")
553 return true
554}
555
556func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
557 options := opaque.(*htmlOptions)
558 if options.flags&HTML_USE_SMARTYPANTS != 0 {
559 htmlSmartypants(out, text, opaque)
560 } else {
561 attrEscape(out, text)
562 }
563}
564
565func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
566 options := opaque.(*htmlOptions)
567 marker := out.Len()
568
569 for level > options.tocData.currentLevel {
570 if options.tocData.currentLevel > 0 {
571 out.WriteString("<li>")
572 }
573 out.WriteString("<ul>\n")
574 options.tocData.currentLevel++
575 }
576
577 for level < options.tocData.currentLevel {
578 out.WriteString("</ul>")
579 if options.tocData.currentLevel > 1 {
580 out.WriteString("</li>\n")
581 }
582 options.tocData.currentLevel--
583 }
584
585 out.WriteString("<li><a href=\"#toc_")
586 out.WriteString(strconv.Itoa(options.tocData.headerCount))
587 out.WriteString("\">")
588 options.tocData.headerCount++
589
590 if !text() {
591 out.Truncate(marker)
592 return
593 }
594 out.WriteString("</a></li>\n")
595}
596
597func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
598 options := opaque.(*htmlOptions)
599 for options.tocData.currentLevel > 1 {
600 out.WriteString("</ul></li>\n")
601 options.tocData.currentLevel--
602 }
603
604 if options.tocData.currentLevel > 0 {
605 out.WriteString("</ul>\n")
606 }
607}
608
609func isHtmlTag(tag []byte, tagname string) bool {
610 i := 0
611 if i < len(tag) && tag[0] != '<' {
612 return false
613 }
614 i++
615 for i < len(tag) && isspace(tag[i]) {
616 i++
617 }
618
619 if i < len(tag) && tag[i] == '/' {
620 i++
621 }
622
623 for i < len(tag) && isspace(tag[i]) {
624 i++
625 }
626
627 j := i
628 for ; i < len(tag); i, j = i+1, j+1 {
629 if j >= len(tagname) {
630 break
631 }
632
633 if tag[i] != tagname[j] {
634 return false
635 }
636 }
637
638 if i == len(tag) {
639 return false
640 }
641
642 return isspace(tag[i]) || tag[i] == '>'
643}