html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Licensed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22)
23
24const (
25 HTML_SKIP_HTML = 1 << iota
26 HTML_SKIP_STYLE
27 HTML_SKIP_IMAGES
28 HTML_SKIP_LINKS
29 HTML_SAFELINK
30 HTML_TOC
31 HTML_GITHUB_BLOCKCODE
32 HTML_USE_XHTML
33 HTML_USE_SMARTYPANTS
34 HTML_SMARTYPANTS_FRACTIONS
35 HTML_SMARTYPANTS_LATEX_DASHES
36)
37
38type htmlOptions struct {
39 flags int
40 closeTag string // how to end singleton tags: either " />\n" or ">\n"
41 tocData struct {
42 headerCount int
43 currentLevel int
44 }
45 smartypants *SmartypantsRenderer
46}
47
48var xhtmlClose = " />\n"
49var htmlClose = ">\n"
50
51func HtmlRenderer(flags int) *Renderer {
52 // configure the rendering engine
53 r := new(Renderer)
54 if flags&HTML_GITHUB_BLOCKCODE == 0 {
55 r.BlockCode = htmlBlockCode
56 } else {
57 r.BlockCode = htmlBlockCodeGithub
58 }
59 r.BlockQuote = htmlBlockQuote
60 if flags&HTML_SKIP_HTML == 0 {
61 r.BlockHtml = htmlRawBlock
62 }
63 r.Header = htmlHeader
64 r.HRule = htmlHRule
65 r.List = htmlList
66 r.ListItem = htmlListItem
67 r.Paragraph = htmlParagraph
68 r.Table = htmlTable
69 r.TableRow = htmlTableRow
70 r.TableCell = htmlTableCell
71
72 r.AutoLink = htmlAutoLink
73 r.CodeSpan = htmlCodeSpan
74 r.DoubleEmphasis = htmlDoubleEmphasis
75 r.Emphasis = htmlEmphasis
76 if flags&HTML_SKIP_IMAGES == 0 {
77 r.Image = htmlImage
78 }
79 r.LineBreak = htmlLineBreak
80 if flags&HTML_SKIP_LINKS == 0 {
81 r.Link = htmlLink
82 }
83 r.RawHtmlTag = htmlRawTag
84 r.TripleEmphasis = htmlTripleEmphasis
85 r.StrikeThrough = htmlStrikeThrough
86
87 var cb *SmartypantsRenderer
88 if flags&HTML_USE_SMARTYPANTS == 0 {
89 r.NormalText = htmlNormalText
90 } else {
91 cb = Smartypants(flags)
92 r.NormalText = htmlSmartypants
93 }
94
95 closeTag := htmlClose
96 if flags&HTML_USE_XHTML != 0 {
97 closeTag = xhtmlClose
98 }
99 r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
100 return r
101}
102
103func HtmlTocRenderer(flags int) *Renderer {
104 // configure the rendering engine
105 r := new(Renderer)
106 r.Header = htmlTocHeader
107
108 r.CodeSpan = htmlCodeSpan
109 r.DoubleEmphasis = htmlDoubleEmphasis
110 r.Emphasis = htmlEmphasis
111 r.TripleEmphasis = htmlTripleEmphasis
112 r.StrikeThrough = htmlStrikeThrough
113
114 r.DocumentFooter = htmlTocFinalize
115
116 closeTag := ">\n"
117 if flags&HTML_USE_XHTML != 0 {
118 closeTag = " />\n"
119 }
120 r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
121 return r
122}
123
124func attrEscape(out *bytes.Buffer, src []byte) {
125 org := 0
126 for i, ch := range src {
127 // using if statements is a bit faster than a switch statement.
128 // as the compiler improves, this should be unnecessary
129 // this is only worthwhile because attrEscape is the single
130 // largest CPU user in normal use
131 if ch == '"' {
132 if i > org {
133 // copy all the normal characters since the last escape
134 out.Write(src[org:i])
135 }
136 org = i + 1
137 out.WriteString(""")
138 continue
139 }
140 if ch == '&' {
141 if i > org {
142 out.Write(src[org:i])
143 }
144 org = i + 1
145 out.WriteString("&")
146 continue
147 }
148 if ch == '<' {
149 if i > org {
150 out.Write(src[org:i])
151 }
152 org = i + 1
153 out.WriteString("<")
154 continue
155 }
156 if ch == '>' {
157 if i > org {
158 out.Write(src[org:i])
159 }
160 org = i + 1
161 out.WriteString(">")
162 continue
163 }
164 }
165 if org < len(src) {
166 out.Write(src[org:])
167 }
168}
169
170func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
171 options := opaque.(*htmlOptions)
172 marker := out.Len()
173
174 if marker > 0 {
175 out.WriteByte('\n')
176 }
177
178 if options.flags&HTML_TOC != 0 {
179 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
180 options.tocData.headerCount++
181 } else {
182 out.WriteString(fmt.Sprintf("<h%d>", level))
183 }
184
185 if !text() {
186 out.Truncate(marker)
187 return
188 }
189 out.WriteString(fmt.Sprintf("</h%d>\n", level))
190}
191
192func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
193 sz := len(text)
194 for sz > 0 && text[sz-1] == '\n' {
195 sz--
196 }
197 org := 0
198 for org < sz && text[org] == '\n' {
199 org++
200 }
201 if org >= sz {
202 return
203 }
204 if out.Len() > 0 {
205 out.WriteByte('\n')
206 }
207 out.Write(text[org:sz])
208 out.WriteByte('\n')
209}
210
211func htmlHRule(out *bytes.Buffer, opaque interface{}) {
212 options := opaque.(*htmlOptions)
213
214 if out.Len() > 0 {
215 out.WriteByte('\n')
216 }
217 out.WriteString("<hr")
218 out.WriteString(options.closeTag)
219}
220
221func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
222 if out.Len() > 0 {
223 out.WriteByte('\n')
224 }
225
226 if lang != "" {
227 out.WriteString("<pre><code class=\"")
228
229 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
230 for i < len(lang) && isspace(lang[i]) {
231 i++
232 }
233
234 if i < len(lang) {
235 org := i
236 for i < len(lang) && !isspace(lang[i]) {
237 i++
238 }
239
240 if lang[org] == '.' {
241 org++
242 }
243
244 if cls > 0 {
245 out.WriteByte(' ')
246 }
247 attrEscape(out, []byte(lang[org:]))
248 }
249 }
250
251 out.WriteString("\">")
252 } else {
253 out.WriteString("<pre><code>")
254 }
255
256 if len(text) > 0 {
257 attrEscape(out, text)
258 }
259
260 out.WriteString("</code></pre>\n")
261}
262
263/*
264 * GitHub style code block:
265 *
266 * <pre lang="LANG"><code>
267 * ...
268 * </pre></code>
269 *
270 * Unlike other parsers, we store the language identifier in the <pre>,
271 * and don't let the user generate custom classes.
272 *
273 * The language identifier in the <pre> block gets postprocessed and all
274 * the code inside gets syntax highlighted with Pygments. This is much safer
275 * than letting the user specify a CSS class for highlighting.
276 *
277 * Note that we only generate HTML for the first specifier.
278 * E.g.
279 * ~~~~ {.python .numbered} => <pre lang="python"><code>
280 */
281func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
282 if out.Len() > 0 {
283 out.WriteByte('\n')
284 }
285
286 if len(lang) > 0 {
287 out.WriteString("<pre lang=\"")
288
289 i := 0
290 for i < len(lang) && !isspace(lang[i]) {
291 i++
292 }
293
294 if lang[0] == '.' {
295 attrEscape(out, []byte(lang[1:i]))
296 } else {
297 attrEscape(out, []byte(lang[:i]))
298 }
299
300 out.WriteString("\"><code>")
301 } else {
302 out.WriteString("<pre><code>")
303 }
304
305 if len(text) > 0 {
306 attrEscape(out, text)
307 }
308
309 out.WriteString("</code></pre>\n")
310}
311
312
313func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
314 out.WriteString("<blockquote>\n")
315 out.Write(text)
316 out.WriteString("</blockquote>")
317}
318
319func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
320 if out.Len() > 0 {
321 out.WriteByte('\n')
322 }
323 out.WriteString("<table><thead>\n")
324 out.Write(header)
325 out.WriteString("\n</thead><tbody>\n")
326 out.Write(body)
327 out.WriteString("\n</tbody></table>")
328}
329
330func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
331 if out.Len() > 0 {
332 out.WriteByte('\n')
333 }
334 out.WriteString("<tr>\n")
335 out.Write(text)
336 out.WriteString("\n</tr>")
337}
338
339func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
340 if out.Len() > 0 {
341 out.WriteByte('\n')
342 }
343 switch align {
344 case TABLE_ALIGNMENT_LEFT:
345 out.WriteString("<td align=\"left\">")
346 case TABLE_ALIGNMENT_RIGHT:
347 out.WriteString("<td align=\"right\">")
348 case TABLE_ALIGNMENT_CENTER:
349 out.WriteString("<td align=\"center\">")
350 default:
351 out.WriteString("<td>")
352 }
353
354 out.Write(text)
355 out.WriteString("</td>")
356}
357
358func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
359 marker := out.Len()
360
361 if marker > 0 {
362 out.WriteByte('\n')
363 }
364 if flags&LIST_TYPE_ORDERED != 0 {
365 out.WriteString("<ol>\n")
366 } else {
367 out.WriteString("<ul>\n")
368 }
369 if !text() {
370 out.Truncate(marker)
371 return
372 }
373 if flags&LIST_TYPE_ORDERED != 0 {
374 out.WriteString("</ol>\n")
375 } else {
376 out.WriteString("</ul>\n")
377 }
378}
379
380func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
381 out.WriteString("<li>")
382 size := len(text)
383 for size > 0 && text[size-1] == '\n' {
384 size--
385 }
386 out.Write(text[:size])
387 out.WriteString("</li>\n")
388}
389
390func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
391 marker := out.Len()
392 if marker > 0 {
393 out.WriteByte('\n')
394 }
395
396 out.WriteString("<p>")
397 if !text() {
398 out.Truncate(marker)
399 return
400 }
401 out.WriteString("</p>\n")
402}
403
404func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
405 options := opaque.(*htmlOptions)
406
407 if len(link) == 0 {
408 return 0
409 }
410 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
411 return 0
412 }
413
414 out.WriteString("<a href=\"")
415 if kind == LINK_TYPE_EMAIL {
416 out.WriteString("mailto:")
417 }
418 attrEscape(out, link)
419 out.WriteString("\">")
420
421 /*
422 * Pretty print: if we get an email address as
423 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
424 * want to print the `mailto:` prefix
425 */
426 switch {
427 case bytes.HasPrefix(link, []byte("mailto://")):
428 attrEscape(out, link[9:])
429 case bytes.HasPrefix(link, []byte("mailto:")):
430 attrEscape(out, link[7:])
431 default:
432 attrEscape(out, link)
433 }
434
435 out.WriteString("</a>")
436
437 return 1
438}
439
440func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
441 out.WriteString("<code>")
442 attrEscape(out, text)
443 out.WriteString("</code>")
444 return 1
445}
446
447func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
448 if len(text) == 0 {
449 return 0
450 }
451 out.WriteString("<strong>")
452 out.Write(text)
453 out.WriteString("</strong>")
454 return 1
455}
456
457func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
458 if len(text) == 0 {
459 return 0
460 }
461 out.WriteString("<em>")
462 out.Write(text)
463 out.WriteString("</em>")
464 return 1
465}
466
467func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
468 options := opaque.(*htmlOptions)
469 if len(link) == 0 {
470 return 0
471 }
472 out.WriteString("<img src=\"")
473 attrEscape(out, link)
474 out.WriteString("\" alt=\"")
475 if len(alt) > 0 {
476 attrEscape(out, alt)
477 }
478 if len(title) > 0 {
479 out.WriteString("\" title=\"")
480 attrEscape(out, title)
481 }
482
483 out.WriteByte('"')
484 out.WriteString(options.closeTag)
485 return 1
486}
487
488func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
489 options := opaque.(*htmlOptions)
490 out.WriteString("<br")
491 out.WriteString(options.closeTag)
492 return 1
493}
494
495func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
496 options := opaque.(*htmlOptions)
497
498 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
499 return 0
500 }
501
502 out.WriteString("<a href=\"")
503 attrEscape(out, link)
504 if len(title) > 0 {
505 out.WriteString("\" title=\"")
506 attrEscape(out, title)
507 }
508 out.WriteString("\">")
509 out.Write(content)
510 out.WriteString("</a>")
511 return 1
512}
513
514func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
515 options := opaque.(*htmlOptions)
516 if options.flags&HTML_SKIP_HTML != 0 {
517 return 1
518 }
519 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
520 return 1
521 }
522 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
523 return 1
524 }
525 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
526 return 1
527 }
528 out.Write(text)
529 return 1
530}
531
532func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
533 if len(text) == 0 {
534 return 0
535 }
536 out.WriteString("<strong><em>")
537 out.Write(text)
538 out.WriteString("</em></strong>")
539 return 1
540}
541
542func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
543 if len(text) == 0 {
544 return 0
545 }
546 out.WriteString("<del>")
547 out.Write(text)
548 out.WriteString("</del>")
549 return 1
550}
551
552func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
553 attrEscape(out, text)
554}
555
556func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
557 options := opaque.(*htmlOptions)
558 marker := out.Len()
559
560 for level > options.tocData.currentLevel {
561 if options.tocData.currentLevel > 0 {
562 out.WriteString("<li>")
563 }
564 out.WriteString("<ul>\n")
565 options.tocData.currentLevel++
566 }
567
568 for level < options.tocData.currentLevel {
569 out.WriteString("</ul>")
570 if options.tocData.currentLevel > 1 {
571 out.WriteString("</li>\n")
572 }
573 options.tocData.currentLevel--
574 }
575
576 out.WriteString("<li><a href=\"#toc_")
577 out.WriteString(strconv.Itoa(options.tocData.headerCount))
578 out.WriteString("\">")
579 options.tocData.headerCount++
580
581 if !text() {
582 out.Truncate(marker)
583 return
584 }
585 out.WriteString("</a></li>\n")
586}
587
588func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
589 options := opaque.(*htmlOptions)
590 for options.tocData.currentLevel > 1 {
591 out.WriteString("</ul></li>\n")
592 options.tocData.currentLevel--
593 }
594
595 if options.tocData.currentLevel > 0 {
596 out.WriteString("</ul>\n")
597 }
598}
599
600func isHtmlTag(tag []byte, tagname string) bool {
601 i := 0
602 if i < len(tag) && tag[0] != '<' {
603 return false
604 }
605 i++
606 for i < len(tag) && isspace(tag[i]) {
607 i++
608 }
609
610 if i < len(tag) && tag[i] == '/' {
611 i++
612 }
613
614 for i < len(tag) && isspace(tag[i]) {
615 i++
616 }
617
618 tag_i := i
619 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
620 if tag_i >= len(tagname) {
621 break
622 }
623
624 if tag[i] != tagname[tag_i] {
625 return false
626 }
627 }
628
629 if i == len(tag) {
630 return false
631 }
632
633 return isspace(tag[i]) || tag[i] == '>'
634}