html.go (view raw)
1//
2// Black Friday Markdown Processor
3// Originally based on http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7//
8//
9// HTML rendering backend
10//
11//
12
13package blackfriday
14
15import (
16 "bytes"
17 "fmt"
18 "strconv"
19)
20
21//
22//
23// HTML rendering
24//
25//
26
27const (
28 HTML_SKIP_HTML = 1 << iota
29 HTML_SKIP_STYLE
30 HTML_SKIP_IMAGES
31 HTML_SKIP_LINKS
32 HTML_EXPAND_TABS
33 HTML_SAFELINK
34 HTML_TOC
35 HTML_HARD_WRAP
36 HTML_GITHUB_BLOCKCODE
37 HTML_USE_XHTML
38 HTML_USE_SMARTYPANTS
39 HTML_SMARTYPANTS_FRACTIONS
40 HTML_SMARTYPANTS_LATEX_DASHES
41)
42
43type htmlOptions struct {
44 Flags int
45 close_tag string // how to end singleton tags: usually " />\n", possibly ">\n"
46 toc_data struct {
47 header_count int
48 current_level int
49 }
50 smartypants *SmartypantsRenderer
51}
52
53var xhtml_close = " />\n"
54var html_close = ">\n"
55
56func HtmlRenderer(flags int) *Renderer {
57 // configure the rendering engine
58 r := new(Renderer)
59 if flags&HTML_GITHUB_BLOCKCODE == 0 {
60 r.blockcode = rndr_blockcode
61 } else {
62 r.blockcode = rndr_blockcode_github
63 }
64 r.blockquote = rndr_blockquote
65 if flags&HTML_SKIP_HTML == 0 {
66 r.blockhtml = rndr_raw_block
67 }
68 r.header = rndr_header
69 r.hrule = rndr_hrule
70 r.list = rndr_list
71 r.listitem = rndr_listitem
72 r.paragraph = rndr_paragraph
73 r.table = rndr_table
74 r.table_row = rndr_tablerow
75 r.table_cell = rndr_tablecell
76
77 r.autolink = rndr_autolink
78 r.codespan = rndr_codespan
79 r.double_emphasis = rndr_double_emphasis
80 r.emphasis = rndr_emphasis
81 if flags&HTML_SKIP_IMAGES == 0 {
82 r.image = rndr_image
83 }
84 r.linebreak = rndr_linebreak
85 if flags&HTML_SKIP_LINKS == 0 {
86 r.link = rndr_link
87 }
88 r.raw_html_tag = rndr_raw_html_tag
89 r.triple_emphasis = rndr_triple_emphasis
90 r.strikethrough = rndr_strikethrough
91
92 var cb *SmartypantsRenderer
93 if flags&HTML_USE_SMARTYPANTS == 0 {
94 r.normal_text = rndr_normal_text
95 } else {
96 cb = Smartypants(flags)
97 r.normal_text = rndr_smartypants
98 }
99
100 close_tag := html_close
101 if flags&HTML_USE_XHTML != 0 {
102 close_tag = xhtml_close
103 }
104 r.opaque = &htmlOptions{Flags: flags, close_tag: close_tag, smartypants: cb}
105 return r
106}
107
108func HtmlTocRenderer(flags int) *Renderer {
109 // configure the rendering engine
110 r := new(Renderer)
111 r.header = rndr_toc_header
112
113 r.codespan = rndr_codespan
114 r.double_emphasis = rndr_double_emphasis
115 r.emphasis = rndr_emphasis
116 r.triple_emphasis = rndr_triple_emphasis
117 r.strikethrough = rndr_strikethrough
118
119 r.doc_footer = rndr_toc_finalize
120
121 close_tag := ">\n"
122 if flags&HTML_USE_XHTML != 0 {
123 close_tag = " />\n"
124 }
125 r.opaque = &htmlOptions{Flags: flags | HTML_TOC, close_tag: close_tag}
126 return r
127}
128
129func attr_escape(ob *bytes.Buffer, src []byte) {
130 for i := 0; i < len(src); i++ {
131 // directly copy unescaped characters
132 org := i
133 for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
134 i++
135 }
136 if i > org {
137 ob.Write(src[org:i])
138 }
139
140 // escape a character
141 if i >= len(src) {
142 break
143 }
144 switch src[i] {
145 case '<':
146 ob.WriteString("<")
147 case '>':
148 ob.WriteString(">")
149 case '&':
150 ob.WriteString("&")
151 case '"':
152 ob.WriteString(""")
153 }
154 }
155}
156
157func unescape_text(ob *bytes.Buffer, src []byte) {
158 i := 0
159 for i < len(src) {
160 org := i
161 for i < len(src) && src[i] != '\\' {
162 i++
163 }
164
165 if i > org {
166 ob.Write(src[org:i])
167 }
168
169 if i+1 >= len(src) {
170 break
171 }
172
173 ob.WriteByte(src[i+1])
174 i += 2
175 }
176}
177
178func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
179 options := opaque.(*htmlOptions)
180
181 if ob.Len() > 0 {
182 ob.WriteByte('\n')
183 }
184
185 if options.Flags&HTML_TOC != 0 {
186 ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
187 options.toc_data.header_count++
188 } else {
189 ob.WriteString(fmt.Sprintf("<h%d>", level))
190 }
191
192 ob.Write(text)
193 ob.WriteString(fmt.Sprintf("</h%d>\n", level))
194}
195
196func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
197 sz := len(text)
198 for sz > 0 && text[sz-1] == '\n' {
199 sz--
200 }
201 org := 0
202 for org < sz && text[org] == '\n' {
203 org++
204 }
205 if org >= sz {
206 return
207 }
208 if ob.Len() > 0 {
209 ob.WriteByte('\n')
210 }
211 ob.Write(text[org:sz])
212 ob.WriteByte('\n')
213}
214
215func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
216 options := opaque.(*htmlOptions)
217
218 if ob.Len() > 0 {
219 ob.WriteByte('\n')
220 }
221 ob.WriteString("<hr")
222 ob.WriteString(options.close_tag)
223}
224
225func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
226 if ob.Len() > 0 {
227 ob.WriteByte('\n')
228 }
229
230 if lang != "" {
231 ob.WriteString("<pre><code class=\"")
232
233 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
234 for i < len(lang) && isspace(lang[i]) {
235 i++
236 }
237
238 if i < len(lang) {
239 org := i
240 for i < len(lang) && !isspace(lang[i]) {
241 i++
242 }
243
244 if lang[org] == '.' {
245 org++
246 }
247
248 if cls > 0 {
249 ob.WriteByte(' ')
250 }
251 attr_escape(ob, []byte(lang[org:]))
252 }
253 }
254
255 ob.WriteString("\">")
256 } else {
257 ob.WriteString("<pre><code>")
258 }
259
260 if len(text) > 0 {
261 attr_escape(ob, text)
262 }
263
264 ob.WriteString("</code></pre>\n")
265}
266
267/*
268 * GitHub style code block:
269 *
270 * <pre lang="LANG"><code>
271 * ...
272 * </pre></code>
273 *
274 * Unlike other parsers, we store the language identifier in the <pre>,
275 * and don't let the user generate custom classes.
276 *
277 * The language identifier in the <pre> block gets postprocessed and all
278 * the code inside gets syntax highlighted with Pygments. This is much safer
279 * than letting the user specify a CSS class for highlighting.
280 *
281 * Note that we only generate HTML for the first specifier.
282 * E.g.
283 * ~~~~ {.python .numbered} => <pre lang="python"><code>
284 */
285func rndr_blockcode_github(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
286 if ob.Len() > 0 {
287 ob.WriteByte('\n')
288 }
289
290 if len(lang) > 0 {
291 ob.WriteString("<pre lang=\"")
292
293 i := 0
294 for i < len(lang) && !isspace(lang[i]) {
295 i++
296 }
297
298 if lang[0] == '.' {
299 attr_escape(ob, []byte(lang[1:i]))
300 } else {
301 attr_escape(ob, []byte(lang[:i]))
302 }
303
304 ob.WriteString("\"><code>")
305 } else {
306 ob.WriteString("<pre><code>")
307 }
308
309 if len(text) > 0 {
310 attr_escape(ob, text)
311 }
312
313 ob.WriteString("</code></pre>\n")
314}
315
316
317func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
318 ob.WriteString("<blockquote>\n")
319 ob.Write(text)
320 ob.WriteString("</blockquote>")
321}
322
323func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
324 if ob.Len() > 0 {
325 ob.WriteByte('\n')
326 }
327 ob.WriteString("<table><thead>\n")
328 ob.Write(header)
329 ob.WriteString("\n</thead><tbody>\n")
330 ob.Write(body)
331 ob.WriteString("\n</tbody></table>")
332}
333
334func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
335 if ob.Len() > 0 {
336 ob.WriteByte('\n')
337 }
338 ob.WriteString("<tr>\n")
339 ob.Write(text)
340 ob.WriteString("\n</tr>")
341}
342
343func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
344 if ob.Len() > 0 {
345 ob.WriteByte('\n')
346 }
347 switch align {
348 case TABLE_ALIGNMENT_LEFT:
349 ob.WriteString("<td align=\"left\">")
350 case TABLE_ALIGNMENT_RIGHT:
351 ob.WriteString("<td align=\"right\">")
352 case TABLE_ALIGNMENT_CENTER:
353 ob.WriteString("<td align=\"center\">")
354 default:
355 ob.WriteString("<td>")
356 }
357
358 ob.Write(text)
359 ob.WriteString("</td>")
360}
361
362func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
363 if ob.Len() > 0 {
364 ob.WriteByte('\n')
365 }
366 if flags&LIST_TYPE_ORDERED != 0 {
367 ob.WriteString("<ol>\n")
368 } else {
369 ob.WriteString("<ul>\n")
370 }
371 ob.Write(text)
372 if flags&LIST_TYPE_ORDERED != 0 {
373 ob.WriteString("</ol>\n")
374 } else {
375 ob.WriteString("</ul>\n")
376 }
377}
378
379func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
380 ob.WriteString("<li>")
381 size := len(text)
382 for size > 0 && text[size-1] == '\n' {
383 size--
384 }
385 ob.Write(text[:size])
386 ob.WriteString("</li>\n")
387}
388
389func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
390 options := opaque.(*htmlOptions)
391 i := 0
392
393 if ob.Len() > 0 {
394 ob.WriteByte('\n')
395 }
396
397 if len(text) == 0 {
398 return
399 }
400
401 for i < len(text) && isspace(text[i]) {
402 i++
403 }
404
405 if i == len(text) {
406 return
407 }
408
409 ob.WriteString("<p>")
410 if options.Flags&HTML_HARD_WRAP != 0 {
411 for i < len(text) {
412 org := i
413 for i < len(text) && text[i] != '\n' {
414 i++
415 }
416
417 if i > org {
418 ob.Write(text[org:i])
419 }
420
421 if i >= len(text) {
422 break
423 }
424
425 ob.WriteString("<br>")
426 ob.WriteString(options.close_tag)
427 i++
428 }
429 } else {
430 ob.Write(text[i:])
431 }
432 ob.WriteString("</p>\n")
433}
434
435func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
436 options := opaque.(*htmlOptions)
437
438 if len(link) == 0 {
439 return 0
440 }
441 if options.Flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != LINK_TYPE_EMAIL {
442 return 0
443 }
444
445 ob.WriteString("<a href=\"")
446 if kind == LINK_TYPE_EMAIL {
447 ob.WriteString("mailto:")
448 }
449 ob.Write(link)
450 ob.WriteString("\">")
451
452 /*
453 * Pretty print: if we get an email address as
454 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
455 * want to print the `mailto:` prefix
456 */
457 if bytes.HasPrefix(link, []byte("mailto:")) {
458 attr_escape(ob, link[7:])
459 } else {
460 attr_escape(ob, link)
461 }
462
463 ob.WriteString("</a>")
464
465 return 1
466}
467
468func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
469 ob.WriteString("<code>")
470 attr_escape(ob, text)
471 ob.WriteString("</code>")
472 return 1
473}
474
475func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
476 if len(text) == 0 {
477 return 0
478 }
479 ob.WriteString("<strong>")
480 ob.Write(text)
481 ob.WriteString("</strong>")
482 return 1
483}
484
485func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
486 if len(text) == 0 {
487 return 0
488 }
489 ob.WriteString("<em>")
490 ob.Write(text)
491 ob.WriteString("</em>")
492 return 1
493}
494
495func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
496 options := opaque.(*htmlOptions)
497 if len(link) == 0 {
498 return 0
499 }
500 ob.WriteString("<img src=\"")
501 attr_escape(ob, link)
502 ob.WriteString("\" alt=\"")
503 if len(alt) > 0 {
504 attr_escape(ob, alt)
505 }
506 if len(title) > 0 {
507 ob.WriteString("\" title=\"")
508 attr_escape(ob, title)
509 }
510
511 ob.WriteByte('"')
512 ob.WriteString(options.close_tag)
513 return 1
514}
515
516func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
517 options := opaque.(*htmlOptions)
518 ob.WriteString("<br")
519 ob.WriteString(options.close_tag)
520 return 1
521}
522
523func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
524 options := opaque.(*htmlOptions)
525
526 if options.Flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
527 return 0
528 }
529
530 ob.WriteString("<a href=\"")
531 if len(link) > 0 {
532 ob.Write(link)
533 }
534 if len(title) > 0 {
535 ob.WriteString("\" title=\"")
536 attr_escape(ob, title)
537 }
538 ob.WriteString("\">")
539 if len(content) > 0 {
540 ob.Write(content)
541 }
542 ob.WriteString("</a>")
543 return 1
544}
545
546func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
547 options := opaque.(*htmlOptions)
548 if options.Flags&HTML_SKIP_HTML != 0 {
549 return 1
550 }
551 if options.Flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
552 return 1
553 }
554 if options.Flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
555 return 1
556 }
557 if options.Flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
558 return 1
559 }
560 ob.Write(text)
561 return 1
562}
563
564func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
565 if len(text) == 0 {
566 return 0
567 }
568 ob.WriteString("<strong><em>")
569 ob.Write(text)
570 ob.WriteString("</em></strong>")
571 return 1
572}
573
574func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
575 if len(text) == 0 {
576 return 0
577 }
578 ob.WriteString("<del>")
579 ob.Write(text)
580 ob.WriteString("</del>")
581 return 1
582}
583
584func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
585 attr_escape(ob, text)
586}
587
588func rndr_toc_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
589 options := opaque.(*htmlOptions)
590 for level > options.toc_data.current_level {
591 if options.toc_data.current_level > 0 {
592 ob.WriteString("<li>")
593 }
594 ob.WriteString("<ul>\n")
595 options.toc_data.current_level++
596 }
597
598 for level < options.toc_data.current_level {
599 ob.WriteString("</ul>")
600 if options.toc_data.current_level > 1 {
601 ob.WriteString("</li>\n")
602 }
603 options.toc_data.current_level--
604 }
605
606 ob.WriteString("<li><a href=\"#toc_")
607 ob.WriteString(strconv.Itoa(options.toc_data.header_count))
608 ob.WriteString("\">")
609 options.toc_data.header_count++
610
611 if len(text) > 0 {
612 ob.Write(text)
613 }
614 ob.WriteString("</a></li>\n")
615}
616
617func rndr_toc_finalize(ob *bytes.Buffer, opaque interface{}) {
618 options := opaque.(*htmlOptions)
619 for options.toc_data.current_level > 1 {
620 ob.WriteString("</ul></li>\n")
621 options.toc_data.current_level--
622 }
623
624 if options.toc_data.current_level > 0 {
625 ob.WriteString("</ul>\n")
626 }
627}
628
629func is_html_tag(tag []byte, tagname string) bool {
630 i := 0
631 if i < len(tag) && tag[0] != '<' {
632 return false
633 }
634 i++
635 for i < len(tag) && isspace(tag[i]) {
636 i++
637 }
638
639 if i < len(tag) && tag[i] == '/' {
640 i++
641 }
642
643 for i < len(tag) && isspace(tag[i]) {
644 i++
645 }
646
647 tag_i := i
648 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
649 if tag_i >= len(tagname) {
650 break
651 }
652
653 if tag[i] != tagname[tag_i] {
654 return false
655 }
656 }
657
658 if i == len(tag) {
659 return false
660 }
661
662 return isspace(tag[i]) || tag[i] == '>'
663}