html.go (view raw)
1//
2// Black Friday Markdown Processor
3// Originally based on http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7//
8//
9// HTML rendering backend
10//
11//
12
13package blackfriday
14
15import (
16 "bytes"
17 "fmt"
18 "strconv"
19)
20
21const (
22 HTML_SKIP_HTML = 1 << iota
23 HTML_SKIP_STYLE
24 HTML_SKIP_IMAGES
25 HTML_SKIP_LINKS
26 HTML_EXPAND_TABS
27 HTML_SAFELINK
28 HTML_TOC
29 HTML_HARD_WRAP
30 HTML_GITHUB_BLOCKCODE
31 HTML_USE_XHTML
32 HTML_USE_SMARTYPANTS
33 HTML_SMARTYPANTS_FRACTIONS
34 HTML_SMARTYPANTS_LATEX_DASHES
35)
36
37type htmlOptions struct {
38 flags int
39 close_tag string // how to end singleton tags: usually " />\n", possibly ">\n"
40 toc_data struct {
41 header_count int
42 current_level int
43 }
44 smartypants *SmartypantsRenderer
45}
46
47var xhtml_close = " />\n"
48var html_close = ">\n"
49
50func HtmlRenderer(flags int) *Renderer {
51 // configure the rendering engine
52 r := new(Renderer)
53 if flags&HTML_GITHUB_BLOCKCODE == 0 {
54 r.blockcode = htmlBlockcode
55 } else {
56 r.blockcode = htmlBlockcodeGithub
57 }
58 r.blockquote = htmlBlockquote
59 if flags&HTML_SKIP_HTML == 0 {
60 r.blockhtml = htmlRawBlock
61 }
62 r.header = htmlHeader
63 r.hrule = htmlHrule
64 r.list = htmlList
65 r.listitem = htmlListitem
66 r.paragraph = htmlParagraph
67 r.table = htmlTable
68 r.tableRow = htmlTableRow
69 r.tableCell = htmlTableCell
70
71 r.autolink = htmlAutolink
72 r.codespan = htmlCodespan
73 r.doubleEmphasis = htmlDoubleEmphasis
74 r.emphasis = htmlEmphasis
75 if flags&HTML_SKIP_IMAGES == 0 {
76 r.image = htmlImage
77 }
78 r.linebreak = htmlLinebreak
79 if flags&HTML_SKIP_LINKS == 0 {
80 r.link = htmlLink
81 }
82 r.rawHtmlTag = htmlRawTag
83 r.tripleEmphasis = htmlTripleEmphasis
84 r.strikethrough = htmlStrikethrough
85
86 var cb *SmartypantsRenderer
87 if flags&HTML_USE_SMARTYPANTS == 0 {
88 r.normalText = htmlNormalText
89 } else {
90 cb = Smartypants(flags)
91 r.normalText = htmlSmartypants
92 }
93
94 close_tag := html_close
95 if flags&HTML_USE_XHTML != 0 {
96 close_tag = xhtml_close
97 }
98 r.opaque = &htmlOptions{flags: flags, close_tag: close_tag, smartypants: cb}
99 return r
100}
101
102func HtmlTocRenderer(flags int) *Renderer {
103 // configure the rendering engine
104 r := new(Renderer)
105 r.header = htmlTocHeader
106
107 r.codespan = htmlCodespan
108 r.doubleEmphasis = htmlDoubleEmphasis
109 r.emphasis = htmlEmphasis
110 r.tripleEmphasis = htmlTripleEmphasis
111 r.strikethrough = htmlStrikethrough
112
113 r.documentFooter = htmlTocFinalize
114
115 close_tag := ">\n"
116 if flags&HTML_USE_XHTML != 0 {
117 close_tag = " />\n"
118 }
119 r.opaque = &htmlOptions{flags: flags | HTML_TOC, close_tag: close_tag}
120 return r
121}
122
123func attrEscape(out *bytes.Buffer, src []byte) {
124 for i := 0; i < len(src); i++ {
125 // directly copy normal characters
126 org := i
127 for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
128 i++
129 }
130 if i > org {
131 out.Write(src[org:i])
132 }
133
134 // escape a character
135 if i >= len(src) {
136 break
137 }
138 switch src[i] {
139 case '<':
140 out.WriteString("<")
141 case '>':
142 out.WriteString(">")
143 case '&':
144 out.WriteString("&")
145 case '"':
146 out.WriteString(""")
147 }
148 }
149}
150
151func htmlHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
152 options := opaque.(*htmlOptions)
153
154 if out.Len() > 0 {
155 out.WriteByte('\n')
156 }
157
158 if options.flags&HTML_TOC != 0 {
159 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
160 options.toc_data.header_count++
161 } else {
162 out.WriteString(fmt.Sprintf("<h%d>", level))
163 }
164
165 out.Write(text)
166 out.WriteString(fmt.Sprintf("</h%d>\n", level))
167}
168
169func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
170 sz := len(text)
171 for sz > 0 && text[sz-1] == '\n' {
172 sz--
173 }
174 org := 0
175 for org < sz && text[org] == '\n' {
176 org++
177 }
178 if org >= sz {
179 return
180 }
181 if out.Len() > 0 {
182 out.WriteByte('\n')
183 }
184 out.Write(text[org:sz])
185 out.WriteByte('\n')
186}
187
188func htmlHrule(out *bytes.Buffer, opaque interface{}) {
189 options := opaque.(*htmlOptions)
190
191 if out.Len() > 0 {
192 out.WriteByte('\n')
193 }
194 out.WriteString("<hr")
195 out.WriteString(options.close_tag)
196}
197
198func htmlBlockcode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
199 if out.Len() > 0 {
200 out.WriteByte('\n')
201 }
202
203 if lang != "" {
204 out.WriteString("<pre><code class=\"")
205
206 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
207 for i < len(lang) && isspace(lang[i]) {
208 i++
209 }
210
211 if i < len(lang) {
212 org := i
213 for i < len(lang) && !isspace(lang[i]) {
214 i++
215 }
216
217 if lang[org] == '.' {
218 org++
219 }
220
221 if cls > 0 {
222 out.WriteByte(' ')
223 }
224 attrEscape(out, []byte(lang[org:]))
225 }
226 }
227
228 out.WriteString("\">")
229 } else {
230 out.WriteString("<pre><code>")
231 }
232
233 if len(text) > 0 {
234 attrEscape(out, text)
235 }
236
237 out.WriteString("</code></pre>\n")
238}
239
240/*
241 * GitHub style code block:
242 *
243 * <pre lang="LANG"><code>
244 * ...
245 * </pre></code>
246 *
247 * Unlike other parsers, we store the language identifier in the <pre>,
248 * and don't let the user generate custom classes.
249 *
250 * The language identifier in the <pre> block gets postprocessed and all
251 * the code inside gets syntax highlighted with Pygments. This is much safer
252 * than letting the user specify a CSS class for highlighting.
253 *
254 * Note that we only generate HTML for the first specifier.
255 * E.g.
256 * ~~~~ {.python .numbered} => <pre lang="python"><code>
257 */
258func htmlBlockcodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
259 if out.Len() > 0 {
260 out.WriteByte('\n')
261 }
262
263 if len(lang) > 0 {
264 out.WriteString("<pre lang=\"")
265
266 i := 0
267 for i < len(lang) && !isspace(lang[i]) {
268 i++
269 }
270
271 if lang[0] == '.' {
272 attrEscape(out, []byte(lang[1:i]))
273 } else {
274 attrEscape(out, []byte(lang[:i]))
275 }
276
277 out.WriteString("\"><code>")
278 } else {
279 out.WriteString("<pre><code>")
280 }
281
282 if len(text) > 0 {
283 attrEscape(out, text)
284 }
285
286 out.WriteString("</code></pre>\n")
287}
288
289
290func htmlBlockquote(out *bytes.Buffer, text []byte, opaque interface{}) {
291 out.WriteString("<blockquote>\n")
292 out.Write(text)
293 out.WriteString("</blockquote>")
294}
295
296func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
297 if out.Len() > 0 {
298 out.WriteByte('\n')
299 }
300 out.WriteString("<table><thead>\n")
301 out.Write(header)
302 out.WriteString("\n</thead><tbody>\n")
303 out.Write(body)
304 out.WriteString("\n</tbody></table>")
305}
306
307func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
308 if out.Len() > 0 {
309 out.WriteByte('\n')
310 }
311 out.WriteString("<tr>\n")
312 out.Write(text)
313 out.WriteString("\n</tr>")
314}
315
316func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
317 if out.Len() > 0 {
318 out.WriteByte('\n')
319 }
320 switch align {
321 case TABLE_ALIGNMENT_LEFT:
322 out.WriteString("<td align=\"left\">")
323 case TABLE_ALIGNMENT_RIGHT:
324 out.WriteString("<td align=\"right\">")
325 case TABLE_ALIGNMENT_CENTER:
326 out.WriteString("<td align=\"center\">")
327 default:
328 out.WriteString("<td>")
329 }
330
331 out.Write(text)
332 out.WriteString("</td>")
333}
334
335func htmlList(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
336 if out.Len() > 0 {
337 out.WriteByte('\n')
338 }
339 if flags&LIST_TYPE_ORDERED != 0 {
340 out.WriteString("<ol>\n")
341 } else {
342 out.WriteString("<ul>\n")
343 }
344 out.Write(text)
345 if flags&LIST_TYPE_ORDERED != 0 {
346 out.WriteString("</ol>\n")
347 } else {
348 out.WriteString("</ul>\n")
349 }
350}
351
352func htmlListitem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
353 out.WriteString("<li>")
354 size := len(text)
355 for size > 0 && text[size-1] == '\n' {
356 size--
357 }
358 out.Write(text[:size])
359 out.WriteString("</li>\n")
360}
361
362func htmlParagraph(out *bytes.Buffer, text []byte, opaque interface{}) {
363 options := opaque.(*htmlOptions)
364 i := 0
365
366 if out.Len() > 0 {
367 out.WriteByte('\n')
368 }
369
370 if len(text) == 0 {
371 return
372 }
373
374 for i < len(text) && isspace(text[i]) {
375 i++
376 }
377
378 if i == len(text) {
379 return
380 }
381
382 out.WriteString("<p>")
383 if options.flags&HTML_HARD_WRAP != 0 {
384 for i < len(text) {
385 org := i
386 for i < len(text) && text[i] != '\n' {
387 i++
388 }
389
390 if i > org {
391 out.Write(text[org:i])
392 }
393
394 if i >= len(text) {
395 break
396 }
397
398 out.WriteString("<br>")
399 out.WriteString(options.close_tag)
400 i++
401 }
402 } else {
403 out.Write(text[i:])
404 }
405 out.WriteString("</p>\n")
406}
407
408func htmlAutolink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
409 options := opaque.(*htmlOptions)
410
411 if len(link) == 0 {
412 return 0
413 }
414 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
415 return 0
416 }
417
418 out.WriteString("<a href=\"")
419 if kind == LINK_TYPE_EMAIL {
420 out.WriteString("mailto:")
421 }
422 out.Write(link)
423 out.WriteString("\">")
424
425 /*
426 * Pretty print: if we get an email address as
427 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
428 * want to print the `mailto:` prefix
429 */
430 if bytes.HasPrefix(link, []byte("mailto:")) {
431 attrEscape(out, link[7:])
432 } else {
433 attrEscape(out, link)
434 }
435
436 out.WriteString("</a>")
437
438 return 1
439}
440
441func htmlCodespan(out *bytes.Buffer, text []byte, opaque interface{}) int {
442 out.WriteString("<code>")
443 attrEscape(out, text)
444 out.WriteString("</code>")
445 return 1
446}
447
448func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
449 if len(text) == 0 {
450 return 0
451 }
452 out.WriteString("<strong>")
453 out.Write(text)
454 out.WriteString("</strong>")
455 return 1
456}
457
458func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
459 if len(text) == 0 {
460 return 0
461 }
462 out.WriteString("<em>")
463 out.Write(text)
464 out.WriteString("</em>")
465 return 1
466}
467
468func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
469 options := opaque.(*htmlOptions)
470 if len(link) == 0 {
471 return 0
472 }
473 out.WriteString("<img src=\"")
474 attrEscape(out, link)
475 out.WriteString("\" alt=\"")
476 if len(alt) > 0 {
477 attrEscape(out, alt)
478 }
479 if len(title) > 0 {
480 out.WriteString("\" title=\"")
481 attrEscape(out, title)
482 }
483
484 out.WriteByte('"')
485 out.WriteString(options.close_tag)
486 return 1
487}
488
489func htmlLinebreak(out *bytes.Buffer, opaque interface{}) int {
490 options := opaque.(*htmlOptions)
491 out.WriteString("<br")
492 out.WriteString(options.close_tag)
493 return 1
494}
495
496func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
497 options := opaque.(*htmlOptions)
498
499 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
500 return 0
501 }
502
503 out.WriteString("<a href=\"")
504 if len(link) > 0 {
505 out.Write(link)
506 }
507 if len(title) > 0 {
508 out.WriteString("\" title=\"")
509 attrEscape(out, title)
510 }
511 out.WriteString("\">")
512 if len(content) > 0 {
513 out.Write(content)
514 }
515 out.WriteString("</a>")
516 return 1
517}
518
519func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
520 options := opaque.(*htmlOptions)
521 if options.flags&HTML_SKIP_HTML != 0 {
522 return 1
523 }
524 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
525 return 1
526 }
527 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
528 return 1
529 }
530 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
531 return 1
532 }
533 out.Write(text)
534 return 1
535}
536
537func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
538 if len(text) == 0 {
539 return 0
540 }
541 out.WriteString("<strong><em>")
542 out.Write(text)
543 out.WriteString("</em></strong>")
544 return 1
545}
546
547func htmlStrikethrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
548 if len(text) == 0 {
549 return 0
550 }
551 out.WriteString("<del>")
552 out.Write(text)
553 out.WriteString("</del>")
554 return 1
555}
556
557func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
558 attrEscape(out, text)
559}
560
561func htmlTocHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
562 options := opaque.(*htmlOptions)
563 for level > options.toc_data.current_level {
564 if options.toc_data.current_level > 0 {
565 out.WriteString("<li>")
566 }
567 out.WriteString("<ul>\n")
568 options.toc_data.current_level++
569 }
570
571 for level < options.toc_data.current_level {
572 out.WriteString("</ul>")
573 if options.toc_data.current_level > 1 {
574 out.WriteString("</li>\n")
575 }
576 options.toc_data.current_level--
577 }
578
579 out.WriteString("<li><a href=\"#toc_")
580 out.WriteString(strconv.Itoa(options.toc_data.header_count))
581 out.WriteString("\">")
582 options.toc_data.header_count++
583
584 if len(text) > 0 {
585 out.Write(text)
586 }
587 out.WriteString("</a></li>\n")
588}
589
590func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
591 options := opaque.(*htmlOptions)
592 for options.toc_data.current_level > 1 {
593 out.WriteString("</ul></li>\n")
594 options.toc_data.current_level--
595 }
596
597 if options.toc_data.current_level > 0 {
598 out.WriteString("</ul>\n")
599 }
600}
601
602func isHtmlTag(tag []byte, tagname string) bool {
603 i := 0
604 if i < len(tag) && tag[0] != '<' {
605 return false
606 }
607 i++
608 for i < len(tag) && isspace(tag[i]) {
609 i++
610 }
611
612 if i < len(tag) && tag[i] == '/' {
613 i++
614 }
615
616 for i < len(tag) && isspace(tag[i]) {
617 i++
618 }
619
620 tag_i := i
621 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
622 if tag_i >= len(tagname) {
623 break
624 }
625
626 if tag[i] != tagname[tag_i] {
627 return false
628 }
629 }
630
631 if i == len(tag) {
632 return false
633 }
634
635 return isspace(tag[i]) || tag[i] == '>'
636}