html.go (view raw)
1//
2// Black Friday Markdown Processor
3// Originally based on http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7//
8//
9// HTML rendering backend
10//
11//
12
13package blackfriday
14
15import (
16 "bytes"
17 "fmt"
18 "strconv"
19)
20
21const (
22 HTML_SKIP_HTML = 1 << iota
23 HTML_SKIP_STYLE
24 HTML_SKIP_IMAGES
25 HTML_SKIP_LINKS
26 HTML_EXPAND_TABS
27 HTML_SAFELINK
28 HTML_TOC
29 HTML_HARD_WRAP
30 HTML_GITHUB_BLOCKCODE
31 HTML_USE_XHTML
32 HTML_USE_SMARTYPANTS
33 HTML_SMARTYPANTS_FRACTIONS
34 HTML_SMARTYPANTS_LATEX_DASHES
35)
36
37type htmlOptions struct {
38 flags int
39 closeTag string // how to end singleton tags: usually " />\n", possibly ">\n"
40 tocData struct {
41 headerCount int
42 currentLevel int
43 }
44 smartypants *SmartypantsRenderer
45}
46
47var xhtmlClose = " />\n"
48var htmlClose = ">\n"
49
50func HtmlRenderer(flags int) *Renderer {
51 // configure the rendering engine
52 r := new(Renderer)
53 if flags&HTML_GITHUB_BLOCKCODE == 0 {
54 r.BlockCode = htmlBlockCode
55 } else {
56 r.BlockCode = htmlBlockCodeGithub
57 }
58 r.BlockQuote = htmlBlockQuote
59 if flags&HTML_SKIP_HTML == 0 {
60 r.BlockHtml = htmlRawBlock
61 }
62 r.Header = htmlHeader
63 r.HRule = htmlHRule
64 r.List = htmlList
65 r.ListItem = htmlListItem
66 r.Paragraph = htmlParagraph
67 r.Table = htmlTable
68 r.TableRow = htmlTableRow
69 r.TableCell = htmlTableCell
70
71 r.AutoLink = htmlAutoLink
72 r.CodeSpan = htmlCodeSpan
73 r.DoubleEmphasis = htmlDoubleEmphasis
74 r.Emphasis = htmlEmphasis
75 if flags&HTML_SKIP_IMAGES == 0 {
76 r.Image = htmlImage
77 }
78 r.LineBreak = htmlLineBreak
79 if flags&HTML_SKIP_LINKS == 0 {
80 r.Link = htmlLink
81 }
82 r.RawHtmlTag = htmlRawTag
83 r.TripleEmphasis = htmlTripleEmphasis
84 r.StrikeThrough = htmlStrikeThrough
85
86 var cb *SmartypantsRenderer
87 if flags&HTML_USE_SMARTYPANTS == 0 {
88 r.NormalText = htmlNormalText
89 } else {
90 cb = Smartypants(flags)
91 r.NormalText = htmlSmartypants
92 }
93
94 closeTag := htmlClose
95 if flags&HTML_USE_XHTML != 0 {
96 closeTag = xhtmlClose
97 }
98 r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
99 return r
100}
101
102func HtmlTocRenderer(flags int) *Renderer {
103 // configure the rendering engine
104 r := new(Renderer)
105 r.Header = htmlTocHeader
106
107 r.CodeSpan = htmlCodeSpan
108 r.DoubleEmphasis = htmlDoubleEmphasis
109 r.Emphasis = htmlEmphasis
110 r.TripleEmphasis = htmlTripleEmphasis
111 r.StrikeThrough = htmlStrikeThrough
112
113 r.DocumentFooter = htmlTocFinalize
114
115 closeTag := ">\n"
116 if flags&HTML_USE_XHTML != 0 {
117 closeTag = " />\n"
118 }
119 r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
120 return r
121}
122
123func attrEscape(out *bytes.Buffer, src []byte) {
124 for i := 0; i < len(src); i++ {
125 // directly copy normal characters
126 org := i
127 for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
128 i++
129 }
130 if i > org {
131 out.Write(src[org:i])
132 }
133
134 // escape a character
135 if i >= len(src) {
136 break
137 }
138 switch src[i] {
139 case '<':
140 out.WriteString("<")
141 case '>':
142 out.WriteString(">")
143 case '&':
144 out.WriteString("&")
145 case '"':
146 out.WriteString(""")
147 }
148 }
149}
150
151func htmlHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
152 options := opaque.(*htmlOptions)
153
154 if out.Len() > 0 {
155 out.WriteByte('\n')
156 }
157
158 if options.flags&HTML_TOC != 0 {
159 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
160 options.tocData.headerCount++
161 } else {
162 out.WriteString(fmt.Sprintf("<h%d>", level))
163 }
164
165 out.Write(text)
166 out.WriteString(fmt.Sprintf("</h%d>\n", level))
167}
168
169func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
170 sz := len(text)
171 for sz > 0 && text[sz-1] == '\n' {
172 sz--
173 }
174 org := 0
175 for org < sz && text[org] == '\n' {
176 org++
177 }
178 if org >= sz {
179 return
180 }
181 if out.Len() > 0 {
182 out.WriteByte('\n')
183 }
184 out.Write(text[org:sz])
185 out.WriteByte('\n')
186}
187
188func htmlHRule(out *bytes.Buffer, opaque interface{}) {
189 options := opaque.(*htmlOptions)
190
191 if out.Len() > 0 {
192 out.WriteByte('\n')
193 }
194 out.WriteString("<hr")
195 out.WriteString(options.closeTag)
196}
197
198func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
199 if out.Len() > 0 {
200 out.WriteByte('\n')
201 }
202
203 if lang != "" {
204 out.WriteString("<pre><code class=\"")
205
206 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
207 for i < len(lang) && isspace(lang[i]) {
208 i++
209 }
210
211 if i < len(lang) {
212 org := i
213 for i < len(lang) && !isspace(lang[i]) {
214 i++
215 }
216
217 if lang[org] == '.' {
218 org++
219 }
220
221 if cls > 0 {
222 out.WriteByte(' ')
223 }
224 attrEscape(out, []byte(lang[org:]))
225 }
226 }
227
228 out.WriteString("\">")
229 } else {
230 out.WriteString("<pre><code>")
231 }
232
233 if len(text) > 0 {
234 attrEscape(out, text)
235 }
236
237 out.WriteString("</code></pre>\n")
238}
239
240/*
241 * GitHub style code block:
242 *
243 * <pre lang="LANG"><code>
244 * ...
245 * </pre></code>
246 *
247 * Unlike other parsers, we store the language identifier in the <pre>,
248 * and don't let the user generate custom classes.
249 *
250 * The language identifier in the <pre> block gets postprocessed and all
251 * the code inside gets syntax highlighted with Pygments. This is much safer
252 * than letting the user specify a CSS class for highlighting.
253 *
254 * Note that we only generate HTML for the first specifier.
255 * E.g.
256 * ~~~~ {.python .numbered} => <pre lang="python"><code>
257 */
258func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
259 if out.Len() > 0 {
260 out.WriteByte('\n')
261 }
262
263 if len(lang) > 0 {
264 out.WriteString("<pre lang=\"")
265
266 i := 0
267 for i < len(lang) && !isspace(lang[i]) {
268 i++
269 }
270
271 if lang[0] == '.' {
272 attrEscape(out, []byte(lang[1:i]))
273 } else {
274 attrEscape(out, []byte(lang[:i]))
275 }
276
277 out.WriteString("\"><code>")
278 } else {
279 out.WriteString("<pre><code>")
280 }
281
282 if len(text) > 0 {
283 attrEscape(out, text)
284 }
285
286 out.WriteString("</code></pre>\n")
287}
288
289
290func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
291 out.WriteString("<blockquote>\n")
292 out.Write(text)
293 out.WriteString("</blockquote>")
294}
295
296func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
297 if out.Len() > 0 {
298 out.WriteByte('\n')
299 }
300 out.WriteString("<table><thead>\n")
301 out.Write(header)
302 out.WriteString("\n</thead><tbody>\n")
303 out.Write(body)
304 out.WriteString("\n</tbody></table>")
305}
306
307func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
308 if out.Len() > 0 {
309 out.WriteByte('\n')
310 }
311 out.WriteString("<tr>\n")
312 out.Write(text)
313 out.WriteString("\n</tr>")
314}
315
316func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
317 if out.Len() > 0 {
318 out.WriteByte('\n')
319 }
320 switch align {
321 case TABLE_ALIGNMENT_LEFT:
322 out.WriteString("<td align=\"left\">")
323 case TABLE_ALIGNMENT_RIGHT:
324 out.WriteString("<td align=\"right\">")
325 case TABLE_ALIGNMENT_CENTER:
326 out.WriteString("<td align=\"center\">")
327 default:
328 out.WriteString("<td>")
329 }
330
331 out.Write(text)
332 out.WriteString("</td>")
333}
334
335func htmlList(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
336 if out.Len() > 0 {
337 out.WriteByte('\n')
338 }
339 if flags&LIST_TYPE_ORDERED != 0 {
340 out.WriteString("<ol>\n")
341 } else {
342 out.WriteString("<ul>\n")
343 }
344 out.Write(text)
345 if flags&LIST_TYPE_ORDERED != 0 {
346 out.WriteString("</ol>\n")
347 } else {
348 out.WriteString("</ul>\n")
349 }
350}
351
352func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
353 out.WriteString("<li>")
354 size := len(text)
355 for size > 0 && text[size-1] == '\n' {
356 size--
357 }
358 out.Write(text[:size])
359 out.WriteString("</li>\n")
360}
361
362func htmlParagraph(out *bytes.Buffer, text []byte, opaque interface{}) {
363 options := opaque.(*htmlOptions)
364 i := 0
365
366 if out.Len() > 0 {
367 out.WriteByte('\n')
368 }
369
370 if len(text) == 0 {
371 return
372 }
373
374 for i < len(text) && isspace(text[i]) {
375 i++
376 }
377
378 if i == len(text) {
379 return
380 }
381
382 out.WriteString("<p>")
383 if options.flags&HTML_HARD_WRAP != 0 {
384 for i < len(text) {
385 org := i
386 for i < len(text) && text[i] != '\n' {
387 i++
388 }
389
390 if i > org {
391 out.Write(text[org:i])
392 }
393
394 if i >= len(text) {
395 break
396 }
397
398 out.WriteString("<br>")
399 out.WriteString(options.closeTag)
400 i++
401 }
402 } else {
403 out.Write(text[i:])
404 }
405 out.WriteString("</p>\n")
406}
407
408func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
409 options := opaque.(*htmlOptions)
410
411 if len(link) == 0 {
412 return 0
413 }
414 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
415 return 0
416 }
417
418 out.WriteString("<a href=\"")
419 if kind == LINK_TYPE_EMAIL {
420 out.WriteString("mailto:")
421 }
422 out.Write(link)
423 out.WriteString("\">")
424
425 /*
426 * Pretty print: if we get an email address as
427 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
428 * want to print the `mailto:` prefix
429 */
430 switch {
431 case bytes.HasPrefix(link, []byte("mailto://")):
432 attrEscape(out, link[9:])
433 case bytes.HasPrefix(link, []byte("mailto:")):
434 attrEscape(out, link[7:])
435 default:
436 attrEscape(out, link)
437 }
438
439 out.WriteString("</a>")
440
441 return 1
442}
443
444func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
445 out.WriteString("<code>")
446 attrEscape(out, text)
447 out.WriteString("</code>")
448 return 1
449}
450
451func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
452 if len(text) == 0 {
453 return 0
454 }
455 out.WriteString("<strong>")
456 out.Write(text)
457 out.WriteString("</strong>")
458 return 1
459}
460
461func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
462 if len(text) == 0 {
463 return 0
464 }
465 out.WriteString("<em>")
466 out.Write(text)
467 out.WriteString("</em>")
468 return 1
469}
470
471func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
472 options := opaque.(*htmlOptions)
473 if len(link) == 0 {
474 return 0
475 }
476 out.WriteString("<img src=\"")
477 attrEscape(out, link)
478 out.WriteString("\" alt=\"")
479 if len(alt) > 0 {
480 attrEscape(out, alt)
481 }
482 if len(title) > 0 {
483 out.WriteString("\" title=\"")
484 attrEscape(out, title)
485 }
486
487 out.WriteByte('"')
488 out.WriteString(options.closeTag)
489 return 1
490}
491
492func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
493 options := opaque.(*htmlOptions)
494 out.WriteString("<br")
495 out.WriteString(options.closeTag)
496 return 1
497}
498
499func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
500 options := opaque.(*htmlOptions)
501
502 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
503 return 0
504 }
505
506 out.WriteString("<a href=\"")
507 if len(link) > 0 {
508 out.Write(link)
509 }
510 if len(title) > 0 {
511 out.WriteString("\" title=\"")
512 attrEscape(out, title)
513 }
514 out.WriteString("\">")
515 if len(content) > 0 {
516 out.Write(content)
517 }
518 out.WriteString("</a>")
519 return 1
520}
521
522func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
523 options := opaque.(*htmlOptions)
524 if options.flags&HTML_SKIP_HTML != 0 {
525 return 1
526 }
527 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
528 return 1
529 }
530 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
531 return 1
532 }
533 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
534 return 1
535 }
536 out.Write(text)
537 return 1
538}
539
540func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
541 if len(text) == 0 {
542 return 0
543 }
544 out.WriteString("<strong><em>")
545 out.Write(text)
546 out.WriteString("</em></strong>")
547 return 1
548}
549
550func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
551 if len(text) == 0 {
552 return 0
553 }
554 out.WriteString("<del>")
555 out.Write(text)
556 out.WriteString("</del>")
557 return 1
558}
559
560func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
561 attrEscape(out, text)
562}
563
564func htmlTocHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
565 options := opaque.(*htmlOptions)
566 for level > options.tocData.currentLevel {
567 if options.tocData.currentLevel > 0 {
568 out.WriteString("<li>")
569 }
570 out.WriteString("<ul>\n")
571 options.tocData.currentLevel++
572 }
573
574 for level < options.tocData.currentLevel {
575 out.WriteString("</ul>")
576 if options.tocData.currentLevel > 1 {
577 out.WriteString("</li>\n")
578 }
579 options.tocData.currentLevel--
580 }
581
582 out.WriteString("<li><a href=\"#toc_")
583 out.WriteString(strconv.Itoa(options.tocData.headerCount))
584 out.WriteString("\">")
585 options.tocData.headerCount++
586
587 if len(text) > 0 {
588 out.Write(text)
589 }
590 out.WriteString("</a></li>\n")
591}
592
593func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
594 options := opaque.(*htmlOptions)
595 for options.tocData.currentLevel > 1 {
596 out.WriteString("</ul></li>\n")
597 options.tocData.currentLevel--
598 }
599
600 if options.tocData.currentLevel > 0 {
601 out.WriteString("</ul>\n")
602 }
603}
604
605func isHtmlTag(tag []byte, tagname string) bool {
606 i := 0
607 if i < len(tag) && tag[0] != '<' {
608 return false
609 }
610 i++
611 for i < len(tag) && isspace(tag[i]) {
612 i++
613 }
614
615 if i < len(tag) && tag[i] == '/' {
616 i++
617 }
618
619 for i < len(tag) && isspace(tag[i]) {
620 i++
621 }
622
623 tag_i := i
624 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
625 if tag_i >= len(tagname) {
626 break
627 }
628
629 if tag[i] != tagname[tag_i] {
630 return false
631 }
632 }
633
634 if i == len(tag) {
635 return false
636 }
637
638 return isspace(tag[i]) || tag[i] == '>'
639}