html.go (view raw)
1//
2// Black Friday Markdown Processor
3// Originally based on http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7//
8//
9// HTML rendering backend
10//
11//
12
13package blackfriday
14
15import (
16 "bytes"
17 "fmt"
18 "strconv"
19)
20
21const (
22 HTML_SKIP_HTML = 1 << iota
23 HTML_SKIP_STYLE
24 HTML_SKIP_IMAGES
25 HTML_SKIP_LINKS
26 HTML_SAFELINK
27 HTML_TOC
28 HTML_GITHUB_BLOCKCODE
29 HTML_USE_XHTML
30 HTML_USE_SMARTYPANTS
31 HTML_SMARTYPANTS_FRACTIONS
32 HTML_SMARTYPANTS_LATEX_DASHES
33)
34
35type htmlOptions struct {
36 flags int
37 closeTag string // how to end singleton tags: either " />\n" or ">\n"
38 tocData struct {
39 headerCount int
40 currentLevel int
41 }
42 smartypants *SmartypantsRenderer
43}
44
45var xhtmlClose = " />\n"
46var htmlClose = ">\n"
47
48func HtmlRenderer(flags int) *Renderer {
49 // configure the rendering engine
50 r := new(Renderer)
51 if flags&HTML_GITHUB_BLOCKCODE == 0 {
52 r.BlockCode = htmlBlockCode
53 } else {
54 r.BlockCode = htmlBlockCodeGithub
55 }
56 r.BlockQuote = htmlBlockQuote
57 if flags&HTML_SKIP_HTML == 0 {
58 r.BlockHtml = htmlRawBlock
59 }
60 r.Header = htmlHeader
61 r.HRule = htmlHRule
62 r.List = htmlList
63 r.ListItem = htmlListItem
64 r.Paragraph = htmlParagraph
65 r.Table = htmlTable
66 r.TableRow = htmlTableRow
67 r.TableCell = htmlTableCell
68
69 r.AutoLink = htmlAutoLink
70 r.CodeSpan = htmlCodeSpan
71 r.DoubleEmphasis = htmlDoubleEmphasis
72 r.Emphasis = htmlEmphasis
73 if flags&HTML_SKIP_IMAGES == 0 {
74 r.Image = htmlImage
75 }
76 r.LineBreak = htmlLineBreak
77 if flags&HTML_SKIP_LINKS == 0 {
78 r.Link = htmlLink
79 }
80 r.RawHtmlTag = htmlRawTag
81 r.TripleEmphasis = htmlTripleEmphasis
82 r.StrikeThrough = htmlStrikeThrough
83
84 var cb *SmartypantsRenderer
85 if flags&HTML_USE_SMARTYPANTS == 0 {
86 r.NormalText = htmlNormalText
87 } else {
88 cb = Smartypants(flags)
89 r.NormalText = htmlSmartypants
90 }
91
92 closeTag := htmlClose
93 if flags&HTML_USE_XHTML != 0 {
94 closeTag = xhtmlClose
95 }
96 r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
97 return r
98}
99
100func HtmlTocRenderer(flags int) *Renderer {
101 // configure the rendering engine
102 r := new(Renderer)
103 r.Header = htmlTocHeader
104
105 r.CodeSpan = htmlCodeSpan
106 r.DoubleEmphasis = htmlDoubleEmphasis
107 r.Emphasis = htmlEmphasis
108 r.TripleEmphasis = htmlTripleEmphasis
109 r.StrikeThrough = htmlStrikeThrough
110
111 r.DocumentFooter = htmlTocFinalize
112
113 closeTag := ">\n"
114 if flags&HTML_USE_XHTML != 0 {
115 closeTag = " />\n"
116 }
117 r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
118 return r
119}
120
121func attrEscape(out *bytes.Buffer, src []byte) {
122 org := 0
123 for i, ch := range src {
124 // using if statements is a bit faster than a switch statement.
125 // as the compiler improves, this should be unnecessary
126 // this is only worthwhile because attrEscape is the single
127 // largest CPU user in normal use
128 if ch == '"' {
129 if i > org {
130 // copy all the normal characters since the last escape
131 out.Write(src[org:i])
132 }
133 org = i + 1
134 out.WriteString(""")
135 continue
136 }
137 if ch == '&' {
138 if i > org {
139 out.Write(src[org:i])
140 }
141 org = i + 1
142 out.WriteString("&")
143 continue
144 }
145 if ch == '<' {
146 if i > org {
147 out.Write(src[org:i])
148 }
149 org = i + 1
150 out.WriteString("<")
151 continue
152 }
153 if ch == '>' {
154 if i > org {
155 out.Write(src[org:i])
156 }
157 org = i + 1
158 out.WriteString(">")
159 continue
160 }
161 }
162 if org < len(src) {
163 out.Write(src[org:])
164 }
165}
166
167func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
168 options := opaque.(*htmlOptions)
169 marker := out.Len()
170
171 if marker > 0 {
172 out.WriteByte('\n')
173 }
174
175 if options.flags&HTML_TOC != 0 {
176 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
177 options.tocData.headerCount++
178 } else {
179 out.WriteString(fmt.Sprintf("<h%d>", level))
180 }
181
182 if !text() {
183 out.Truncate(marker)
184 return
185 }
186 out.WriteString(fmt.Sprintf("</h%d>\n", level))
187}
188
189func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
190 sz := len(text)
191 for sz > 0 && text[sz-1] == '\n' {
192 sz--
193 }
194 org := 0
195 for org < sz && text[org] == '\n' {
196 org++
197 }
198 if org >= sz {
199 return
200 }
201 if out.Len() > 0 {
202 out.WriteByte('\n')
203 }
204 out.Write(text[org:sz])
205 out.WriteByte('\n')
206}
207
208func htmlHRule(out *bytes.Buffer, opaque interface{}) {
209 options := opaque.(*htmlOptions)
210
211 if out.Len() > 0 {
212 out.WriteByte('\n')
213 }
214 out.WriteString("<hr")
215 out.WriteString(options.closeTag)
216}
217
218func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
219 if out.Len() > 0 {
220 out.WriteByte('\n')
221 }
222
223 if lang != "" {
224 out.WriteString("<pre><code class=\"")
225
226 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
227 for i < len(lang) && isspace(lang[i]) {
228 i++
229 }
230
231 if i < len(lang) {
232 org := i
233 for i < len(lang) && !isspace(lang[i]) {
234 i++
235 }
236
237 if lang[org] == '.' {
238 org++
239 }
240
241 if cls > 0 {
242 out.WriteByte(' ')
243 }
244 attrEscape(out, []byte(lang[org:]))
245 }
246 }
247
248 out.WriteString("\">")
249 } else {
250 out.WriteString("<pre><code>")
251 }
252
253 if len(text) > 0 {
254 attrEscape(out, text)
255 }
256
257 out.WriteString("</code></pre>\n")
258}
259
260/*
261 * GitHub style code block:
262 *
263 * <pre lang="LANG"><code>
264 * ...
265 * </pre></code>
266 *
267 * Unlike other parsers, we store the language identifier in the <pre>,
268 * and don't let the user generate custom classes.
269 *
270 * The language identifier in the <pre> block gets postprocessed and all
271 * the code inside gets syntax highlighted with Pygments. This is much safer
272 * than letting the user specify a CSS class for highlighting.
273 *
274 * Note that we only generate HTML for the first specifier.
275 * E.g.
276 * ~~~~ {.python .numbered} => <pre lang="python"><code>
277 */
278func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
279 if out.Len() > 0 {
280 out.WriteByte('\n')
281 }
282
283 if len(lang) > 0 {
284 out.WriteString("<pre lang=\"")
285
286 i := 0
287 for i < len(lang) && !isspace(lang[i]) {
288 i++
289 }
290
291 if lang[0] == '.' {
292 attrEscape(out, []byte(lang[1:i]))
293 } else {
294 attrEscape(out, []byte(lang[:i]))
295 }
296
297 out.WriteString("\"><code>")
298 } else {
299 out.WriteString("<pre><code>")
300 }
301
302 if len(text) > 0 {
303 attrEscape(out, text)
304 }
305
306 out.WriteString("</code></pre>\n")
307}
308
309
310func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
311 out.WriteString("<blockquote>\n")
312 out.Write(text)
313 out.WriteString("</blockquote>")
314}
315
316func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
317 if out.Len() > 0 {
318 out.WriteByte('\n')
319 }
320 out.WriteString("<table><thead>\n")
321 out.Write(header)
322 out.WriteString("\n</thead><tbody>\n")
323 out.Write(body)
324 out.WriteString("\n</tbody></table>")
325}
326
327func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
328 if out.Len() > 0 {
329 out.WriteByte('\n')
330 }
331 out.WriteString("<tr>\n")
332 out.Write(text)
333 out.WriteString("\n</tr>")
334}
335
336func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
337 if out.Len() > 0 {
338 out.WriteByte('\n')
339 }
340 switch align {
341 case TABLE_ALIGNMENT_LEFT:
342 out.WriteString("<td align=\"left\">")
343 case TABLE_ALIGNMENT_RIGHT:
344 out.WriteString("<td align=\"right\">")
345 case TABLE_ALIGNMENT_CENTER:
346 out.WriteString("<td align=\"center\">")
347 default:
348 out.WriteString("<td>")
349 }
350
351 out.Write(text)
352 out.WriteString("</td>")
353}
354
355func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
356 marker := out.Len()
357
358 if marker > 0 {
359 out.WriteByte('\n')
360 }
361 if flags&LIST_TYPE_ORDERED != 0 {
362 out.WriteString("<ol>\n")
363 } else {
364 out.WriteString("<ul>\n")
365 }
366 if !text() {
367 out.Truncate(marker)
368 return
369 }
370 if flags&LIST_TYPE_ORDERED != 0 {
371 out.WriteString("</ol>\n")
372 } else {
373 out.WriteString("</ul>\n")
374 }
375}
376
377func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
378 out.WriteString("<li>")
379 size := len(text)
380 for size > 0 && text[size-1] == '\n' {
381 size--
382 }
383 out.Write(text[:size])
384 out.WriteString("</li>\n")
385}
386
387func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
388 marker := out.Len()
389 if marker > 0 {
390 out.WriteByte('\n')
391 }
392
393 out.WriteString("<p>")
394 if !text() {
395 out.Truncate(marker)
396 return
397 }
398 out.WriteString("</p>\n")
399}
400
401func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
402 options := opaque.(*htmlOptions)
403
404 if len(link) == 0 {
405 return 0
406 }
407 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
408 return 0
409 }
410
411 out.WriteString("<a href=\"")
412 if kind == LINK_TYPE_EMAIL {
413 out.WriteString("mailto:")
414 }
415 attrEscape(out, link)
416 out.WriteString("\">")
417
418 /*
419 * Pretty print: if we get an email address as
420 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
421 * want to print the `mailto:` prefix
422 */
423 switch {
424 case bytes.HasPrefix(link, []byte("mailto://")):
425 attrEscape(out, link[9:])
426 case bytes.HasPrefix(link, []byte("mailto:")):
427 attrEscape(out, link[7:])
428 default:
429 attrEscape(out, link)
430 }
431
432 out.WriteString("</a>")
433
434 return 1
435}
436
437func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
438 out.WriteString("<code>")
439 attrEscape(out, text)
440 out.WriteString("</code>")
441 return 1
442}
443
444func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
445 if len(text) == 0 {
446 return 0
447 }
448 out.WriteString("<strong>")
449 out.Write(text)
450 out.WriteString("</strong>")
451 return 1
452}
453
454func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
455 if len(text) == 0 {
456 return 0
457 }
458 out.WriteString("<em>")
459 out.Write(text)
460 out.WriteString("</em>")
461 return 1
462}
463
464func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
465 options := opaque.(*htmlOptions)
466 if len(link) == 0 {
467 return 0
468 }
469 out.WriteString("<img src=\"")
470 attrEscape(out, link)
471 out.WriteString("\" alt=\"")
472 if len(alt) > 0 {
473 attrEscape(out, alt)
474 }
475 if len(title) > 0 {
476 out.WriteString("\" title=\"")
477 attrEscape(out, title)
478 }
479
480 out.WriteByte('"')
481 out.WriteString(options.closeTag)
482 return 1
483}
484
485func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
486 options := opaque.(*htmlOptions)
487 out.WriteString("<br")
488 out.WriteString(options.closeTag)
489 return 1
490}
491
492func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
493 options := opaque.(*htmlOptions)
494
495 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
496 return 0
497 }
498
499 out.WriteString("<a href=\"")
500 attrEscape(out, link)
501 if len(title) > 0 {
502 out.WriteString("\" title=\"")
503 attrEscape(out, title)
504 }
505 out.WriteString("\">")
506 out.Write(content)
507 out.WriteString("</a>")
508 return 1
509}
510
511func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
512 options := opaque.(*htmlOptions)
513 if options.flags&HTML_SKIP_HTML != 0 {
514 return 1
515 }
516 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
517 return 1
518 }
519 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
520 return 1
521 }
522 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
523 return 1
524 }
525 out.Write(text)
526 return 1
527}
528
529func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
530 if len(text) == 0 {
531 return 0
532 }
533 out.WriteString("<strong><em>")
534 out.Write(text)
535 out.WriteString("</em></strong>")
536 return 1
537}
538
539func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
540 if len(text) == 0 {
541 return 0
542 }
543 out.WriteString("<del>")
544 out.Write(text)
545 out.WriteString("</del>")
546 return 1
547}
548
549func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
550 attrEscape(out, text)
551}
552
553func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
554 options := opaque.(*htmlOptions)
555 marker := out.Len()
556
557 for level > options.tocData.currentLevel {
558 if options.tocData.currentLevel > 0 {
559 out.WriteString("<li>")
560 }
561 out.WriteString("<ul>\n")
562 options.tocData.currentLevel++
563 }
564
565 for level < options.tocData.currentLevel {
566 out.WriteString("</ul>")
567 if options.tocData.currentLevel > 1 {
568 out.WriteString("</li>\n")
569 }
570 options.tocData.currentLevel--
571 }
572
573 out.WriteString("<li><a href=\"#toc_")
574 out.WriteString(strconv.Itoa(options.tocData.headerCount))
575 out.WriteString("\">")
576 options.tocData.headerCount++
577
578 if !text() {
579 out.Truncate(marker)
580 return
581 }
582 out.WriteString("</a></li>\n")
583}
584
585func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
586 options := opaque.(*htmlOptions)
587 for options.tocData.currentLevel > 1 {
588 out.WriteString("</ul></li>\n")
589 options.tocData.currentLevel--
590 }
591
592 if options.tocData.currentLevel > 0 {
593 out.WriteString("</ul>\n")
594 }
595}
596
597func isHtmlTag(tag []byte, tagname string) bool {
598 i := 0
599 if i < len(tag) && tag[0] != '<' {
600 return false
601 }
602 i++
603 for i < len(tag) && isspace(tag[i]) {
604 i++
605 }
606
607 if i < len(tag) && tag[i] == '/' {
608 i++
609 }
610
611 for i < len(tag) && isspace(tag[i]) {
612 i++
613 }
614
615 tag_i := i
616 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
617 if tag_i >= len(tagname) {
618 break
619 }
620
621 if tag[i] != tagname[tag_i] {
622 return false
623 }
624 }
625
626 if i == len(tag) {
627 return false
628 }
629
630 return isspace(tag[i]) || tag[i] == '>'
631}