html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25const (
26 HTML_SKIP_HTML = 1 << iota
27 HTML_SKIP_STYLE
28 HTML_SKIP_IMAGES
29 HTML_SKIP_LINKS
30 HTML_SAFELINK
31 HTML_TOC
32 HTML_OMIT_CONTENTS
33 HTML_COMPLETE_PAGE
34 HTML_GITHUB_BLOCKCODE
35 HTML_USE_XHTML
36 HTML_USE_SMARTYPANTS
37 HTML_SMARTYPANTS_FRACTIONS
38 HTML_SMARTYPANTS_LATEX_DASHES
39)
40
41type Html struct {
42 flags int // HTML_* options
43 closeTag string // how to end singleton tags: either " />\n" or ">\n"
44 title string // document title
45 css string // optional css file url (used with HTML_COMPLETE_PAGE)
46
47 // table of contents data
48 tocMarker int
49 headerCount int
50 currentLevel int
51 toc *bytes.Buffer
52
53 smartypants *SmartypantsRenderer
54}
55
56const (
57 xhtmlClose = " />\n"
58 htmlClose = ">\n"
59)
60
61func HtmlRenderer(flags int, title string, css string) Renderer {
62 // configure the rendering engine
63 closeTag := htmlClose
64 if flags&HTML_USE_XHTML != 0 {
65 closeTag = xhtmlClose
66 }
67
68 return &Html{
69 flags: flags,
70 closeTag: closeTag,
71 title: title,
72 css: css,
73
74 headerCount: 0,
75 currentLevel: 0,
76 toc: new(bytes.Buffer),
77
78 smartypants: Smartypants(flags),
79 }
80}
81
82func attrEscape(out *bytes.Buffer, src []byte) {
83 org := 0
84 for i, ch := range src {
85 // using if statements is a bit faster than a switch statement.
86 // as the compiler improves, this should be unnecessary
87 // this is only worthwhile because attrEscape is the single
88 // largest CPU user in normal use
89 if ch == '"' {
90 if i > org {
91 // copy all the normal characters since the last escape
92 out.Write(src[org:i])
93 }
94 org = i + 1
95 out.WriteString(""")
96 continue
97 }
98 if ch == '&' {
99 if i > org {
100 out.Write(src[org:i])
101 }
102 org = i + 1
103 out.WriteString("&")
104 continue
105 }
106 if ch == '<' {
107 if i > org {
108 out.Write(src[org:i])
109 }
110 org = i + 1
111 out.WriteString("<")
112 continue
113 }
114 if ch == '>' {
115 if i > org {
116 out.Write(src[org:i])
117 }
118 org = i + 1
119 out.WriteString(">")
120 continue
121 }
122 }
123 if org < len(src) {
124 out.Write(src[org:])
125 }
126}
127
128func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
129 marker := out.Len()
130 doubleSpace(out)
131
132 if options.flags&HTML_TOC != 0 {
133 // headerCount is incremented in htmlTocHeader
134 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
135 } else {
136 out.WriteString(fmt.Sprintf("<h%d>", level))
137 }
138
139 tocMarker := out.Len()
140 if !text() {
141 out.Truncate(marker)
142 return
143 }
144
145 // are we building a table of contents?
146 if options.flags&HTML_TOC != 0 {
147 options.TocHeader(out.Bytes()[tocMarker:], level)
148 }
149
150 out.WriteString(fmt.Sprintf("</h%d>\n", level))
151}
152
153func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
154 if options.flags&HTML_SKIP_HTML != 0 {
155 return
156 }
157
158 doubleSpace(out)
159 out.Write(text)
160 out.WriteByte('\n')
161}
162
163func (options *Html) HRule(out *bytes.Buffer) {
164 doubleSpace(out)
165 out.WriteString("<hr")
166 out.WriteString(options.closeTag)
167}
168
169func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
170 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
171 options.BlockCodeGithub(out, text, lang)
172 } else {
173 options.BlockCodeNormal(out, text, lang)
174 }
175}
176
177func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
178 doubleSpace(out)
179
180 // parse out the language names/classes
181 count := 0
182 for _, elt := range strings.Fields(lang) {
183 if elt[0] == '.' {
184 elt = elt[1:]
185 }
186 if len(elt) == 0 {
187 continue
188 }
189 if count == 0 {
190 out.WriteString("<pre><code class=\"")
191 } else {
192 out.WriteByte(' ')
193 }
194 attrEscape(out, []byte(elt))
195 count++
196 }
197
198 if count == 0 {
199 out.WriteString("<pre><code>")
200 } else {
201 out.WriteString("\">")
202 }
203
204 attrEscape(out, text)
205 out.WriteString("</code></pre>\n")
206}
207
208/*
209 * GitHub style code block:
210 *
211 * <pre lang="LANG"><code>
212 * ...
213 * </pre></code>
214 *
215 * Unlike other parsers, we store the language identifier in the <pre>,
216 * and don't let the user generate custom classes.
217 *
218 * The language identifier in the <pre> block gets postprocessed and all
219 * the code inside gets syntax highlighted with Pygments. This is much safer
220 * than letting the user specify a CSS class for highlighting.
221 *
222 * Note that we only generate HTML for the first specifier.
223 * E.g.
224 * ~~~~ {.python .numbered} => <pre lang="python"><code>
225 */
226func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
227 doubleSpace(out)
228
229 // parse out the language name
230 count := 0
231 for _, elt := range strings.Fields(lang) {
232 if elt[0] == '.' {
233 elt = elt[1:]
234 }
235 if len(elt) == 0 {
236 continue
237 }
238 out.WriteString("<pre lang=\"")
239 attrEscape(out, []byte(elt))
240 out.WriteString("\"><code>")
241 count++
242 break
243 }
244
245 if count == 0 {
246 out.WriteString("<pre><code>")
247 }
248
249 attrEscape(out, text)
250 out.WriteString("</code></pre>\n")
251}
252
253
254func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
255 doubleSpace(out)
256 out.WriteString("<blockquote>\n")
257 out.Write(text)
258 out.WriteString("</blockquote>\n")
259}
260
261func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
262 doubleSpace(out)
263 out.WriteString("<table>\n<thead>\n")
264 out.Write(header)
265 out.WriteString("</thead>\n\n<tbody>\n")
266 out.Write(body)
267 out.WriteString("</tbody>\n</table>\n")
268}
269
270func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
271 doubleSpace(out)
272 out.WriteString("<tr>\n")
273 out.Write(text)
274 out.WriteString("\n</tr>\n")
275}
276
277func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
278 doubleSpace(out)
279 switch align {
280 case TABLE_ALIGNMENT_LEFT:
281 out.WriteString("<td align=\"left\">")
282 case TABLE_ALIGNMENT_RIGHT:
283 out.WriteString("<td align=\"right\">")
284 case TABLE_ALIGNMENT_CENTER:
285 out.WriteString("<td align=\"center\">")
286 default:
287 out.WriteString("<td>")
288 }
289
290 out.Write(text)
291 out.WriteString("</td>")
292}
293
294func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
295 marker := out.Len()
296 doubleSpace(out)
297
298 if flags&LIST_TYPE_ORDERED != 0 {
299 out.WriteString("<ol>")
300 } else {
301 out.WriteString("<ul>")
302 }
303 if !text() {
304 out.Truncate(marker)
305 return
306 }
307 if flags&LIST_TYPE_ORDERED != 0 {
308 out.WriteString("</ol>\n")
309 } else {
310 out.WriteString("</ul>\n")
311 }
312}
313
314func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
315 if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
316 doubleSpace(out)
317 }
318 out.WriteString("<li>")
319 out.Write(text)
320 out.WriteString("</li>\n")
321}
322
323func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
324 marker := out.Len()
325 doubleSpace(out)
326
327 out.WriteString("<p>")
328 if !text() {
329 out.Truncate(marker)
330 return
331 }
332 out.WriteString("</p>\n")
333}
334
335func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
336 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
337 // mark it but don't link it if it is not a safe link: no smartypants
338 out.WriteString("<tt>")
339 attrEscape(out, link)
340 out.WriteString("</tt>")
341 return
342 }
343
344 out.WriteString("<a href=\"")
345 if kind == LINK_TYPE_EMAIL {
346 out.WriteString("mailto:")
347 }
348 attrEscape(out, link)
349 out.WriteString("\">")
350
351 // Pretty print: if we get an email address as
352 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
353 // want to print the `mailto:` prefix
354 switch {
355 case bytes.HasPrefix(link, []byte("mailto://")):
356 attrEscape(out, link[len("mailto://"):])
357 case bytes.HasPrefix(link, []byte("mailto:")):
358 attrEscape(out, link[len("mailto:"):])
359 default:
360 attrEscape(out, link)
361 }
362
363 out.WriteString("</a>")
364}
365
366func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
367 out.WriteString("<code>")
368 attrEscape(out, text)
369 out.WriteString("</code>")
370}
371
372func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
373 out.WriteString("<strong>")
374 out.Write(text)
375 out.WriteString("</strong>")
376}
377
378func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
379 if len(text) == 0 {
380 return
381 }
382 out.WriteString("<em>")
383 out.Write(text)
384 out.WriteString("</em>")
385}
386
387func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
388 if options.flags&HTML_SKIP_IMAGES != 0 {
389 return
390 }
391
392 out.WriteString("<img src=\"")
393 attrEscape(out, link)
394 out.WriteString("\" alt=\"")
395 if len(alt) > 0 {
396 attrEscape(out, alt)
397 }
398 if len(title) > 0 {
399 out.WriteString("\" title=\"")
400 attrEscape(out, title)
401 }
402
403 out.WriteByte('"')
404 out.WriteString(options.closeTag)
405 return
406}
407
408func (options *Html) LineBreak(out *bytes.Buffer) {
409 out.WriteString("<br")
410 out.WriteString(options.closeTag)
411}
412
413func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
414 if options.flags&HTML_SKIP_LINKS != 0 {
415 // write the link text out but don't link it, just mark it with typewriter font
416 out.WriteString("<tt>")
417 attrEscape(out, content)
418 out.WriteString("</tt>")
419 return
420 }
421
422 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
423 // write the link text out but don't link it, just mark it with typewriter font
424 out.WriteString("<tt>")
425 attrEscape(out, content)
426 out.WriteString("</tt>")
427 return
428 }
429
430 out.WriteString("<a href=\"")
431 attrEscape(out, link)
432 if len(title) > 0 {
433 out.WriteString("\" title=\"")
434 attrEscape(out, title)
435 }
436 out.WriteString("\">")
437 out.Write(content)
438 out.WriteString("</a>")
439 return
440}
441
442func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
443 if options.flags&HTML_SKIP_HTML != 0 {
444 return
445 }
446 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
447 return
448 }
449 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
450 return
451 }
452 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
453 return
454 }
455 out.Write(text)
456}
457
458func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
459 out.WriteString("<strong><em>")
460 out.Write(text)
461 out.WriteString("</em></strong>")
462}
463
464func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
465 out.WriteString("<del>")
466 out.Write(text)
467 out.WriteString("</del>")
468}
469
470func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
471 out.Write(entity)
472}
473
474func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
475 if options.flags&HTML_USE_SMARTYPANTS != 0 {
476 options.Smartypants(out, text)
477 } else {
478 attrEscape(out, text)
479 }
480}
481
482func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
483 smrt := smartypantsData{false, false}
484
485 // first do normal entity escaping
486 var escaped bytes.Buffer
487 attrEscape(&escaped, text)
488 text = escaped.Bytes()
489
490 mark := 0
491 for i := 0; i < len(text); i++ {
492 if action := options.smartypants[text[i]]; action != nil {
493 if i > mark {
494 out.Write(text[mark:i])
495 }
496
497 previousChar := byte(0)
498 if i > 0 {
499 previousChar = text[i-1]
500 }
501 i += action(out, &smrt, previousChar, text[i:])
502 mark = i + 1
503 }
504 }
505
506 if mark < len(text) {
507 out.Write(text[mark:])
508 }
509}
510
511func (options *Html) DocumentHeader(out *bytes.Buffer) {
512 if options.flags&HTML_COMPLETE_PAGE == 0 {
513 return
514 }
515
516 ending := ""
517 if options.flags&HTML_USE_XHTML != 0 {
518 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
519 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
520 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
521 ending = " /"
522 } else {
523 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
524 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
525 out.WriteString("<html>\n")
526 }
527 out.WriteString("<head>\n")
528 out.WriteString(" <title>")
529 options.NormalText(out, []byte(options.title))
530 out.WriteString("</title>\n")
531 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
532 out.WriteString(VERSION)
533 out.WriteString("\"")
534 out.WriteString(ending)
535 out.WriteString(">\n")
536 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
537 out.WriteString(ending)
538 out.WriteString(">\n")
539 if options.css != "" {
540 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
541 attrEscape(out, []byte(options.css))
542 out.WriteString("\"")
543 out.WriteString(ending)
544 out.WriteString(">\n")
545 }
546 out.WriteString("</head>\n")
547 out.WriteString("<body>\n")
548
549 options.tocMarker = out.Len()
550}
551
552func (options *Html) DocumentFooter(out *bytes.Buffer) {
553 // finalize and insert the table of contents
554 if options.flags&HTML_TOC != 0 {
555 options.TocFinalize()
556
557 // now we have to insert the table of contents into the document
558 var temp bytes.Buffer
559
560 // start by making a copy of everything after the document header
561 temp.Write(out.Bytes()[options.tocMarker:])
562
563 // now clear the copied material from the main output buffer
564 out.Truncate(options.tocMarker)
565
566 // corner case spacing issue
567 if options.flags&HTML_COMPLETE_PAGE != 0 {
568 out.WriteByte('\n')
569 }
570
571 // insert the table of contents
572 out.Write(options.toc.Bytes())
573
574 // corner case spacing issue
575 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
576 out.WriteByte('\n')
577 }
578
579 // write out everything that came after it
580 if options.flags&HTML_OMIT_CONTENTS == 0 {
581 out.Write(temp.Bytes())
582 }
583 }
584
585 if options.flags&HTML_COMPLETE_PAGE != 0 {
586 out.WriteString("\n</body>\n")
587 out.WriteString("</html>\n")
588 }
589
590}
591
592func (options *Html) TocHeader(text []byte, level int) {
593 for level > options.currentLevel {
594 switch {
595 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
596 // this sublist can nest underneath a header
597 size := options.toc.Len()
598 options.toc.Truncate(size - len("</li>\n"))
599
600 case options.currentLevel > 0:
601 options.toc.WriteString("<li>")
602 }
603 if options.toc.Len() > 0 {
604 options.toc.WriteByte('\n')
605 }
606 options.toc.WriteString("<ul>\n")
607 options.currentLevel++
608 }
609
610 for level < options.currentLevel {
611 options.toc.WriteString("</ul>")
612 if options.currentLevel > 1 {
613 options.toc.WriteString("</li>\n")
614 }
615 options.currentLevel--
616 }
617
618 options.toc.WriteString("<li><a href=\"#toc_")
619 options.toc.WriteString(strconv.Itoa(options.headerCount))
620 options.toc.WriteString("\">")
621 options.headerCount++
622
623 options.toc.Write(text)
624
625 options.toc.WriteString("</a></li>\n")
626}
627
628func (options *Html) TocFinalize() {
629 for options.currentLevel > 1 {
630 options.toc.WriteString("</ul></li>\n")
631 options.currentLevel--
632 }
633
634 if options.currentLevel > 0 {
635 options.toc.WriteString("</ul>\n")
636 }
637}
638
639func isHtmlTag(tag []byte, tagname string) bool {
640 i := 0
641 if i < len(tag) && tag[0] != '<' {
642 return false
643 }
644 i++
645 for i < len(tag) && isspace(tag[i]) {
646 i++
647 }
648
649 if i < len(tag) && tag[i] == '/' {
650 i++
651 }
652
653 for i < len(tag) && isspace(tag[i]) {
654 i++
655 }
656
657 j := i
658 for ; i < len(tag); i, j = i+1, j+1 {
659 if j >= len(tagname) {
660 break
661 }
662
663 if tag[i] != tagname[j] {
664 return false
665 }
666 }
667
668 if i == len(tag) {
669 return false
670 }
671
672 return isspace(tag[i]) || tag[i] == '>'
673}
674
675func doubleSpace(out *bytes.Buffer) {
676 if out.Len() > 0 {
677 out.WriteByte('\n')
678 }
679}