html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22)
23
24const (
25 HTML_SKIP_HTML = 1 << iota
26 HTML_SKIP_STYLE
27 HTML_SKIP_IMAGES
28 HTML_SKIP_LINKS
29 HTML_SAFELINK
30 HTML_TOC
31 HTML_OMIT_CONTENTS
32 HTML_COMPLETE_PAGE
33 HTML_GITHUB_BLOCKCODE
34 HTML_USE_XHTML
35 HTML_USE_SMARTYPANTS
36 HTML_SMARTYPANTS_FRACTIONS
37 HTML_SMARTYPANTS_LATEX_DASHES
38)
39
40type Html struct {
41 flags int // HTML_* options
42 closeTag string // how to end singleton tags: either " />\n" or ">\n"
43 title string // document title
44 css string // optional css file url (used with HTML_COMPLETE_PAGE)
45
46 // table of contents data
47 tocMarker int
48 headerCount int
49 currentLevel int
50 toc *bytes.Buffer
51
52 smartypants *SmartypantsRenderer
53}
54
55const (
56 xhtmlClose = " />\n"
57 htmlClose = ">\n"
58)
59
60func HtmlRenderer(flags int, title string, css string) Renderer {
61 // configure the rendering engine
62 closeTag := htmlClose
63 if flags&HTML_USE_XHTML != 0 {
64 closeTag = xhtmlClose
65 }
66
67 return &Html{
68 flags: flags,
69 closeTag: closeTag,
70 title: title,
71 css: css,
72
73 headerCount: 0,
74 currentLevel: 0,
75 toc: new(bytes.Buffer),
76
77 smartypants: Smartypants(flags),
78 }
79}
80
81func attrEscape(out *bytes.Buffer, src []byte) {
82 org := 0
83 for i, ch := range src {
84 // using if statements is a bit faster than a switch statement.
85 // as the compiler improves, this should be unnecessary
86 // this is only worthwhile because attrEscape is the single
87 // largest CPU user in normal use
88 if ch == '"' {
89 if i > org {
90 // copy all the normal characters since the last escape
91 out.Write(src[org:i])
92 }
93 org = i + 1
94 out.WriteString(""")
95 continue
96 }
97 if ch == '&' {
98 if i > org {
99 out.Write(src[org:i])
100 }
101 org = i + 1
102 out.WriteString("&")
103 continue
104 }
105 if ch == '<' {
106 if i > org {
107 out.Write(src[org:i])
108 }
109 org = i + 1
110 out.WriteString("<")
111 continue
112 }
113 if ch == '>' {
114 if i > org {
115 out.Write(src[org:i])
116 }
117 org = i + 1
118 out.WriteString(">")
119 continue
120 }
121 }
122 if org < len(src) {
123 out.Write(src[org:])
124 }
125}
126
127func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
128 marker := out.Len()
129
130 if marker > 0 {
131 out.WriteByte('\n')
132 }
133
134 if options.flags&HTML_TOC != 0 {
135 // headerCount is incremented in htmlTocHeader
136 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
137 } else {
138 out.WriteString(fmt.Sprintf("<h%d>", level))
139 }
140
141 tocMarker := out.Len()
142 if !text() {
143 out.Truncate(marker)
144 return
145 }
146
147 // are we building a table of contents?
148 if options.flags&HTML_TOC != 0 {
149 options.TocHeader(out.Bytes()[tocMarker:], level)
150 }
151
152 out.WriteString(fmt.Sprintf("</h%d>\n", level))
153}
154
155func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
156 if options.flags&HTML_SKIP_HTML != 0 {
157 return
158 }
159
160 sz := len(text)
161 for sz > 0 && text[sz-1] == '\n' {
162 sz--
163 }
164 org := 0
165 for org < sz && text[org] == '\n' {
166 org++
167 }
168 if org >= sz {
169 return
170 }
171 if out.Len() > 0 {
172 out.WriteByte('\n')
173 }
174 out.Write(text[org:sz])
175 out.WriteByte('\n')
176}
177
178func (options *Html) HRule(out *bytes.Buffer) {
179 if out.Len() > 0 {
180 out.WriteByte('\n')
181 }
182 out.WriteString("<hr")
183 out.WriteString(options.closeTag)
184}
185
186func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
187 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
188 options.BlockCodeGithub(out, text, lang)
189 } else {
190 options.BlockCodeNormal(out, text, lang)
191 }
192}
193
194func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
195 if out.Len() > 0 {
196 out.WriteByte('\n')
197 }
198
199 if lang != "" {
200 out.WriteString("<pre><code class=\"")
201
202 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
203 for i < len(lang) && isspace(lang[i]) {
204 i++
205 }
206
207 if i < len(lang) {
208 org := i
209 for i < len(lang) && !isspace(lang[i]) {
210 i++
211 }
212
213 if lang[org] == '.' {
214 org++
215 }
216
217 if cls > 0 {
218 out.WriteByte(' ')
219 }
220 attrEscape(out, []byte(lang[org:]))
221 }
222 }
223
224 out.WriteString("\">")
225 } else {
226 out.WriteString("<pre><code>")
227 }
228
229 if len(text) > 0 {
230 attrEscape(out, text)
231 }
232
233 out.WriteString("</code></pre>\n")
234}
235
236/*
237 * GitHub style code block:
238 *
239 * <pre lang="LANG"><code>
240 * ...
241 * </pre></code>
242 *
243 * Unlike other parsers, we store the language identifier in the <pre>,
244 * and don't let the user generate custom classes.
245 *
246 * The language identifier in the <pre> block gets postprocessed and all
247 * the code inside gets syntax highlighted with Pygments. This is much safer
248 * than letting the user specify a CSS class for highlighting.
249 *
250 * Note that we only generate HTML for the first specifier.
251 * E.g.
252 * ~~~~ {.python .numbered} => <pre lang="python"><code>
253 */
254func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
255 if out.Len() > 0 {
256 out.WriteByte('\n')
257 }
258
259 if len(lang) > 0 {
260 out.WriteString("<pre lang=\"")
261
262 i := 0
263 for i < len(lang) && !isspace(lang[i]) {
264 i++
265 }
266
267 if lang[0] == '.' {
268 attrEscape(out, []byte(lang[1:i]))
269 } else {
270 attrEscape(out, []byte(lang[:i]))
271 }
272
273 out.WriteString("\"><code>")
274 } else {
275 out.WriteString("<pre><code>")
276 }
277
278 if len(text) > 0 {
279 attrEscape(out, text)
280 }
281
282 out.WriteString("</code></pre>\n")
283}
284
285
286func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
287 out.WriteString("<blockquote>\n")
288 out.Write(text)
289 out.WriteString("</blockquote>")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293 if out.Len() > 0 {
294 out.WriteByte('\n')
295 }
296 out.WriteString("<table><thead>\n")
297 out.Write(header)
298 out.WriteString("\n</thead><tbody>\n")
299 out.Write(body)
300 out.WriteString("\n</tbody></table>")
301}
302
303func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
304 if out.Len() > 0 {
305 out.WriteByte('\n')
306 }
307 out.WriteString("<tr>\n")
308 out.Write(text)
309 out.WriteString("\n</tr>")
310}
311
312func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
313 if out.Len() > 0 {
314 out.WriteByte('\n')
315 }
316 switch align {
317 case TABLE_ALIGNMENT_LEFT:
318 out.WriteString("<td align=\"left\">")
319 case TABLE_ALIGNMENT_RIGHT:
320 out.WriteString("<td align=\"right\">")
321 case TABLE_ALIGNMENT_CENTER:
322 out.WriteString("<td align=\"center\">")
323 default:
324 out.WriteString("<td>")
325 }
326
327 out.Write(text)
328 out.WriteString("</td>")
329}
330
331func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
332 marker := out.Len()
333
334 if marker > 0 {
335 out.WriteByte('\n')
336 }
337 if flags&LIST_TYPE_ORDERED != 0 {
338 out.WriteString("<ol>\n")
339 } else {
340 out.WriteString("<ul>\n")
341 }
342 if !text() {
343 out.Truncate(marker)
344 return
345 }
346 if flags&LIST_TYPE_ORDERED != 0 {
347 out.WriteString("</ol>\n")
348 } else {
349 out.WriteString("</ul>\n")
350 }
351}
352
353func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
354 out.WriteString("<li>")
355 size := len(text)
356 for size > 0 && text[size-1] == '\n' {
357 size--
358 }
359 out.Write(text[:size])
360 out.WriteString("</li>\n")
361}
362
363func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
364 marker := out.Len()
365 if marker > 0 {
366 out.WriteByte('\n')
367 }
368
369 out.WriteString("<p>")
370 if !text() {
371 out.Truncate(marker)
372 return
373 }
374 out.WriteString("</p>\n")
375}
376
377func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
378 if len(link) == 0 {
379 return
380 }
381 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
382 return
383 }
384
385 out.WriteString("<a href=\"")
386 if kind == LINK_TYPE_EMAIL {
387 out.WriteString("mailto:")
388 }
389 attrEscape(out, link)
390 out.WriteString("\">")
391
392 /*
393 * Pretty print: if we get an email address as
394 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
395 * want to print the `mailto:` prefix
396 */
397 switch {
398 case bytes.HasPrefix(link, []byte("mailto://")):
399 attrEscape(out, link[9:])
400 case bytes.HasPrefix(link, []byte("mailto:")):
401 attrEscape(out, link[7:])
402 default:
403 attrEscape(out, link)
404 }
405
406 out.WriteString("</a>")
407}
408
409func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
410 out.WriteString("<code>")
411 attrEscape(out, text)
412 out.WriteString("</code>")
413}
414
415func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
416 if len(text) == 0 {
417 return
418 }
419 out.WriteString("<strong>")
420 out.Write(text)
421 out.WriteString("</strong>")
422}
423
424func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
425 if len(text) == 0 {
426 return
427 }
428 out.WriteString("<em>")
429 out.Write(text)
430 out.WriteString("</em>")
431}
432
433func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
434 if options.flags&HTML_SKIP_IMAGES != 0 {
435 return
436 }
437
438 if len(link) == 0 {
439 return
440 }
441 out.WriteString("<img src=\"")
442 attrEscape(out, link)
443 out.WriteString("\" alt=\"")
444 if len(alt) > 0 {
445 attrEscape(out, alt)
446 }
447 if len(title) > 0 {
448 out.WriteString("\" title=\"")
449 attrEscape(out, title)
450 }
451
452 out.WriteByte('"')
453 out.WriteString(options.closeTag)
454 return
455}
456
457func (options *Html) LineBreak(out *bytes.Buffer) {
458 out.WriteString("<br")
459 out.WriteString(options.closeTag)
460}
461
462func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
463 if options.flags&HTML_SKIP_LINKS != 0 {
464 return
465 }
466
467 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
468 return
469 }
470
471 out.WriteString("<a href=\"")
472 attrEscape(out, link)
473 if len(title) > 0 {
474 out.WriteString("\" title=\"")
475 attrEscape(out, title)
476 }
477 out.WriteString("\">")
478 out.Write(content)
479 out.WriteString("</a>")
480 return
481}
482
483func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
484 if options.flags&HTML_SKIP_HTML != 0 {
485 return
486 }
487 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
488 return
489 }
490 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
491 return
492 }
493 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
494 return
495 }
496 out.Write(text)
497}
498
499func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
500 if len(text) == 0 {
501 return
502 }
503 out.WriteString("<strong><em>")
504 out.Write(text)
505 out.WriteString("</em></strong>")
506}
507
508func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
509 if len(text) == 0 {
510 return
511 }
512 out.WriteString("<del>")
513 out.Write(text)
514 out.WriteString("</del>")
515}
516
517func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
518 out.Write(entity)
519}
520
521func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
522 if options.flags&HTML_USE_SMARTYPANTS != 0 {
523 options.Smartypants(out, text)
524 } else {
525 attrEscape(out, text)
526 }
527}
528
529func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
530 smrt := smartypantsData{false, false}
531
532 // first do normal entity escaping
533 var escaped bytes.Buffer
534 attrEscape(&escaped, text)
535 text = escaped.Bytes()
536
537 mark := 0
538 for i := 0; i < len(text); i++ {
539 if action := options.smartypants[text[i]]; action != nil {
540 if i > mark {
541 out.Write(text[mark:i])
542 }
543
544 previousChar := byte(0)
545 if i > 0 {
546 previousChar = text[i-1]
547 }
548 i += action(out, &smrt, previousChar, text[i:])
549 mark = i + 1
550 }
551 }
552
553 if mark < len(text) {
554 out.Write(text[mark:])
555 }
556}
557
558func (options *Html) DocumentHeader(out *bytes.Buffer) {
559 if options.flags&HTML_COMPLETE_PAGE == 0 {
560 return
561 }
562
563 ending := ""
564 if options.flags&HTML_USE_XHTML != 0 {
565 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
566 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
567 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
568 ending = " /"
569 } else {
570 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
571 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
572 out.WriteString("<html>\n")
573 }
574 out.WriteString("<head>\n")
575 out.WriteString(" <title>")
576 options.NormalText(out, []byte(options.title))
577 out.WriteString("</title>\n")
578 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
579 out.WriteString(VERSION)
580 out.WriteString("\"")
581 out.WriteString(ending)
582 out.WriteString(">\n")
583 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
584 out.WriteString(ending)
585 out.WriteString(">\n")
586 if options.css != "" {
587 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
588 attrEscape(out, []byte(options.css))
589 out.WriteString("\"")
590 out.WriteString(ending)
591 out.WriteString(">\n")
592 }
593 out.WriteString("</head>\n")
594 out.WriteString("<body>\n")
595
596 options.tocMarker = out.Len()
597}
598
599func (options *Html) DocumentFooter(out *bytes.Buffer) {
600 // finalize and insert the table of contents
601 if options.flags&HTML_TOC != 0 {
602 options.TocFinalize()
603
604 // now we have to insert the table of contents into the document
605 var temp bytes.Buffer
606
607 // start by making a copy of everything after the document header
608 temp.Write(out.Bytes()[options.tocMarker:])
609
610 // now clear the copied material from the main output buffer
611 out.Truncate(options.tocMarker)
612
613 // corner case spacing issue
614 if options.flags&HTML_COMPLETE_PAGE != 0 {
615 out.WriteByte('\n')
616 }
617
618 // insert the table of contents
619 out.Write(options.toc.Bytes())
620
621 // corner case spacing issue
622 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
623 out.WriteByte('\n')
624 }
625
626 // write out everything that came after it
627 if options.flags&HTML_OMIT_CONTENTS == 0 {
628 out.Write(temp.Bytes())
629 }
630 }
631
632 if options.flags&HTML_COMPLETE_PAGE != 0 {
633 out.WriteString("\n</body>\n")
634 out.WriteString("</html>\n")
635 }
636
637}
638
639func (options *Html) TocHeader(text []byte, level int) {
640 for level > options.currentLevel {
641 switch {
642 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
643 // this sublist can nest underneath a header
644 size := options.toc.Len()
645 options.toc.Truncate(size - len("</li>\n"))
646
647 case options.currentLevel > 0:
648 options.toc.WriteString("<li>")
649 }
650 if options.toc.Len() > 0 {
651 options.toc.WriteByte('\n')
652 }
653 options.toc.WriteString("<ul>\n")
654 options.currentLevel++
655 }
656
657 for level < options.currentLevel {
658 options.toc.WriteString("</ul>")
659 if options.currentLevel > 1 {
660 options.toc.WriteString("</li>\n")
661 }
662 options.currentLevel--
663 }
664
665 options.toc.WriteString("<li><a href=\"#toc_")
666 options.toc.WriteString(strconv.Itoa(options.headerCount))
667 options.toc.WriteString("\">")
668 options.headerCount++
669
670 options.toc.Write(text)
671
672 options.toc.WriteString("</a></li>\n")
673}
674
675func (options *Html) TocFinalize() {
676 for options.currentLevel > 1 {
677 options.toc.WriteString("</ul></li>\n")
678 options.currentLevel--
679 }
680
681 if options.currentLevel > 0 {
682 options.toc.WriteString("</ul>\n")
683 }
684}
685
686func isHtmlTag(tag []byte, tagname string) bool {
687 i := 0
688 if i < len(tag) && tag[0] != '<' {
689 return false
690 }
691 i++
692 for i < len(tag) && isspace(tag[i]) {
693 i++
694 }
695
696 if i < len(tag) && tag[i] == '/' {
697 i++
698 }
699
700 for i < len(tag) && isspace(tag[i]) {
701 i++
702 }
703
704 j := i
705 for ; i < len(tag); i, j = i+1, j+1 {
706 if j >= len(tagname) {
707 break
708 }
709
710 if tag[i] != tagname[j] {
711 return false
712 }
713 }
714
715 if i == len(tag) {
716 return false
717 }
718
719 return isspace(tag[i]) || tag[i] == '>'
720}