html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "strconv"
22 "strings"
23)
24
25const (
26 HTML_SKIP_HTML = 1 << iota
27 HTML_SKIP_STYLE
28 HTML_SKIP_IMAGES
29 HTML_SKIP_LINKS
30 HTML_SAFELINK
31 HTML_TOC
32 HTML_OMIT_CONTENTS
33 HTML_COMPLETE_PAGE
34 HTML_GITHUB_BLOCKCODE
35 HTML_USE_XHTML
36 HTML_USE_SMARTYPANTS
37 HTML_SMARTYPANTS_FRACTIONS
38 HTML_SMARTYPANTS_LATEX_DASHES
39)
40
41type Html struct {
42 flags int // HTML_* options
43 closeTag string // how to end singleton tags: either " />\n" or ">\n"
44 title string // document title
45 css string // optional css file url (used with HTML_COMPLETE_PAGE)
46
47 // table of contents data
48 tocMarker int
49 headerCount int
50 currentLevel int
51 toc *bytes.Buffer
52
53 smartypants *SmartypantsRenderer
54}
55
56const (
57 xhtmlClose = " />\n"
58 htmlClose = ">\n"
59)
60
61func HtmlRenderer(flags int, title string, css string) Renderer {
62 // configure the rendering engine
63 closeTag := htmlClose
64 if flags&HTML_USE_XHTML != 0 {
65 closeTag = xhtmlClose
66 }
67
68 return &Html{
69 flags: flags,
70 closeTag: closeTag,
71 title: title,
72 css: css,
73
74 headerCount: 0,
75 currentLevel: 0,
76 toc: new(bytes.Buffer),
77
78 smartypants: Smartypants(flags),
79 }
80}
81
82func attrEscape(out *bytes.Buffer, src []byte) {
83 org := 0
84 for i, ch := range src {
85 // using if statements is a bit faster than a switch statement.
86 // as the compiler improves, this should be unnecessary
87 // this is only worthwhile because attrEscape is the single
88 // largest CPU user in normal use
89 if ch == '"' {
90 if i > org {
91 // copy all the normal characters since the last escape
92 out.Write(src[org:i])
93 }
94 org = i + 1
95 out.WriteString(""")
96 continue
97 }
98 if ch == '&' {
99 if i > org {
100 out.Write(src[org:i])
101 }
102 org = i + 1
103 out.WriteString("&")
104 continue
105 }
106 if ch == '<' {
107 if i > org {
108 out.Write(src[org:i])
109 }
110 org = i + 1
111 out.WriteString("<")
112 continue
113 }
114 if ch == '>' {
115 if i > org {
116 out.Write(src[org:i])
117 }
118 org = i + 1
119 out.WriteString(">")
120 continue
121 }
122 }
123 if org < len(src) {
124 out.Write(src[org:])
125 }
126}
127
128func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
129 marker := out.Len()
130 doubleSpace(out)
131
132 if options.flags&HTML_TOC != 0 {
133 // headerCount is incremented in htmlTocHeader
134 out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
135 } else {
136 out.WriteString(fmt.Sprintf("<h%d>", level))
137 }
138
139 tocMarker := out.Len()
140 if !text() {
141 out.Truncate(marker)
142 return
143 }
144
145 // are we building a table of contents?
146 if options.flags&HTML_TOC != 0 {
147 options.TocHeader(out.Bytes()[tocMarker:], level)
148 }
149
150 out.WriteString(fmt.Sprintf("</h%d>\n", level))
151}
152
153func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
154 if options.flags&HTML_SKIP_HTML != 0 {
155 return
156 }
157
158 doubleSpace(out)
159 out.Write(text)
160 out.WriteByte('\n')
161}
162
163func (options *Html) HRule(out *bytes.Buffer) {
164 doubleSpace(out)
165 out.WriteString("<hr")
166 out.WriteString(options.closeTag)
167}
168
169func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
170 if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
171 options.BlockCodeGithub(out, text, lang)
172 } else {
173 options.BlockCodeNormal(out, text, lang)
174 }
175}
176
177func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
178 doubleSpace(out)
179
180 // parse out the language names/classes
181 count := 0
182 for _, elt := range strings.Fields(lang) {
183 if elt[0] == '.' {
184 elt = elt[1:]
185 }
186 if len(elt) == 0 {
187 continue
188 }
189 if count == 0 {
190 out.WriteString("<pre><code class=\"")
191 } else {
192 out.WriteByte(' ')
193 }
194 attrEscape(out, []byte(elt))
195 count++
196 }
197
198 if count == 0 {
199 out.WriteString("<pre><code>")
200 } else {
201 out.WriteString("\">")
202 }
203
204 attrEscape(out, text)
205 out.WriteString("</code></pre>\n")
206}
207
208/*
209 * GitHub style code block:
210 *
211 * <pre lang="LANG"><code>
212 * ...
213 * </pre></code>
214 *
215 * Unlike other parsers, we store the language identifier in the <pre>,
216 * and don't let the user generate custom classes.
217 *
218 * The language identifier in the <pre> block gets postprocessed and all
219 * the code inside gets syntax highlighted with Pygments. This is much safer
220 * than letting the user specify a CSS class for highlighting.
221 *
222 * Note that we only generate HTML for the first specifier.
223 * E.g.
224 * ~~~~ {.python .numbered} => <pre lang="python"><code>
225 */
226func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
227 doubleSpace(out)
228
229 // parse out the language name
230 count := 0
231 for _, elt := range strings.Fields(lang) {
232 if elt[0] == '.' {
233 elt = elt[1:]
234 }
235 if len(elt) == 0 {
236 continue
237 }
238 out.WriteString("<pre lang=\"")
239 attrEscape(out, []byte(elt))
240 out.WriteString("\"><code>")
241 count++
242 break
243 }
244
245 if count == 0 {
246 out.WriteString("<pre><code>")
247 }
248
249 attrEscape(out, text)
250 out.WriteString("</code></pre>\n")
251}
252
253
254func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
255 out.WriteString("<blockquote>\n")
256 out.Write(text)
257 out.WriteString("</blockquote>")
258}
259
260func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
261 doubleSpace(out)
262 out.WriteString("<table>\n<thead>\n")
263 out.Write(header)
264 out.WriteString("\n</thead>\n<tbody>\n")
265 out.Write(body)
266 out.WriteString("\n</tbody>\n</table>")
267}
268
269func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
270 doubleSpace(out)
271 out.WriteString("<tr>\n")
272 out.Write(text)
273 out.WriteString("\n</tr>")
274}
275
276func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
277 doubleSpace(out)
278 switch align {
279 case TABLE_ALIGNMENT_LEFT:
280 out.WriteString("<td align=\"left\">")
281 case TABLE_ALIGNMENT_RIGHT:
282 out.WriteString("<td align=\"right\">")
283 case TABLE_ALIGNMENT_CENTER:
284 out.WriteString("<td align=\"center\">")
285 default:
286 out.WriteString("<td>")
287 }
288
289 out.Write(text)
290 out.WriteString("</td>")
291}
292
293func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
294 marker := out.Len()
295 doubleSpace(out)
296
297 if flags&LIST_TYPE_ORDERED != 0 {
298 out.WriteString("<ol>\n")
299 } else {
300 out.WriteString("<ul>\n")
301 }
302 if !text() {
303 out.Truncate(marker)
304 return
305 }
306 if flags&LIST_TYPE_ORDERED != 0 {
307 out.WriteString("</ol>\n")
308 } else {
309 out.WriteString("</ul>\n")
310 }
311}
312
313func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
314 out.WriteString("<li>")
315 out.Write(text)
316 out.WriteString("</li>\n")
317}
318
319func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
320 marker := out.Len()
321 doubleSpace(out)
322
323 out.WriteString("<p>")
324 if !text() {
325 out.Truncate(marker)
326 return
327 }
328 out.WriteString("</p>\n")
329}
330
331func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
332 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
333 // mark it but don't link it if it is not a safe link: no smartypants
334 out.WriteString("<tt>")
335 attrEscape(out, link)
336 out.WriteString("</tt>")
337 return
338 }
339
340 out.WriteString("<a href=\"")
341 if kind == LINK_TYPE_EMAIL {
342 out.WriteString("mailto:")
343 }
344 attrEscape(out, link)
345 out.WriteString("\">")
346
347 // Pretty print: if we get an email address as
348 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
349 // want to print the `mailto:` prefix
350 switch {
351 case bytes.HasPrefix(link, []byte("mailto://")):
352 attrEscape(out, link[len("mailto://"):])
353 case bytes.HasPrefix(link, []byte("mailto:")):
354 attrEscape(out, link[len("mailto:"):])
355 default:
356 attrEscape(out, link)
357 }
358
359 out.WriteString("</a>")
360}
361
362func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
363 out.WriteString("<code>")
364 attrEscape(out, text)
365 out.WriteString("</code>")
366}
367
368func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
369 out.WriteString("<strong>")
370 out.Write(text)
371 out.WriteString("</strong>")
372}
373
374func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
375 if len(text) == 0 {
376 return
377 }
378 out.WriteString("<em>")
379 out.Write(text)
380 out.WriteString("</em>")
381}
382
383func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
384 if options.flags&HTML_SKIP_IMAGES != 0 {
385 return
386 }
387
388 out.WriteString("<img src=\"")
389 attrEscape(out, link)
390 out.WriteString("\" alt=\"")
391 if len(alt) > 0 {
392 attrEscape(out, alt)
393 }
394 if len(title) > 0 {
395 out.WriteString("\" title=\"")
396 attrEscape(out, title)
397 }
398
399 out.WriteByte('"')
400 out.WriteString(options.closeTag)
401 return
402}
403
404func (options *Html) LineBreak(out *bytes.Buffer) {
405 out.WriteString("<br")
406 out.WriteString(options.closeTag)
407}
408
409func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
410 if options.flags&HTML_SKIP_LINKS != 0 {
411 // write the link text out but don't link it, just mark it with typewriter font
412 out.WriteString("<tt>")
413 attrEscape(out, content)
414 out.WriteString("</tt>")
415 return
416 }
417
418 if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
419 // write the link text out but don't link it, just mark it with typewriter font
420 out.WriteString("<tt>")
421 attrEscape(out, content)
422 out.WriteString("</tt>")
423 return
424 }
425
426 out.WriteString("<a href=\"")
427 attrEscape(out, link)
428 if len(title) > 0 {
429 out.WriteString("\" title=\"")
430 attrEscape(out, title)
431 }
432 out.WriteString("\">")
433 out.Write(content)
434 out.WriteString("</a>")
435 return
436}
437
438func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
439 if options.flags&HTML_SKIP_HTML != 0 {
440 return
441 }
442 if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
443 return
444 }
445 if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
446 return
447 }
448 if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
449 return
450 }
451 out.Write(text)
452}
453
454func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
455 out.WriteString("<strong><em>")
456 out.Write(text)
457 out.WriteString("</em></strong>")
458}
459
460func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
461 out.WriteString("<del>")
462 out.Write(text)
463 out.WriteString("</del>")
464}
465
466func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
467 out.Write(entity)
468}
469
470func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
471 if options.flags&HTML_USE_SMARTYPANTS != 0 {
472 options.Smartypants(out, text)
473 } else {
474 attrEscape(out, text)
475 }
476}
477
478func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
479 smrt := smartypantsData{false, false}
480
481 // first do normal entity escaping
482 var escaped bytes.Buffer
483 attrEscape(&escaped, text)
484 text = escaped.Bytes()
485
486 mark := 0
487 for i := 0; i < len(text); i++ {
488 if action := options.smartypants[text[i]]; action != nil {
489 if i > mark {
490 out.Write(text[mark:i])
491 }
492
493 previousChar := byte(0)
494 if i > 0 {
495 previousChar = text[i-1]
496 }
497 i += action(out, &smrt, previousChar, text[i:])
498 mark = i + 1
499 }
500 }
501
502 if mark < len(text) {
503 out.Write(text[mark:])
504 }
505}
506
507func (options *Html) DocumentHeader(out *bytes.Buffer) {
508 if options.flags&HTML_COMPLETE_PAGE == 0 {
509 return
510 }
511
512 ending := ""
513 if options.flags&HTML_USE_XHTML != 0 {
514 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
515 out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
516 out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
517 ending = " /"
518 } else {
519 out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
520 out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
521 out.WriteString("<html>\n")
522 }
523 out.WriteString("<head>\n")
524 out.WriteString(" <title>")
525 options.NormalText(out, []byte(options.title))
526 out.WriteString("</title>\n")
527 out.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
528 out.WriteString(VERSION)
529 out.WriteString("\"")
530 out.WriteString(ending)
531 out.WriteString(">\n")
532 out.WriteString(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
533 out.WriteString(ending)
534 out.WriteString(">\n")
535 if options.css != "" {
536 out.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
537 attrEscape(out, []byte(options.css))
538 out.WriteString("\"")
539 out.WriteString(ending)
540 out.WriteString(">\n")
541 }
542 out.WriteString("</head>\n")
543 out.WriteString("<body>\n")
544
545 options.tocMarker = out.Len()
546}
547
548func (options *Html) DocumentFooter(out *bytes.Buffer) {
549 // finalize and insert the table of contents
550 if options.flags&HTML_TOC != 0 {
551 options.TocFinalize()
552
553 // now we have to insert the table of contents into the document
554 var temp bytes.Buffer
555
556 // start by making a copy of everything after the document header
557 temp.Write(out.Bytes()[options.tocMarker:])
558
559 // now clear the copied material from the main output buffer
560 out.Truncate(options.tocMarker)
561
562 // corner case spacing issue
563 if options.flags&HTML_COMPLETE_PAGE != 0 {
564 out.WriteByte('\n')
565 }
566
567 // insert the table of contents
568 out.Write(options.toc.Bytes())
569
570 // corner case spacing issue
571 if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
572 out.WriteByte('\n')
573 }
574
575 // write out everything that came after it
576 if options.flags&HTML_OMIT_CONTENTS == 0 {
577 out.Write(temp.Bytes())
578 }
579 }
580
581 if options.flags&HTML_COMPLETE_PAGE != 0 {
582 out.WriteString("\n</body>\n")
583 out.WriteString("</html>\n")
584 }
585
586}
587
588func (options *Html) TocHeader(text []byte, level int) {
589 for level > options.currentLevel {
590 switch {
591 case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
592 // this sublist can nest underneath a header
593 size := options.toc.Len()
594 options.toc.Truncate(size - len("</li>\n"))
595
596 case options.currentLevel > 0:
597 options.toc.WriteString("<li>")
598 }
599 if options.toc.Len() > 0 {
600 options.toc.WriteByte('\n')
601 }
602 options.toc.WriteString("<ul>\n")
603 options.currentLevel++
604 }
605
606 for level < options.currentLevel {
607 options.toc.WriteString("</ul>")
608 if options.currentLevel > 1 {
609 options.toc.WriteString("</li>\n")
610 }
611 options.currentLevel--
612 }
613
614 options.toc.WriteString("<li><a href=\"#toc_")
615 options.toc.WriteString(strconv.Itoa(options.headerCount))
616 options.toc.WriteString("\">")
617 options.headerCount++
618
619 options.toc.Write(text)
620
621 options.toc.WriteString("</a></li>\n")
622}
623
624func (options *Html) TocFinalize() {
625 for options.currentLevel > 1 {
626 options.toc.WriteString("</ul></li>\n")
627 options.currentLevel--
628 }
629
630 if options.currentLevel > 0 {
631 options.toc.WriteString("</ul>\n")
632 }
633}
634
635func isHtmlTag(tag []byte, tagname string) bool {
636 i := 0
637 if i < len(tag) && tag[0] != '<' {
638 return false
639 }
640 i++
641 for i < len(tag) && isspace(tag[i]) {
642 i++
643 }
644
645 if i < len(tag) && tag[i] == '/' {
646 i++
647 }
648
649 for i < len(tag) && isspace(tag[i]) {
650 i++
651 }
652
653 j := i
654 for ; i < len(tag); i, j = i+1, j+1 {
655 if j >= len(tagname) {
656 break
657 }
658
659 if tag[i] != tagname[j] {
660 return false
661 }
662 }
663
664 if i == len(tag) {
665 return false
666 }
667
668 return isspace(tag[i]) || tag[i] == '>'
669}
670
671func doubleSpace(out *bytes.Buffer) {
672 if out.Len() > 0 {
673 out.WriteByte('\n')
674 }
675}