html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "html"
22 "io"
23 "regexp"
24 "strconv"
25 "strings"
26)
27
28type HTMLFlags int
29
30// HTML renderer configuration options.
31const (
32 HTMLFlagsNone HTMLFlags = 0
33 SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks
34 SkipStyle // Skip embedded <style> elements
35 SkipImages // Skip embedded images
36 SkipLinks // Skip all links
37 Safelink // Only link to trusted protocols
38 NofollowLinks // Only link with rel="nofollow"
39 NoreferrerLinks // Only link with rel="noreferrer"
40 HrefTargetBlank // Add a blank target
41 TOC // Generate a table of contents
42 OmitContents // Skip the main contents (for a standalone table of contents)
43 CompletePage // Generate a complete HTML page
44 UseXHTML // Generate XHTML output instead of HTML
45 FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source
46
47 TagName = "[A-Za-z][A-Za-z0-9-]*"
48 AttributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
49 UnquotedValue = "[^\"'=<>`\\x00-\\x20]+"
50 SingleQuotedValue = "'[^']*'"
51 DoubleQuotedValue = "\"[^\"]*\""
52 AttributeValue = "(?:" + UnquotedValue + "|" + SingleQuotedValue + "|" + DoubleQuotedValue + ")"
53 AttributeValueSpec = "(?:" + "\\s*=" + "\\s*" + AttributeValue + ")"
54 Attribute = "(?:" + "\\s+" + AttributeName + AttributeValueSpec + "?)"
55 OpenTag = "<" + TagName + Attribute + "*" + "\\s*/?>"
56 CloseTag = "</" + TagName + "\\s*[>]"
57 HTMLComment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
58 ProcessingInstruction = "[<][?].*?[?][>]"
59 Declaration = "<![A-Z]+" + "\\s+[^>]*>"
60 CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
61 HTMLTag = "(?:" + OpenTag + "|" + CloseTag + "|" + HTMLComment + "|" +
62 ProcessingInstruction + "|" + Declaration + "|" + CDATA + ")"
63)
64
65var (
66 // TODO: improve this regexp to catch all possible entities:
67 htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
68 reHtmlTag = regexp.MustCompile("(?i)^" + HTMLTag)
69)
70
71type HTMLRendererParameters struct {
72 // Prepend this text to each relative URL.
73 AbsolutePrefix string
74 // Add this text to each footnote anchor, to ensure uniqueness.
75 FootnoteAnchorPrefix string
76 // Show this text inside the <a> tag for a footnote return link, if the
77 // HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string
78 // <sup>[return]</sup> is used.
79 FootnoteReturnLinkContents string
80 // If set, add this text to the front of each Header ID, to ensure
81 // uniqueness.
82 HeaderIDPrefix string
83 // If set, add this text to the back of each Header ID, to ensure uniqueness.
84 HeaderIDSuffix string
85}
86
87// HTML is a type that implements the Renderer interface for HTML output.
88//
89// Do not create this directly, instead use the HTMLRenderer function.
90type HTML struct {
91 flags HTMLFlags
92 closeTag string // how to end singleton tags: either " />" or ">"
93 title string // document title
94 css string // optional css file url (used with HTML_COMPLETE_PAGE)
95
96 parameters HTMLRendererParameters
97
98 // table of contents data
99 tocMarker int
100 headerCount int
101 currentLevel int
102 toc *bytes.Buffer
103
104 // Track header IDs to prevent ID collision in a single generation.
105 headerIDs map[string]int
106
107 w HTMLWriter
108 lastOutputLen int
109 disableTags int
110
111 extensions Extensions // This gives Smartypants renderer access to flags
112}
113
114const (
115 xhtmlClose = " />"
116 htmlClose = ">"
117)
118
119// HTMLRenderer creates and configures an HTML object, which
120// satisfies the Renderer interface.
121//
122// flags is a set of HTMLFlags ORed together.
123// title is the title of the document, and css is a URL for the document's
124// stylesheet.
125// title and css are only used when HTML_COMPLETE_PAGE is selected.
126func HTMLRenderer(flags HTMLFlags, extensions Extensions, title string, css string) Renderer {
127 return HTMLRendererWithParameters(flags, extensions, title, css, HTMLRendererParameters{})
128}
129
130type HTMLWriter struct {
131 output bytes.Buffer
132}
133
134func (w *HTMLWriter) Write(p []byte) (n int, err error) {
135 return w.output.Write(p)
136}
137
138func (w *HTMLWriter) WriteString(s string) (n int, err error) {
139 return w.output.WriteString(s)
140}
141
142func (w *HTMLWriter) WriteByte(b byte) error {
143 return w.output.WriteByte(b)
144}
145
146// Writes out a newline if the output is not pristine. Used at the beginning of
147// every rendering func
148func (w *HTMLWriter) Newline() {
149 w.WriteByte('\n')
150}
151
152func (r *HTML) Write(b []byte) (int, error) {
153 return r.w.Write(b)
154}
155
156func HTMLRendererWithParameters(flags HTMLFlags, extensions Extensions, title string,
157 css string, renderParameters HTMLRendererParameters) Renderer {
158 // configure the rendering engine
159 closeTag := htmlClose
160 if flags&UseXHTML != 0 {
161 closeTag = xhtmlClose
162 }
163
164 if renderParameters.FootnoteReturnLinkContents == "" {
165 renderParameters.FootnoteReturnLinkContents = `<sup>[return]</sup>`
166 }
167
168 var writer HTMLWriter
169 return &HTML{
170 flags: flags,
171 extensions: extensions,
172 closeTag: closeTag,
173 title: title,
174 css: css,
175 parameters: renderParameters,
176
177 headerCount: 0,
178 currentLevel: 0,
179 toc: new(bytes.Buffer),
180
181 headerIDs: make(map[string]int),
182
183 w: writer,
184 }
185}
186
187// Using if statements is a bit faster than a switch statement. As the compiler
188// improves, this should be unnecessary this is only worthwhile because
189// attrEscape is the single largest CPU user in normal use.
190// Also tried using map, but that gave a ~3x slowdown.
191func escapeSingleChar(char byte) (string, bool) {
192 if char == '"' {
193 return """, true
194 }
195 if char == '&' {
196 return "&", true
197 }
198 if char == '<' {
199 return "<", true
200 }
201 if char == '>' {
202 return ">", true
203 }
204 return "", false
205}
206
207func (r *HTML) attrEscape(src []byte) {
208 org := 0
209 for i, ch := range src {
210 if entity, ok := escapeSingleChar(ch); ok {
211 if i > org {
212 // copy all the normal characters since the last escape
213 r.w.Write(src[org:i])
214 }
215 org = i + 1
216 r.w.WriteString(entity)
217 }
218 }
219 if org < len(src) {
220 r.w.Write(src[org:])
221 }
222}
223
224func attrEscape2(src []byte) []byte {
225 unesc := []byte(html.UnescapeString(string(src)))
226 esc1 := []byte(html.EscapeString(string(unesc)))
227 esc2 := bytes.Replace(esc1, []byte("""), []byte("""), -1)
228 return bytes.Replace(esc2, []byte("'"), []byte{'\''}, -1)
229}
230
231func (r *HTML) entityEscapeWithSkip(src []byte, skipRanges [][]int) {
232 end := 0
233 for _, rang := range skipRanges {
234 r.attrEscape(src[end:rang[0]])
235 r.w.Write(src[rang[0]:rang[1]])
236 end = rang[1]
237 }
238 r.attrEscape(src[end:])
239}
240
241func (r *HTML) TitleBlock(text []byte) {
242 text = bytes.TrimPrefix(text, []byte("% "))
243 text = bytes.Replace(text, []byte("\n% "), []byte("\n"), -1)
244 r.w.WriteString("<h1 class=\"title\">")
245 r.w.Write(text)
246 r.w.WriteString("\n</h1>")
247}
248
249func (r *HTML) BeginHeader(level int, id string) {
250 r.w.Newline()
251
252 if id == "" && r.flags&TOC != 0 {
253 id = fmt.Sprintf("toc_%d", r.headerCount)
254 }
255
256 if id != "" {
257 id = r.ensureUniqueHeaderID(id)
258
259 if r.parameters.HeaderIDPrefix != "" {
260 id = r.parameters.HeaderIDPrefix + id
261 }
262
263 if r.parameters.HeaderIDSuffix != "" {
264 id = id + r.parameters.HeaderIDSuffix
265 }
266
267 r.w.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id))
268 } else {
269 r.w.WriteString(fmt.Sprintf("<h%d>", level))
270 }
271}
272
273func (r *HTML) EndHeader(level int, id string, header []byte) {
274 // are we building a table of contents?
275 if r.flags&TOC != 0 {
276 r.TocHeaderWithAnchor(header, level, id)
277 }
278
279 r.w.WriteString(fmt.Sprintf("</h%d>\n", level))
280}
281
282func (r *HTML) BlockHtml(text []byte) {
283 if r.flags&SkipHTML != 0 {
284 return
285 }
286
287 r.w.Newline()
288 r.w.Write(text)
289 r.w.WriteByte('\n')
290}
291
292func (r *HTML) HRule() {
293 r.w.Newline()
294 r.w.WriteString("<hr")
295 r.w.WriteString(r.closeTag)
296 r.w.WriteByte('\n')
297}
298
299func (r *HTML) BlockCode(text []byte, lang string) {
300 r.w.Newline()
301
302 // parse out the language names/classes
303 count := 0
304 for _, elt := range strings.Fields(lang) {
305 if elt[0] == '.' {
306 elt = elt[1:]
307 }
308 if len(elt) == 0 {
309 continue
310 }
311 if count == 0 {
312 r.w.WriteString("<pre><code class=\"language-")
313 } else {
314 r.w.WriteByte(' ')
315 }
316 r.attrEscape([]byte(elt))
317 count++
318 }
319
320 if count == 0 {
321 r.w.WriteString("<pre><code>")
322 } else {
323 r.w.WriteString("\">")
324 }
325
326 r.attrEscape(text)
327 r.w.WriteString("</code></pre>\n")
328}
329
330func (r *HTML) BlockQuote(text []byte) {
331 r.w.Newline()
332 r.w.WriteString("<blockquote>\n")
333 r.w.Write(text)
334 r.w.WriteString("</blockquote>\n")
335}
336
337func (r *HTML) Table(header []byte, body []byte, columnData []CellAlignFlags) {
338 r.w.Newline()
339 r.w.WriteString("<table>\n<thead>\n")
340 r.w.Write(header)
341 r.w.WriteString("</thead>\n\n<tbody>\n")
342 r.w.Write(body)
343 r.w.WriteString("</tbody>\n</table>\n")
344}
345
346func (r *HTML) TableRow(text []byte) {
347 r.w.Newline()
348 r.w.WriteString("<tr>\n")
349 r.w.Write(text)
350 r.w.WriteString("\n</tr>\n")
351}
352
353func leadingNewline(out *bytes.Buffer) {
354 if out.Len() > 0 {
355 out.WriteByte('\n')
356 }
357}
358
359func (r *HTML) TableHeaderCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
360 leadingNewline(out)
361 switch align {
362 case TableAlignmentLeft:
363 out.WriteString("<th align=\"left\">")
364 case TableAlignmentRight:
365 out.WriteString("<th align=\"right\">")
366 case TableAlignmentCenter:
367 out.WriteString("<th align=\"center\">")
368 default:
369 out.WriteString("<th>")
370 }
371
372 out.Write(text)
373 out.WriteString("</th>")
374}
375
376func (r *HTML) TableCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
377 leadingNewline(out)
378 switch align {
379 case TableAlignmentLeft:
380 out.WriteString("<td align=\"left\">")
381 case TableAlignmentRight:
382 out.WriteString("<td align=\"right\">")
383 case TableAlignmentCenter:
384 out.WriteString("<td align=\"center\">")
385 default:
386 out.WriteString("<td>")
387 }
388
389 out.Write(text)
390 out.WriteString("</td>")
391}
392
393func (r *HTML) BeginFootnotes() {
394 r.w.WriteString("<div class=\"footnotes\">\n")
395 r.HRule()
396 r.BeginList(ListTypeOrdered)
397}
398
399func (r *HTML) EndFootnotes() {
400 r.EndList(ListTypeOrdered)
401 r.w.WriteString("</div>\n")
402}
403
404func (r *HTML) FootnoteItem(name, text []byte, flags ListType) {
405 if flags&ListItemContainsBlock != 0 || flags&ListItemBeginningOfList != 0 {
406 r.w.Newline()
407 }
408 slug := slugify(name)
409 r.w.WriteString(`<li id="`)
410 r.w.WriteString(`fn:`)
411 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
412 r.w.Write(slug)
413 r.w.WriteString(`">`)
414 r.w.Write(text)
415 if r.flags&FootnoteReturnLinks != 0 {
416 r.w.WriteString(` <a class="footnote-return" href="#`)
417 r.w.WriteString(`fnref:`)
418 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
419 r.w.Write(slug)
420 r.w.WriteString(`">`)
421 r.w.WriteString(r.parameters.FootnoteReturnLinkContents)
422 r.w.WriteString(`</a>`)
423 }
424 r.w.WriteString("</li>\n")
425}
426
427func (r *HTML) BeginList(flags ListType) {
428 r.w.Newline()
429
430 if flags&ListTypeDefinition != 0 {
431 r.w.WriteString("<dl>")
432 } else if flags&ListTypeOrdered != 0 {
433 r.w.WriteString("<ol>")
434 } else {
435 r.w.WriteString("<ul>")
436 }
437}
438
439func (r *HTML) EndList(flags ListType) {
440 if flags&ListTypeDefinition != 0 {
441 r.w.WriteString("</dl>\n")
442 } else if flags&ListTypeOrdered != 0 {
443 r.w.WriteString("</ol>\n")
444 } else {
445 r.w.WriteString("</ul>\n")
446 }
447}
448
449func (r *HTML) ListItem(text []byte, flags ListType) {
450 if (flags&ListItemContainsBlock != 0 && flags&ListTypeDefinition == 0) ||
451 flags&ListItemBeginningOfList != 0 {
452 r.w.Newline()
453 }
454 if flags&ListTypeTerm != 0 {
455 r.w.WriteString("<dt>")
456 } else if flags&ListTypeDefinition != 0 {
457 r.w.WriteString("<dd>")
458 } else {
459 r.w.WriteString("<li>")
460 }
461 r.w.Write(text)
462 if flags&ListTypeTerm != 0 {
463 r.w.WriteString("</dt>\n")
464 } else if flags&ListTypeDefinition != 0 {
465 r.w.WriteString("</dd>\n")
466 } else {
467 r.w.WriteString("</li>\n")
468 }
469}
470
471func (r *HTML) BeginParagraph() {
472 r.w.Newline()
473 r.w.WriteString("<p>")
474}
475
476func (r *HTML) EndParagraph() {
477 r.w.WriteString("</p>\n")
478}
479
480func (r *HTML) AutoLink(link []byte, kind LinkType) {
481 skipRanges := htmlEntity.FindAllIndex(link, -1)
482 if r.flags&Safelink != 0 && !isSafeLink(link) && kind != LinkTypeEmail {
483 // mark it but don't link it if it is not a safe link: no smartypants
484 r.w.WriteString("<tt>")
485 r.entityEscapeWithSkip(link, skipRanges)
486 r.w.WriteString("</tt>")
487 return
488 }
489
490 r.w.WriteString("<a href=\"")
491 if kind == LinkTypeEmail {
492 r.w.WriteString("mailto:")
493 } else {
494 r.maybeWriteAbsolutePrefix(link)
495 }
496
497 r.entityEscapeWithSkip(link, skipRanges)
498
499 var relAttrs []string
500 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
501 relAttrs = append(relAttrs, "nofollow")
502 }
503 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
504 relAttrs = append(relAttrs, "noreferrer")
505 }
506 if len(relAttrs) > 0 {
507 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
508 }
509
510 // blank target only add to external link
511 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
512 r.w.WriteString("\" target=\"_blank")
513 }
514
515 r.w.WriteString("\">")
516
517 // Pretty print: if we get an email address as
518 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
519 // want to print the `mailto:` prefix
520 switch {
521 case bytes.HasPrefix(link, []byte("mailto://")):
522 r.attrEscape(link[len("mailto://"):])
523 case bytes.HasPrefix(link, []byte("mailto:")):
524 r.attrEscape(link[len("mailto:"):])
525 default:
526 r.entityEscapeWithSkip(link, skipRanges)
527 }
528
529 r.w.WriteString("</a>")
530}
531
532func (r *HTML) CodeSpan(text []byte) {
533 r.w.WriteString("<code>")
534 r.attrEscape(text)
535 r.w.WriteString("</code>")
536}
537
538func (r *HTML) DoubleEmphasis(text []byte) {
539 r.w.WriteString("<strong>")
540 r.w.Write(text)
541 r.w.WriteString("</strong>")
542}
543
544func (r *HTML) Emphasis(text []byte) {
545 if len(text) == 0 {
546 return
547 }
548 r.w.WriteString("<em>")
549 r.w.Write(text)
550 r.w.WriteString("</em>")
551}
552
553func (r *HTML) maybeWriteAbsolutePrefix(link []byte) {
554 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
555 r.w.WriteString(r.parameters.AbsolutePrefix)
556 if link[0] != '/' {
557 r.w.WriteByte('/')
558 }
559 }
560}
561
562func (r *HTML) Image(link []byte, title []byte, alt []byte) {
563 if r.flags&SkipImages != 0 {
564 return
565 }
566
567 r.w.WriteString("<img src=\"")
568 r.maybeWriteAbsolutePrefix(link)
569 r.attrEscape(link)
570 r.w.WriteString("\" alt=\"")
571 if len(alt) > 0 {
572 r.attrEscape(alt)
573 }
574 if len(title) > 0 {
575 r.w.WriteString("\" title=\"")
576 r.attrEscape(title)
577 }
578
579 r.w.WriteByte('"')
580 r.w.WriteString(r.closeTag)
581}
582
583func (r *HTML) LineBreak() {
584 r.w.WriteString("<br")
585 r.w.WriteString(r.closeTag)
586 r.w.WriteByte('\n')
587}
588
589func (r *HTML) Link(link []byte, title []byte, content []byte) {
590 if r.flags&SkipLinks != 0 {
591 // write the link text out but don't link it, just mark it with typewriter font
592 r.w.WriteString("<tt>")
593 r.attrEscape(content)
594 r.w.WriteString("</tt>")
595 return
596 }
597
598 if r.flags&Safelink != 0 && !isSafeLink(link) {
599 // write the link text out but don't link it, just mark it with typewriter font
600 r.w.WriteString("<tt>")
601 r.attrEscape(content)
602 r.w.WriteString("</tt>")
603 return
604 }
605
606 r.w.WriteString("<a href=\"")
607 r.maybeWriteAbsolutePrefix(link)
608 r.attrEscape(link)
609 if len(title) > 0 {
610 r.w.WriteString("\" title=\"")
611 r.attrEscape(title)
612 }
613 var relAttrs []string
614 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
615 relAttrs = append(relAttrs, "nofollow")
616 }
617 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
618 relAttrs = append(relAttrs, "noreferrer")
619 }
620 if len(relAttrs) > 0 {
621 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
622 }
623
624 // blank target only add to external link
625 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
626 r.w.WriteString("\" target=\"_blank")
627 }
628
629 r.w.WriteString("\">")
630 r.w.Write(content)
631 r.w.WriteString("</a>")
632 return
633}
634
635func (r *HTML) RawHtmlTag(text []byte) {
636 if r.flags&SkipHTML != 0 {
637 return
638 }
639 if r.flags&SkipStyle != 0 && isHtmlTag(text, "style") {
640 return
641 }
642 if r.flags&SkipLinks != 0 && isHtmlTag(text, "a") {
643 return
644 }
645 if r.flags&SkipImages != 0 && isHtmlTag(text, "img") {
646 return
647 }
648 r.w.Write(text)
649}
650
651func (r *HTML) TripleEmphasis(text []byte) {
652 r.w.WriteString("<strong><em>")
653 r.w.Write(text)
654 r.w.WriteString("</em></strong>")
655}
656
657func (r *HTML) StrikeThrough(text []byte) {
658 r.w.WriteString("<del>")
659 r.w.Write(text)
660 r.w.WriteString("</del>")
661}
662
663func (r *HTML) FootnoteRef(ref []byte, id int) {
664 slug := slugify(ref)
665 r.w.WriteString(`<sup class="footnote-ref" id="`)
666 r.w.WriteString(`fnref:`)
667 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
668 r.w.Write(slug)
669 r.w.WriteString(`"><a rel="footnote" href="#`)
670 r.w.WriteString(`fn:`)
671 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
672 r.w.Write(slug)
673 r.w.WriteString(`">`)
674 r.w.WriteString(strconv.Itoa(id))
675 r.w.WriteString(`</a></sup>`)
676}
677
678func (r *HTML) Entity(entity []byte) {
679 r.w.Write(entity)
680}
681
682func (r *HTML) NormalText(text []byte) {
683 if r.extensions&Smartypants != 0 {
684 r.Smartypants(text)
685 } else {
686 r.attrEscape(text)
687 }
688}
689
690func (r *HTML) Smartypants(text []byte) {
691 r.w.Write(NewSmartypantsRenderer(r.extensions).Process(text))
692}
693
694func (r *HTML) DocumentHeader() {
695 if r.flags&CompletePage == 0 {
696 return
697 }
698
699 ending := ""
700 if r.flags&UseXHTML != 0 {
701 r.w.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
702 r.w.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
703 r.w.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
704 ending = " /"
705 } else {
706 r.w.WriteString("<!DOCTYPE html>\n")
707 r.w.WriteString("<html>\n")
708 }
709 r.w.WriteString("<head>\n")
710 r.w.WriteString(" <title>")
711 r.NormalText([]byte(r.title))
712 r.w.WriteString("</title>\n")
713 r.w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
714 r.w.WriteString(VERSION)
715 r.w.WriteString("\"")
716 r.w.WriteString(ending)
717 r.w.WriteString(">\n")
718 r.w.WriteString(" <meta charset=\"utf-8\"")
719 r.w.WriteString(ending)
720 r.w.WriteString(">\n")
721 if r.css != "" {
722 r.w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
723 r.attrEscape([]byte(r.css))
724 r.w.WriteString("\"")
725 r.w.WriteString(ending)
726 r.w.WriteString(">\n")
727 }
728 r.w.WriteString("</head>\n")
729 r.w.WriteString("<body>\n")
730
731 r.tocMarker = r.w.output.Len() // XXX
732}
733
734func (r *HTML) DocumentFooter() {
735 // finalize and insert the table of contents
736 if r.flags&TOC != 0 {
737 r.TocFinalize()
738
739 // now we have to insert the table of contents into the document
740 var temp bytes.Buffer
741
742 // start by making a copy of everything after the document header
743 temp.Write(r.w.output.Bytes()[r.tocMarker:])
744
745 // now clear the copied material from the main output buffer
746 r.w.output.Truncate(r.tocMarker)
747
748 // corner case spacing issue
749 if r.flags&CompletePage != 0 {
750 r.w.WriteByte('\n')
751 }
752
753 // insert the table of contents
754 r.w.WriteString("<nav>\n")
755 r.w.Write(r.toc.Bytes())
756 r.w.WriteString("</nav>\n")
757
758 // corner case spacing issue
759 if r.flags&CompletePage == 0 && r.flags&OmitContents == 0 {
760 r.w.WriteByte('\n')
761 }
762
763 // write out everything that came after it
764 if r.flags&OmitContents == 0 {
765 r.w.Write(temp.Bytes())
766 }
767 }
768
769 if r.flags&CompletePage != 0 {
770 r.w.WriteString("\n</body>\n")
771 r.w.WriteString("</html>\n")
772 }
773
774}
775
776func (r *HTML) TocHeaderWithAnchor(text []byte, level int, anchor string) {
777 for level > r.currentLevel {
778 switch {
779 case bytes.HasSuffix(r.toc.Bytes(), []byte("</li>\n")):
780 // this sublist can nest underneath a header
781 size := r.toc.Len()
782 r.toc.Truncate(size - len("</li>\n"))
783
784 case r.currentLevel > 0:
785 r.toc.WriteString("<li>")
786 }
787 if r.toc.Len() > 0 {
788 r.toc.WriteByte('\n')
789 }
790 r.toc.WriteString("<ul>\n")
791 r.currentLevel++
792 }
793
794 for level < r.currentLevel {
795 r.toc.WriteString("</ul>")
796 if r.currentLevel > 1 {
797 r.toc.WriteString("</li>\n")
798 }
799 r.currentLevel--
800 }
801
802 r.toc.WriteString("<li><a href=\"#")
803 if anchor != "" {
804 r.toc.WriteString(anchor)
805 } else {
806 r.toc.WriteString("toc_")
807 r.toc.WriteString(strconv.Itoa(r.headerCount))
808 }
809 r.toc.WriteString("\">")
810 r.headerCount++
811
812 r.toc.Write(text)
813
814 r.toc.WriteString("</a></li>\n")
815}
816
817func (r *HTML) TocHeader(text []byte, level int) {
818 r.TocHeaderWithAnchor(text, level, "")
819}
820
821func (r *HTML) TocFinalize() {
822 for r.currentLevel > 1 {
823 r.toc.WriteString("</ul></li>\n")
824 r.currentLevel--
825 }
826
827 if r.currentLevel > 0 {
828 r.toc.WriteString("</ul>\n")
829 }
830}
831
832func isHtmlTag(tag []byte, tagname string) bool {
833 found, _ := findHtmlTagPos(tag, tagname)
834 return found
835}
836
837// Look for a character, but ignore it when it's in any kind of quotes, it
838// might be JavaScript
839func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
840 inSingleQuote := false
841 inDoubleQuote := false
842 inGraveQuote := false
843 i := start
844 for i < len(html) {
845 switch {
846 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
847 return i
848 case html[i] == '\'':
849 inSingleQuote = !inSingleQuote
850 case html[i] == '"':
851 inDoubleQuote = !inDoubleQuote
852 case html[i] == '`':
853 inGraveQuote = !inGraveQuote
854 }
855 i++
856 }
857 return start
858}
859
860func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
861 i := 0
862 if i < len(tag) && tag[0] != '<' {
863 return false, -1
864 }
865 i++
866 i = skipSpace(tag, i)
867
868 if i < len(tag) && tag[i] == '/' {
869 i++
870 }
871
872 i = skipSpace(tag, i)
873 j := 0
874 for ; i < len(tag); i, j = i+1, j+1 {
875 if j >= len(tagname) {
876 break
877 }
878
879 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
880 return false, -1
881 }
882 }
883
884 if i == len(tag) {
885 return false, -1
886 }
887
888 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
889 if rightAngle > i {
890 return true, rightAngle
891 }
892
893 return false, -1
894}
895
896func skipUntilChar(text []byte, start int, char byte) int {
897 i := start
898 for i < len(text) && text[i] != char {
899 i++
900 }
901 return i
902}
903
904func skipSpace(tag []byte, i int) int {
905 for i < len(tag) && isspace(tag[i]) {
906 i++
907 }
908 return i
909}
910
911func skipChar(data []byte, start int, char byte) int {
912 i := start
913 for i < len(data) && data[i] == char {
914 i++
915 }
916 return i
917}
918
919func isRelativeLink(link []byte) (yes bool) {
920 // a tag begin with '#'
921 if link[0] == '#' {
922 return true
923 }
924
925 // link begin with '/' but not '//', the second maybe a protocol relative link
926 if len(link) >= 2 && link[0] == '/' && link[1] != '/' {
927 return true
928 }
929
930 // only the root '/'
931 if len(link) == 1 && link[0] == '/' {
932 return true
933 }
934
935 // current directory : begin with "./"
936 if bytes.HasPrefix(link, []byte("./")) {
937 return true
938 }
939
940 // parent directory : begin with "../"
941 if bytes.HasPrefix(link, []byte("../")) {
942 return true
943 }
944
945 return false
946}
947
948func (r *HTML) ensureUniqueHeaderID(id string) string {
949 for count, found := r.headerIDs[id]; found; count, found = r.headerIDs[id] {
950 tmp := fmt.Sprintf("%s-%d", id, count+1)
951
952 if _, tmpFound := r.headerIDs[tmp]; !tmpFound {
953 r.headerIDs[id] = count + 1
954 id = tmp
955 } else {
956 id = id + "-1"
957 }
958 }
959
960 if _, found := r.headerIDs[id]; !found {
961 r.headerIDs[id] = 0
962 }
963
964 return id
965}
966
967func (r *HTML) addAbsPrefix(link []byte) []byte {
968 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
969 newDest := r.parameters.AbsolutePrefix
970 if link[0] != '/' {
971 newDest += "/"
972 }
973 newDest += string(link)
974 return []byte(newDest)
975 }
976 return link
977}
978
979func appendLinkAttrs(attrs []string, flags HTMLFlags, link []byte) []string {
980 if isRelativeLink(link) {
981 return attrs
982 }
983 val := []string{}
984 if flags&NofollowLinks != 0 {
985 val = append(val, "nofollow")
986 }
987 if flags&NoreferrerLinks != 0 {
988 val = append(val, "noreferrer")
989 }
990 if flags&HrefTargetBlank != 0 {
991 attrs = append(attrs, "target=\"_blank\"")
992 }
993 if len(val) == 0 {
994 return attrs
995 }
996 attr := fmt.Sprintf("rel=%q", strings.Join(val, " "))
997 return append(attrs, attr)
998}
999
1000func isMailto(link []byte) bool {
1001 return bytes.HasPrefix(link, []byte("mailto:"))
1002}
1003
1004func isSmartypantable(node *Node) bool {
1005 pt := node.Parent.Type
1006 return pt != Link && pt != CodeBlock && pt != Code
1007}
1008
1009func appendLanguageAttr(attrs []string, info []byte) []string {
1010 infoWords := bytes.Split(info, []byte("\t "))
1011 if len(infoWords) > 0 && len(infoWords[0]) > 0 {
1012 attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0]))
1013 }
1014 return attrs
1015}
1016
1017func tag(name string, attrs []string, selfClosing bool) []byte {
1018 result := "<" + name
1019 if attrs != nil && len(attrs) > 0 {
1020 result += " " + strings.Join(attrs, " ")
1021 }
1022 if selfClosing {
1023 result += " /"
1024 }
1025 return []byte(result + ">")
1026}
1027
1028func footnoteRef(prefix string, node *Node) []byte {
1029 urlFrag := prefix + string(slugify(node.Destination))
1030 anchor := fmt.Sprintf(`<a rel="footnote" href="#fn:%s">%d</a>`, urlFrag, node.NoteID)
1031 return []byte(fmt.Sprintf(`<sup class="footnote-ref" id="fnref:%s">%s</sup>`, urlFrag, anchor))
1032}
1033
1034func footnoteItem(prefix string, slug []byte) []byte {
1035 return []byte(fmt.Sprintf(`<li id="fn:%s%s">`, prefix, slug))
1036}
1037
1038func footnoteReturnLink(prefix, returnLink string, slug []byte) []byte {
1039 const format = ` <a class="footnote-return" href="#fnref:%s%s">%s</a>`
1040 return []byte(fmt.Sprintf(format, prefix, slug, returnLink))
1041}
1042
1043func itemOpenCR(node *Node) bool {
1044 if node.Prev == nil {
1045 return false
1046 }
1047 ld := node.Parent.ListData
1048 return !ld.Tight && ld.ListFlags&ListTypeDefinition == 0
1049}
1050
1051func skipParagraphTags(node *Node) bool {
1052 grandparent := node.Parent.Parent
1053 if grandparent == nil || grandparent.Type != List {
1054 return false
1055 }
1056 tightOrTerm := grandparent.Tight || node.Parent.ListFlags&ListTypeTerm != 0
1057 return grandparent.Type == List && tightOrTerm
1058}
1059
1060func cellAlignment(align CellAlignFlags) string {
1061 switch align {
1062 case TableAlignmentLeft:
1063 return "left"
1064 case TableAlignmentRight:
1065 return "right"
1066 case TableAlignmentCenter:
1067 return "center"
1068 default:
1069 return ""
1070 }
1071}
1072
1073func esc(text []byte, preserveEntities bool) []byte {
1074 return attrEscape2(text)
1075}
1076
1077func escCode(text []byte, preserveEntities bool) []byte {
1078 e1 := []byte(html.EscapeString(string(text)))
1079 e2 := bytes.Replace(e1, []byte("""), []byte("""), -1)
1080 return bytes.Replace(e2, []byte("'"), []byte{'\''}, -1)
1081}
1082
1083func (r *HTML) out(w io.Writer, text []byte) {
1084 if r.disableTags > 0 {
1085 w.Write(reHtmlTag.ReplaceAll(text, []byte{}))
1086 } else {
1087 w.Write(text)
1088 }
1089 r.lastOutputLen = len(text)
1090}
1091
1092func (r *HTML) cr(w io.Writer) {
1093 if r.lastOutputLen > 0 {
1094 r.out(w, []byte{'\n'})
1095 }
1096}
1097
1098func (r *HTML) RenderNode(w io.Writer, node *Node, entering bool) {
1099 attrs := []string{}
1100 switch node.Type {
1101 case Text:
1102 r.out(w, node.Literal)
1103 break
1104 case Softbreak:
1105 r.out(w, []byte("\n"))
1106 // TODO: make it configurable via out(renderer.softbreak)
1107 case Hardbreak:
1108 r.out(w, tag("br", nil, true))
1109 r.cr(w)
1110 case Emph:
1111 if entering {
1112 r.out(w, tag("em", nil, false))
1113 } else {
1114 r.out(w, tag("/em", nil, false))
1115 }
1116 break
1117 case Strong:
1118 if entering {
1119 r.out(w, tag("strong", nil, false))
1120 } else {
1121 r.out(w, tag("/strong", nil, false))
1122 }
1123 break
1124 case Del:
1125 if entering {
1126 r.out(w, tag("del", nil, false))
1127 } else {
1128 r.out(w, tag("/del", nil, false))
1129 }
1130 case HTMLSpan:
1131 //if options.safe {
1132 // out(w, "<!-- raw HTML omitted -->")
1133 //} else {
1134 r.out(w, node.Literal)
1135 //}
1136 case Link:
1137 // mark it but don't link it if it is not a safe link: no smartypants
1138 dest := node.LinkData.Destination
1139 if r.flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest) {
1140 if entering {
1141 r.out(w, tag("tt", nil, false))
1142 } else {
1143 r.out(w, tag("/tt", nil, false))
1144 }
1145 } else {
1146 if entering {
1147 dest = r.addAbsPrefix(dest)
1148 //if (!(options.safe && potentiallyUnsafe(node.destination))) {
1149 attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest, true)))
1150 //}
1151 if node.NoteID != 0 {
1152 r.out(w, footnoteRef(r.parameters.FootnoteAnchorPrefix, node))
1153 break
1154 }
1155 attrs = appendLinkAttrs(attrs, r.flags, dest)
1156 if len(node.LinkData.Title) > 0 {
1157 attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title, true)))
1158 }
1159 r.out(w, tag("a", attrs, false))
1160 } else {
1161 if node.NoteID != 0 {
1162 break
1163 }
1164 r.out(w, tag("/a", nil, false))
1165 }
1166 }
1167 case Image:
1168 if entering {
1169 dest := node.LinkData.Destination
1170 dest = r.addAbsPrefix(dest)
1171 if r.disableTags == 0 {
1172 //if options.safe && potentiallyUnsafe(dest) {
1173 //out(w, `<img src="" alt="`)
1174 //} else {
1175 r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest, true))))
1176 //}
1177 }
1178 r.disableTags++
1179 } else {
1180 r.disableTags--
1181 if r.disableTags == 0 {
1182 if node.LinkData.Title != nil {
1183 r.out(w, []byte(`" title="`))
1184 r.out(w, esc(node.LinkData.Title, true))
1185 }
1186 r.out(w, []byte(`" />`))
1187 }
1188 }
1189 case Code:
1190 r.out(w, tag("code", nil, false))
1191 r.out(w, escCode(node.Literal, false))
1192 r.out(w, tag("/code", nil, false))
1193 case Document:
1194 break
1195 case Paragraph:
1196 if skipParagraphTags(node) {
1197 break
1198 }
1199 if entering {
1200 // TODO: untangle this clusterfuck about when the newlines need
1201 // to be added and when not.
1202 if node.Prev != nil {
1203 t := node.Prev.Type
1204 if t == HTMLBlock || t == List || t == Paragraph || t == Header || t == CodeBlock || t == BlockQuote || t == HorizontalRule {
1205 r.cr(w)
1206 }
1207 }
1208 if node.Parent.Type == BlockQuote && node.Prev == nil {
1209 r.cr(w)
1210 }
1211 r.out(w, tag("p", attrs, false))
1212 } else {
1213 r.out(w, tag("/p", attrs, false))
1214 if !(node.Parent.Type == Item && node.Next == nil) {
1215 r.cr(w)
1216 }
1217 }
1218 break
1219 case BlockQuote:
1220 if entering {
1221 r.cr(w)
1222 r.out(w, tag("blockquote", attrs, false))
1223 } else {
1224 r.out(w, tag("/blockquote", nil, false))
1225 r.cr(w)
1226 }
1227 break
1228 case HTMLBlock:
1229 r.cr(w)
1230 r.out(w, node.Literal)
1231 r.cr(w)
1232 case Header:
1233 tagname := fmt.Sprintf("h%d", node.Level)
1234 if entering {
1235 if node.IsTitleblock {
1236 attrs = append(attrs, `class="title"`)
1237 }
1238 if node.HeaderID != "" {
1239 id := r.ensureUniqueHeaderID(node.HeaderID)
1240 if r.parameters.HeaderIDPrefix != "" {
1241 id = r.parameters.HeaderIDPrefix + id
1242 }
1243 if r.parameters.HeaderIDSuffix != "" {
1244 id = id + r.parameters.HeaderIDSuffix
1245 }
1246 attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
1247 }
1248 r.cr(w)
1249 r.out(w, tag(tagname, attrs, false))
1250 } else {
1251 r.out(w, tag("/"+tagname, nil, false))
1252 if !(node.Parent.Type == Item && node.Next == nil) {
1253 r.cr(w)
1254 }
1255 }
1256 break
1257 case HorizontalRule:
1258 r.cr(w)
1259 r.out(w, tag("hr", attrs, r.flags&UseXHTML != 0))
1260 r.cr(w)
1261 break
1262 case List:
1263 tagName := "ul"
1264 if node.ListFlags&ListTypeOrdered != 0 {
1265 tagName = "ol"
1266 }
1267 if node.ListFlags&ListTypeDefinition != 0 {
1268 tagName = "dl"
1269 }
1270 if entering {
1271 // var start = node.listStart;
1272 // if (start !== null && start !== 1) {
1273 // attrs.push(['start', start.toString()]);
1274 // }
1275 r.cr(w)
1276 if node.Parent.Type == Item && node.Parent.Parent.Tight {
1277 r.cr(w)
1278 }
1279 r.out(w, tag(tagName, attrs, false))
1280 r.cr(w)
1281 } else {
1282 r.out(w, tag("/"+tagName, nil, false))
1283 //cr(w)
1284 //if node.parent.Type != Item {
1285 // cr(w)
1286 //}
1287 if node.Parent.Type == Item && node.Next != nil {
1288 r.cr(w)
1289 }
1290 if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
1291 r.cr(w)
1292 }
1293 }
1294 case Item:
1295 tagName := "li"
1296 if node.ListFlags&ListTypeDefinition != 0 {
1297 tagName = "dd"
1298 }
1299 if node.ListFlags&ListTypeTerm != 0 {
1300 tagName = "dt"
1301 }
1302 if entering {
1303 if itemOpenCR(node) {
1304 r.cr(w)
1305 }
1306 if node.ListData.RefLink != nil {
1307 slug := slugify(node.ListData.RefLink)
1308 r.out(w, footnoteItem(r.parameters.FootnoteAnchorPrefix, slug))
1309 break
1310 }
1311 r.out(w, tag(tagName, nil, false))
1312 } else {
1313 if node.ListData.RefLink != nil {
1314 slug := slugify(node.ListData.RefLink)
1315 if r.flags&FootnoteReturnLinks != 0 {
1316 r.out(w, footnoteReturnLink(r.parameters.FootnoteAnchorPrefix, r.parameters.FootnoteReturnLinkContents, slug))
1317 }
1318 }
1319 r.out(w, tag("/"+tagName, nil, false))
1320 r.cr(w)
1321 }
1322 case CodeBlock:
1323 attrs = appendLanguageAttr(attrs, node.Info)
1324 r.cr(w)
1325 r.out(w, tag("pre", nil, false))
1326 r.out(w, tag("code", attrs, false))
1327 r.out(w, escCode(node.Literal, false))
1328 r.out(w, tag("/code", nil, false))
1329 r.out(w, tag("/pre", nil, false))
1330 if node.Parent.Type != Item {
1331 r.cr(w)
1332 }
1333 case Table:
1334 if entering {
1335 r.cr(w)
1336 r.out(w, tag("table", nil, false))
1337 } else {
1338 r.out(w, tag("/table", nil, false))
1339 r.cr(w)
1340 }
1341 case TableCell:
1342 tagName := "td"
1343 if node.IsHeader {
1344 tagName = "th"
1345 }
1346 if entering {
1347 align := cellAlignment(node.Align)
1348 if align != "" {
1349 attrs = append(attrs, fmt.Sprintf(`align="%s"`, align))
1350 }
1351 if node.Prev == nil {
1352 r.cr(w)
1353 }
1354 r.out(w, tag(tagName, attrs, false))
1355 } else {
1356 r.out(w, tag("/"+tagName, nil, false))
1357 r.cr(w)
1358 }
1359 case TableHead:
1360 if entering {
1361 r.cr(w)
1362 r.out(w, tag("thead", nil, false))
1363 } else {
1364 r.out(w, tag("/thead", nil, false))
1365 r.cr(w)
1366 }
1367 case TableBody:
1368 if entering {
1369 r.cr(w)
1370 r.out(w, tag("tbody", nil, false))
1371 // XXX: this is to adhere to a rather silly test. Should fix test.
1372 if node.FirstChild == nil {
1373 r.cr(w)
1374 }
1375 } else {
1376 r.out(w, tag("/tbody", nil, false))
1377 r.cr(w)
1378 }
1379 case TableRow:
1380 if entering {
1381 r.cr(w)
1382 r.out(w, tag("tr", nil, false))
1383 } else {
1384 r.out(w, tag("/tr", nil, false))
1385 r.cr(w)
1386 }
1387 default:
1388 panic("Unknown node type " + node.Type.String())
1389 }
1390}
1391
1392func (r *HTML) Render(ast *Node) []byte {
1393 //println("render_Blackfriday")
1394 //dump(ast)
1395 // Run Smartypants if it's enabled or simply escape text if not
1396 sr := NewSmartypantsRenderer(r.extensions)
1397 ast.Walk(func(node *Node, entering bool) {
1398 if node.Type == Text {
1399 if r.extensions&Smartypants != 0 {
1400 node.Literal = sr.Process(node.Literal)
1401 } else {
1402 node.Literal = esc(node.Literal, false)
1403 }
1404 }
1405 })
1406 var buff bytes.Buffer
1407 ast.Walk(func(node *Node, entering bool) {
1408 r.RenderNode(&buff, node, entering)
1409 })
1410 return buff.Bytes()
1411}