html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "html"
22 "io"
23 "regexp"
24 "strconv"
25 "strings"
26)
27
28type HTMLFlags int
29
30// HTML renderer configuration options.
31const (
32 HTMLFlagsNone HTMLFlags = 0
33 SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks
34 SkipStyle // Skip embedded <style> elements
35 SkipImages // Skip embedded images
36 SkipLinks // Skip all links
37 Safelink // Only link to trusted protocols
38 NofollowLinks // Only link with rel="nofollow"
39 NoreferrerLinks // Only link with rel="noreferrer"
40 HrefTargetBlank // Add a blank target
41 CompletePage // Generate a complete HTML page
42 UseXHTML // Generate XHTML output instead of HTML
43 FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source
44
45 TagName = "[A-Za-z][A-Za-z0-9-]*"
46 AttributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
47 UnquotedValue = "[^\"'=<>`\\x00-\\x20]+"
48 SingleQuotedValue = "'[^']*'"
49 DoubleQuotedValue = "\"[^\"]*\""
50 AttributeValue = "(?:" + UnquotedValue + "|" + SingleQuotedValue + "|" + DoubleQuotedValue + ")"
51 AttributeValueSpec = "(?:" + "\\s*=" + "\\s*" + AttributeValue + ")"
52 Attribute = "(?:" + "\\s+" + AttributeName + AttributeValueSpec + "?)"
53 OpenTag = "<" + TagName + Attribute + "*" + "\\s*/?>"
54 CloseTag = "</" + TagName + "\\s*[>]"
55 HTMLComment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
56 ProcessingInstruction = "[<][?].*?[?][>]"
57 Declaration = "<![A-Z]+" + "\\s+[^>]*>"
58 CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
59 HTMLTag = "(?:" + OpenTag + "|" + CloseTag + "|" + HTMLComment + "|" +
60 ProcessingInstruction + "|" + Declaration + "|" + CDATA + ")"
61)
62
63var (
64 // TODO: improve this regexp to catch all possible entities:
65 htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
66 reHtmlTag = regexp.MustCompile("(?i)^" + HTMLTag)
67)
68
69type HTMLRendererParameters struct {
70 // Prepend this text to each relative URL.
71 AbsolutePrefix string
72 // Add this text to each footnote anchor, to ensure uniqueness.
73 FootnoteAnchorPrefix string
74 // Show this text inside the <a> tag for a footnote return link, if the
75 // HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string
76 // <sup>[return]</sup> is used.
77 FootnoteReturnLinkContents string
78 // If set, add this text to the front of each Header ID, to ensure
79 // uniqueness.
80 HeaderIDPrefix string
81 // If set, add this text to the back of each Header ID, to ensure uniqueness.
82 HeaderIDSuffix string
83}
84
85// HTML is a type that implements the Renderer interface for HTML output.
86//
87// Do not create this directly, instead use the HTMLRenderer function.
88type HTML struct {
89 flags HTMLFlags
90 closeTag string // how to end singleton tags: either " />" or ">"
91 title string // document title
92 css string // optional css file url (used with HTML_COMPLETE_PAGE)
93
94 parameters HTMLRendererParameters
95
96 // table of contents data
97 tocMarker int
98 headerCount int
99 currentLevel int
100 toc *bytes.Buffer
101
102 // Track header IDs to prevent ID collision in a single generation.
103 headerIDs map[string]int
104
105 w HTMLWriter
106 lastOutputLen int
107 disableTags int
108
109 extensions Extensions // This gives Smartypants renderer access to flags
110}
111
112const (
113 xhtmlClose = " />"
114 htmlClose = ">"
115)
116
117// HTMLRenderer creates and configures an HTML object, which
118// satisfies the Renderer interface.
119//
120// flags is a set of HTMLFlags ORed together.
121// title is the title of the document, and css is a URL for the document's
122// stylesheet.
123// title and css are only used when HTML_COMPLETE_PAGE is selected.
124func HTMLRenderer(flags HTMLFlags, extensions Extensions, title string, css string) Renderer {
125 return HTMLRendererWithParameters(flags, extensions, title, css, HTMLRendererParameters{})
126}
127
128type HTMLWriter struct {
129 output bytes.Buffer
130}
131
132func (w *HTMLWriter) Write(p []byte) (n int, err error) {
133 return w.output.Write(p)
134}
135
136func (w *HTMLWriter) WriteString(s string) (n int, err error) {
137 return w.output.WriteString(s)
138}
139
140func (w *HTMLWriter) WriteByte(b byte) error {
141 return w.output.WriteByte(b)
142}
143
144// Writes out a newline if the output is not pristine. Used at the beginning of
145// every rendering func
146func (w *HTMLWriter) Newline() {
147 w.WriteByte('\n')
148}
149
150func (r *HTML) Write(b []byte) (int, error) {
151 return r.w.Write(b)
152}
153
154func HTMLRendererWithParameters(flags HTMLFlags, extensions Extensions, title string,
155 css string, renderParameters HTMLRendererParameters) Renderer {
156 // configure the rendering engine
157 closeTag := htmlClose
158 if flags&UseXHTML != 0 {
159 closeTag = xhtmlClose
160 }
161
162 if renderParameters.FootnoteReturnLinkContents == "" {
163 renderParameters.FootnoteReturnLinkContents = `<sup>[return]</sup>`
164 }
165
166 var writer HTMLWriter
167 return &HTML{
168 flags: flags,
169 extensions: extensions,
170 closeTag: closeTag,
171 title: title,
172 css: css,
173 parameters: renderParameters,
174
175 headerCount: 0,
176 currentLevel: 0,
177 toc: new(bytes.Buffer),
178
179 headerIDs: make(map[string]int),
180
181 w: writer,
182 }
183}
184
185// Using if statements is a bit faster than a switch statement. As the compiler
186// improves, this should be unnecessary this is only worthwhile because
187// attrEscape is the single largest CPU user in normal use.
188// Also tried using map, but that gave a ~3x slowdown.
189func escapeSingleChar(char byte) (string, bool) {
190 if char == '"' {
191 return """, true
192 }
193 if char == '&' {
194 return "&", true
195 }
196 if char == '<' {
197 return "<", true
198 }
199 if char == '>' {
200 return ">", true
201 }
202 return "", false
203}
204
205func (r *HTML) attrEscape(src []byte) {
206 org := 0
207 for i, ch := range src {
208 if entity, ok := escapeSingleChar(ch); ok {
209 if i > org {
210 // copy all the normal characters since the last escape
211 r.w.Write(src[org:i])
212 }
213 org = i + 1
214 r.w.WriteString(entity)
215 }
216 }
217 if org < len(src) {
218 r.w.Write(src[org:])
219 }
220}
221
222func attrEscape2(src []byte) []byte {
223 unesc := []byte(html.UnescapeString(string(src)))
224 esc1 := []byte(html.EscapeString(string(unesc)))
225 esc2 := bytes.Replace(esc1, []byte("""), []byte("""), -1)
226 return bytes.Replace(esc2, []byte("'"), []byte{'\''}, -1)
227}
228
229func (r *HTML) entityEscapeWithSkip(src []byte, skipRanges [][]int) {
230 end := 0
231 for _, rang := range skipRanges {
232 r.attrEscape(src[end:rang[0]])
233 r.w.Write(src[rang[0]:rang[1]])
234 end = rang[1]
235 }
236 r.attrEscape(src[end:])
237}
238
239func (r *HTML) TitleBlock(text []byte) {
240 text = bytes.TrimPrefix(text, []byte("% "))
241 text = bytes.Replace(text, []byte("\n% "), []byte("\n"), -1)
242 r.w.WriteString("<h1 class=\"title\">")
243 r.w.Write(text)
244 r.w.WriteString("\n</h1>")
245}
246
247func (r *HTML) BeginHeader(level int, id string) {
248 r.w.Newline()
249
250 if id == "" && r.extensions&TOC != 0 {
251 id = fmt.Sprintf("toc_%d", r.headerCount)
252 }
253
254 if id != "" {
255 id = r.ensureUniqueHeaderID(id)
256
257 if r.parameters.HeaderIDPrefix != "" {
258 id = r.parameters.HeaderIDPrefix + id
259 }
260
261 if r.parameters.HeaderIDSuffix != "" {
262 id = id + r.parameters.HeaderIDSuffix
263 }
264
265 r.w.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id))
266 } else {
267 r.w.WriteString(fmt.Sprintf("<h%d>", level))
268 }
269}
270
271func (r *HTML) EndHeader(level int, id string, header []byte) {
272 // are we building a table of contents?
273 if r.extensions&TOC != 0 {
274 r.TocHeaderWithAnchor(header, level, id)
275 }
276
277 r.w.WriteString(fmt.Sprintf("</h%d>\n", level))
278}
279
280func (r *HTML) BlockHtml(text []byte) {
281 if r.flags&SkipHTML != 0 {
282 return
283 }
284
285 r.w.Newline()
286 r.w.Write(text)
287 r.w.WriteByte('\n')
288}
289
290func (r *HTML) HRule() {
291 r.w.Newline()
292 r.w.WriteString("<hr")
293 r.w.WriteString(r.closeTag)
294 r.w.WriteByte('\n')
295}
296
297func (r *HTML) BlockCode(text []byte, lang string) {
298 r.w.Newline()
299
300 // parse out the language names/classes
301 count := 0
302 for _, elt := range strings.Fields(lang) {
303 if elt[0] == '.' {
304 elt = elt[1:]
305 }
306 if len(elt) == 0 {
307 continue
308 }
309 if count == 0 {
310 r.w.WriteString("<pre><code class=\"language-")
311 } else {
312 r.w.WriteByte(' ')
313 }
314 r.attrEscape([]byte(elt))
315 count++
316 }
317
318 if count == 0 {
319 r.w.WriteString("<pre><code>")
320 } else {
321 r.w.WriteString("\">")
322 }
323
324 r.attrEscape(text)
325 r.w.WriteString("</code></pre>\n")
326}
327
328func (r *HTML) BlockQuote(text []byte) {
329 r.w.Newline()
330 r.w.WriteString("<blockquote>\n")
331 r.w.Write(text)
332 r.w.WriteString("</blockquote>\n")
333}
334
335func (r *HTML) Table(header []byte, body []byte, columnData []CellAlignFlags) {
336 r.w.Newline()
337 r.w.WriteString("<table>\n<thead>\n")
338 r.w.Write(header)
339 r.w.WriteString("</thead>\n\n<tbody>\n")
340 r.w.Write(body)
341 r.w.WriteString("</tbody>\n</table>\n")
342}
343
344func (r *HTML) TableRow(text []byte) {
345 r.w.Newline()
346 r.w.WriteString("<tr>\n")
347 r.w.Write(text)
348 r.w.WriteString("\n</tr>\n")
349}
350
351func leadingNewline(out *bytes.Buffer) {
352 if out.Len() > 0 {
353 out.WriteByte('\n')
354 }
355}
356
357func (r *HTML) TableHeaderCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
358 leadingNewline(out)
359 switch align {
360 case TableAlignmentLeft:
361 out.WriteString("<th align=\"left\">")
362 case TableAlignmentRight:
363 out.WriteString("<th align=\"right\">")
364 case TableAlignmentCenter:
365 out.WriteString("<th align=\"center\">")
366 default:
367 out.WriteString("<th>")
368 }
369
370 out.Write(text)
371 out.WriteString("</th>")
372}
373
374func (r *HTML) TableCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
375 leadingNewline(out)
376 switch align {
377 case TableAlignmentLeft:
378 out.WriteString("<td align=\"left\">")
379 case TableAlignmentRight:
380 out.WriteString("<td align=\"right\">")
381 case TableAlignmentCenter:
382 out.WriteString("<td align=\"center\">")
383 default:
384 out.WriteString("<td>")
385 }
386
387 out.Write(text)
388 out.WriteString("</td>")
389}
390
391func (r *HTML) BeginFootnotes() {
392 r.w.WriteString("<div class=\"footnotes\">\n")
393 r.HRule()
394 r.BeginList(ListTypeOrdered)
395}
396
397func (r *HTML) EndFootnotes() {
398 r.EndList(ListTypeOrdered)
399 r.w.WriteString("</div>\n")
400}
401
402func (r *HTML) FootnoteItem(name, text []byte, flags ListType) {
403 if flags&ListItemContainsBlock != 0 || flags&ListItemBeginningOfList != 0 {
404 r.w.Newline()
405 }
406 slug := slugify(name)
407 r.w.WriteString(`<li id="`)
408 r.w.WriteString(`fn:`)
409 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
410 r.w.Write(slug)
411 r.w.WriteString(`">`)
412 r.w.Write(text)
413 if r.flags&FootnoteReturnLinks != 0 {
414 r.w.WriteString(` <a class="footnote-return" href="#`)
415 r.w.WriteString(`fnref:`)
416 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
417 r.w.Write(slug)
418 r.w.WriteString(`">`)
419 r.w.WriteString(r.parameters.FootnoteReturnLinkContents)
420 r.w.WriteString(`</a>`)
421 }
422 r.w.WriteString("</li>\n")
423}
424
425func (r *HTML) BeginList(flags ListType) {
426 r.w.Newline()
427
428 if flags&ListTypeDefinition != 0 {
429 r.w.WriteString("<dl>")
430 } else if flags&ListTypeOrdered != 0 {
431 r.w.WriteString("<ol>")
432 } else {
433 r.w.WriteString("<ul>")
434 }
435}
436
437func (r *HTML) EndList(flags ListType) {
438 if flags&ListTypeDefinition != 0 {
439 r.w.WriteString("</dl>\n")
440 } else if flags&ListTypeOrdered != 0 {
441 r.w.WriteString("</ol>\n")
442 } else {
443 r.w.WriteString("</ul>\n")
444 }
445}
446
447func (r *HTML) ListItem(text []byte, flags ListType) {
448 if (flags&ListItemContainsBlock != 0 && flags&ListTypeDefinition == 0) ||
449 flags&ListItemBeginningOfList != 0 {
450 r.w.Newline()
451 }
452 if flags&ListTypeTerm != 0 {
453 r.w.WriteString("<dt>")
454 } else if flags&ListTypeDefinition != 0 {
455 r.w.WriteString("<dd>")
456 } else {
457 r.w.WriteString("<li>")
458 }
459 r.w.Write(text)
460 if flags&ListTypeTerm != 0 {
461 r.w.WriteString("</dt>\n")
462 } else if flags&ListTypeDefinition != 0 {
463 r.w.WriteString("</dd>\n")
464 } else {
465 r.w.WriteString("</li>\n")
466 }
467}
468
469func (r *HTML) BeginParagraph() {
470 r.w.Newline()
471 r.w.WriteString("<p>")
472}
473
474func (r *HTML) EndParagraph() {
475 r.w.WriteString("</p>\n")
476}
477
478func (r *HTML) AutoLink(link []byte, kind LinkType) {
479 skipRanges := htmlEntity.FindAllIndex(link, -1)
480 if r.flags&Safelink != 0 && !isSafeLink(link) && kind != LinkTypeEmail {
481 // mark it but don't link it if it is not a safe link: no smartypants
482 r.w.WriteString("<tt>")
483 r.entityEscapeWithSkip(link, skipRanges)
484 r.w.WriteString("</tt>")
485 return
486 }
487
488 r.w.WriteString("<a href=\"")
489 if kind == LinkTypeEmail {
490 r.w.WriteString("mailto:")
491 } else {
492 r.maybeWriteAbsolutePrefix(link)
493 }
494
495 r.entityEscapeWithSkip(link, skipRanges)
496
497 var relAttrs []string
498 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
499 relAttrs = append(relAttrs, "nofollow")
500 }
501 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
502 relAttrs = append(relAttrs, "noreferrer")
503 }
504 if len(relAttrs) > 0 {
505 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
506 }
507
508 // blank target only add to external link
509 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
510 r.w.WriteString("\" target=\"_blank")
511 }
512
513 r.w.WriteString("\">")
514
515 // Pretty print: if we get an email address as
516 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
517 // want to print the `mailto:` prefix
518 switch {
519 case bytes.HasPrefix(link, []byte("mailto://")):
520 r.attrEscape(link[len("mailto://"):])
521 case bytes.HasPrefix(link, []byte("mailto:")):
522 r.attrEscape(link[len("mailto:"):])
523 default:
524 r.entityEscapeWithSkip(link, skipRanges)
525 }
526
527 r.w.WriteString("</a>")
528}
529
530func (r *HTML) CodeSpan(text []byte) {
531 r.w.WriteString("<code>")
532 r.attrEscape(text)
533 r.w.WriteString("</code>")
534}
535
536func (r *HTML) DoubleEmphasis(text []byte) {
537 r.w.WriteString("<strong>")
538 r.w.Write(text)
539 r.w.WriteString("</strong>")
540}
541
542func (r *HTML) Emphasis(text []byte) {
543 if len(text) == 0 {
544 return
545 }
546 r.w.WriteString("<em>")
547 r.w.Write(text)
548 r.w.WriteString("</em>")
549}
550
551func (r *HTML) maybeWriteAbsolutePrefix(link []byte) {
552 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
553 r.w.WriteString(r.parameters.AbsolutePrefix)
554 if link[0] != '/' {
555 r.w.WriteByte('/')
556 }
557 }
558}
559
560func (r *HTML) Image(link []byte, title []byte, alt []byte) {
561 if r.flags&SkipImages != 0 {
562 return
563 }
564
565 r.w.WriteString("<img src=\"")
566 r.maybeWriteAbsolutePrefix(link)
567 r.attrEscape(link)
568 r.w.WriteString("\" alt=\"")
569 if len(alt) > 0 {
570 r.attrEscape(alt)
571 }
572 if len(title) > 0 {
573 r.w.WriteString("\" title=\"")
574 r.attrEscape(title)
575 }
576
577 r.w.WriteByte('"')
578 r.w.WriteString(r.closeTag)
579}
580
581func (r *HTML) LineBreak() {
582 r.w.WriteString("<br")
583 r.w.WriteString(r.closeTag)
584 r.w.WriteByte('\n')
585}
586
587func (r *HTML) Link(link []byte, title []byte, content []byte) {
588 if r.flags&SkipLinks != 0 {
589 // write the link text out but don't link it, just mark it with typewriter font
590 r.w.WriteString("<tt>")
591 r.attrEscape(content)
592 r.w.WriteString("</tt>")
593 return
594 }
595
596 if r.flags&Safelink != 0 && !isSafeLink(link) {
597 // write the link text out but don't link it, just mark it with typewriter font
598 r.w.WriteString("<tt>")
599 r.attrEscape(content)
600 r.w.WriteString("</tt>")
601 return
602 }
603
604 r.w.WriteString("<a href=\"")
605 r.maybeWriteAbsolutePrefix(link)
606 r.attrEscape(link)
607 if len(title) > 0 {
608 r.w.WriteString("\" title=\"")
609 r.attrEscape(title)
610 }
611 var relAttrs []string
612 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
613 relAttrs = append(relAttrs, "nofollow")
614 }
615 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
616 relAttrs = append(relAttrs, "noreferrer")
617 }
618 if len(relAttrs) > 0 {
619 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
620 }
621
622 // blank target only add to external link
623 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
624 r.w.WriteString("\" target=\"_blank")
625 }
626
627 r.w.WriteString("\">")
628 r.w.Write(content)
629 r.w.WriteString("</a>")
630 return
631}
632
633func (r *HTML) RawHtmlTag(text []byte) {
634 if r.flags&SkipHTML != 0 {
635 return
636 }
637 if r.flags&SkipStyle != 0 && isHtmlTag(text, "style") {
638 return
639 }
640 if r.flags&SkipLinks != 0 && isHtmlTag(text, "a") {
641 return
642 }
643 if r.flags&SkipImages != 0 && isHtmlTag(text, "img") {
644 return
645 }
646 r.w.Write(text)
647}
648
649func (r *HTML) TripleEmphasis(text []byte) {
650 r.w.WriteString("<strong><em>")
651 r.w.Write(text)
652 r.w.WriteString("</em></strong>")
653}
654
655func (r *HTML) StrikeThrough(text []byte) {
656 r.w.WriteString("<del>")
657 r.w.Write(text)
658 r.w.WriteString("</del>")
659}
660
661func (r *HTML) FootnoteRef(ref []byte, id int) {
662 slug := slugify(ref)
663 r.w.WriteString(`<sup class="footnote-ref" id="`)
664 r.w.WriteString(`fnref:`)
665 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
666 r.w.Write(slug)
667 r.w.WriteString(`"><a rel="footnote" href="#`)
668 r.w.WriteString(`fn:`)
669 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
670 r.w.Write(slug)
671 r.w.WriteString(`">`)
672 r.w.WriteString(strconv.Itoa(id))
673 r.w.WriteString(`</a></sup>`)
674}
675
676func (r *HTML) Entity(entity []byte) {
677 r.w.Write(entity)
678}
679
680func (r *HTML) NormalText(text []byte) {
681 if r.extensions&Smartypants != 0 {
682 r.Smartypants(text)
683 } else {
684 r.attrEscape(text)
685 }
686}
687
688func (r *HTML) Smartypants(text []byte) {
689 r.w.Write(NewSmartypantsRenderer(r.extensions).Process(text))
690}
691
692func (r *HTML) DocumentHeader() {
693 if r.flags&CompletePage == 0 {
694 return
695 }
696
697 ending := ""
698 if r.flags&UseXHTML != 0 {
699 r.w.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
700 r.w.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
701 r.w.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
702 ending = " /"
703 } else {
704 r.w.WriteString("<!DOCTYPE html>\n")
705 r.w.WriteString("<html>\n")
706 }
707 r.w.WriteString("<head>\n")
708 r.w.WriteString(" <title>")
709 r.NormalText([]byte(r.title))
710 r.w.WriteString("</title>\n")
711 r.w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
712 r.w.WriteString(VERSION)
713 r.w.WriteString("\"")
714 r.w.WriteString(ending)
715 r.w.WriteString(">\n")
716 r.w.WriteString(" <meta charset=\"utf-8\"")
717 r.w.WriteString(ending)
718 r.w.WriteString(">\n")
719 if r.css != "" {
720 r.w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
721 r.attrEscape([]byte(r.css))
722 r.w.WriteString("\"")
723 r.w.WriteString(ending)
724 r.w.WriteString(">\n")
725 }
726 r.w.WriteString("</head>\n")
727 r.w.WriteString("<body>\n")
728
729 r.tocMarker = r.w.output.Len() // XXX
730}
731
732func (r *HTML) DocumentFooter() {
733 // finalize and insert the table of contents
734 if r.extensions&TOC != 0 {
735 r.TocFinalize()
736
737 // now we have to insert the table of contents into the document
738 var temp bytes.Buffer
739
740 // start by making a copy of everything after the document header
741 temp.Write(r.w.output.Bytes()[r.tocMarker:])
742
743 // now clear the copied material from the main output buffer
744 r.w.output.Truncate(r.tocMarker)
745
746 // corner case spacing issue
747 if r.flags&CompletePage != 0 {
748 r.w.WriteByte('\n')
749 }
750
751 // insert the table of contents
752 r.w.WriteString("<nav>\n")
753 r.w.Write(r.toc.Bytes())
754 r.w.WriteString("</nav>\n")
755
756 // corner case spacing issue
757 if r.flags&CompletePage == 0 && r.extensions&OmitContents == 0 {
758 r.w.WriteByte('\n')
759 }
760
761 // write out everything that came after it
762 if r.extensions&OmitContents == 0 {
763 r.w.Write(temp.Bytes())
764 }
765 }
766
767 if r.flags&CompletePage != 0 {
768 r.w.WriteString("\n</body>\n")
769 r.w.WriteString("</html>\n")
770 }
771
772}
773
774func (r *HTML) TocHeaderWithAnchor(text []byte, level int, anchor string) {
775 for level > r.currentLevel {
776 switch {
777 case bytes.HasSuffix(r.toc.Bytes(), []byte("</li>\n")):
778 // this sublist can nest underneath a header
779 size := r.toc.Len()
780 r.toc.Truncate(size - len("</li>\n"))
781
782 case r.currentLevel > 0:
783 r.toc.WriteString("<li>")
784 }
785 if r.toc.Len() > 0 {
786 r.toc.WriteByte('\n')
787 }
788 r.toc.WriteString("<ul>\n")
789 r.currentLevel++
790 }
791
792 for level < r.currentLevel {
793 r.toc.WriteString("</ul>")
794 if r.currentLevel > 1 {
795 r.toc.WriteString("</li>\n")
796 }
797 r.currentLevel--
798 }
799
800 r.toc.WriteString("<li><a href=\"#")
801 if anchor != "" {
802 r.toc.WriteString(anchor)
803 } else {
804 r.toc.WriteString("toc_")
805 r.toc.WriteString(strconv.Itoa(r.headerCount))
806 }
807 r.toc.WriteString("\">")
808 r.headerCount++
809
810 r.toc.Write(text)
811
812 r.toc.WriteString("</a></li>\n")
813}
814
815func (r *HTML) TocHeader(text []byte, level int) {
816 r.TocHeaderWithAnchor(text, level, "")
817}
818
819func (r *HTML) TocFinalize() {
820 for r.currentLevel > 1 {
821 r.toc.WriteString("</ul></li>\n")
822 r.currentLevel--
823 }
824
825 if r.currentLevel > 0 {
826 r.toc.WriteString("</ul>\n")
827 }
828}
829
830func isHtmlTag(tag []byte, tagname string) bool {
831 found, _ := findHtmlTagPos(tag, tagname)
832 return found
833}
834
835// Look for a character, but ignore it when it's in any kind of quotes, it
836// might be JavaScript
837func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
838 inSingleQuote := false
839 inDoubleQuote := false
840 inGraveQuote := false
841 i := start
842 for i < len(html) {
843 switch {
844 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
845 return i
846 case html[i] == '\'':
847 inSingleQuote = !inSingleQuote
848 case html[i] == '"':
849 inDoubleQuote = !inDoubleQuote
850 case html[i] == '`':
851 inGraveQuote = !inGraveQuote
852 }
853 i++
854 }
855 return start
856}
857
858func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
859 i := 0
860 if i < len(tag) && tag[0] != '<' {
861 return false, -1
862 }
863 i++
864 i = skipSpace(tag, i)
865
866 if i < len(tag) && tag[i] == '/' {
867 i++
868 }
869
870 i = skipSpace(tag, i)
871 j := 0
872 for ; i < len(tag); i, j = i+1, j+1 {
873 if j >= len(tagname) {
874 break
875 }
876
877 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
878 return false, -1
879 }
880 }
881
882 if i == len(tag) {
883 return false, -1
884 }
885
886 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
887 if rightAngle > i {
888 return true, rightAngle
889 }
890
891 return false, -1
892}
893
894func skipUntilChar(text []byte, start int, char byte) int {
895 i := start
896 for i < len(text) && text[i] != char {
897 i++
898 }
899 return i
900}
901
902func skipSpace(tag []byte, i int) int {
903 for i < len(tag) && isspace(tag[i]) {
904 i++
905 }
906 return i
907}
908
909func skipChar(data []byte, start int, char byte) int {
910 i := start
911 for i < len(data) && data[i] == char {
912 i++
913 }
914 return i
915}
916
917func isRelativeLink(link []byte) (yes bool) {
918 // a tag begin with '#'
919 if link[0] == '#' {
920 return true
921 }
922
923 // link begin with '/' but not '//', the second maybe a protocol relative link
924 if len(link) >= 2 && link[0] == '/' && link[1] != '/' {
925 return true
926 }
927
928 // only the root '/'
929 if len(link) == 1 && link[0] == '/' {
930 return true
931 }
932
933 // current directory : begin with "./"
934 if bytes.HasPrefix(link, []byte("./")) {
935 return true
936 }
937
938 // parent directory : begin with "../"
939 if bytes.HasPrefix(link, []byte("../")) {
940 return true
941 }
942
943 return false
944}
945
946func (r *HTML) ensureUniqueHeaderID(id string) string {
947 for count, found := r.headerIDs[id]; found; count, found = r.headerIDs[id] {
948 tmp := fmt.Sprintf("%s-%d", id, count+1)
949
950 if _, tmpFound := r.headerIDs[tmp]; !tmpFound {
951 r.headerIDs[id] = count + 1
952 id = tmp
953 } else {
954 id = id + "-1"
955 }
956 }
957
958 if _, found := r.headerIDs[id]; !found {
959 r.headerIDs[id] = 0
960 }
961
962 return id
963}
964
965func (r *HTML) addAbsPrefix(link []byte) []byte {
966 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
967 newDest := r.parameters.AbsolutePrefix
968 if link[0] != '/' {
969 newDest += "/"
970 }
971 newDest += string(link)
972 return []byte(newDest)
973 }
974 return link
975}
976
977func appendLinkAttrs(attrs []string, flags HTMLFlags, link []byte) []string {
978 if isRelativeLink(link) {
979 return attrs
980 }
981 val := []string{}
982 if flags&NofollowLinks != 0 {
983 val = append(val, "nofollow")
984 }
985 if flags&NoreferrerLinks != 0 {
986 val = append(val, "noreferrer")
987 }
988 if flags&HrefTargetBlank != 0 {
989 attrs = append(attrs, "target=\"_blank\"")
990 }
991 if len(val) == 0 {
992 return attrs
993 }
994 attr := fmt.Sprintf("rel=%q", strings.Join(val, " "))
995 return append(attrs, attr)
996}
997
998func isMailto(link []byte) bool {
999 return bytes.HasPrefix(link, []byte("mailto:"))
1000}
1001
1002func needSkipLink(flags HTMLFlags, dest []byte) bool {
1003 if flags&SkipLinks != 0 {
1004 return true
1005 }
1006 return flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest)
1007}
1008
1009func isSmartypantable(node *Node) bool {
1010 pt := node.Parent.Type
1011 return pt != Link && pt != CodeBlock && pt != Code
1012}
1013
1014func appendLanguageAttr(attrs []string, info []byte) []string {
1015 infoWords := bytes.Split(info, []byte("\t "))
1016 if len(infoWords) > 0 && len(infoWords[0]) > 0 {
1017 attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0]))
1018 }
1019 return attrs
1020}
1021
1022func tag(name string, attrs []string, selfClosing bool) []byte {
1023 result := "<" + name
1024 if attrs != nil && len(attrs) > 0 {
1025 result += " " + strings.Join(attrs, " ")
1026 }
1027 if selfClosing {
1028 result += " /"
1029 }
1030 return []byte(result + ">")
1031}
1032
1033func footnoteRef(prefix string, node *Node) []byte {
1034 urlFrag := prefix + string(slugify(node.Destination))
1035 anchor := fmt.Sprintf(`<a rel="footnote" href="#fn:%s">%d</a>`, urlFrag, node.NoteID)
1036 return []byte(fmt.Sprintf(`<sup class="footnote-ref" id="fnref:%s">%s</sup>`, urlFrag, anchor))
1037}
1038
1039func footnoteItem(prefix string, slug []byte) []byte {
1040 return []byte(fmt.Sprintf(`<li id="fn:%s%s">`, prefix, slug))
1041}
1042
1043func footnoteReturnLink(prefix, returnLink string, slug []byte) []byte {
1044 const format = ` <a class="footnote-return" href="#fnref:%s%s">%s</a>`
1045 return []byte(fmt.Sprintf(format, prefix, slug, returnLink))
1046}
1047
1048func itemOpenCR(node *Node) bool {
1049 if node.Prev == nil {
1050 return false
1051 }
1052 ld := node.Parent.ListData
1053 return !ld.Tight && ld.ListFlags&ListTypeDefinition == 0
1054}
1055
1056func skipParagraphTags(node *Node) bool {
1057 grandparent := node.Parent.Parent
1058 if grandparent == nil || grandparent.Type != List {
1059 return false
1060 }
1061 tightOrTerm := grandparent.Tight || node.Parent.ListFlags&ListTypeTerm != 0
1062 return grandparent.Type == List && tightOrTerm
1063}
1064
1065func cellAlignment(align CellAlignFlags) string {
1066 switch align {
1067 case TableAlignmentLeft:
1068 return "left"
1069 case TableAlignmentRight:
1070 return "right"
1071 case TableAlignmentCenter:
1072 return "center"
1073 default:
1074 return ""
1075 }
1076}
1077
1078func esc(text []byte, preserveEntities bool) []byte {
1079 return attrEscape2(text)
1080}
1081
1082func escCode(text []byte, preserveEntities bool) []byte {
1083 e1 := []byte(html.EscapeString(string(text)))
1084 e2 := bytes.Replace(e1, []byte("""), []byte("""), -1)
1085 return bytes.Replace(e2, []byte("'"), []byte{'\''}, -1)
1086}
1087
1088func (r *HTML) out(w io.Writer, text []byte) {
1089 if r.disableTags > 0 {
1090 w.Write(reHtmlTag.ReplaceAll(text, []byte{}))
1091 } else {
1092 w.Write(text)
1093 }
1094 r.lastOutputLen = len(text)
1095}
1096
1097func (r *HTML) cr(w io.Writer) {
1098 if r.lastOutputLen > 0 {
1099 r.out(w, []byte{'\n'})
1100 }
1101}
1102
1103func (r *HTML) RenderNode(w io.Writer, node *Node, entering bool) {
1104 attrs := []string{}
1105 switch node.Type {
1106 case Text:
1107 r.out(w, node.Literal)
1108 break
1109 case Softbreak:
1110 r.out(w, []byte("\n"))
1111 // TODO: make it configurable via out(renderer.softbreak)
1112 case Hardbreak:
1113 r.out(w, tag("br", nil, true))
1114 r.cr(w)
1115 case Emph:
1116 if entering {
1117 r.out(w, tag("em", nil, false))
1118 } else {
1119 r.out(w, tag("/em", nil, false))
1120 }
1121 break
1122 case Strong:
1123 if entering {
1124 r.out(w, tag("strong", nil, false))
1125 } else {
1126 r.out(w, tag("/strong", nil, false))
1127 }
1128 break
1129 case Del:
1130 if entering {
1131 r.out(w, tag("del", nil, false))
1132 } else {
1133 r.out(w, tag("/del", nil, false))
1134 }
1135 case HTMLSpan:
1136 //if options.safe {
1137 // out(w, "<!-- raw HTML omitted -->")
1138 //} else {
1139 r.out(w, node.Literal)
1140 //}
1141 case Link:
1142 // mark it but don't link it if it is not a safe link: no smartypants
1143 dest := node.LinkData.Destination
1144 if needSkipLink(r.flags, dest) {
1145 if entering {
1146 r.out(w, tag("tt", nil, false))
1147 } else {
1148 r.out(w, tag("/tt", nil, false))
1149 }
1150 } else {
1151 if entering {
1152 dest = r.addAbsPrefix(dest)
1153 //if (!(options.safe && potentiallyUnsafe(node.destination))) {
1154 attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest, true)))
1155 //}
1156 if node.NoteID != 0 {
1157 r.out(w, footnoteRef(r.parameters.FootnoteAnchorPrefix, node))
1158 break
1159 }
1160 attrs = appendLinkAttrs(attrs, r.flags, dest)
1161 if len(node.LinkData.Title) > 0 {
1162 attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title, true)))
1163 }
1164 r.out(w, tag("a", attrs, false))
1165 } else {
1166 if node.NoteID != 0 {
1167 break
1168 }
1169 r.out(w, tag("/a", nil, false))
1170 }
1171 }
1172 case Image:
1173 if entering {
1174 dest := node.LinkData.Destination
1175 dest = r.addAbsPrefix(dest)
1176 if r.disableTags == 0 {
1177 //if options.safe && potentiallyUnsafe(dest) {
1178 //out(w, `<img src="" alt="`)
1179 //} else {
1180 r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest, true))))
1181 //}
1182 }
1183 r.disableTags++
1184 } else {
1185 r.disableTags--
1186 if r.disableTags == 0 {
1187 if node.LinkData.Title != nil {
1188 r.out(w, []byte(`" title="`))
1189 r.out(w, esc(node.LinkData.Title, true))
1190 }
1191 r.out(w, []byte(`" />`))
1192 }
1193 }
1194 case Code:
1195 r.out(w, tag("code", nil, false))
1196 r.out(w, escCode(node.Literal, false))
1197 r.out(w, tag("/code", nil, false))
1198 case Document:
1199 break
1200 case Paragraph:
1201 if skipParagraphTags(node) {
1202 break
1203 }
1204 if entering {
1205 // TODO: untangle this clusterfuck about when the newlines need
1206 // to be added and when not.
1207 if node.Prev != nil {
1208 t := node.Prev.Type
1209 if t == HTMLBlock || t == List || t == Paragraph || t == Header || t == CodeBlock || t == BlockQuote || t == HorizontalRule {
1210 r.cr(w)
1211 }
1212 }
1213 if node.Parent.Type == BlockQuote && node.Prev == nil {
1214 r.cr(w)
1215 }
1216 r.out(w, tag("p", attrs, false))
1217 } else {
1218 r.out(w, tag("/p", attrs, false))
1219 if !(node.Parent.Type == Item && node.Next == nil) {
1220 r.cr(w)
1221 }
1222 }
1223 break
1224 case BlockQuote:
1225 if entering {
1226 r.cr(w)
1227 r.out(w, tag("blockquote", attrs, false))
1228 } else {
1229 r.out(w, tag("/blockquote", nil, false))
1230 r.cr(w)
1231 }
1232 break
1233 case HTMLBlock:
1234 r.cr(w)
1235 r.out(w, node.Literal)
1236 r.cr(w)
1237 case Header:
1238 tagname := fmt.Sprintf("h%d", node.Level)
1239 if entering {
1240 if node.IsTitleblock {
1241 attrs = append(attrs, `class="title"`)
1242 }
1243 if node.HeaderID != "" {
1244 id := r.ensureUniqueHeaderID(node.HeaderID)
1245 if r.parameters.HeaderIDPrefix != "" {
1246 id = r.parameters.HeaderIDPrefix + id
1247 }
1248 if r.parameters.HeaderIDSuffix != "" {
1249 id = id + r.parameters.HeaderIDSuffix
1250 }
1251 attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
1252 }
1253 r.cr(w)
1254 r.out(w, tag(tagname, attrs, false))
1255 } else {
1256 r.out(w, tag("/"+tagname, nil, false))
1257 if !(node.Parent.Type == Item && node.Next == nil) {
1258 r.cr(w)
1259 }
1260 }
1261 break
1262 case HorizontalRule:
1263 r.cr(w)
1264 r.out(w, tag("hr", attrs, r.flags&UseXHTML != 0))
1265 r.cr(w)
1266 break
1267 case List:
1268 tagName := "ul"
1269 if node.ListFlags&ListTypeOrdered != 0 {
1270 tagName = "ol"
1271 }
1272 if node.ListFlags&ListTypeDefinition != 0 {
1273 tagName = "dl"
1274 }
1275 if entering {
1276 // var start = node.listStart;
1277 // if (start !== null && start !== 1) {
1278 // attrs.push(['start', start.toString()]);
1279 // }
1280 r.cr(w)
1281 if node.Parent.Type == Item && node.Parent.Parent.Tight {
1282 r.cr(w)
1283 }
1284 r.out(w, tag(tagName, attrs, false))
1285 r.cr(w)
1286 } else {
1287 r.out(w, tag("/"+tagName, nil, false))
1288 //cr(w)
1289 //if node.parent.Type != Item {
1290 // cr(w)
1291 //}
1292 if node.Parent.Type == Item && node.Next != nil {
1293 r.cr(w)
1294 }
1295 if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
1296 r.cr(w)
1297 }
1298 }
1299 case Item:
1300 tagName := "li"
1301 if node.ListFlags&ListTypeDefinition != 0 {
1302 tagName = "dd"
1303 }
1304 if node.ListFlags&ListTypeTerm != 0 {
1305 tagName = "dt"
1306 }
1307 if entering {
1308 if itemOpenCR(node) {
1309 r.cr(w)
1310 }
1311 if node.ListData.RefLink != nil {
1312 slug := slugify(node.ListData.RefLink)
1313 r.out(w, footnoteItem(r.parameters.FootnoteAnchorPrefix, slug))
1314 break
1315 }
1316 r.out(w, tag(tagName, nil, false))
1317 } else {
1318 if node.ListData.RefLink != nil {
1319 slug := slugify(node.ListData.RefLink)
1320 if r.flags&FootnoteReturnLinks != 0 {
1321 r.out(w, footnoteReturnLink(r.parameters.FootnoteAnchorPrefix, r.parameters.FootnoteReturnLinkContents, slug))
1322 }
1323 }
1324 r.out(w, tag("/"+tagName, nil, false))
1325 r.cr(w)
1326 }
1327 case CodeBlock:
1328 attrs = appendLanguageAttr(attrs, node.Info)
1329 r.cr(w)
1330 r.out(w, tag("pre", nil, false))
1331 r.out(w, tag("code", attrs, false))
1332 r.out(w, escCode(node.Literal, false))
1333 r.out(w, tag("/code", nil, false))
1334 r.out(w, tag("/pre", nil, false))
1335 if node.Parent.Type != Item {
1336 r.cr(w)
1337 }
1338 case Table:
1339 if entering {
1340 r.cr(w)
1341 r.out(w, tag("table", nil, false))
1342 } else {
1343 r.out(w, tag("/table", nil, false))
1344 r.cr(w)
1345 }
1346 case TableCell:
1347 tagName := "td"
1348 if node.IsHeader {
1349 tagName = "th"
1350 }
1351 if entering {
1352 align := cellAlignment(node.Align)
1353 if align != "" {
1354 attrs = append(attrs, fmt.Sprintf(`align="%s"`, align))
1355 }
1356 if node.Prev == nil {
1357 r.cr(w)
1358 }
1359 r.out(w, tag(tagName, attrs, false))
1360 } else {
1361 r.out(w, tag("/"+tagName, nil, false))
1362 r.cr(w)
1363 }
1364 case TableHead:
1365 if entering {
1366 r.cr(w)
1367 r.out(w, tag("thead", nil, false))
1368 } else {
1369 r.out(w, tag("/thead", nil, false))
1370 r.cr(w)
1371 }
1372 case TableBody:
1373 if entering {
1374 r.cr(w)
1375 r.out(w, tag("tbody", nil, false))
1376 // XXX: this is to adhere to a rather silly test. Should fix test.
1377 if node.FirstChild == nil {
1378 r.cr(w)
1379 }
1380 } else {
1381 r.out(w, tag("/tbody", nil, false))
1382 r.cr(w)
1383 }
1384 case TableRow:
1385 if entering {
1386 r.cr(w)
1387 r.out(w, tag("tr", nil, false))
1388 } else {
1389 r.out(w, tag("/tr", nil, false))
1390 r.cr(w)
1391 }
1392 default:
1393 panic("Unknown node type " + node.Type.String())
1394 }
1395}
1396
1397func (r *HTML) writeDocumentHeader(w *bytes.Buffer, sr *SPRenderer) {
1398 if r.flags&CompletePage == 0 {
1399 return
1400 }
1401 ending := ""
1402 if r.flags&UseXHTML != 0 {
1403 w.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
1404 w.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
1405 w.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
1406 ending = " /"
1407 } else {
1408 w.WriteString("<!DOCTYPE html>\n")
1409 w.WriteString("<html>\n")
1410 }
1411 w.WriteString("<head>\n")
1412 w.WriteString(" <title>")
1413 if r.extensions&Smartypants != 0 {
1414 w.Write(sr.Process([]byte(r.title)))
1415 } else {
1416 w.Write(esc([]byte(r.title), false))
1417 }
1418 w.WriteString("</title>\n")
1419 w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
1420 w.WriteString(VERSION)
1421 w.WriteString("\"")
1422 w.WriteString(ending)
1423 w.WriteString(">\n")
1424 w.WriteString(" <meta charset=\"utf-8\"")
1425 w.WriteString(ending)
1426 w.WriteString(">\n")
1427 if r.css != "" {
1428 w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
1429 r.attrEscape([]byte(r.css))
1430 w.WriteString("\"")
1431 w.WriteString(ending)
1432 w.WriteString(">\n")
1433 }
1434 w.WriteString("</head>\n")
1435 w.WriteString("<body>\n\n")
1436}
1437
1438func (r *HTML) writeDocumentFooter(w *bytes.Buffer) {
1439 if r.flags&CompletePage == 0 {
1440 return
1441 }
1442 w.WriteString("\n</body>\n</html>\n")
1443}
1444
1445func (r *HTML) Render(ast *Node) []byte {
1446 //println("render_Blackfriday")
1447 //dump(ast)
1448 // Run Smartypants if it's enabled or simply escape text if not
1449 sr := NewSmartypantsRenderer(r.extensions)
1450 ast.Walk(func(node *Node, entering bool) {
1451 if node.Type == Text {
1452 if r.extensions&Smartypants != 0 {
1453 node.Literal = sr.Process(node.Literal)
1454 } else {
1455 node.Literal = esc(node.Literal, false)
1456 }
1457 }
1458 })
1459 var buff bytes.Buffer
1460 r.writeDocumentHeader(&buff, sr)
1461 ast.Walk(func(node *Node, entering bool) {
1462 r.RenderNode(&buff, node, entering)
1463 })
1464 r.writeDocumentFooter(&buff)
1465 return buff.Bytes()
1466}