html.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// HTML rendering backend
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "fmt"
21 "html"
22 "io"
23 "regexp"
24 "strconv"
25 "strings"
26)
27
28type HTMLFlags int
29
30// HTML renderer configuration options.
31const (
32 HTMLFlagsNone HTMLFlags = 0
33 SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks
34 SkipStyle // Skip embedded <style> elements
35 SkipImages // Skip embedded images
36 SkipLinks // Skip all links
37 Safelink // Only link to trusted protocols
38 NofollowLinks // Only link with rel="nofollow"
39 NoreferrerLinks // Only link with rel="noreferrer"
40 HrefTargetBlank // Add a blank target
41 CompletePage // Generate a complete HTML page
42 UseXHTML // Generate XHTML output instead of HTML
43 FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source
44
45 TagName = "[A-Za-z][A-Za-z0-9-]*"
46 AttributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
47 UnquotedValue = "[^\"'=<>`\\x00-\\x20]+"
48 SingleQuotedValue = "'[^']*'"
49 DoubleQuotedValue = "\"[^\"]*\""
50 AttributeValue = "(?:" + UnquotedValue + "|" + SingleQuotedValue + "|" + DoubleQuotedValue + ")"
51 AttributeValueSpec = "(?:" + "\\s*=" + "\\s*" + AttributeValue + ")"
52 Attribute = "(?:" + "\\s+" + AttributeName + AttributeValueSpec + "?)"
53 OpenTag = "<" + TagName + Attribute + "*" + "\\s*/?>"
54 CloseTag = "</" + TagName + "\\s*[>]"
55 HTMLComment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
56 ProcessingInstruction = "[<][?].*?[?][>]"
57 Declaration = "<![A-Z]+" + "\\s+[^>]*>"
58 CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
59 HTMLTag = "(?:" + OpenTag + "|" + CloseTag + "|" + HTMLComment + "|" +
60 ProcessingInstruction + "|" + Declaration + "|" + CDATA + ")"
61)
62
63var (
64 // TODO: improve this regexp to catch all possible entities:
65 htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
66 reHtmlTag = regexp.MustCompile("(?i)^" + HTMLTag)
67)
68
69type HTMLRendererParameters struct {
70 // Prepend this text to each relative URL.
71 AbsolutePrefix string
72 // Add this text to each footnote anchor, to ensure uniqueness.
73 FootnoteAnchorPrefix string
74 // Show this text inside the <a> tag for a footnote return link, if the
75 // HTML_FOOTNOTE_RETURN_LINKS flag is enabled. If blank, the string
76 // <sup>[return]</sup> is used.
77 FootnoteReturnLinkContents string
78 // If set, add this text to the front of each Header ID, to ensure
79 // uniqueness.
80 HeaderIDPrefix string
81 // If set, add this text to the back of each Header ID, to ensure uniqueness.
82 HeaderIDSuffix string
83}
84
85// HTML is a type that implements the Renderer interface for HTML output.
86//
87// Do not create this directly, instead use the HTMLRenderer function.
88type HTML struct {
89 flags HTMLFlags
90 closeTag string // how to end singleton tags: either " />" or ">"
91 title string // document title
92 css string // optional css file url (used with HTML_COMPLETE_PAGE)
93
94 parameters HTMLRendererParameters
95
96 // table of contents data
97 tocMarker int
98 headerCount int
99 currentLevel int
100 toc *bytes.Buffer
101
102 // Track header IDs to prevent ID collision in a single generation.
103 headerIDs map[string]int
104
105 w HTMLWriter
106 lastOutputLen int
107 disableTags int
108
109 extensions Extensions // This gives Smartypants renderer access to flags
110}
111
112const (
113 xhtmlClose = " />"
114 htmlClose = ">"
115)
116
117// HTMLRenderer creates and configures an HTML object, which
118// satisfies the Renderer interface.
119//
120// flags is a set of HTMLFlags ORed together.
121// title is the title of the document, and css is a URL for the document's
122// stylesheet.
123// title and css are only used when HTML_COMPLETE_PAGE is selected.
124func HTMLRenderer(flags HTMLFlags, extensions Extensions, title string, css string) Renderer {
125 return HTMLRendererWithParameters(flags, extensions, title, css, HTMLRendererParameters{})
126}
127
128type HTMLWriter struct {
129 output bytes.Buffer
130}
131
132func (w *HTMLWriter) Write(p []byte) (n int, err error) {
133 return w.output.Write(p)
134}
135
136func (w *HTMLWriter) WriteString(s string) (n int, err error) {
137 return w.output.WriteString(s)
138}
139
140func (w *HTMLWriter) WriteByte(b byte) error {
141 return w.output.WriteByte(b)
142}
143
144// Writes out a newline if the output is not pristine. Used at the beginning of
145// every rendering func
146func (w *HTMLWriter) Newline() {
147 w.WriteByte('\n')
148}
149
150func (r *HTML) Write(b []byte) (int, error) {
151 return r.w.Write(b)
152}
153
154func HTMLRendererWithParameters(flags HTMLFlags, extensions Extensions, title string,
155 css string, renderParameters HTMLRendererParameters) Renderer {
156 // configure the rendering engine
157 closeTag := htmlClose
158 if flags&UseXHTML != 0 {
159 closeTag = xhtmlClose
160 }
161
162 if renderParameters.FootnoteReturnLinkContents == "" {
163 renderParameters.FootnoteReturnLinkContents = `<sup>[return]</sup>`
164 }
165
166 var writer HTMLWriter
167 return &HTML{
168 flags: flags,
169 extensions: extensions,
170 closeTag: closeTag,
171 title: title,
172 css: css,
173 parameters: renderParameters,
174
175 headerCount: 0,
176 currentLevel: 0,
177 toc: new(bytes.Buffer),
178
179 headerIDs: make(map[string]int),
180
181 w: writer,
182 }
183}
184
185// Using if statements is a bit faster than a switch statement. As the compiler
186// improves, this should be unnecessary this is only worthwhile because
187// attrEscape is the single largest CPU user in normal use.
188// Also tried using map, but that gave a ~3x slowdown.
189func escapeSingleChar(char byte) (string, bool) {
190 if char == '"' {
191 return """, true
192 }
193 if char == '&' {
194 return "&", true
195 }
196 if char == '<' {
197 return "<", true
198 }
199 if char == '>' {
200 return ">", true
201 }
202 return "", false
203}
204
205func (r *HTML) attrEscape(src []byte) {
206 org := 0
207 for i, ch := range src {
208 if entity, ok := escapeSingleChar(ch); ok {
209 if i > org {
210 // copy all the normal characters since the last escape
211 r.w.Write(src[org:i])
212 }
213 org = i + 1
214 r.w.WriteString(entity)
215 }
216 }
217 if org < len(src) {
218 r.w.Write(src[org:])
219 }
220}
221
222func attrEscape2(src []byte) []byte {
223 unesc := []byte(html.UnescapeString(string(src)))
224 esc1 := []byte(html.EscapeString(string(unesc)))
225 esc2 := bytes.Replace(esc1, []byte("""), []byte("""), -1)
226 return bytes.Replace(esc2, []byte("'"), []byte{'\''}, -1)
227}
228
229func (r *HTML) entityEscapeWithSkip(src []byte, skipRanges [][]int) {
230 end := 0
231 for _, rang := range skipRanges {
232 r.attrEscape(src[end:rang[0]])
233 r.w.Write(src[rang[0]:rang[1]])
234 end = rang[1]
235 }
236 r.attrEscape(src[end:])
237}
238
239func (r *HTML) TitleBlock(text []byte) {
240 text = bytes.TrimPrefix(text, []byte("% "))
241 text = bytes.Replace(text, []byte("\n% "), []byte("\n"), -1)
242 r.w.WriteString("<h1 class=\"title\">")
243 r.w.Write(text)
244 r.w.WriteString("\n</h1>")
245}
246
247func (r *HTML) BeginHeader(level int, id string) {
248 r.w.Newline()
249
250 if id == "" && r.extensions&TOC != 0 {
251 id = fmt.Sprintf("toc_%d", r.headerCount)
252 }
253
254 if id != "" {
255 id = r.ensureUniqueHeaderID(id)
256
257 if r.parameters.HeaderIDPrefix != "" {
258 id = r.parameters.HeaderIDPrefix + id
259 }
260
261 if r.parameters.HeaderIDSuffix != "" {
262 id = id + r.parameters.HeaderIDSuffix
263 }
264
265 r.w.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id))
266 } else {
267 r.w.WriteString(fmt.Sprintf("<h%d>", level))
268 }
269}
270
271func (r *HTML) EndHeader(level int, id string, header []byte) {
272 // are we building a table of contents?
273 if r.extensions&TOC != 0 {
274 r.TocHeaderWithAnchor(header, level, id)
275 }
276
277 r.w.WriteString(fmt.Sprintf("</h%d>\n", level))
278}
279
280func (r *HTML) BlockHtml(text []byte) {
281 if r.flags&SkipHTML != 0 {
282 return
283 }
284
285 r.w.Newline()
286 r.w.Write(text)
287 r.w.WriteByte('\n')
288}
289
290func (r *HTML) HRule() {
291 r.w.Newline()
292 r.w.WriteString("<hr")
293 r.w.WriteString(r.closeTag)
294 r.w.WriteByte('\n')
295}
296
297func (r *HTML) BlockCode(text []byte, lang string) {
298 r.w.Newline()
299
300 // parse out the language names/classes
301 count := 0
302 for _, elt := range strings.Fields(lang) {
303 if elt[0] == '.' {
304 elt = elt[1:]
305 }
306 if len(elt) == 0 {
307 continue
308 }
309 if count == 0 {
310 r.w.WriteString("<pre><code class=\"language-")
311 } else {
312 r.w.WriteByte(' ')
313 }
314 r.attrEscape([]byte(elt))
315 count++
316 }
317
318 if count == 0 {
319 r.w.WriteString("<pre><code>")
320 } else {
321 r.w.WriteString("\">")
322 }
323
324 r.attrEscape(text)
325 r.w.WriteString("</code></pre>\n")
326}
327
328func (r *HTML) BlockQuote(text []byte) {
329 r.w.Newline()
330 r.w.WriteString("<blockquote>\n")
331 r.w.Write(text)
332 r.w.WriteString("</blockquote>\n")
333}
334
335func (r *HTML) Table(header []byte, body []byte, columnData []CellAlignFlags) {
336 r.w.Newline()
337 r.w.WriteString("<table>\n<thead>\n")
338 r.w.Write(header)
339 r.w.WriteString("</thead>\n\n<tbody>\n")
340 r.w.Write(body)
341 r.w.WriteString("</tbody>\n</table>\n")
342}
343
344func (r *HTML) TableRow(text []byte) {
345 r.w.Newline()
346 r.w.WriteString("<tr>\n")
347 r.w.Write(text)
348 r.w.WriteString("\n</tr>\n")
349}
350
351func leadingNewline(out *bytes.Buffer) {
352 if out.Len() > 0 {
353 out.WriteByte('\n')
354 }
355}
356
357func (r *HTML) TableHeaderCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
358 leadingNewline(out)
359 switch align {
360 case TableAlignmentLeft:
361 out.WriteString("<th align=\"left\">")
362 case TableAlignmentRight:
363 out.WriteString("<th align=\"right\">")
364 case TableAlignmentCenter:
365 out.WriteString("<th align=\"center\">")
366 default:
367 out.WriteString("<th>")
368 }
369
370 out.Write(text)
371 out.WriteString("</th>")
372}
373
374func (r *HTML) TableCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
375 leadingNewline(out)
376 switch align {
377 case TableAlignmentLeft:
378 out.WriteString("<td align=\"left\">")
379 case TableAlignmentRight:
380 out.WriteString("<td align=\"right\">")
381 case TableAlignmentCenter:
382 out.WriteString("<td align=\"center\">")
383 default:
384 out.WriteString("<td>")
385 }
386
387 out.Write(text)
388 out.WriteString("</td>")
389}
390
391func (r *HTML) BeginFootnotes() {
392 r.w.WriteString("<div class=\"footnotes\">\n")
393 r.HRule()
394 r.BeginList(ListTypeOrdered)
395}
396
397func (r *HTML) EndFootnotes() {
398 r.EndList(ListTypeOrdered)
399 r.w.WriteString("</div>\n")
400}
401
402func (r *HTML) FootnoteItem(name, text []byte, flags ListType) {
403 if flags&ListItemContainsBlock != 0 || flags&ListItemBeginningOfList != 0 {
404 r.w.Newline()
405 }
406 slug := slugify(name)
407 r.w.WriteString(`<li id="`)
408 r.w.WriteString(`fn:`)
409 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
410 r.w.Write(slug)
411 r.w.WriteString(`">`)
412 r.w.Write(text)
413 if r.flags&FootnoteReturnLinks != 0 {
414 r.w.WriteString(` <a class="footnote-return" href="#`)
415 r.w.WriteString(`fnref:`)
416 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
417 r.w.Write(slug)
418 r.w.WriteString(`">`)
419 r.w.WriteString(r.parameters.FootnoteReturnLinkContents)
420 r.w.WriteString(`</a>`)
421 }
422 r.w.WriteString("</li>\n")
423}
424
425func (r *HTML) BeginList(flags ListType) {
426 r.w.Newline()
427
428 if flags&ListTypeDefinition != 0 {
429 r.w.WriteString("<dl>")
430 } else if flags&ListTypeOrdered != 0 {
431 r.w.WriteString("<ol>")
432 } else {
433 r.w.WriteString("<ul>")
434 }
435}
436
437func (r *HTML) EndList(flags ListType) {
438 if flags&ListTypeDefinition != 0 {
439 r.w.WriteString("</dl>\n")
440 } else if flags&ListTypeOrdered != 0 {
441 r.w.WriteString("</ol>\n")
442 } else {
443 r.w.WriteString("</ul>\n")
444 }
445}
446
447func (r *HTML) ListItem(text []byte, flags ListType) {
448 if (flags&ListItemContainsBlock != 0 && flags&ListTypeDefinition == 0) ||
449 flags&ListItemBeginningOfList != 0 {
450 r.w.Newline()
451 }
452 if flags&ListTypeTerm != 0 {
453 r.w.WriteString("<dt>")
454 } else if flags&ListTypeDefinition != 0 {
455 r.w.WriteString("<dd>")
456 } else {
457 r.w.WriteString("<li>")
458 }
459 r.w.Write(text)
460 if flags&ListTypeTerm != 0 {
461 r.w.WriteString("</dt>\n")
462 } else if flags&ListTypeDefinition != 0 {
463 r.w.WriteString("</dd>\n")
464 } else {
465 r.w.WriteString("</li>\n")
466 }
467}
468
469func (r *HTML) BeginParagraph() {
470 r.w.Newline()
471 r.w.WriteString("<p>")
472}
473
474func (r *HTML) EndParagraph() {
475 r.w.WriteString("</p>\n")
476}
477
478func (r *HTML) AutoLink(link []byte, kind LinkType) {
479 skipRanges := htmlEntity.FindAllIndex(link, -1)
480 if r.flags&Safelink != 0 && !isSafeLink(link) && kind != LinkTypeEmail {
481 // mark it but don't link it if it is not a safe link: no smartypants
482 r.w.WriteString("<tt>")
483 r.entityEscapeWithSkip(link, skipRanges)
484 r.w.WriteString("</tt>")
485 return
486 }
487
488 r.w.WriteString("<a href=\"")
489 if kind == LinkTypeEmail {
490 r.w.WriteString("mailto:")
491 } else {
492 r.maybeWriteAbsolutePrefix(link)
493 }
494
495 r.entityEscapeWithSkip(link, skipRanges)
496
497 var relAttrs []string
498 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
499 relAttrs = append(relAttrs, "nofollow")
500 }
501 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
502 relAttrs = append(relAttrs, "noreferrer")
503 }
504 if len(relAttrs) > 0 {
505 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
506 }
507
508 // blank target only add to external link
509 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
510 r.w.WriteString("\" target=\"_blank")
511 }
512
513 r.w.WriteString("\">")
514
515 // Pretty print: if we get an email address as
516 // an actual URI, e.g. `mailto:foo@bar.com`, we don't
517 // want to print the `mailto:` prefix
518 switch {
519 case bytes.HasPrefix(link, []byte("mailto://")):
520 r.attrEscape(link[len("mailto://"):])
521 case bytes.HasPrefix(link, []byte("mailto:")):
522 r.attrEscape(link[len("mailto:"):])
523 default:
524 r.entityEscapeWithSkip(link, skipRanges)
525 }
526
527 r.w.WriteString("</a>")
528}
529
530func (r *HTML) CodeSpan(text []byte) {
531 r.w.WriteString("<code>")
532 r.attrEscape(text)
533 r.w.WriteString("</code>")
534}
535
536func (r *HTML) DoubleEmphasis(text []byte) {
537 r.w.WriteString("<strong>")
538 r.w.Write(text)
539 r.w.WriteString("</strong>")
540}
541
542func (r *HTML) Emphasis(text []byte) {
543 if len(text) == 0 {
544 return
545 }
546 r.w.WriteString("<em>")
547 r.w.Write(text)
548 r.w.WriteString("</em>")
549}
550
551func (r *HTML) maybeWriteAbsolutePrefix(link []byte) {
552 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
553 r.w.WriteString(r.parameters.AbsolutePrefix)
554 if link[0] != '/' {
555 r.w.WriteByte('/')
556 }
557 }
558}
559
560func (r *HTML) Image(link []byte, title []byte, alt []byte) {
561 if r.flags&SkipImages != 0 {
562 return
563 }
564
565 r.w.WriteString("<img src=\"")
566 r.maybeWriteAbsolutePrefix(link)
567 r.attrEscape(link)
568 r.w.WriteString("\" alt=\"")
569 if len(alt) > 0 {
570 r.attrEscape(alt)
571 }
572 if len(title) > 0 {
573 r.w.WriteString("\" title=\"")
574 r.attrEscape(title)
575 }
576
577 r.w.WriteByte('"')
578 r.w.WriteString(r.closeTag)
579}
580
581func (r *HTML) LineBreak() {
582 r.w.WriteString("<br")
583 r.w.WriteString(r.closeTag)
584 r.w.WriteByte('\n')
585}
586
587func (r *HTML) Link(link []byte, title []byte, content []byte) {
588 if r.flags&SkipLinks != 0 {
589 // write the link text out but don't link it, just mark it with typewriter font
590 r.w.WriteString("<tt>")
591 r.attrEscape(content)
592 r.w.WriteString("</tt>")
593 return
594 }
595
596 if r.flags&Safelink != 0 && !isSafeLink(link) {
597 // write the link text out but don't link it, just mark it with typewriter font
598 r.w.WriteString("<tt>")
599 r.attrEscape(content)
600 r.w.WriteString("</tt>")
601 return
602 }
603
604 r.w.WriteString("<a href=\"")
605 r.maybeWriteAbsolutePrefix(link)
606 r.attrEscape(link)
607 if len(title) > 0 {
608 r.w.WriteString("\" title=\"")
609 r.attrEscape(title)
610 }
611 var relAttrs []string
612 if r.flags&NofollowLinks != 0 && !isRelativeLink(link) {
613 relAttrs = append(relAttrs, "nofollow")
614 }
615 if r.flags&NoreferrerLinks != 0 && !isRelativeLink(link) {
616 relAttrs = append(relAttrs, "noreferrer")
617 }
618 if len(relAttrs) > 0 {
619 r.w.WriteString(fmt.Sprintf("\" rel=\"%s", strings.Join(relAttrs, " ")))
620 }
621
622 // blank target only add to external link
623 if r.flags&HrefTargetBlank != 0 && !isRelativeLink(link) {
624 r.w.WriteString("\" target=\"_blank")
625 }
626
627 r.w.WriteString("\">")
628 r.w.Write(content)
629 r.w.WriteString("</a>")
630 return
631}
632
633func (r *HTML) RawHtmlTag(text []byte) {
634 if r.flags&SkipHTML != 0 {
635 return
636 }
637 if r.flags&SkipStyle != 0 && isHtmlTag(text, "style") {
638 return
639 }
640 if r.flags&SkipLinks != 0 && isHtmlTag(text, "a") {
641 return
642 }
643 if r.flags&SkipImages != 0 && isHtmlTag(text, "img") {
644 return
645 }
646 r.w.Write(text)
647}
648
649func (r *HTML) TripleEmphasis(text []byte) {
650 r.w.WriteString("<strong><em>")
651 r.w.Write(text)
652 r.w.WriteString("</em></strong>")
653}
654
655func (r *HTML) StrikeThrough(text []byte) {
656 r.w.WriteString("<del>")
657 r.w.Write(text)
658 r.w.WriteString("</del>")
659}
660
661func (r *HTML) FootnoteRef(ref []byte, id int) {
662 slug := slugify(ref)
663 r.w.WriteString(`<sup class="footnote-ref" id="`)
664 r.w.WriteString(`fnref:`)
665 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
666 r.w.Write(slug)
667 r.w.WriteString(`"><a rel="footnote" href="#`)
668 r.w.WriteString(`fn:`)
669 r.w.WriteString(r.parameters.FootnoteAnchorPrefix)
670 r.w.Write(slug)
671 r.w.WriteString(`">`)
672 r.w.WriteString(strconv.Itoa(id))
673 r.w.WriteString(`</a></sup>`)
674}
675
676func (r *HTML) Entity(entity []byte) {
677 r.w.Write(entity)
678}
679
680func (r *HTML) NormalText(text []byte) {
681 if r.extensions&Smartypants != 0 {
682 r.Smartypants(text)
683 } else {
684 r.attrEscape(text)
685 }
686}
687
688func (r *HTML) Smartypants(text []byte) {
689 r.w.Write(NewSmartypantsRenderer(r.extensions).Process(text))
690}
691
692func (r *HTML) DocumentHeader() {
693 if r.flags&CompletePage == 0 {
694 return
695 }
696
697 ending := ""
698 if r.flags&UseXHTML != 0 {
699 r.w.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
700 r.w.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
701 r.w.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
702 ending = " /"
703 } else {
704 r.w.WriteString("<!DOCTYPE html>\n")
705 r.w.WriteString("<html>\n")
706 }
707 r.w.WriteString("<head>\n")
708 r.w.WriteString(" <title>")
709 r.NormalText([]byte(r.title))
710 r.w.WriteString("</title>\n")
711 r.w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
712 r.w.WriteString(VERSION)
713 r.w.WriteString("\"")
714 r.w.WriteString(ending)
715 r.w.WriteString(">\n")
716 r.w.WriteString(" <meta charset=\"utf-8\"")
717 r.w.WriteString(ending)
718 r.w.WriteString(">\n")
719 if r.css != "" {
720 r.w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
721 r.attrEscape([]byte(r.css))
722 r.w.WriteString("\"")
723 r.w.WriteString(ending)
724 r.w.WriteString(">\n")
725 }
726 r.w.WriteString("</head>\n")
727 r.w.WriteString("<body>\n")
728
729 r.tocMarker = r.w.output.Len() // XXX
730}
731
732func (r *HTML) DocumentFooter() {
733 // finalize and insert the table of contents
734 if r.extensions&TOC != 0 {
735 r.TocFinalize()
736
737 // now we have to insert the table of contents into the document
738 var temp bytes.Buffer
739
740 // start by making a copy of everything after the document header
741 temp.Write(r.w.output.Bytes()[r.tocMarker:])
742
743 // now clear the copied material from the main output buffer
744 r.w.output.Truncate(r.tocMarker)
745
746 // corner case spacing issue
747 if r.flags&CompletePage != 0 {
748 r.w.WriteByte('\n')
749 }
750
751 // insert the table of contents
752 r.w.WriteString("<nav>\n")
753 r.w.Write(r.toc.Bytes())
754 r.w.WriteString("</nav>\n")
755
756 // corner case spacing issue
757 if r.flags&CompletePage == 0 && r.extensions&OmitContents == 0 {
758 r.w.WriteByte('\n')
759 }
760
761 // write out everything that came after it
762 if r.extensions&OmitContents == 0 {
763 r.w.Write(temp.Bytes())
764 }
765 }
766
767 if r.flags&CompletePage != 0 {
768 r.w.WriteString("\n</body>\n")
769 r.w.WriteString("</html>\n")
770 }
771
772}
773
774func (r *HTML) TocHeaderWithAnchor(text []byte, level int, anchor string) {
775 for level > r.currentLevel {
776 switch {
777 case bytes.HasSuffix(r.toc.Bytes(), []byte("</li>\n")):
778 // this sublist can nest underneath a header
779 size := r.toc.Len()
780 r.toc.Truncate(size - len("</li>\n"))
781
782 case r.currentLevel > 0:
783 r.toc.WriteString("<li>")
784 }
785 if r.toc.Len() > 0 {
786 r.toc.WriteByte('\n')
787 }
788 r.toc.WriteString("<ul>\n")
789 r.currentLevel++
790 }
791
792 for level < r.currentLevel {
793 r.toc.WriteString("</ul>")
794 if r.currentLevel > 1 {
795 r.toc.WriteString("</li>\n")
796 }
797 r.currentLevel--
798 }
799
800 r.toc.WriteString("<li><a href=\"#")
801 if anchor != "" {
802 r.toc.WriteString(anchor)
803 } else {
804 r.toc.WriteString("toc_")
805 r.toc.WriteString(strconv.Itoa(r.headerCount))
806 }
807 r.toc.WriteString("\">")
808 r.headerCount++
809
810 r.toc.Write(text)
811
812 r.toc.WriteString("</a></li>\n")
813}
814
815func (r *HTML) TocHeader(text []byte, level int) {
816 r.TocHeaderWithAnchor(text, level, "")
817}
818
819func (r *HTML) TocFinalize() {
820 for r.currentLevel > 1 {
821 r.toc.WriteString("</ul></li>\n")
822 r.currentLevel--
823 }
824
825 if r.currentLevel > 0 {
826 r.toc.WriteString("</ul>\n")
827 }
828}
829
830func isHtmlTag(tag []byte, tagname string) bool {
831 found, _ := findHtmlTagPos(tag, tagname)
832 return found
833}
834
835// Look for a character, but ignore it when it's in any kind of quotes, it
836// might be JavaScript
837func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
838 inSingleQuote := false
839 inDoubleQuote := false
840 inGraveQuote := false
841 i := start
842 for i < len(html) {
843 switch {
844 case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
845 return i
846 case html[i] == '\'':
847 inSingleQuote = !inSingleQuote
848 case html[i] == '"':
849 inDoubleQuote = !inDoubleQuote
850 case html[i] == '`':
851 inGraveQuote = !inGraveQuote
852 }
853 i++
854 }
855 return start
856}
857
858func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
859 i := 0
860 if i < len(tag) && tag[0] != '<' {
861 return false, -1
862 }
863 i++
864 i = skipSpace(tag, i)
865
866 if i < len(tag) && tag[i] == '/' {
867 i++
868 }
869
870 i = skipSpace(tag, i)
871 j := 0
872 for ; i < len(tag); i, j = i+1, j+1 {
873 if j >= len(tagname) {
874 break
875 }
876
877 if strings.ToLower(string(tag[i]))[0] != tagname[j] {
878 return false, -1
879 }
880 }
881
882 if i == len(tag) {
883 return false, -1
884 }
885
886 rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
887 if rightAngle > i {
888 return true, rightAngle
889 }
890
891 return false, -1
892}
893
894func skipUntilChar(text []byte, start int, char byte) int {
895 i := start
896 for i < len(text) && text[i] != char {
897 i++
898 }
899 return i
900}
901
902func skipSpace(tag []byte, i int) int {
903 for i < len(tag) && isspace(tag[i]) {
904 i++
905 }
906 return i
907}
908
909func skipChar(data []byte, start int, char byte) int {
910 i := start
911 for i < len(data) && data[i] == char {
912 i++
913 }
914 return i
915}
916
917func isRelativeLink(link []byte) (yes bool) {
918 // a tag begin with '#'
919 if link[0] == '#' {
920 return true
921 }
922
923 // link begin with '/' but not '//', the second maybe a protocol relative link
924 if len(link) >= 2 && link[0] == '/' && link[1] != '/' {
925 return true
926 }
927
928 // only the root '/'
929 if len(link) == 1 && link[0] == '/' {
930 return true
931 }
932
933 // current directory : begin with "./"
934 if bytes.HasPrefix(link, []byte("./")) {
935 return true
936 }
937
938 // parent directory : begin with "../"
939 if bytes.HasPrefix(link, []byte("../")) {
940 return true
941 }
942
943 return false
944}
945
946func (r *HTML) ensureUniqueHeaderID(id string) string {
947 for count, found := r.headerIDs[id]; found; count, found = r.headerIDs[id] {
948 tmp := fmt.Sprintf("%s-%d", id, count+1)
949
950 if _, tmpFound := r.headerIDs[tmp]; !tmpFound {
951 r.headerIDs[id] = count + 1
952 id = tmp
953 } else {
954 id = id + "-1"
955 }
956 }
957
958 if _, found := r.headerIDs[id]; !found {
959 r.headerIDs[id] = 0
960 }
961
962 return id
963}
964
965func (r *HTML) addAbsPrefix(link []byte) []byte {
966 if r.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
967 newDest := r.parameters.AbsolutePrefix
968 if link[0] != '/' {
969 newDest += "/"
970 }
971 newDest += string(link)
972 return []byte(newDest)
973 }
974 return link
975}
976
977func appendLinkAttrs(attrs []string, flags HTMLFlags, link []byte) []string {
978 if isRelativeLink(link) {
979 return attrs
980 }
981 val := []string{}
982 if flags&NofollowLinks != 0 {
983 val = append(val, "nofollow")
984 }
985 if flags&NoreferrerLinks != 0 {
986 val = append(val, "noreferrer")
987 }
988 if flags&HrefTargetBlank != 0 {
989 attrs = append(attrs, "target=\"_blank\"")
990 }
991 if len(val) == 0 {
992 return attrs
993 }
994 attr := fmt.Sprintf("rel=%q", strings.Join(val, " "))
995 return append(attrs, attr)
996}
997
998func isMailto(link []byte) bool {
999 return bytes.HasPrefix(link, []byte("mailto:"))
1000}
1001
1002func isSmartypantable(node *Node) bool {
1003 pt := node.Parent.Type
1004 return pt != Link && pt != CodeBlock && pt != Code
1005}
1006
1007func appendLanguageAttr(attrs []string, info []byte) []string {
1008 infoWords := bytes.Split(info, []byte("\t "))
1009 if len(infoWords) > 0 && len(infoWords[0]) > 0 {
1010 attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0]))
1011 }
1012 return attrs
1013}
1014
1015func tag(name string, attrs []string, selfClosing bool) []byte {
1016 result := "<" + name
1017 if attrs != nil && len(attrs) > 0 {
1018 result += " " + strings.Join(attrs, " ")
1019 }
1020 if selfClosing {
1021 result += " /"
1022 }
1023 return []byte(result + ">")
1024}
1025
1026func footnoteRef(prefix string, node *Node) []byte {
1027 urlFrag := prefix + string(slugify(node.Destination))
1028 anchor := fmt.Sprintf(`<a rel="footnote" href="#fn:%s">%d</a>`, urlFrag, node.NoteID)
1029 return []byte(fmt.Sprintf(`<sup class="footnote-ref" id="fnref:%s">%s</sup>`, urlFrag, anchor))
1030}
1031
1032func footnoteItem(prefix string, slug []byte) []byte {
1033 return []byte(fmt.Sprintf(`<li id="fn:%s%s">`, prefix, slug))
1034}
1035
1036func footnoteReturnLink(prefix, returnLink string, slug []byte) []byte {
1037 const format = ` <a class="footnote-return" href="#fnref:%s%s">%s</a>`
1038 return []byte(fmt.Sprintf(format, prefix, slug, returnLink))
1039}
1040
1041func itemOpenCR(node *Node) bool {
1042 if node.Prev == nil {
1043 return false
1044 }
1045 ld := node.Parent.ListData
1046 return !ld.Tight && ld.ListFlags&ListTypeDefinition == 0
1047}
1048
1049func skipParagraphTags(node *Node) bool {
1050 grandparent := node.Parent.Parent
1051 if grandparent == nil || grandparent.Type != List {
1052 return false
1053 }
1054 tightOrTerm := grandparent.Tight || node.Parent.ListFlags&ListTypeTerm != 0
1055 return grandparent.Type == List && tightOrTerm
1056}
1057
1058func cellAlignment(align CellAlignFlags) string {
1059 switch align {
1060 case TableAlignmentLeft:
1061 return "left"
1062 case TableAlignmentRight:
1063 return "right"
1064 case TableAlignmentCenter:
1065 return "center"
1066 default:
1067 return ""
1068 }
1069}
1070
1071func esc(text []byte, preserveEntities bool) []byte {
1072 return attrEscape2(text)
1073}
1074
1075func escCode(text []byte, preserveEntities bool) []byte {
1076 e1 := []byte(html.EscapeString(string(text)))
1077 e2 := bytes.Replace(e1, []byte("""), []byte("""), -1)
1078 return bytes.Replace(e2, []byte("'"), []byte{'\''}, -1)
1079}
1080
1081func (r *HTML) out(w io.Writer, text []byte) {
1082 if r.disableTags > 0 {
1083 w.Write(reHtmlTag.ReplaceAll(text, []byte{}))
1084 } else {
1085 w.Write(text)
1086 }
1087 r.lastOutputLen = len(text)
1088}
1089
1090func (r *HTML) cr(w io.Writer) {
1091 if r.lastOutputLen > 0 {
1092 r.out(w, []byte{'\n'})
1093 }
1094}
1095
1096func (r *HTML) RenderNode(w io.Writer, node *Node, entering bool) {
1097 attrs := []string{}
1098 switch node.Type {
1099 case Text:
1100 r.out(w, node.Literal)
1101 break
1102 case Softbreak:
1103 r.out(w, []byte("\n"))
1104 // TODO: make it configurable via out(renderer.softbreak)
1105 case Hardbreak:
1106 r.out(w, tag("br", nil, true))
1107 r.cr(w)
1108 case Emph:
1109 if entering {
1110 r.out(w, tag("em", nil, false))
1111 } else {
1112 r.out(w, tag("/em", nil, false))
1113 }
1114 break
1115 case Strong:
1116 if entering {
1117 r.out(w, tag("strong", nil, false))
1118 } else {
1119 r.out(w, tag("/strong", nil, false))
1120 }
1121 break
1122 case Del:
1123 if entering {
1124 r.out(w, tag("del", nil, false))
1125 } else {
1126 r.out(w, tag("/del", nil, false))
1127 }
1128 case HTMLSpan:
1129 //if options.safe {
1130 // out(w, "<!-- raw HTML omitted -->")
1131 //} else {
1132 r.out(w, node.Literal)
1133 //}
1134 case Link:
1135 // mark it but don't link it if it is not a safe link: no smartypants
1136 dest := node.LinkData.Destination
1137 if r.flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest) {
1138 if entering {
1139 r.out(w, tag("tt", nil, false))
1140 } else {
1141 r.out(w, tag("/tt", nil, false))
1142 }
1143 } else {
1144 if entering {
1145 dest = r.addAbsPrefix(dest)
1146 //if (!(options.safe && potentiallyUnsafe(node.destination))) {
1147 attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest, true)))
1148 //}
1149 if node.NoteID != 0 {
1150 r.out(w, footnoteRef(r.parameters.FootnoteAnchorPrefix, node))
1151 break
1152 }
1153 attrs = appendLinkAttrs(attrs, r.flags, dest)
1154 if len(node.LinkData.Title) > 0 {
1155 attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title, true)))
1156 }
1157 r.out(w, tag("a", attrs, false))
1158 } else {
1159 if node.NoteID != 0 {
1160 break
1161 }
1162 r.out(w, tag("/a", nil, false))
1163 }
1164 }
1165 case Image:
1166 if entering {
1167 dest := node.LinkData.Destination
1168 dest = r.addAbsPrefix(dest)
1169 if r.disableTags == 0 {
1170 //if options.safe && potentiallyUnsafe(dest) {
1171 //out(w, `<img src="" alt="`)
1172 //} else {
1173 r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest, true))))
1174 //}
1175 }
1176 r.disableTags++
1177 } else {
1178 r.disableTags--
1179 if r.disableTags == 0 {
1180 if node.LinkData.Title != nil {
1181 r.out(w, []byte(`" title="`))
1182 r.out(w, esc(node.LinkData.Title, true))
1183 }
1184 r.out(w, []byte(`" />`))
1185 }
1186 }
1187 case Code:
1188 r.out(w, tag("code", nil, false))
1189 r.out(w, escCode(node.Literal, false))
1190 r.out(w, tag("/code", nil, false))
1191 case Document:
1192 break
1193 case Paragraph:
1194 if skipParagraphTags(node) {
1195 break
1196 }
1197 if entering {
1198 // TODO: untangle this clusterfuck about when the newlines need
1199 // to be added and when not.
1200 if node.Prev != nil {
1201 t := node.Prev.Type
1202 if t == HTMLBlock || t == List || t == Paragraph || t == Header || t == CodeBlock || t == BlockQuote || t == HorizontalRule {
1203 r.cr(w)
1204 }
1205 }
1206 if node.Parent.Type == BlockQuote && node.Prev == nil {
1207 r.cr(w)
1208 }
1209 r.out(w, tag("p", attrs, false))
1210 } else {
1211 r.out(w, tag("/p", attrs, false))
1212 if !(node.Parent.Type == Item && node.Next == nil) {
1213 r.cr(w)
1214 }
1215 }
1216 break
1217 case BlockQuote:
1218 if entering {
1219 r.cr(w)
1220 r.out(w, tag("blockquote", attrs, false))
1221 } else {
1222 r.out(w, tag("/blockquote", nil, false))
1223 r.cr(w)
1224 }
1225 break
1226 case HTMLBlock:
1227 r.cr(w)
1228 r.out(w, node.Literal)
1229 r.cr(w)
1230 case Header:
1231 tagname := fmt.Sprintf("h%d", node.Level)
1232 if entering {
1233 if node.IsTitleblock {
1234 attrs = append(attrs, `class="title"`)
1235 }
1236 if node.HeaderID != "" {
1237 id := r.ensureUniqueHeaderID(node.HeaderID)
1238 if r.parameters.HeaderIDPrefix != "" {
1239 id = r.parameters.HeaderIDPrefix + id
1240 }
1241 if r.parameters.HeaderIDSuffix != "" {
1242 id = id + r.parameters.HeaderIDSuffix
1243 }
1244 attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
1245 }
1246 r.cr(w)
1247 r.out(w, tag(tagname, attrs, false))
1248 } else {
1249 r.out(w, tag("/"+tagname, nil, false))
1250 if !(node.Parent.Type == Item && node.Next == nil) {
1251 r.cr(w)
1252 }
1253 }
1254 break
1255 case HorizontalRule:
1256 r.cr(w)
1257 r.out(w, tag("hr", attrs, r.flags&UseXHTML != 0))
1258 r.cr(w)
1259 break
1260 case List:
1261 tagName := "ul"
1262 if node.ListFlags&ListTypeOrdered != 0 {
1263 tagName = "ol"
1264 }
1265 if node.ListFlags&ListTypeDefinition != 0 {
1266 tagName = "dl"
1267 }
1268 if entering {
1269 // var start = node.listStart;
1270 // if (start !== null && start !== 1) {
1271 // attrs.push(['start', start.toString()]);
1272 // }
1273 r.cr(w)
1274 if node.Parent.Type == Item && node.Parent.Parent.Tight {
1275 r.cr(w)
1276 }
1277 r.out(w, tag(tagName, attrs, false))
1278 r.cr(w)
1279 } else {
1280 r.out(w, tag("/"+tagName, nil, false))
1281 //cr(w)
1282 //if node.parent.Type != Item {
1283 // cr(w)
1284 //}
1285 if node.Parent.Type == Item && node.Next != nil {
1286 r.cr(w)
1287 }
1288 if node.Parent.Type == Document || node.Parent.Type == BlockQuote {
1289 r.cr(w)
1290 }
1291 }
1292 case Item:
1293 tagName := "li"
1294 if node.ListFlags&ListTypeDefinition != 0 {
1295 tagName = "dd"
1296 }
1297 if node.ListFlags&ListTypeTerm != 0 {
1298 tagName = "dt"
1299 }
1300 if entering {
1301 if itemOpenCR(node) {
1302 r.cr(w)
1303 }
1304 if node.ListData.RefLink != nil {
1305 slug := slugify(node.ListData.RefLink)
1306 r.out(w, footnoteItem(r.parameters.FootnoteAnchorPrefix, slug))
1307 break
1308 }
1309 r.out(w, tag(tagName, nil, false))
1310 } else {
1311 if node.ListData.RefLink != nil {
1312 slug := slugify(node.ListData.RefLink)
1313 if r.flags&FootnoteReturnLinks != 0 {
1314 r.out(w, footnoteReturnLink(r.parameters.FootnoteAnchorPrefix, r.parameters.FootnoteReturnLinkContents, slug))
1315 }
1316 }
1317 r.out(w, tag("/"+tagName, nil, false))
1318 r.cr(w)
1319 }
1320 case CodeBlock:
1321 attrs = appendLanguageAttr(attrs, node.Info)
1322 r.cr(w)
1323 r.out(w, tag("pre", nil, false))
1324 r.out(w, tag("code", attrs, false))
1325 r.out(w, escCode(node.Literal, false))
1326 r.out(w, tag("/code", nil, false))
1327 r.out(w, tag("/pre", nil, false))
1328 if node.Parent.Type != Item {
1329 r.cr(w)
1330 }
1331 case Table:
1332 if entering {
1333 r.cr(w)
1334 r.out(w, tag("table", nil, false))
1335 } else {
1336 r.out(w, tag("/table", nil, false))
1337 r.cr(w)
1338 }
1339 case TableCell:
1340 tagName := "td"
1341 if node.IsHeader {
1342 tagName = "th"
1343 }
1344 if entering {
1345 align := cellAlignment(node.Align)
1346 if align != "" {
1347 attrs = append(attrs, fmt.Sprintf(`align="%s"`, align))
1348 }
1349 if node.Prev == nil {
1350 r.cr(w)
1351 }
1352 r.out(w, tag(tagName, attrs, false))
1353 } else {
1354 r.out(w, tag("/"+tagName, nil, false))
1355 r.cr(w)
1356 }
1357 case TableHead:
1358 if entering {
1359 r.cr(w)
1360 r.out(w, tag("thead", nil, false))
1361 } else {
1362 r.out(w, tag("/thead", nil, false))
1363 r.cr(w)
1364 }
1365 case TableBody:
1366 if entering {
1367 r.cr(w)
1368 r.out(w, tag("tbody", nil, false))
1369 // XXX: this is to adhere to a rather silly test. Should fix test.
1370 if node.FirstChild == nil {
1371 r.cr(w)
1372 }
1373 } else {
1374 r.out(w, tag("/tbody", nil, false))
1375 r.cr(w)
1376 }
1377 case TableRow:
1378 if entering {
1379 r.cr(w)
1380 r.out(w, tag("tr", nil, false))
1381 } else {
1382 r.out(w, tag("/tr", nil, false))
1383 r.cr(w)
1384 }
1385 default:
1386 panic("Unknown node type " + node.Type.String())
1387 }
1388}
1389
1390func (r *HTML) writeDocumentHeader(w *bytes.Buffer, sr *SPRenderer) {
1391 if r.flags&CompletePage == 0 {
1392 return
1393 }
1394 ending := ""
1395 if r.flags&UseXHTML != 0 {
1396 w.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
1397 w.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
1398 w.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
1399 ending = " /"
1400 } else {
1401 w.WriteString("<!DOCTYPE html>\n")
1402 w.WriteString("<html>\n")
1403 }
1404 w.WriteString("<head>\n")
1405 w.WriteString(" <title>")
1406 if r.extensions&Smartypants != 0 {
1407 w.Write(sr.Process([]byte(r.title)))
1408 } else {
1409 w.Write(esc([]byte(r.title), false))
1410 }
1411 w.WriteString("</title>\n")
1412 w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
1413 w.WriteString(VERSION)
1414 w.WriteString("\"")
1415 w.WriteString(ending)
1416 w.WriteString(">\n")
1417 w.WriteString(" <meta charset=\"utf-8\"")
1418 w.WriteString(ending)
1419 w.WriteString(">\n")
1420 if r.css != "" {
1421 w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
1422 r.attrEscape([]byte(r.css))
1423 w.WriteString("\"")
1424 w.WriteString(ending)
1425 w.WriteString(">\n")
1426 }
1427 w.WriteString("</head>\n")
1428 w.WriteString("<body>\n\n")
1429}
1430
1431func (r *HTML) writeDocumentFooter(w *bytes.Buffer) {
1432 if r.flags&CompletePage == 0 {
1433 return
1434 }
1435 w.WriteString("\n</body>\n</html>\n")
1436}
1437
1438func (r *HTML) Render(ast *Node) []byte {
1439 //println("render_Blackfriday")
1440 //dump(ast)
1441 // Run Smartypants if it's enabled or simply escape text if not
1442 sr := NewSmartypantsRenderer(r.extensions)
1443 ast.Walk(func(node *Node, entering bool) {
1444 if node.Type == Text {
1445 if r.extensions&Smartypants != 0 {
1446 node.Literal = sr.Process(node.Literal)
1447 } else {
1448 node.Literal = esc(node.Literal, false)
1449 }
1450 }
1451 })
1452 var buff bytes.Buffer
1453 r.writeDocumentHeader(&buff, sr)
1454 ast.Walk(func(node *Node, entering bool) {
1455 r.RenderNode(&buff, node, entering)
1456 })
1457 r.writeDocumentFooter(&buff)
1458 return buff.Bytes()
1459}