all repos — grayfriday @ 375cae5dbea8edc3c30439522c2c72f5e0c2b566

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25// Html renderer configuration options.
 26const (
 27	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 28	HTML_SKIP_STYLE                           // skip embedded <style> elements
 29	HTML_SKIP_IMAGES                          // skip embedded images
 30	HTML_SKIP_LINKS                           // skip all links
 31	HTML_SKIP_SCRIPT                          // skip embedded <script> elements
 32	HTML_SAFELINK                             // only link to trusted protocols
 33	HTML_TOC                                  // generate a table of contents
 34	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 35	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 36	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 37	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 38	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 39	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 40	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 41)
 42
 43// Html is a type that implements the Renderer interface for HTML output.
 44//
 45// Do not create this directly, instead use the HtmlRenderer function.
 46type Html struct {
 47	flags    int    // HTML_* options
 48	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 49	title    string // document title
 50	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 51
 52	// table of contents data
 53	tocMarker    int
 54	headerCount  int
 55	currentLevel int
 56	toc          *bytes.Buffer
 57
 58	smartypants *smartypantsRenderer
 59}
 60
 61const (
 62	xhtmlClose = " />\n"
 63	htmlClose  = ">\n"
 64)
 65
 66// HtmlRenderer creates and configures an Html object, which
 67// satisfies the Renderer interface.
 68//
 69// flags is a set of HTML_* options ORed together.
 70// title is the title of the document, and css is a URL for the document's
 71// stylesheet.
 72// title and css are only used when HTML_COMPLETE_PAGE is selected.
 73func HtmlRenderer(flags int, title string, css string) Renderer {
 74	// configure the rendering engine
 75	closeTag := htmlClose
 76	if flags&HTML_USE_XHTML != 0 {
 77		closeTag = xhtmlClose
 78	}
 79
 80	return &Html{
 81		flags:    flags,
 82		closeTag: closeTag,
 83		title:    title,
 84		css:      css,
 85
 86		headerCount:  0,
 87		currentLevel: 0,
 88		toc:          new(bytes.Buffer),
 89
 90		smartypants: smartypants(flags),
 91	}
 92}
 93
 94func attrEscape(out *bytes.Buffer, src []byte) {
 95	org := 0
 96	for i, ch := range src {
 97		// using if statements is a bit faster than a switch statement.
 98		// as the compiler improves, this should be unnecessary
 99		// this is only worthwhile because attrEscape is the single
100		// largest CPU user in normal use
101		if ch == '"' {
102			if i > org {
103				// copy all the normal characters since the last escape
104				out.Write(src[org:i])
105			}
106			org = i + 1
107			out.WriteString("&quot;")
108			continue
109		}
110		if ch == '&' {
111			if i > org {
112				out.Write(src[org:i])
113			}
114			org = i + 1
115			out.WriteString("&amp;")
116			continue
117		}
118		if ch == '<' {
119			if i > org {
120				out.Write(src[org:i])
121			}
122			org = i + 1
123			out.WriteString("&lt;")
124			continue
125		}
126		if ch == '>' {
127			if i > org {
128				out.Write(src[org:i])
129			}
130			org = i + 1
131			out.WriteString("&gt;")
132			continue
133		}
134	}
135	if org < len(src) {
136		out.Write(src[org:])
137	}
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141	marker := out.Len()
142	doubleSpace(out)
143
144	if options.flags&HTML_TOC != 0 {
145		// headerCount is incremented in htmlTocHeader
146		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147	} else {
148		out.WriteString(fmt.Sprintf("<h%d>", level))
149	}
150
151	tocMarker := out.Len()
152	if !text() {
153		out.Truncate(marker)
154		return
155	}
156
157	// are we building a table of contents?
158	if options.flags&HTML_TOC != 0 {
159		options.TocHeader(out.Bytes()[tocMarker:], level)
160	}
161
162	out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166	if options.flags&HTML_SKIP_HTML != 0 {
167		return
168	}
169
170	doubleSpace(out)
171	if options.flags&HTML_SKIP_SCRIPT != 0 {
172		out.Write(stripTag(string(text), "script", "p"))
173	} else {
174		out.Write(text)
175	}
176	out.WriteByte('\n')
177}
178
179// This is a trivial implementation for the simplest possible case
180func stripTag(text, tag, newTag string) []byte {
181	openTag := fmt.Sprintf("<%s>", tag)
182	closeTag := fmt.Sprintf("</%s>", tag)
183	openNewTag := fmt.Sprintf("<%s>", newTag)
184	closeNewTag := fmt.Sprintf("</%s>", newTag)
185	noOpen := strings.Replace(text, openTag, openNewTag, -1)
186	return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
187}
188
189func (options *Html) HRule(out *bytes.Buffer) {
190	doubleSpace(out)
191	out.WriteString("<hr")
192	out.WriteString(options.closeTag)
193}
194
195func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
196	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
197		options.BlockCodeGithub(out, text, lang)
198	} else {
199		options.BlockCodeNormal(out, text, lang)
200	}
201}
202
203func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
204	doubleSpace(out)
205
206	// parse out the language names/classes
207	count := 0
208	for _, elt := range strings.Fields(lang) {
209		if elt[0] == '.' {
210			elt = elt[1:]
211		}
212		if len(elt) == 0 {
213			continue
214		}
215		if count == 0 {
216			out.WriteString("<pre><code class=\"")
217		} else {
218			out.WriteByte(' ')
219		}
220		attrEscape(out, []byte(elt))
221		count++
222	}
223
224	if count == 0 {
225		out.WriteString("<pre><code>")
226	} else {
227		out.WriteString("\">")
228	}
229
230	attrEscape(out, text)
231	out.WriteString("</code></pre>\n")
232}
233
234// GitHub style code block:
235//
236//              <pre lang="LANG"><code>
237//              ...
238//              </code></pre>
239//
240// Unlike other parsers, we store the language identifier in the <pre>,
241// and don't let the user generate custom classes.
242//
243// The language identifier in the <pre> block gets postprocessed and all
244// the code inside gets syntax highlighted with Pygments. This is much safer
245// than letting the user specify a CSS class for highlighting.
246//
247// Note that we only generate HTML for the first specifier.
248// E.g.
249//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
250func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
251	doubleSpace(out)
252
253	// parse out the language name
254	count := 0
255	for _, elt := range strings.Fields(lang) {
256		if elt[0] == '.' {
257			elt = elt[1:]
258		}
259		if len(elt) == 0 {
260			continue
261		}
262		out.WriteString("<pre lang=\"")
263		attrEscape(out, []byte(elt))
264		out.WriteString("\"><code>")
265		count++
266		break
267	}
268
269	if count == 0 {
270		out.WriteString("<pre><code>")
271	}
272
273	attrEscape(out, text)
274	out.WriteString("</code></pre>\n")
275}
276
277func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
278	doubleSpace(out)
279	out.WriteString("<blockquote>\n")
280	out.Write(text)
281	out.WriteString("</blockquote>\n")
282}
283
284func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
285	doubleSpace(out)
286	out.WriteString("<table>\n<thead>\n")
287	out.Write(header)
288	out.WriteString("</thead>\n\n<tbody>\n")
289	out.Write(body)
290	out.WriteString("</tbody>\n</table>\n")
291}
292
293func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
294	doubleSpace(out)
295	out.WriteString("<tr>\n")
296	out.Write(text)
297	out.WriteString("\n</tr>\n")
298}
299
300func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
301	doubleSpace(out)
302	switch align {
303	case TABLE_ALIGNMENT_LEFT:
304		out.WriteString("<td align=\"left\">")
305	case TABLE_ALIGNMENT_RIGHT:
306		out.WriteString("<td align=\"right\">")
307	case TABLE_ALIGNMENT_CENTER:
308		out.WriteString("<td align=\"center\">")
309	default:
310		out.WriteString("<td>")
311	}
312
313	out.Write(text)
314	out.WriteString("</td>")
315}
316
317func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
318	marker := out.Len()
319	doubleSpace(out)
320
321	if flags&LIST_TYPE_ORDERED != 0 {
322		out.WriteString("<ol>")
323	} else {
324		out.WriteString("<ul>")
325	}
326	if !text() {
327		out.Truncate(marker)
328		return
329	}
330	if flags&LIST_TYPE_ORDERED != 0 {
331		out.WriteString("</ol>\n")
332	} else {
333		out.WriteString("</ul>\n")
334	}
335}
336
337func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
338	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
339		doubleSpace(out)
340	}
341	out.WriteString("<li>")
342	out.Write(text)
343	out.WriteString("</li>\n")
344}
345
346func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
347	marker := out.Len()
348	doubleSpace(out)
349
350	out.WriteString("<p>")
351	if !text() {
352		out.Truncate(marker)
353		return
354	}
355	out.WriteString("</p>\n")
356}
357
358func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
359	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
360		// mark it but don't link it if it is not a safe link: no smartypants
361		out.WriteString("<tt>")
362		attrEscape(out, link)
363		out.WriteString("</tt>")
364		return
365	}
366
367	out.WriteString("<a href=\"")
368	if kind == LINK_TYPE_EMAIL {
369		out.WriteString("mailto:")
370	}
371	attrEscape(out, link)
372	out.WriteString("\">")
373
374	// Pretty print: if we get an email address as
375	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
376	// want to print the `mailto:` prefix
377	switch {
378	case bytes.HasPrefix(link, []byte("mailto://")):
379		attrEscape(out, link[len("mailto://"):])
380	case bytes.HasPrefix(link, []byte("mailto:")):
381		attrEscape(out, link[len("mailto:"):])
382	default:
383		attrEscape(out, link)
384	}
385
386	out.WriteString("</a>")
387}
388
389func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
390	out.WriteString("<code>")
391	attrEscape(out, text)
392	out.WriteString("</code>")
393}
394
395func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
396	out.WriteString("<strong>")
397	out.Write(text)
398	out.WriteString("</strong>")
399}
400
401func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
402	if len(text) == 0 {
403		return
404	}
405	out.WriteString("<em>")
406	out.Write(text)
407	out.WriteString("</em>")
408}
409
410func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
411	if options.flags&HTML_SKIP_IMAGES != 0 {
412		return
413	}
414
415	out.WriteString("<img src=\"")
416	attrEscape(out, link)
417	out.WriteString("\" alt=\"")
418	if len(alt) > 0 {
419		attrEscape(out, alt)
420	}
421	if len(title) > 0 {
422		out.WriteString("\" title=\"")
423		attrEscape(out, title)
424	}
425
426	out.WriteByte('"')
427	out.WriteString(options.closeTag)
428	return
429}
430
431func (options *Html) LineBreak(out *bytes.Buffer) {
432	out.WriteString("<br")
433	out.WriteString(options.closeTag)
434}
435
436func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
437	if options.flags&HTML_SKIP_LINKS != 0 {
438		// write the link text out but don't link it, just mark it with typewriter font
439		out.WriteString("<tt>")
440		attrEscape(out, content)
441		out.WriteString("</tt>")
442		return
443	}
444
445	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
446		// write the link text out but don't link it, just mark it with typewriter font
447		out.WriteString("<tt>")
448		attrEscape(out, content)
449		out.WriteString("</tt>")
450		return
451	}
452
453	out.WriteString("<a href=\"")
454	attrEscape(out, link)
455	if len(title) > 0 {
456		out.WriteString("\" title=\"")
457		attrEscape(out, title)
458	}
459	out.WriteString("\">")
460	out.Write(content)
461	out.WriteString("</a>")
462	return
463}
464
465func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
466	if options.flags&HTML_SKIP_HTML != 0 {
467		return
468	}
469	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
470		return
471	}
472	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
473		return
474	}
475	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
476		return
477	}
478	if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
479		return
480	}
481	out.Write(text)
482}
483
484func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
485	out.WriteString("<strong><em>")
486	out.Write(text)
487	out.WriteString("</em></strong>")
488}
489
490func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
491	out.WriteString("<del>")
492	out.Write(text)
493	out.WriteString("</del>")
494}
495
496func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
497	out.Write(entity)
498}
499
500func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
501	if options.flags&HTML_USE_SMARTYPANTS != 0 {
502		options.Smartypants(out, text)
503	} else {
504		attrEscape(out, text)
505	}
506}
507
508func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
509	smrt := smartypantsData{false, false}
510
511	// first do normal entity escaping
512	var escaped bytes.Buffer
513	attrEscape(&escaped, text)
514	text = escaped.Bytes()
515
516	mark := 0
517	for i := 0; i < len(text); i++ {
518		if action := options.smartypants[text[i]]; action != nil {
519			if i > mark {
520				out.Write(text[mark:i])
521			}
522
523			previousChar := byte(0)
524			if i > 0 {
525				previousChar = text[i-1]
526			}
527			i += action(out, &smrt, previousChar, text[i:])
528			mark = i + 1
529		}
530	}
531
532	if mark < len(text) {
533		out.Write(text[mark:])
534	}
535}
536
537func (options *Html) DocumentHeader(out *bytes.Buffer) {
538	if options.flags&HTML_COMPLETE_PAGE == 0 {
539		return
540	}
541
542	ending := ""
543	if options.flags&HTML_USE_XHTML != 0 {
544		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
545		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
546		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
547		ending = " /"
548	} else {
549		out.WriteString("<!DOCTYPE html>\n")
550		out.WriteString("<html>\n")
551	}
552	out.WriteString("<head>\n")
553	out.WriteString("  <title>")
554	options.NormalText(out, []byte(options.title))
555	out.WriteString("</title>\n")
556	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
557	out.WriteString(VERSION)
558	out.WriteString("\"")
559	out.WriteString(ending)
560	out.WriteString(">\n")
561	out.WriteString("  <meta charset=\"utf-8\"")
562	out.WriteString(ending)
563	out.WriteString(">\n")
564	if options.css != "" {
565		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
566		attrEscape(out, []byte(options.css))
567		out.WriteString("\"")
568		out.WriteString(ending)
569		out.WriteString(">\n")
570	}
571	out.WriteString("</head>\n")
572	out.WriteString("<body>\n")
573
574	options.tocMarker = out.Len()
575}
576
577func (options *Html) DocumentFooter(out *bytes.Buffer) {
578	// finalize and insert the table of contents
579	if options.flags&HTML_TOC != 0 {
580		options.TocFinalize()
581
582		// now we have to insert the table of contents into the document
583		var temp bytes.Buffer
584
585		// start by making a copy of everything after the document header
586		temp.Write(out.Bytes()[options.tocMarker:])
587
588		// now clear the copied material from the main output buffer
589		out.Truncate(options.tocMarker)
590
591		// corner case spacing issue
592		if options.flags&HTML_COMPLETE_PAGE != 0 {
593			out.WriteByte('\n')
594		}
595
596		// insert the table of contents
597		out.WriteString("<nav>\n")
598		out.Write(options.toc.Bytes())
599		out.WriteString("</nav>\n")
600
601		// corner case spacing issue
602		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
603			out.WriteByte('\n')
604		}
605
606		// write out everything that came after it
607		if options.flags&HTML_OMIT_CONTENTS == 0 {
608			out.Write(temp.Bytes())
609		}
610	}
611
612	if options.flags&HTML_COMPLETE_PAGE != 0 {
613		out.WriteString("\n</body>\n")
614		out.WriteString("</html>\n")
615	}
616
617}
618
619func (options *Html) TocHeader(text []byte, level int) {
620	for level > options.currentLevel {
621		switch {
622		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
623			// this sublist can nest underneath a header
624			size := options.toc.Len()
625			options.toc.Truncate(size - len("</li>\n"))
626
627		case options.currentLevel > 0:
628			options.toc.WriteString("<li>")
629		}
630		if options.toc.Len() > 0 {
631			options.toc.WriteByte('\n')
632		}
633		options.toc.WriteString("<ul>\n")
634		options.currentLevel++
635	}
636
637	for level < options.currentLevel {
638		options.toc.WriteString("</ul>")
639		if options.currentLevel > 1 {
640			options.toc.WriteString("</li>\n")
641		}
642		options.currentLevel--
643	}
644
645	options.toc.WriteString("<li><a href=\"#toc_")
646	options.toc.WriteString(strconv.Itoa(options.headerCount))
647	options.toc.WriteString("\">")
648	options.headerCount++
649
650	options.toc.Write(text)
651
652	options.toc.WriteString("</a></li>\n")
653}
654
655func (options *Html) TocFinalize() {
656	for options.currentLevel > 1 {
657		options.toc.WriteString("</ul></li>\n")
658		options.currentLevel--
659	}
660
661	if options.currentLevel > 0 {
662		options.toc.WriteString("</ul>\n")
663	}
664}
665
666func isHtmlTag(tag []byte, tagname string) bool {
667	i := 0
668	if i < len(tag) && tag[0] != '<' {
669		return false
670	}
671	i++
672	i = skipSpace(tag, i)
673
674	if i < len(tag) && tag[i] == '/' {
675		i++
676	}
677
678	i = skipSpace(tag, i)
679	j := 0
680	for ; i < len(tag); i, j = i+1, j+1 {
681		if j >= len(tagname) {
682			break
683		}
684
685		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
686			return false
687		}
688	}
689
690	if i == len(tag) {
691		return false
692	}
693
694	return isspace(tag[i]) || tag[i] == '>'
695}
696
697func skipSpace(tag []byte, i int) int {
698	for i < len(tag) && isspace(tag[i]) {
699		i++
700	}
701	return i
702}
703
704func doubleSpace(out *bytes.Buffer) {
705	if out.Len() > 0 {
706		out.WriteByte('\n')
707	}
708}