all repos — grayfriday @ c99557ec28d2958929d93f07b077a47de5444be8

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25// Html renderer configuration options.
 26const (
 27	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 28	HTML_SKIP_STYLE                           // skip embedded <style> elements
 29	HTML_SKIP_IMAGES                          // skip embedded images
 30	HTML_SKIP_LINKS                           // skip all links
 31	HTML_SAFELINK                             // only link to trusted protocols
 32	HTML_TOC                                  // generate a table of contents
 33	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 34	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 35	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 36	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 37	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 38	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 39	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 40)
 41
 42// Html is a type that implements the Renderer interface for HTML output.
 43//
 44// Do not create this directly, instead use the HtmlRenderer function.
 45type Html struct {
 46	flags    int    // HTML_* options
 47	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 48	title    string // document title
 49	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 50
 51	// table of contents data
 52	tocMarker    int
 53	headerCount  int
 54	currentLevel int
 55	toc          *bytes.Buffer
 56
 57	smartypants *smartypantsRenderer
 58}
 59
 60const (
 61	xhtmlClose = " />\n"
 62	htmlClose  = ">\n"
 63)
 64
 65// HtmlRenderer creates and configures an Html object, which
 66// satisfies the Renderer interface.
 67//
 68// flags is a set of HTML_* options ORed together.
 69// title is the title of the document, and css is a URL for the document's
 70// stylesheet.
 71// title and css are only used when HTML_COMPLETE_PAGE is selected.
 72func HtmlRenderer(flags int, title string, css string) Renderer {
 73	// configure the rendering engine
 74	closeTag := htmlClose
 75	if flags&HTML_USE_XHTML != 0 {
 76		closeTag = xhtmlClose
 77	}
 78
 79	return &Html{
 80		flags:    flags,
 81		closeTag: closeTag,
 82		title:    title,
 83		css:      css,
 84
 85		headerCount:  0,
 86		currentLevel: 0,
 87		toc:          new(bytes.Buffer),
 88
 89		smartypants: smartypants(flags),
 90	}
 91}
 92
 93func attrEscape(out *bytes.Buffer, src []byte) {
 94	org := 0
 95	for i, ch := range src {
 96		// using if statements is a bit faster than a switch statement.
 97		// as the compiler improves, this should be unnecessary
 98		// this is only worthwhile because attrEscape is the single
 99		// largest CPU user in normal use
100		if ch == '"' {
101			if i > org {
102				// copy all the normal characters since the last escape
103				out.Write(src[org:i])
104			}
105			org = i + 1
106			out.WriteString("&quot;")
107			continue
108		}
109		if ch == '&' {
110			if i > org {
111				out.Write(src[org:i])
112			}
113			org = i + 1
114			out.WriteString("&amp;")
115			continue
116		}
117		if ch == '<' {
118			if i > org {
119				out.Write(src[org:i])
120			}
121			org = i + 1
122			out.WriteString("&lt;")
123			continue
124		}
125		if ch == '>' {
126			if i > org {
127				out.Write(src[org:i])
128			}
129			org = i + 1
130			out.WriteString("&gt;")
131			continue
132		}
133	}
134	if org < len(src) {
135		out.Write(src[org:])
136	}
137}
138
139func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
140	marker := out.Len()
141	doubleSpace(out)
142
143	if options.flags&HTML_TOC != 0 {
144		// headerCount is incremented in htmlTocHeader
145		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
146	} else {
147		out.WriteString(fmt.Sprintf("<h%d>", level))
148	}
149
150	tocMarker := out.Len()
151	if !text() {
152		out.Truncate(marker)
153		return
154	}
155
156	// are we building a table of contents?
157	if options.flags&HTML_TOC != 0 {
158		options.TocHeader(out.Bytes()[tocMarker:], level)
159	}
160
161	out.WriteString(fmt.Sprintf("</h%d>\n", level))
162}
163
164func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
165	if options.flags&HTML_SKIP_HTML != 0 {
166		return
167	}
168
169	doubleSpace(out)
170	out.Write(text)
171	out.WriteByte('\n')
172}
173
174func (options *Html) HRule(out *bytes.Buffer) {
175	doubleSpace(out)
176	out.WriteString("<hr")
177	out.WriteString(options.closeTag)
178}
179
180func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
181	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
182		options.BlockCodeGithub(out, text, lang)
183	} else {
184		options.BlockCodeNormal(out, text, lang)
185	}
186}
187
188func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
189	doubleSpace(out)
190
191	// parse out the language names/classes
192	count := 0
193	for _, elt := range strings.Fields(lang) {
194		if elt[0] == '.' {
195			elt = elt[1:]
196		}
197		if len(elt) == 0 {
198			continue
199		}
200		if count == 0 {
201			out.WriteString("<pre><code class=\"")
202		} else {
203			out.WriteByte(' ')
204		}
205		attrEscape(out, []byte(elt))
206		count++
207	}
208
209	if count == 0 {
210		out.WriteString("<pre><code>")
211	} else {
212		out.WriteString("\">")
213	}
214
215	attrEscape(out, text)
216	out.WriteString("</code></pre>\n")
217}
218
219/*
220 * GitHub style code block:
221 *
222 *              <pre lang="LANG"><code>
223 *              ...
224 *              </pre></code>
225 *
226 * Unlike other parsers, we store the language identifier in the <pre>,
227 * and don't let the user generate custom classes.
228 *
229 * The language identifier in the <pre> block gets postprocessed and all
230 * the code inside gets syntax highlighted with Pygments. This is much safer
231 * than letting the user specify a CSS class for highlighting.
232 *
233 * Note that we only generate HTML for the first specifier.
234 * E.g.
235 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
236 */
237func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
238	doubleSpace(out)
239
240	// parse out the language name
241	count := 0
242	for _, elt := range strings.Fields(lang) {
243		if elt[0] == '.' {
244			elt = elt[1:]
245		}
246		if len(elt) == 0 {
247			continue
248		}
249		out.WriteString("<pre lang=\"")
250		attrEscape(out, []byte(elt))
251		out.WriteString("\"><code>")
252		count++
253		break
254	}
255
256	if count == 0 {
257		out.WriteString("<pre><code>")
258	}
259
260	attrEscape(out, text)
261	out.WriteString("</code></pre>\n")
262}
263
264
265func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
266	doubleSpace(out)
267	out.WriteString("<blockquote>\n")
268	out.Write(text)
269	out.WriteString("</blockquote>\n")
270}
271
272func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
273	doubleSpace(out)
274	out.WriteString("<table>\n<thead>\n")
275	out.Write(header)
276	out.WriteString("</thead>\n\n<tbody>\n")
277	out.Write(body)
278	out.WriteString("</tbody>\n</table>\n")
279}
280
281func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
282	doubleSpace(out)
283	out.WriteString("<tr>\n")
284	out.Write(text)
285	out.WriteString("\n</tr>\n")
286}
287
288func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
289	doubleSpace(out)
290	switch align {
291	case TABLE_ALIGNMENT_LEFT:
292		out.WriteString("<td align=\"left\">")
293	case TABLE_ALIGNMENT_RIGHT:
294		out.WriteString("<td align=\"right\">")
295	case TABLE_ALIGNMENT_CENTER:
296		out.WriteString("<td align=\"center\">")
297	default:
298		out.WriteString("<td>")
299	}
300
301	out.Write(text)
302	out.WriteString("</td>")
303}
304
305func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
306	marker := out.Len()
307	doubleSpace(out)
308
309	if flags&LIST_TYPE_ORDERED != 0 {
310		out.WriteString("<ol>")
311	} else {
312		out.WriteString("<ul>")
313	}
314	if !text() {
315		out.Truncate(marker)
316		return
317	}
318	if flags&LIST_TYPE_ORDERED != 0 {
319		out.WriteString("</ol>\n")
320	} else {
321		out.WriteString("</ul>\n")
322	}
323}
324
325func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
326	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
327		doubleSpace(out)
328	}
329	out.WriteString("<li>")
330	out.Write(text)
331	out.WriteString("</li>\n")
332}
333
334func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
335	marker := out.Len()
336	doubleSpace(out)
337
338	out.WriteString("<p>")
339	if !text() {
340		out.Truncate(marker)
341		return
342	}
343	out.WriteString("</p>\n")
344}
345
346func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
347	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
348		// mark it but don't link it if it is not a safe link: no smartypants
349		out.WriteString("<tt>")
350		attrEscape(out, link)
351		out.WriteString("</tt>")
352		return
353	}
354
355	out.WriteString("<a href=\"")
356	if kind == LINK_TYPE_EMAIL {
357		out.WriteString("mailto:")
358	}
359	attrEscape(out, link)
360	out.WriteString("\">")
361
362	// Pretty print: if we get an email address as
363	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
364	// want to print the `mailto:` prefix
365	switch {
366	case bytes.HasPrefix(link, []byte("mailto://")):
367		attrEscape(out, link[len("mailto://"):])
368	case bytes.HasPrefix(link, []byte("mailto:")):
369		attrEscape(out, link[len("mailto:"):])
370	default:
371		attrEscape(out, link)
372	}
373
374	out.WriteString("</a>")
375}
376
377func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
378	out.WriteString("<code>")
379	attrEscape(out, text)
380	out.WriteString("</code>")
381}
382
383func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
384	out.WriteString("<strong>")
385	out.Write(text)
386	out.WriteString("</strong>")
387}
388
389func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
390	if len(text) == 0 {
391		return
392	}
393	out.WriteString("<em>")
394	out.Write(text)
395	out.WriteString("</em>")
396}
397
398func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
399	if options.flags&HTML_SKIP_IMAGES != 0 {
400		return
401	}
402
403	out.WriteString("<img src=\"")
404	attrEscape(out, link)
405	out.WriteString("\" alt=\"")
406	if len(alt) > 0 {
407		attrEscape(out, alt)
408	}
409	if len(title) > 0 {
410		out.WriteString("\" title=\"")
411		attrEscape(out, title)
412	}
413
414	out.WriteByte('"')
415	out.WriteString(options.closeTag)
416	return
417}
418
419func (options *Html) LineBreak(out *bytes.Buffer) {
420	out.WriteString("<br")
421	out.WriteString(options.closeTag)
422}
423
424func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
425	if options.flags&HTML_SKIP_LINKS != 0 {
426		// write the link text out but don't link it, just mark it with typewriter font
427		out.WriteString("<tt>")
428		attrEscape(out, content)
429		out.WriteString("</tt>")
430		return
431	}
432
433	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
434		// write the link text out but don't link it, just mark it with typewriter font
435		out.WriteString("<tt>")
436		attrEscape(out, content)
437		out.WriteString("</tt>")
438		return
439	}
440
441	out.WriteString("<a href=\"")
442	attrEscape(out, link)
443	if len(title) > 0 {
444		out.WriteString("\" title=\"")
445		attrEscape(out, title)
446	}
447	out.WriteString("\">")
448	out.Write(content)
449	out.WriteString("</a>")
450	return
451}
452
453func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
454	if options.flags&HTML_SKIP_HTML != 0 {
455		return
456	}
457	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
458		return
459	}
460	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
461		return
462	}
463	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
464		return
465	}
466	out.Write(text)
467}
468
469func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
470	out.WriteString("<strong><em>")
471	out.Write(text)
472	out.WriteString("</em></strong>")
473}
474
475func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
476	out.WriteString("<del>")
477	out.Write(text)
478	out.WriteString("</del>")
479}
480
481func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
482	out.Write(entity)
483}
484
485func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
486	if options.flags&HTML_USE_SMARTYPANTS != 0 {
487		options.Smartypants(out, text)
488	} else {
489		attrEscape(out, text)
490	}
491}
492
493func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
494	smrt := smartypantsData{false, false}
495
496	// first do normal entity escaping
497	var escaped bytes.Buffer
498	attrEscape(&escaped, text)
499	text = escaped.Bytes()
500
501	mark := 0
502	for i := 0; i < len(text); i++ {
503		if action := options.smartypants[text[i]]; action != nil {
504			if i > mark {
505				out.Write(text[mark:i])
506			}
507
508			previousChar := byte(0)
509			if i > 0 {
510				previousChar = text[i-1]
511			}
512			i += action(out, &smrt, previousChar, text[i:])
513			mark = i + 1
514		}
515	}
516
517	if mark < len(text) {
518		out.Write(text[mark:])
519	}
520}
521
522func (options *Html) DocumentHeader(out *bytes.Buffer) {
523	if options.flags&HTML_COMPLETE_PAGE == 0 {
524		return
525	}
526
527	ending := ""
528	if options.flags&HTML_USE_XHTML != 0 {
529		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
530		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
531		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
532		ending = " /"
533	} else {
534		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
535		out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
536		out.WriteString("<html>\n")
537	}
538	out.WriteString("<head>\n")
539	out.WriteString("  <title>")
540	options.NormalText(out, []byte(options.title))
541	out.WriteString("</title>\n")
542	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
543	out.WriteString(VERSION)
544	out.WriteString("\"")
545	out.WriteString(ending)
546	out.WriteString(">\n")
547	out.WriteString("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
548	out.WriteString(ending)
549	out.WriteString(">\n")
550	if options.css != "" {
551		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
552		attrEscape(out, []byte(options.css))
553		out.WriteString("\"")
554		out.WriteString(ending)
555		out.WriteString(">\n")
556	}
557	out.WriteString("</head>\n")
558	out.WriteString("<body>\n")
559
560	options.tocMarker = out.Len()
561}
562
563func (options *Html) DocumentFooter(out *bytes.Buffer) {
564	// finalize and insert the table of contents
565	if options.flags&HTML_TOC != 0 {
566		options.TocFinalize()
567
568		// now we have to insert the table of contents into the document
569		var temp bytes.Buffer
570
571		// start by making a copy of everything after the document header
572		temp.Write(out.Bytes()[options.tocMarker:])
573
574		// now clear the copied material from the main output buffer
575		out.Truncate(options.tocMarker)
576
577		// corner case spacing issue
578		if options.flags&HTML_COMPLETE_PAGE != 0 {
579			out.WriteByte('\n')
580		}
581
582		// insert the table of contents
583		out.Write(options.toc.Bytes())
584
585		// corner case spacing issue
586		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
587			out.WriteByte('\n')
588		}
589
590		// write out everything that came after it
591		if options.flags&HTML_OMIT_CONTENTS == 0 {
592			out.Write(temp.Bytes())
593		}
594	}
595
596	if options.flags&HTML_COMPLETE_PAGE != 0 {
597		out.WriteString("\n</body>\n")
598		out.WriteString("</html>\n")
599	}
600
601}
602
603func (options *Html) TocHeader(text []byte, level int) {
604	for level > options.currentLevel {
605		switch {
606		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
607			// this sublist can nest underneath a header
608			size := options.toc.Len()
609			options.toc.Truncate(size - len("</li>\n"))
610
611		case options.currentLevel > 0:
612			options.toc.WriteString("<li>")
613		}
614		if options.toc.Len() > 0 {
615			options.toc.WriteByte('\n')
616		}
617		options.toc.WriteString("<ul>\n")
618		options.currentLevel++
619	}
620
621	for level < options.currentLevel {
622		options.toc.WriteString("</ul>")
623		if options.currentLevel > 1 {
624			options.toc.WriteString("</li>\n")
625		}
626		options.currentLevel--
627	}
628
629	options.toc.WriteString("<li><a href=\"#toc_")
630	options.toc.WriteString(strconv.Itoa(options.headerCount))
631	options.toc.WriteString("\">")
632	options.headerCount++
633
634	options.toc.Write(text)
635
636	options.toc.WriteString("</a></li>\n")
637}
638
639func (options *Html) TocFinalize() {
640	for options.currentLevel > 1 {
641		options.toc.WriteString("</ul></li>\n")
642		options.currentLevel--
643	}
644
645	if options.currentLevel > 0 {
646		options.toc.WriteString("</ul>\n")
647	}
648}
649
650func isHtmlTag(tag []byte, tagname string) bool {
651	i := 0
652	if i < len(tag) && tag[0] != '<' {
653		return false
654	}
655	i++
656	for i < len(tag) && isspace(tag[i]) {
657		i++
658	}
659
660	if i < len(tag) && tag[i] == '/' {
661		i++
662	}
663
664	for i < len(tag) && isspace(tag[i]) {
665		i++
666	}
667
668	j := i
669	for ; i < len(tag); i, j = i+1, j+1 {
670		if j >= len(tagname) {
671			break
672		}
673
674		if tag[i] != tagname[j] {
675			return false
676		}
677	}
678
679	if i == len(tag) {
680		return false
681	}
682
683	return isspace(tag[i]) || tag[i] == '>'
684}
685
686func doubleSpace(out *bytes.Buffer) {
687	if out.Len() > 0 {
688		out.WriteByte('\n')
689	}
690}