all repos — grayfriday @ fb923cdb7885cc75de960e11e527c6c0b1405923

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25// Html renderer configuration options.
 26const (
 27	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 28	HTML_SKIP_STYLE                           // skip embedded <style> elements
 29	HTML_SKIP_IMAGES                          // skip embedded images
 30	HTML_SKIP_LINKS                           // skip all links
 31	HTML_SKIP_SCRIPT                          // skip embedded <script> elements
 32	HTML_SAFELINK                             // only link to trusted protocols
 33	HTML_TOC                                  // generate a table of contents
 34	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 35	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 36	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 37	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 38	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 39	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 40	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 41)
 42
 43// Html is a type that implements the Renderer interface for HTML output.
 44//
 45// Do not create this directly, instead use the HtmlRenderer function.
 46type Html struct {
 47	flags    int    // HTML_* options
 48	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 49	title    string // document title
 50	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 51
 52	// table of contents data
 53	tocMarker    int
 54	headerCount  int
 55	currentLevel int
 56	toc          *bytes.Buffer
 57
 58	smartypants *smartypantsRenderer
 59}
 60
 61const (
 62	xhtmlClose = " />\n"
 63	htmlClose  = ">\n"
 64)
 65
 66// HtmlRenderer creates and configures an Html object, which
 67// satisfies the Renderer interface.
 68//
 69// flags is a set of HTML_* options ORed together.
 70// title is the title of the document, and css is a URL for the document's
 71// stylesheet.
 72// title and css are only used when HTML_COMPLETE_PAGE is selected.
 73func HtmlRenderer(flags int, title string, css string) Renderer {
 74	// configure the rendering engine
 75	closeTag := htmlClose
 76	if flags&HTML_USE_XHTML != 0 {
 77		closeTag = xhtmlClose
 78	}
 79
 80	return &Html{
 81		flags:    flags,
 82		closeTag: closeTag,
 83		title:    title,
 84		css:      css,
 85
 86		headerCount:  0,
 87		currentLevel: 0,
 88		toc:          new(bytes.Buffer),
 89
 90		smartypants: smartypants(flags),
 91	}
 92}
 93
 94func attrEscape(out *bytes.Buffer, src []byte) {
 95	org := 0
 96	for i, ch := range src {
 97		// using if statements is a bit faster than a switch statement.
 98		// as the compiler improves, this should be unnecessary
 99		// this is only worthwhile because attrEscape is the single
100		// largest CPU user in normal use
101		if ch == '"' {
102			if i > org {
103				// copy all the normal characters since the last escape
104				out.Write(src[org:i])
105			}
106			org = i + 1
107			out.WriteString("&quot;")
108			continue
109		}
110		if ch == '&' {
111			if i > org {
112				out.Write(src[org:i])
113			}
114			org = i + 1
115			out.WriteString("&amp;")
116			continue
117		}
118		if ch == '<' {
119			if i > org {
120				out.Write(src[org:i])
121			}
122			org = i + 1
123			out.WriteString("&lt;")
124			continue
125		}
126		if ch == '>' {
127			if i > org {
128				out.Write(src[org:i])
129			}
130			org = i + 1
131			out.WriteString("&gt;")
132			continue
133		}
134	}
135	if org < len(src) {
136		out.Write(src[org:])
137	}
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141	marker := out.Len()
142	doubleSpace(out)
143
144	if options.flags&HTML_TOC != 0 {
145		// headerCount is incremented in htmlTocHeader
146		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147	} else {
148		out.WriteString(fmt.Sprintf("<h%d>", level))
149	}
150
151	tocMarker := out.Len()
152	if !text() {
153		out.Truncate(marker)
154		return
155	}
156
157	// are we building a table of contents?
158	if options.flags&HTML_TOC != 0 {
159		options.TocHeader(out.Bytes()[tocMarker:], level)
160	}
161
162	out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166	if options.flags&HTML_SKIP_HTML != 0 {
167		return
168	}
169
170	doubleSpace(out)
171	out.Write(text)
172	out.WriteByte('\n')
173}
174
175func (options *Html) HRule(out *bytes.Buffer) {
176	doubleSpace(out)
177	out.WriteString("<hr")
178	out.WriteString(options.closeTag)
179}
180
181func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
182	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
183		options.BlockCodeGithub(out, text, lang)
184	} else {
185		options.BlockCodeNormal(out, text, lang)
186	}
187}
188
189func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
190	doubleSpace(out)
191
192	// parse out the language names/classes
193	count := 0
194	for _, elt := range strings.Fields(lang) {
195		if elt[0] == '.' {
196			elt = elt[1:]
197		}
198		if len(elt) == 0 {
199			continue
200		}
201		if count == 0 {
202			out.WriteString("<pre><code class=\"")
203		} else {
204			out.WriteByte(' ')
205		}
206		attrEscape(out, []byte(elt))
207		count++
208	}
209
210	if count == 0 {
211		out.WriteString("<pre><code>")
212	} else {
213		out.WriteString("\">")
214	}
215
216	attrEscape(out, text)
217	out.WriteString("</code></pre>\n")
218}
219
220// GitHub style code block:
221//
222//              <pre lang="LANG"><code>
223//              ...
224//              </code></pre>
225//
226// Unlike other parsers, we store the language identifier in the <pre>,
227// and don't let the user generate custom classes.
228//
229// The language identifier in the <pre> block gets postprocessed and all
230// the code inside gets syntax highlighted with Pygments. This is much safer
231// than letting the user specify a CSS class for highlighting.
232//
233// Note that we only generate HTML for the first specifier.
234// E.g.
235//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
236func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
237	doubleSpace(out)
238
239	// parse out the language name
240	count := 0
241	for _, elt := range strings.Fields(lang) {
242		if elt[0] == '.' {
243			elt = elt[1:]
244		}
245		if len(elt) == 0 {
246			continue
247		}
248		out.WriteString("<pre lang=\"")
249		attrEscape(out, []byte(elt))
250		out.WriteString("\"><code>")
251		count++
252		break
253	}
254
255	if count == 0 {
256		out.WriteString("<pre><code>")
257	}
258
259	attrEscape(out, text)
260	out.WriteString("</code></pre>\n")
261}
262
263func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
264	doubleSpace(out)
265	out.WriteString("<blockquote>\n")
266	out.Write(text)
267	out.WriteString("</blockquote>\n")
268}
269
270func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
271	doubleSpace(out)
272	out.WriteString("<table>\n<thead>\n")
273	out.Write(header)
274	out.WriteString("</thead>\n\n<tbody>\n")
275	out.Write(body)
276	out.WriteString("</tbody>\n</table>\n")
277}
278
279func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
280	doubleSpace(out)
281	out.WriteString("<tr>\n")
282	out.Write(text)
283	out.WriteString("\n</tr>\n")
284}
285
286func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
287	doubleSpace(out)
288	switch align {
289	case TABLE_ALIGNMENT_LEFT:
290		out.WriteString("<td align=\"left\">")
291	case TABLE_ALIGNMENT_RIGHT:
292		out.WriteString("<td align=\"right\">")
293	case TABLE_ALIGNMENT_CENTER:
294		out.WriteString("<td align=\"center\">")
295	default:
296		out.WriteString("<td>")
297	}
298
299	out.Write(text)
300	out.WriteString("</td>")
301}
302
303func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
304	marker := out.Len()
305	doubleSpace(out)
306
307	if flags&LIST_TYPE_ORDERED != 0 {
308		out.WriteString("<ol>")
309	} else {
310		out.WriteString("<ul>")
311	}
312	if !text() {
313		out.Truncate(marker)
314		return
315	}
316	if flags&LIST_TYPE_ORDERED != 0 {
317		out.WriteString("</ol>\n")
318	} else {
319		out.WriteString("</ul>\n")
320	}
321}
322
323func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
324	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
325		doubleSpace(out)
326	}
327	out.WriteString("<li>")
328	out.Write(text)
329	out.WriteString("</li>\n")
330}
331
332func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
333	marker := out.Len()
334	doubleSpace(out)
335
336	out.WriteString("<p>")
337	if !text() {
338		out.Truncate(marker)
339		return
340	}
341	out.WriteString("</p>\n")
342}
343
344func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
345	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
346		// mark it but don't link it if it is not a safe link: no smartypants
347		out.WriteString("<tt>")
348		attrEscape(out, link)
349		out.WriteString("</tt>")
350		return
351	}
352
353	out.WriteString("<a href=\"")
354	if kind == LINK_TYPE_EMAIL {
355		out.WriteString("mailto:")
356	}
357	attrEscape(out, link)
358	out.WriteString("\">")
359
360	// Pretty print: if we get an email address as
361	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
362	// want to print the `mailto:` prefix
363	switch {
364	case bytes.HasPrefix(link, []byte("mailto://")):
365		attrEscape(out, link[len("mailto://"):])
366	case bytes.HasPrefix(link, []byte("mailto:")):
367		attrEscape(out, link[len("mailto:"):])
368	default:
369		attrEscape(out, link)
370	}
371
372	out.WriteString("</a>")
373}
374
375func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
376	out.WriteString("<code>")
377	attrEscape(out, text)
378	out.WriteString("</code>")
379}
380
381func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
382	out.WriteString("<strong>")
383	out.Write(text)
384	out.WriteString("</strong>")
385}
386
387func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
388	if len(text) == 0 {
389		return
390	}
391	out.WriteString("<em>")
392	out.Write(text)
393	out.WriteString("</em>")
394}
395
396func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
397	if options.flags&HTML_SKIP_IMAGES != 0 {
398		return
399	}
400
401	out.WriteString("<img src=\"")
402	attrEscape(out, link)
403	out.WriteString("\" alt=\"")
404	if len(alt) > 0 {
405		attrEscape(out, alt)
406	}
407	if len(title) > 0 {
408		out.WriteString("\" title=\"")
409		attrEscape(out, title)
410	}
411
412	out.WriteByte('"')
413	out.WriteString(options.closeTag)
414	return
415}
416
417func (options *Html) LineBreak(out *bytes.Buffer) {
418	out.WriteString("<br")
419	out.WriteString(options.closeTag)
420}
421
422func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
423	if options.flags&HTML_SKIP_LINKS != 0 {
424		// write the link text out but don't link it, just mark it with typewriter font
425		out.WriteString("<tt>")
426		attrEscape(out, content)
427		out.WriteString("</tt>")
428		return
429	}
430
431	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
432		// write the link text out but don't link it, just mark it with typewriter font
433		out.WriteString("<tt>")
434		attrEscape(out, content)
435		out.WriteString("</tt>")
436		return
437	}
438
439	out.WriteString("<a href=\"")
440	attrEscape(out, link)
441	if len(title) > 0 {
442		out.WriteString("\" title=\"")
443		attrEscape(out, title)
444	}
445	out.WriteString("\">")
446	out.Write(content)
447	out.WriteString("</a>")
448	return
449}
450
451func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
452	if options.flags&HTML_SKIP_HTML != 0 {
453		return
454	}
455	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
456		return
457	}
458	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
459		return
460	}
461	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
462		return
463	}
464	if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
465		return
466	}
467	out.Write(text)
468}
469
470func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
471	out.WriteString("<strong><em>")
472	out.Write(text)
473	out.WriteString("</em></strong>")
474}
475
476func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
477	out.WriteString("<del>")
478	out.Write(text)
479	out.WriteString("</del>")
480}
481
482func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
483	out.Write(entity)
484}
485
486func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
487	if options.flags&HTML_USE_SMARTYPANTS != 0 {
488		options.Smartypants(out, text)
489	} else {
490		attrEscape(out, text)
491	}
492}
493
494func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
495	smrt := smartypantsData{false, false}
496
497	// first do normal entity escaping
498	var escaped bytes.Buffer
499	attrEscape(&escaped, text)
500	text = escaped.Bytes()
501
502	mark := 0
503	for i := 0; i < len(text); i++ {
504		if action := options.smartypants[text[i]]; action != nil {
505			if i > mark {
506				out.Write(text[mark:i])
507			}
508
509			previousChar := byte(0)
510			if i > 0 {
511				previousChar = text[i-1]
512			}
513			i += action(out, &smrt, previousChar, text[i:])
514			mark = i + 1
515		}
516	}
517
518	if mark < len(text) {
519		out.Write(text[mark:])
520	}
521}
522
523func (options *Html) DocumentHeader(out *bytes.Buffer) {
524	if options.flags&HTML_COMPLETE_PAGE == 0 {
525		return
526	}
527
528	ending := ""
529	if options.flags&HTML_USE_XHTML != 0 {
530		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
531		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
532		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
533		ending = " /"
534	} else {
535		out.WriteString("<!DOCTYPE html>\n")
536		out.WriteString("<html>\n")
537	}
538	out.WriteString("<head>\n")
539	out.WriteString("  <title>")
540	options.NormalText(out, []byte(options.title))
541	out.WriteString("</title>\n")
542	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
543	out.WriteString(VERSION)
544	out.WriteString("\"")
545	out.WriteString(ending)
546	out.WriteString(">\n")
547	out.WriteString("  <meta charset=\"utf-8\"")
548	out.WriteString(ending)
549	out.WriteString(">\n")
550	if options.css != "" {
551		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
552		attrEscape(out, []byte(options.css))
553		out.WriteString("\"")
554		out.WriteString(ending)
555		out.WriteString(">\n")
556	}
557	out.WriteString("</head>\n")
558	out.WriteString("<body>\n")
559
560	options.tocMarker = out.Len()
561}
562
563func (options *Html) DocumentFooter(out *bytes.Buffer) {
564	// finalize and insert the table of contents
565	if options.flags&HTML_TOC != 0 {
566		options.TocFinalize()
567
568		// now we have to insert the table of contents into the document
569		var temp bytes.Buffer
570
571		// start by making a copy of everything after the document header
572		temp.Write(out.Bytes()[options.tocMarker:])
573
574		// now clear the copied material from the main output buffer
575		out.Truncate(options.tocMarker)
576
577		// corner case spacing issue
578		if options.flags&HTML_COMPLETE_PAGE != 0 {
579			out.WriteByte('\n')
580		}
581
582		// insert the table of contents
583		out.WriteString("<nav>\n")
584		out.Write(options.toc.Bytes())
585		out.WriteString("</nav>\n")
586
587		// corner case spacing issue
588		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
589			out.WriteByte('\n')
590		}
591
592		// write out everything that came after it
593		if options.flags&HTML_OMIT_CONTENTS == 0 {
594			out.Write(temp.Bytes())
595		}
596	}
597
598	if options.flags&HTML_COMPLETE_PAGE != 0 {
599		out.WriteString("\n</body>\n")
600		out.WriteString("</html>\n")
601	}
602
603}
604
605func (options *Html) TocHeader(text []byte, level int) {
606	for level > options.currentLevel {
607		switch {
608		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
609			// this sublist can nest underneath a header
610			size := options.toc.Len()
611			options.toc.Truncate(size - len("</li>\n"))
612
613		case options.currentLevel > 0:
614			options.toc.WriteString("<li>")
615		}
616		if options.toc.Len() > 0 {
617			options.toc.WriteByte('\n')
618		}
619		options.toc.WriteString("<ul>\n")
620		options.currentLevel++
621	}
622
623	for level < options.currentLevel {
624		options.toc.WriteString("</ul>")
625		if options.currentLevel > 1 {
626			options.toc.WriteString("</li>\n")
627		}
628		options.currentLevel--
629	}
630
631	options.toc.WriteString("<li><a href=\"#toc_")
632	options.toc.WriteString(strconv.Itoa(options.headerCount))
633	options.toc.WriteString("\">")
634	options.headerCount++
635
636	options.toc.Write(text)
637
638	options.toc.WriteString("</a></li>\n")
639}
640
641func (options *Html) TocFinalize() {
642	for options.currentLevel > 1 {
643		options.toc.WriteString("</ul></li>\n")
644		options.currentLevel--
645	}
646
647	if options.currentLevel > 0 {
648		options.toc.WriteString("</ul>\n")
649	}
650}
651
652func isHtmlTag(tag []byte, tagname string) bool {
653	i := 0
654	if i < len(tag) && tag[0] != '<' {
655		return false
656	}
657	i++
658	i = skipSpace(tag, i)
659
660	if i < len(tag) && tag[i] == '/' {
661		i++
662	}
663
664	i = skipSpace(tag, i)
665	j := 0
666	for ; i < len(tag); i, j = i+1, j+1 {
667		if j >= len(tagname) {
668			break
669		}
670
671		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
672			return false
673		}
674	}
675
676	if i == len(tag) {
677		return false
678	}
679
680	return isspace(tag[i]) || tag[i] == '>'
681}
682
683func skipSpace(tag []byte, i int) int {
684	for i < len(tag) && isspace(tag[i]) {
685		i++
686	}
687	return i
688}
689
690func doubleSpace(out *bytes.Buffer) {
691	if out.Len() > 0 {
692		out.WriteByte('\n')
693	}
694}