all repos — grayfriday @ 59358adea87fa5061b5c21be571a3480bf0217a9

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25// Html renderer configuration options.
 26const (
 27	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 28	HTML_SKIP_STYLE                           // skip embedded <style> elements
 29	HTML_SKIP_IMAGES                          // skip embedded images
 30	HTML_SKIP_LINKS                           // skip all links
 31	HTML_SKIP_SCRIPT                          // skip embedded <script> elements
 32	HTML_SAFELINK                             // only link to trusted protocols
 33	HTML_TOC                                  // generate a table of contents
 34	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 35	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 36	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 37	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 38	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 39	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 40	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 41)
 42
 43// Html is a type that implements the Renderer interface for HTML output.
 44//
 45// Do not create this directly, instead use the HtmlRenderer function.
 46type Html struct {
 47	flags    int    // HTML_* options
 48	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 49	title    string // document title
 50	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 51
 52	// table of contents data
 53	tocMarker    int
 54	headerCount  int
 55	currentLevel int
 56	toc          *bytes.Buffer
 57
 58	smartypants *smartypantsRenderer
 59}
 60
 61const (
 62	xhtmlClose = " />\n"
 63	htmlClose  = ">\n"
 64)
 65
 66// HtmlRenderer creates and configures an Html object, which
 67// satisfies the Renderer interface.
 68//
 69// flags is a set of HTML_* options ORed together.
 70// title is the title of the document, and css is a URL for the document's
 71// stylesheet.
 72// title and css are only used when HTML_COMPLETE_PAGE is selected.
 73func HtmlRenderer(flags int, title string, css string) Renderer {
 74	// configure the rendering engine
 75	closeTag := htmlClose
 76	if flags&HTML_USE_XHTML != 0 {
 77		closeTag = xhtmlClose
 78	}
 79
 80	return &Html{
 81		flags:    flags,
 82		closeTag: closeTag,
 83		title:    title,
 84		css:      css,
 85
 86		headerCount:  0,
 87		currentLevel: 0,
 88		toc:          new(bytes.Buffer),
 89
 90		smartypants: smartypants(flags),
 91	}
 92}
 93
 94func attrEscape(out *bytes.Buffer, src []byte) {
 95	org := 0
 96	for i, ch := range src {
 97		// using if statements is a bit faster than a switch statement.
 98		// as the compiler improves, this should be unnecessary
 99		// this is only worthwhile because attrEscape is the single
100		// largest CPU user in normal use
101		if ch == '"' {
102			if i > org {
103				// copy all the normal characters since the last escape
104				out.Write(src[org:i])
105			}
106			org = i + 1
107			out.WriteString("&quot;")
108			continue
109		}
110		if ch == '&' {
111			if i > org {
112				out.Write(src[org:i])
113			}
114			org = i + 1
115			out.WriteString("&amp;")
116			continue
117		}
118		if ch == '<' {
119			if i > org {
120				out.Write(src[org:i])
121			}
122			org = i + 1
123			out.WriteString("&lt;")
124			continue
125		}
126		if ch == '>' {
127			if i > org {
128				out.Write(src[org:i])
129			}
130			org = i + 1
131			out.WriteString("&gt;")
132			continue
133		}
134	}
135	if org < len(src) {
136		out.Write(src[org:])
137	}
138}
139
140func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
141	marker := out.Len()
142	doubleSpace(out)
143
144	if options.flags&HTML_TOC != 0 {
145		// headerCount is incremented in htmlTocHeader
146		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
147	} else {
148		out.WriteString(fmt.Sprintf("<h%d>", level))
149	}
150
151	tocMarker := out.Len()
152	if !text() {
153		out.Truncate(marker)
154		return
155	}
156
157	// are we building a table of contents?
158	if options.flags&HTML_TOC != 0 {
159		options.TocHeader(out.Bytes()[tocMarker:], level)
160	}
161
162	out.WriteString(fmt.Sprintf("</h%d>\n", level))
163}
164
165func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
166	if options.flags&HTML_SKIP_HTML != 0 {
167		return
168	}
169
170	doubleSpace(out)
171	if options.flags&HTML_SKIP_SCRIPT != 0 {
172		out.Write(stripTag(string(text), "script", "p"))
173	} else {
174		out.Write(text)
175	}
176	out.WriteByte('\n')
177}
178
179func stripTag(text, tag, newTag string) []byte {
180	closeNewTag := fmt.Sprintf("</%s>", newTag)
181	i := 0
182	for i < len(text) && text[i] != '<' {
183		i++
184	}
185	if i == len(text) {
186		return []byte(text)
187	}
188	found, end := findHtmlTagPos([]byte(text[i:]), tag)
189	closeTag := fmt.Sprintf("</%s>", tag)
190	noOpen := text
191	if found {
192		noOpen = text[0:i+1] + newTag + text[end:]
193	}
194	return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
195}
196
197func (options *Html) HRule(out *bytes.Buffer) {
198	doubleSpace(out)
199	out.WriteString("<hr")
200	out.WriteString(options.closeTag)
201}
202
203func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
204	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
205		options.BlockCodeGithub(out, text, lang)
206	} else {
207		options.BlockCodeNormal(out, text, lang)
208	}
209}
210
211func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
212	doubleSpace(out)
213
214	// parse out the language names/classes
215	count := 0
216	for _, elt := range strings.Fields(lang) {
217		if elt[0] == '.' {
218			elt = elt[1:]
219		}
220		if len(elt) == 0 {
221			continue
222		}
223		if count == 0 {
224			out.WriteString("<pre><code class=\"")
225		} else {
226			out.WriteByte(' ')
227		}
228		attrEscape(out, []byte(elt))
229		count++
230	}
231
232	if count == 0 {
233		out.WriteString("<pre><code>")
234	} else {
235		out.WriteString("\">")
236	}
237
238	attrEscape(out, text)
239	out.WriteString("</code></pre>\n")
240}
241
242// GitHub style code block:
243//
244//              <pre lang="LANG"><code>
245//              ...
246//              </code></pre>
247//
248// Unlike other parsers, we store the language identifier in the <pre>,
249// and don't let the user generate custom classes.
250//
251// The language identifier in the <pre> block gets postprocessed and all
252// the code inside gets syntax highlighted with Pygments. This is much safer
253// than letting the user specify a CSS class for highlighting.
254//
255// Note that we only generate HTML for the first specifier.
256// E.g.
257//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
258func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
259	doubleSpace(out)
260
261	// parse out the language name
262	count := 0
263	for _, elt := range strings.Fields(lang) {
264		if elt[0] == '.' {
265			elt = elt[1:]
266		}
267		if len(elt) == 0 {
268			continue
269		}
270		out.WriteString("<pre lang=\"")
271		attrEscape(out, []byte(elt))
272		out.WriteString("\"><code>")
273		count++
274		break
275	}
276
277	if count == 0 {
278		out.WriteString("<pre><code>")
279	}
280
281	attrEscape(out, text)
282	out.WriteString("</code></pre>\n")
283}
284
285func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
286	doubleSpace(out)
287	out.WriteString("<blockquote>\n")
288	out.Write(text)
289	out.WriteString("</blockquote>\n")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293	doubleSpace(out)
294	out.WriteString("<table>\n<thead>\n")
295	out.Write(header)
296	out.WriteString("</thead>\n\n<tbody>\n")
297	out.Write(body)
298	out.WriteString("</tbody>\n</table>\n")
299}
300
301func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
302	doubleSpace(out)
303	out.WriteString("<tr>\n")
304	out.Write(text)
305	out.WriteString("\n</tr>\n")
306}
307
308func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
309	doubleSpace(out)
310	switch align {
311	case TABLE_ALIGNMENT_LEFT:
312		out.WriteString("<td align=\"left\">")
313	case TABLE_ALIGNMENT_RIGHT:
314		out.WriteString("<td align=\"right\">")
315	case TABLE_ALIGNMENT_CENTER:
316		out.WriteString("<td align=\"center\">")
317	default:
318		out.WriteString("<td>")
319	}
320
321	out.Write(text)
322	out.WriteString("</td>")
323}
324
325func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
326	out.WriteString("<div class=\"footnotes\">\n")
327	options.HRule(out)
328	options.List(out, text, LIST_TYPE_ORDERED)
329	out.WriteString("</div>\n")
330}
331
332func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
333	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
334		doubleSpace(out)
335	}
336	out.WriteString(`<li id="fn:`)
337	out.Write(slugify(name))
338	out.WriteString(`">`)
339	out.Write(text)
340	out.WriteString("</li>\n")
341}
342
343func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
344	marker := out.Len()
345	doubleSpace(out)
346
347	if flags&LIST_TYPE_ORDERED != 0 {
348		out.WriteString("<ol>")
349	} else {
350		out.WriteString("<ul>")
351	}
352	if !text() {
353		out.Truncate(marker)
354		return
355	}
356	if flags&LIST_TYPE_ORDERED != 0 {
357		out.WriteString("</ol>\n")
358	} else {
359		out.WriteString("</ul>\n")
360	}
361}
362
363func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
364	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
365		doubleSpace(out)
366	}
367	out.WriteString("<li>")
368	out.Write(text)
369	out.WriteString("</li>\n")
370}
371
372func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
373	marker := out.Len()
374	doubleSpace(out)
375
376	out.WriteString("<p>")
377	if !text() {
378		out.Truncate(marker)
379		return
380	}
381	out.WriteString("</p>\n")
382}
383
384func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
385	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
386		// mark it but don't link it if it is not a safe link: no smartypants
387		out.WriteString("<tt>")
388		attrEscape(out, link)
389		out.WriteString("</tt>")
390		return
391	}
392
393	out.WriteString("<a href=\"")
394	if kind == LINK_TYPE_EMAIL {
395		out.WriteString("mailto:")
396	}
397	attrEscape(out, link)
398	out.WriteString("\">")
399
400	// Pretty print: if we get an email address as
401	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
402	// want to print the `mailto:` prefix
403	switch {
404	case bytes.HasPrefix(link, []byte("mailto://")):
405		attrEscape(out, link[len("mailto://"):])
406	case bytes.HasPrefix(link, []byte("mailto:")):
407		attrEscape(out, link[len("mailto:"):])
408	default:
409		attrEscape(out, link)
410	}
411
412	out.WriteString("</a>")
413}
414
415func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
416	out.WriteString("<code>")
417	attrEscape(out, text)
418	out.WriteString("</code>")
419}
420
421func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
422	out.WriteString("<strong>")
423	out.Write(text)
424	out.WriteString("</strong>")
425}
426
427func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
428	if len(text) == 0 {
429		return
430	}
431	out.WriteString("<em>")
432	out.Write(text)
433	out.WriteString("</em>")
434}
435
436func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
437	if options.flags&HTML_SKIP_IMAGES != 0 {
438		return
439	}
440
441	out.WriteString("<img src=\"")
442	attrEscape(out, link)
443	out.WriteString("\" alt=\"")
444	if len(alt) > 0 {
445		attrEscape(out, alt)
446	}
447	if len(title) > 0 {
448		out.WriteString("\" title=\"")
449		attrEscape(out, title)
450	}
451
452	out.WriteByte('"')
453	out.WriteString(options.closeTag)
454	return
455}
456
457func (options *Html) LineBreak(out *bytes.Buffer) {
458	out.WriteString("<br")
459	out.WriteString(options.closeTag)
460}
461
462func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
463	if options.flags&HTML_SKIP_LINKS != 0 {
464		// write the link text out but don't link it, just mark it with typewriter font
465		out.WriteString("<tt>")
466		attrEscape(out, content)
467		out.WriteString("</tt>")
468		return
469	}
470
471	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
472		// write the link text out but don't link it, just mark it with typewriter font
473		out.WriteString("<tt>")
474		attrEscape(out, content)
475		out.WriteString("</tt>")
476		return
477	}
478
479	out.WriteString("<a href=\"")
480	attrEscape(out, link)
481	if len(title) > 0 {
482		out.WriteString("\" title=\"")
483		attrEscape(out, title)
484	}
485	out.WriteString("\">")
486	out.Write(content)
487	out.WriteString("</a>")
488	return
489}
490
491func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
492	if options.flags&HTML_SKIP_HTML != 0 {
493		return
494	}
495	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
496		return
497	}
498	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
499		return
500	}
501	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
502		return
503	}
504	if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
505		return
506	}
507	out.Write(text)
508}
509
510func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
511	out.WriteString("<strong><em>")
512	out.Write(text)
513	out.WriteString("</em></strong>")
514}
515
516func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
517	out.WriteString("<del>")
518	out.Write(text)
519	out.WriteString("</del>")
520}
521
522func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
523	slug := slugify(ref)
524	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
525	out.Write(slug)
526	out.WriteString(`"><a rel="footnote" href="#fn:`)
527	out.Write(slug)
528	out.WriteString(`">`)
529	out.WriteString(strconv.Itoa(id))
530	out.WriteString(`</a></sup>`)
531}
532
533func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
534	out.Write(entity)
535}
536
537func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
538	if options.flags&HTML_USE_SMARTYPANTS != 0 {
539		options.Smartypants(out, text)
540	} else {
541		attrEscape(out, text)
542	}
543}
544
545func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
546	smrt := smartypantsData{false, false}
547
548	// first do normal entity escaping
549	var escaped bytes.Buffer
550	attrEscape(&escaped, text)
551	text = escaped.Bytes()
552
553	mark := 0
554	for i := 0; i < len(text); i++ {
555		if action := options.smartypants[text[i]]; action != nil {
556			if i > mark {
557				out.Write(text[mark:i])
558			}
559
560			previousChar := byte(0)
561			if i > 0 {
562				previousChar = text[i-1]
563			}
564			i += action(out, &smrt, previousChar, text[i:])
565			mark = i + 1
566		}
567	}
568
569	if mark < len(text) {
570		out.Write(text[mark:])
571	}
572}
573
574func (options *Html) DocumentHeader(out *bytes.Buffer) {
575	if options.flags&HTML_COMPLETE_PAGE == 0 {
576		return
577	}
578
579	ending := ""
580	if options.flags&HTML_USE_XHTML != 0 {
581		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
582		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
583		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
584		ending = " /"
585	} else {
586		out.WriteString("<!DOCTYPE html>\n")
587		out.WriteString("<html>\n")
588	}
589	out.WriteString("<head>\n")
590	out.WriteString("  <title>")
591	options.NormalText(out, []byte(options.title))
592	out.WriteString("</title>\n")
593	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
594	out.WriteString(VERSION)
595	out.WriteString("\"")
596	out.WriteString(ending)
597	out.WriteString(">\n")
598	out.WriteString("  <meta charset=\"utf-8\"")
599	out.WriteString(ending)
600	out.WriteString(">\n")
601	if options.css != "" {
602		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
603		attrEscape(out, []byte(options.css))
604		out.WriteString("\"")
605		out.WriteString(ending)
606		out.WriteString(">\n")
607	}
608	out.WriteString("</head>\n")
609	out.WriteString("<body>\n")
610
611	options.tocMarker = out.Len()
612}
613
614func (options *Html) DocumentFooter(out *bytes.Buffer) {
615	// finalize and insert the table of contents
616	if options.flags&HTML_TOC != 0 {
617		options.TocFinalize()
618
619		// now we have to insert the table of contents into the document
620		var temp bytes.Buffer
621
622		// start by making a copy of everything after the document header
623		temp.Write(out.Bytes()[options.tocMarker:])
624
625		// now clear the copied material from the main output buffer
626		out.Truncate(options.tocMarker)
627
628		// corner case spacing issue
629		if options.flags&HTML_COMPLETE_PAGE != 0 {
630			out.WriteByte('\n')
631		}
632
633		// insert the table of contents
634		out.WriteString("<nav>\n")
635		out.Write(options.toc.Bytes())
636		out.WriteString("</nav>\n")
637
638		// corner case spacing issue
639		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
640			out.WriteByte('\n')
641		}
642
643		// write out everything that came after it
644		if options.flags&HTML_OMIT_CONTENTS == 0 {
645			out.Write(temp.Bytes())
646		}
647	}
648
649	if options.flags&HTML_COMPLETE_PAGE != 0 {
650		out.WriteString("\n</body>\n")
651		out.WriteString("</html>\n")
652	}
653
654}
655
656func (options *Html) TocHeader(text []byte, level int) {
657	for level > options.currentLevel {
658		switch {
659		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
660			// this sublist can nest underneath a header
661			size := options.toc.Len()
662			options.toc.Truncate(size - len("</li>\n"))
663
664		case options.currentLevel > 0:
665			options.toc.WriteString("<li>")
666		}
667		if options.toc.Len() > 0 {
668			options.toc.WriteByte('\n')
669		}
670		options.toc.WriteString("<ul>\n")
671		options.currentLevel++
672	}
673
674	for level < options.currentLevel {
675		options.toc.WriteString("</ul>")
676		if options.currentLevel > 1 {
677			options.toc.WriteString("</li>\n")
678		}
679		options.currentLevel--
680	}
681
682	options.toc.WriteString("<li><a href=\"#toc_")
683	options.toc.WriteString(strconv.Itoa(options.headerCount))
684	options.toc.WriteString("\">")
685	options.headerCount++
686
687	options.toc.Write(text)
688
689	options.toc.WriteString("</a></li>\n")
690}
691
692func (options *Html) TocFinalize() {
693	for options.currentLevel > 1 {
694		options.toc.WriteString("</ul></li>\n")
695		options.currentLevel--
696	}
697
698	if options.currentLevel > 0 {
699		options.toc.WriteString("</ul>\n")
700	}
701}
702
703func isHtmlTag(tag []byte, tagname string) bool {
704	found, _ := findHtmlTagPos(tag, tagname)
705	return found
706}
707
708func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
709	i := 0
710	if i < len(tag) && tag[0] != '<' {
711		return false, -1
712	}
713	i++
714	i = skipSpace(tag, i)
715
716	if i < len(tag) && tag[i] == '/' {
717		i++
718	}
719
720	i = skipSpace(tag, i)
721	j := 0
722	for ; i < len(tag); i, j = i+1, j+1 {
723		if j >= len(tagname) {
724			break
725		}
726
727		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
728			return false, -1
729		}
730	}
731
732	if i == len(tag) {
733		return false, -1
734	}
735
736	// Now look for closing '>', but ignore it when it's in any kind of quotes,
737	// it might be JavaScript
738	inSingleQuote := false
739	inDoubleQuote := false
740	inGraveQuote := false
741	for i < len(tag) {
742		switch {
743		case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
744			return true, i
745		case tag[i] == '\'':
746			inSingleQuote = !inSingleQuote
747		case tag[i] == '"':
748			inDoubleQuote = !inDoubleQuote
749		case tag[i] == '`':
750			inGraveQuote = !inGraveQuote
751		}
752		i++
753	}
754
755	return false, -1
756}
757
758func skipSpace(tag []byte, i int) int {
759	for i < len(tag) && isspace(tag[i]) {
760		i++
761	}
762	return i
763}
764
765func doubleSpace(out *bytes.Buffer) {
766	if out.Len() > 0 {
767		out.WriteByte('\n')
768	}
769}