all repos — grayfriday @ 793fee5451fe853b7bd45ef35eb06c04522eafad

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22)
 23
 24const (
 25	HTML_SKIP_HTML = 1 << iota
 26	HTML_SKIP_STYLE
 27	HTML_SKIP_IMAGES
 28	HTML_SKIP_LINKS
 29	HTML_SAFELINK
 30	HTML_TOC
 31	HTML_OMIT_CONTENTS
 32	HTML_COMPLETE_PAGE
 33	HTML_GITHUB_BLOCKCODE
 34	HTML_USE_XHTML
 35	HTML_USE_SMARTYPANTS
 36	HTML_SMARTYPANTS_FRACTIONS
 37	HTML_SMARTYPANTS_LATEX_DASHES
 38)
 39
 40type htmlOptions struct {
 41	flags    int    // HTML_* options
 42	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 43	title    string // document title
 44	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 45
 46	// table of contents data
 47	tocMarker    int
 48	headerCount  int
 49	currentLevel int
 50	toc          *bytes.Buffer
 51
 52	smartypants *SmartypantsRenderer
 53}
 54
 55var xhtmlClose = " />\n"
 56var htmlClose = ">\n"
 57
 58func HtmlRenderer(flags int, title string, css string) *Renderer {
 59	// configure the rendering engine
 60	r := new(Renderer)
 61
 62	r.BlockCode = htmlBlockCode
 63	r.BlockQuote = htmlBlockQuote
 64	r.BlockHtml = htmlBlockHtml
 65	r.Header = htmlHeader
 66	r.HRule = htmlHRule
 67	r.List = htmlList
 68	r.ListItem = htmlListItem
 69	r.Paragraph = htmlParagraph
 70	r.Table = htmlTable
 71	r.TableRow = htmlTableRow
 72	r.TableCell = htmlTableCell
 73
 74	r.AutoLink = htmlAutoLink
 75	r.CodeSpan = htmlCodeSpan
 76	r.DoubleEmphasis = htmlDoubleEmphasis
 77	r.Emphasis = htmlEmphasis
 78	r.Image = htmlImage
 79	r.LineBreak = htmlLineBreak
 80	r.Link = htmlLink
 81	r.RawHtmlTag = htmlRawTag
 82	r.TripleEmphasis = htmlTripleEmphasis
 83	r.StrikeThrough = htmlStrikeThrough
 84
 85	r.Entity = htmlEntity
 86	r.NormalText = htmlNormalText
 87
 88	r.DocumentHeader = htmlDocumentHeader
 89	r.DocumentFooter = htmlDocumentFooter
 90
 91	closeTag := htmlClose
 92	if flags&HTML_USE_XHTML != 0 {
 93		closeTag = xhtmlClose
 94	}
 95
 96	r.Opaque = &htmlOptions{
 97		flags:    flags,
 98		closeTag: closeTag,
 99		title:    title,
100		css:      css,
101
102		headerCount:  0,
103		currentLevel: 0,
104		toc:          new(bytes.Buffer),
105
106		smartypants: Smartypants(flags),
107	}
108	return r
109}
110
111func attrEscape(out *bytes.Buffer, src []byte) {
112	org := 0
113	for i, ch := range src {
114		// using if statements is a bit faster than a switch statement.
115		// as the compiler improves, this should be unnecessary
116		// this is only worthwhile because attrEscape is the single
117		// largest CPU user in normal use
118		if ch == '"' {
119			if i > org {
120				// copy all the normal characters since the last escape
121				out.Write(src[org:i])
122			}
123			org = i + 1
124			out.WriteString("&quot;")
125			continue
126		}
127		if ch == '&' {
128			if i > org {
129				out.Write(src[org:i])
130			}
131			org = i + 1
132			out.WriteString("&amp;")
133			continue
134		}
135		if ch == '<' {
136			if i > org {
137				out.Write(src[org:i])
138			}
139			org = i + 1
140			out.WriteString("&lt;")
141			continue
142		}
143		if ch == '>' {
144			if i > org {
145				out.Write(src[org:i])
146			}
147			org = i + 1
148			out.WriteString("&gt;")
149			continue
150		}
151	}
152	if org < len(src) {
153		out.Write(src[org:])
154	}
155}
156
157func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
158	options := opaque.(*htmlOptions)
159	marker := out.Len()
160
161	if marker > 0 {
162		out.WriteByte('\n')
163	}
164
165	if options.flags&HTML_TOC != 0 {
166		// headerCount is incremented in htmlTocHeader
167		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
168	} else {
169		out.WriteString(fmt.Sprintf("<h%d>", level))
170	}
171
172	tocMarker := out.Len()
173	if !text() {
174		out.Truncate(marker)
175		return
176	}
177
178	// are we building a table of contents?
179	if options.flags&HTML_TOC != 0 {
180		htmlTocHeader(out.Bytes()[tocMarker:], level, opaque)
181	}
182
183	out.WriteString(fmt.Sprintf("</h%d>\n", level))
184}
185
186func htmlBlockHtml(out *bytes.Buffer, text []byte, opaque interface{}) {
187	options := opaque.(*htmlOptions)
188	if options.flags&HTML_SKIP_HTML != 0 {
189		return
190	}
191
192	sz := len(text)
193	for sz > 0 && text[sz-1] == '\n' {
194		sz--
195	}
196	org := 0
197	for org < sz && text[org] == '\n' {
198		org++
199	}
200	if org >= sz {
201		return
202	}
203	if out.Len() > 0 {
204		out.WriteByte('\n')
205	}
206	out.Write(text[org:sz])
207	out.WriteByte('\n')
208}
209
210func htmlHRule(out *bytes.Buffer, opaque interface{}) {
211	options := opaque.(*htmlOptions)
212
213	if out.Len() > 0 {
214		out.WriteByte('\n')
215	}
216	out.WriteString("<hr")
217	out.WriteString(options.closeTag)
218}
219
220func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
221	options := opaque.(*htmlOptions)
222	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
223		htmlBlockCodeGithub(out, text, lang, opaque)
224	} else {
225		htmlBlockCodeNormal(out, text, lang, opaque)
226	}
227}
228
229func htmlBlockCodeNormal(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
230	if out.Len() > 0 {
231		out.WriteByte('\n')
232	}
233
234	if lang != "" {
235		out.WriteString("<pre><code class=\"")
236
237		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
238			for i < len(lang) && isspace(lang[i]) {
239				i++
240			}
241
242			if i < len(lang) {
243				org := i
244				for i < len(lang) && !isspace(lang[i]) {
245					i++
246				}
247
248				if lang[org] == '.' {
249					org++
250				}
251
252				if cls > 0 {
253					out.WriteByte(' ')
254				}
255				attrEscape(out, []byte(lang[org:]))
256			}
257		}
258
259		out.WriteString("\">")
260	} else {
261		out.WriteString("<pre><code>")
262	}
263
264	if len(text) > 0 {
265		attrEscape(out, text)
266	}
267
268	out.WriteString("</code></pre>\n")
269}
270
271/*
272 * GitHub style code block:
273 *
274 *              <pre lang="LANG"><code>
275 *              ...
276 *              </pre></code>
277 *
278 * Unlike other parsers, we store the language identifier in the <pre>,
279 * and don't let the user generate custom classes.
280 *
281 * The language identifier in the <pre> block gets postprocessed and all
282 * the code inside gets syntax highlighted with Pygments. This is much safer
283 * than letting the user specify a CSS class for highlighting.
284 *
285 * Note that we only generate HTML for the first specifier.
286 * E.g.
287 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
288 */
289func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
290	if out.Len() > 0 {
291		out.WriteByte('\n')
292	}
293
294	if len(lang) > 0 {
295		out.WriteString("<pre lang=\"")
296
297		i := 0
298		for i < len(lang) && !isspace(lang[i]) {
299			i++
300		}
301
302		if lang[0] == '.' {
303			attrEscape(out, []byte(lang[1:i]))
304		} else {
305			attrEscape(out, []byte(lang[:i]))
306		}
307
308		out.WriteString("\"><code>")
309	} else {
310		out.WriteString("<pre><code>")
311	}
312
313	if len(text) > 0 {
314		attrEscape(out, text)
315	}
316
317	out.WriteString("</code></pre>\n")
318}
319
320
321func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
322	out.WriteString("<blockquote>\n")
323	out.Write(text)
324	out.WriteString("</blockquote>")
325}
326
327func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
328	if out.Len() > 0 {
329		out.WriteByte('\n')
330	}
331	out.WriteString("<table><thead>\n")
332	out.Write(header)
333	out.WriteString("\n</thead><tbody>\n")
334	out.Write(body)
335	out.WriteString("\n</tbody></table>")
336}
337
338func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
339	if out.Len() > 0 {
340		out.WriteByte('\n')
341	}
342	out.WriteString("<tr>\n")
343	out.Write(text)
344	out.WriteString("\n</tr>")
345}
346
347func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
348	if out.Len() > 0 {
349		out.WriteByte('\n')
350	}
351	switch align {
352	case TABLE_ALIGNMENT_LEFT:
353		out.WriteString("<td align=\"left\">")
354	case TABLE_ALIGNMENT_RIGHT:
355		out.WriteString("<td align=\"right\">")
356	case TABLE_ALIGNMENT_CENTER:
357		out.WriteString("<td align=\"center\">")
358	default:
359		out.WriteString("<td>")
360	}
361
362	out.Write(text)
363	out.WriteString("</td>")
364}
365
366func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
367	marker := out.Len()
368
369	if marker > 0 {
370		out.WriteByte('\n')
371	}
372	if flags&LIST_TYPE_ORDERED != 0 {
373		out.WriteString("<ol>\n")
374	} else {
375		out.WriteString("<ul>\n")
376	}
377	if !text() {
378		out.Truncate(marker)
379		return
380	}
381	if flags&LIST_TYPE_ORDERED != 0 {
382		out.WriteString("</ol>\n")
383	} else {
384		out.WriteString("</ul>\n")
385	}
386}
387
388func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
389	out.WriteString("<li>")
390	size := len(text)
391	for size > 0 && text[size-1] == '\n' {
392		size--
393	}
394	out.Write(text[:size])
395	out.WriteString("</li>\n")
396}
397
398func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
399	marker := out.Len()
400	if marker > 0 {
401		out.WriteByte('\n')
402	}
403
404	out.WriteString("<p>")
405	if !text() {
406		out.Truncate(marker)
407		return
408	}
409	out.WriteString("</p>\n")
410}
411
412func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) bool {
413	options := opaque.(*htmlOptions)
414
415	if len(link) == 0 {
416		return false
417	}
418	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
419		return false
420	}
421
422	out.WriteString("<a href=\"")
423	if kind == LINK_TYPE_EMAIL {
424		out.WriteString("mailto:")
425	}
426	attrEscape(out, link)
427	out.WriteString("\">")
428
429	/*
430	 * Pretty print: if we get an email address as
431	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
432	 * want to print the `mailto:` prefix
433	 */
434	switch {
435	case bytes.HasPrefix(link, []byte("mailto://")):
436		attrEscape(out, link[9:])
437	case bytes.HasPrefix(link, []byte("mailto:")):
438		attrEscape(out, link[7:])
439	default:
440		attrEscape(out, link)
441	}
442
443	out.WriteString("</a>")
444
445	return true
446}
447
448func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) bool {
449	out.WriteString("<code>")
450	attrEscape(out, text)
451	out.WriteString("</code>")
452	return true
453}
454
455func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
456	if len(text) == 0 {
457		return false
458	}
459	out.WriteString("<strong>")
460	out.Write(text)
461	out.WriteString("</strong>")
462	return true
463}
464
465func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
466	if len(text) == 0 {
467		return false
468	}
469	out.WriteString("<em>")
470	out.Write(text)
471	out.WriteString("</em>")
472	return true
473}
474
475func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) bool {
476	options := opaque.(*htmlOptions)
477	if options.flags&HTML_SKIP_IMAGES != 0 {
478		return false
479	}
480
481	if len(link) == 0 {
482		return false
483	}
484	out.WriteString("<img src=\"")
485	attrEscape(out, link)
486	out.WriteString("\" alt=\"")
487	if len(alt) > 0 {
488		attrEscape(out, alt)
489	}
490	if len(title) > 0 {
491		out.WriteString("\" title=\"")
492		attrEscape(out, title)
493	}
494
495	out.WriteByte('"')
496	out.WriteString(options.closeTag)
497	return true
498}
499
500func htmlLineBreak(out *bytes.Buffer, opaque interface{}) bool {
501	options := opaque.(*htmlOptions)
502	out.WriteString("<br")
503	out.WriteString(options.closeTag)
504	return true
505}
506
507func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) bool {
508	options := opaque.(*htmlOptions)
509	if options.flags&HTML_SKIP_LINKS != 0 {
510		return false
511	}
512
513	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
514		return false
515	}
516
517	out.WriteString("<a href=\"")
518	attrEscape(out, link)
519	if len(title) > 0 {
520		out.WriteString("\" title=\"")
521		attrEscape(out, title)
522	}
523	out.WriteString("\">")
524	out.Write(content)
525	out.WriteString("</a>")
526	return true
527}
528
529func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) bool {
530	options := opaque.(*htmlOptions)
531	if options.flags&HTML_SKIP_HTML != 0 {
532		return true
533	}
534	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
535		return true
536	}
537	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
538		return true
539	}
540	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
541		return true
542	}
543	out.Write(text)
544	return true
545}
546
547func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
548	if len(text) == 0 {
549		return false
550	}
551	out.WriteString("<strong><em>")
552	out.Write(text)
553	out.WriteString("</em></strong>")
554	return true
555}
556
557func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) bool {
558	if len(text) == 0 {
559		return false
560	}
561	out.WriteString("<del>")
562	out.Write(text)
563	out.WriteString("</del>")
564	return true
565}
566
567func htmlEntity(out *bytes.Buffer, entity []byte, opaque interface{}) {
568	out.Write(entity)
569}
570
571func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
572	options := opaque.(*htmlOptions)
573	if options.flags&HTML_USE_SMARTYPANTS != 0 {
574		htmlSmartypants(out, text, opaque)
575	} else {
576		attrEscape(out, text)
577	}
578}
579
580func htmlDocumentHeader(out *bytes.Buffer, opaque interface{}) {
581	options := opaque.(*htmlOptions)
582	if options.flags&HTML_COMPLETE_PAGE == 0 {
583		return
584	}
585
586	ending := ""
587	if options.flags&HTML_USE_XHTML != 0 {
588		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
589		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
590		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
591		ending = " /"
592	} else {
593		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
594		out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
595		out.WriteString("<html>\n")
596	}
597	out.WriteString("<head>\n")
598	out.WriteString("  <title>")
599	htmlNormalText(out, []byte(options.title), opaque)
600	out.WriteString("</title>\n")
601	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
602	out.WriteString(VERSION)
603	out.WriteString("\"")
604	out.WriteString(ending)
605	out.WriteString(">\n")
606	out.WriteString("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
607	out.WriteString(ending)
608	out.WriteString(">\n")
609	if options.css != "" {
610		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
611		attrEscape(out, []byte(options.css))
612		out.WriteString("\"")
613		out.WriteString(ending)
614		out.WriteString(">\n")
615	}
616	out.WriteString("</head>\n")
617	out.WriteString("<body>\n")
618
619	options.tocMarker = out.Len()
620}
621
622func htmlDocumentFooter(out *bytes.Buffer, opaque interface{}) {
623	options := opaque.(*htmlOptions)
624
625	// finalize and insert the table of contents
626	if options.flags&HTML_TOC != 0 {
627		htmlTocFinalize(opaque)
628
629		// now we have to insert the table of contents into the document
630		var temp bytes.Buffer
631
632		// start by making a copy of everything after the document header
633		temp.Write(out.Bytes()[options.tocMarker:])
634
635		// now clear the copied material from the main output buffer
636		out.Truncate(options.tocMarker)
637
638		// insert the table of contents
639		out.Write(options.toc.Bytes())
640
641		// write out everything that came after it
642		if options.flags&HTML_OMIT_CONTENTS == 0 {
643			out.Write(temp.Bytes())
644		}
645	}
646
647	if options.flags&HTML_COMPLETE_PAGE != 0 {
648		out.WriteString("\n</body>\n")
649		out.WriteString("</html>\n")
650	}
651
652}
653
654func htmlTocHeader(text []byte, level int, opaque interface{}) {
655	options := opaque.(*htmlOptions)
656
657	for level > options.currentLevel {
658		switch {
659		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
660			size := options.toc.Len()
661			options.toc.Truncate(size - len("</li>\n"))
662
663		case options.currentLevel > 0:
664			options.toc.WriteString("<li>")
665		}
666		options.toc.WriteString("\n<ul>\n")
667		options.currentLevel++
668	}
669
670	for level < options.currentLevel {
671		options.toc.WriteString("</ul>")
672		if options.currentLevel > 1 {
673			options.toc.WriteString("</li>\n")
674		}
675		options.currentLevel--
676	}
677
678	options.toc.WriteString("<li><a href=\"#toc_")
679	options.toc.WriteString(strconv.Itoa(options.headerCount))
680	options.toc.WriteString("\">")
681	options.headerCount++
682
683	options.toc.Write(text)
684
685	options.toc.WriteString("</a></li>\n")
686}
687
688func htmlTocFinalize(opaque interface{}) {
689	options := opaque.(*htmlOptions)
690	for options.currentLevel > 1 {
691		options.toc.WriteString("</ul></li>\n")
692		options.currentLevel--
693	}
694
695	if options.currentLevel > 0 {
696		options.toc.WriteString("</ul>\n")
697	}
698}
699
700func isHtmlTag(tag []byte, tagname string) bool {
701	i := 0
702	if i < len(tag) && tag[0] != '<' {
703		return false
704	}
705	i++
706	for i < len(tag) && isspace(tag[i]) {
707		i++
708	}
709
710	if i < len(tag) && tag[i] == '/' {
711		i++
712	}
713
714	for i < len(tag) && isspace(tag[i]) {
715		i++
716	}
717
718	j := i
719	for ; i < len(tag); i, j = i+1, j+1 {
720		if j >= len(tagname) {
721			break
722		}
723
724		if tag[i] != tagname[j] {
725			return false
726		}
727	}
728
729	if i == len(tag) {
730		return false
731	}
732
733	return isspace(tag[i]) || tag[i] == '>'
734}