all repos — grayfriday @ 2a18706ca4952462e699c51b746e188970933d8d

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Licensed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22)
 23
 24const (
 25	HTML_SKIP_HTML = 1 << iota
 26	HTML_SKIP_STYLE
 27	HTML_SKIP_IMAGES
 28	HTML_SKIP_LINKS
 29	HTML_SAFELINK
 30	HTML_TOC
 31	HTML_GITHUB_BLOCKCODE
 32	HTML_USE_XHTML
 33	HTML_USE_SMARTYPANTS
 34	HTML_SMARTYPANTS_FRACTIONS
 35	HTML_SMARTYPANTS_LATEX_DASHES
 36)
 37
 38type htmlOptions struct {
 39	flags    int
 40	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 41	tocData  struct {
 42		headerCount  int
 43		currentLevel int
 44	}
 45	smartypants *SmartypantsRenderer
 46}
 47
 48var xhtmlClose = " />\n"
 49var htmlClose = ">\n"
 50
 51func HtmlRenderer(flags int) *Renderer {
 52	// configure the rendering engine
 53	r := new(Renderer)
 54	if flags&HTML_GITHUB_BLOCKCODE == 0 {
 55		r.BlockCode = htmlBlockCode
 56	} else {
 57		r.BlockCode = htmlBlockCodeGithub
 58	}
 59	r.BlockQuote = htmlBlockQuote
 60	if flags&HTML_SKIP_HTML == 0 {
 61		r.BlockHtml = htmlRawBlock
 62	}
 63	r.Header = htmlHeader
 64	r.HRule = htmlHRule
 65	r.List = htmlList
 66	r.ListItem = htmlListItem
 67	r.Paragraph = htmlParagraph
 68	r.Table = htmlTable
 69	r.TableRow = htmlTableRow
 70	r.TableCell = htmlTableCell
 71
 72	r.AutoLink = htmlAutoLink
 73	r.CodeSpan = htmlCodeSpan
 74	r.DoubleEmphasis = htmlDoubleEmphasis
 75	r.Emphasis = htmlEmphasis
 76	if flags&HTML_SKIP_IMAGES == 0 {
 77		r.Image = htmlImage
 78	}
 79	r.LineBreak = htmlLineBreak
 80	if flags&HTML_SKIP_LINKS == 0 {
 81		r.Link = htmlLink
 82	}
 83	r.RawHtmlTag = htmlRawTag
 84	r.TripleEmphasis = htmlTripleEmphasis
 85	r.StrikeThrough = htmlStrikeThrough
 86
 87	var cb *SmartypantsRenderer
 88	if flags&HTML_USE_SMARTYPANTS == 0 {
 89		r.NormalText = htmlNormalText
 90	} else {
 91		cb = Smartypants(flags)
 92		r.NormalText = htmlSmartypants
 93	}
 94
 95	closeTag := htmlClose
 96	if flags&HTML_USE_XHTML != 0 {
 97		closeTag = xhtmlClose
 98	}
 99	r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
100	return r
101}
102
103func HtmlTocRenderer(flags int) *Renderer {
104	// configure the rendering engine
105	r := new(Renderer)
106	r.Header = htmlTocHeader
107
108	r.CodeSpan = htmlCodeSpan
109	r.DoubleEmphasis = htmlDoubleEmphasis
110	r.Emphasis = htmlEmphasis
111	r.TripleEmphasis = htmlTripleEmphasis
112	r.StrikeThrough = htmlStrikeThrough
113
114	r.DocumentFooter = htmlTocFinalize
115
116	closeTag := ">\n"
117	if flags&HTML_USE_XHTML != 0 {
118		closeTag = " />\n"
119	}
120	r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
121	return r
122}
123
124func attrEscape(out *bytes.Buffer, src []byte) {
125	org := 0
126	for i, ch := range src {
127		// using if statements is a bit faster than a switch statement.
128		// as the compiler improves, this should be unnecessary
129		// this is only worthwhile because attrEscape is the single
130		// largest CPU user in normal use
131		if ch == '"' {
132			if i > org {
133				// copy all the normal characters since the last escape
134				out.Write(src[org:i])
135			}
136			org = i + 1
137			out.WriteString("&quot;")
138			continue
139		}
140		if ch == '&' {
141			if i > org {
142				out.Write(src[org:i])
143			}
144			org = i + 1
145			out.WriteString("&amp;")
146			continue
147		}
148		if ch == '<' {
149			if i > org {
150				out.Write(src[org:i])
151			}
152			org = i + 1
153			out.WriteString("&lt;")
154			continue
155		}
156		if ch == '>' {
157			if i > org {
158				out.Write(src[org:i])
159			}
160			org = i + 1
161			out.WriteString("&gt;")
162			continue
163		}
164	}
165	if org < len(src) {
166		out.Write(src[org:])
167	}
168}
169
170func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
171	options := opaque.(*htmlOptions)
172	marker := out.Len()
173
174	if marker > 0 {
175		out.WriteByte('\n')
176	}
177
178	if options.flags&HTML_TOC != 0 {
179		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
180		options.tocData.headerCount++
181	} else {
182		out.WriteString(fmt.Sprintf("<h%d>", level))
183	}
184
185	if !text() {
186		out.Truncate(marker)
187		return
188	}
189	out.WriteString(fmt.Sprintf("</h%d>\n", level))
190}
191
192func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
193	sz := len(text)
194	for sz > 0 && text[sz-1] == '\n' {
195		sz--
196	}
197	org := 0
198	for org < sz && text[org] == '\n' {
199		org++
200	}
201	if org >= sz {
202		return
203	}
204	if out.Len() > 0 {
205		out.WriteByte('\n')
206	}
207	out.Write(text[org:sz])
208	out.WriteByte('\n')
209}
210
211func htmlHRule(out *bytes.Buffer, opaque interface{}) {
212	options := opaque.(*htmlOptions)
213
214	if out.Len() > 0 {
215		out.WriteByte('\n')
216	}
217	out.WriteString("<hr")
218	out.WriteString(options.closeTag)
219}
220
221func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
222	if out.Len() > 0 {
223		out.WriteByte('\n')
224	}
225
226	if lang != "" {
227		out.WriteString("<pre><code class=\"")
228
229		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
230			for i < len(lang) && isspace(lang[i]) {
231				i++
232			}
233
234			if i < len(lang) {
235				org := i
236				for i < len(lang) && !isspace(lang[i]) {
237					i++
238				}
239
240				if lang[org] == '.' {
241					org++
242				}
243
244				if cls > 0 {
245					out.WriteByte(' ')
246				}
247				attrEscape(out, []byte(lang[org:]))
248			}
249		}
250
251		out.WriteString("\">")
252	} else {
253		out.WriteString("<pre><code>")
254	}
255
256	if len(text) > 0 {
257		attrEscape(out, text)
258	}
259
260	out.WriteString("</code></pre>\n")
261}
262
263/*
264 * GitHub style code block:
265 *
266 *              <pre lang="LANG"><code>
267 *              ...
268 *              </pre></code>
269 *
270 * Unlike other parsers, we store the language identifier in the <pre>,
271 * and don't let the user generate custom classes.
272 *
273 * The language identifier in the <pre> block gets postprocessed and all
274 * the code inside gets syntax highlighted with Pygments. This is much safer
275 * than letting the user specify a CSS class for highlighting.
276 *
277 * Note that we only generate HTML for the first specifier.
278 * E.g.
279 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
280 */
281func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
282	if out.Len() > 0 {
283		out.WriteByte('\n')
284	}
285
286	if len(lang) > 0 {
287		out.WriteString("<pre lang=\"")
288
289		i := 0
290		for i < len(lang) && !isspace(lang[i]) {
291			i++
292		}
293
294		if lang[0] == '.' {
295			attrEscape(out, []byte(lang[1:i]))
296		} else {
297			attrEscape(out, []byte(lang[:i]))
298		}
299
300		out.WriteString("\"><code>")
301	} else {
302		out.WriteString("<pre><code>")
303	}
304
305	if len(text) > 0 {
306		attrEscape(out, text)
307	}
308
309	out.WriteString("</code></pre>\n")
310}
311
312
313func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
314	out.WriteString("<blockquote>\n")
315	out.Write(text)
316	out.WriteString("</blockquote>")
317}
318
319func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
320	if out.Len() > 0 {
321		out.WriteByte('\n')
322	}
323	out.WriteString("<table><thead>\n")
324	out.Write(header)
325	out.WriteString("\n</thead><tbody>\n")
326	out.Write(body)
327	out.WriteString("\n</tbody></table>")
328}
329
330func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
331	if out.Len() > 0 {
332		out.WriteByte('\n')
333	}
334	out.WriteString("<tr>\n")
335	out.Write(text)
336	out.WriteString("\n</tr>")
337}
338
339func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
340	if out.Len() > 0 {
341		out.WriteByte('\n')
342	}
343	switch align {
344	case TABLE_ALIGNMENT_LEFT:
345		out.WriteString("<td align=\"left\">")
346	case TABLE_ALIGNMENT_RIGHT:
347		out.WriteString("<td align=\"right\">")
348	case TABLE_ALIGNMENT_CENTER:
349		out.WriteString("<td align=\"center\">")
350	default:
351		out.WriteString("<td>")
352	}
353
354	out.Write(text)
355	out.WriteString("</td>")
356}
357
358func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
359	marker := out.Len()
360
361	if marker > 0 {
362		out.WriteByte('\n')
363	}
364	if flags&LIST_TYPE_ORDERED != 0 {
365		out.WriteString("<ol>\n")
366	} else {
367		out.WriteString("<ul>\n")
368	}
369	if !text() {
370		out.Truncate(marker)
371		return
372	}
373	if flags&LIST_TYPE_ORDERED != 0 {
374		out.WriteString("</ol>\n")
375	} else {
376		out.WriteString("</ul>\n")
377	}
378}
379
380func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
381	out.WriteString("<li>")
382	size := len(text)
383	for size > 0 && text[size-1] == '\n' {
384		size--
385	}
386	out.Write(text[:size])
387	out.WriteString("</li>\n")
388}
389
390func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
391	marker := out.Len()
392	if marker > 0 {
393		out.WriteByte('\n')
394	}
395
396	out.WriteString("<p>")
397	if !text() {
398		out.Truncate(marker)
399		return
400	}
401	out.WriteString("</p>\n")
402}
403
404func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
405	options := opaque.(*htmlOptions)
406
407	if len(link) == 0 {
408		return 0
409	}
410	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
411		return 0
412	}
413
414	out.WriteString("<a href=\"")
415	if kind == LINK_TYPE_EMAIL {
416		out.WriteString("mailto:")
417	}
418	attrEscape(out, link)
419	out.WriteString("\">")
420
421	/*
422	 * Pretty print: if we get an email address as
423	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
424	 * want to print the `mailto:` prefix
425	 */
426	switch {
427	case bytes.HasPrefix(link, []byte("mailto://")):
428		attrEscape(out, link[9:])
429	case bytes.HasPrefix(link, []byte("mailto:")):
430		attrEscape(out, link[7:])
431	default:
432		attrEscape(out, link)
433	}
434
435	out.WriteString("</a>")
436
437	return 1
438}
439
440func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
441	out.WriteString("<code>")
442	attrEscape(out, text)
443	out.WriteString("</code>")
444	return 1
445}
446
447func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
448	if len(text) == 0 {
449		return 0
450	}
451	out.WriteString("<strong>")
452	out.Write(text)
453	out.WriteString("</strong>")
454	return 1
455}
456
457func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
458	if len(text) == 0 {
459		return 0
460	}
461	out.WriteString("<em>")
462	out.Write(text)
463	out.WriteString("</em>")
464	return 1
465}
466
467func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
468	options := opaque.(*htmlOptions)
469	if len(link) == 0 {
470		return 0
471	}
472	out.WriteString("<img src=\"")
473	attrEscape(out, link)
474	out.WriteString("\" alt=\"")
475	if len(alt) > 0 {
476		attrEscape(out, alt)
477	}
478	if len(title) > 0 {
479		out.WriteString("\" title=\"")
480		attrEscape(out, title)
481	}
482
483	out.WriteByte('"')
484	out.WriteString(options.closeTag)
485	return 1
486}
487
488func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
489	options := opaque.(*htmlOptions)
490	out.WriteString("<br")
491	out.WriteString(options.closeTag)
492	return 1
493}
494
495func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
496	options := opaque.(*htmlOptions)
497
498	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
499		return 0
500	}
501
502	out.WriteString("<a href=\"")
503	attrEscape(out, link)
504	if len(title) > 0 {
505		out.WriteString("\" title=\"")
506		attrEscape(out, title)
507	}
508	out.WriteString("\">")
509	out.Write(content)
510	out.WriteString("</a>")
511	return 1
512}
513
514func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
515	options := opaque.(*htmlOptions)
516	if options.flags&HTML_SKIP_HTML != 0 {
517		return 1
518	}
519	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
520		return 1
521	}
522	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
523		return 1
524	}
525	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
526		return 1
527	}
528	out.Write(text)
529	return 1
530}
531
532func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
533	if len(text) == 0 {
534		return 0
535	}
536	out.WriteString("<strong><em>")
537	out.Write(text)
538	out.WriteString("</em></strong>")
539	return 1
540}
541
542func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
543	if len(text) == 0 {
544		return 0
545	}
546	out.WriteString("<del>")
547	out.Write(text)
548	out.WriteString("</del>")
549	return 1
550}
551
552func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
553	attrEscape(out, text)
554}
555
556func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
557	options := opaque.(*htmlOptions)
558	marker := out.Len()
559
560	for level > options.tocData.currentLevel {
561		if options.tocData.currentLevel > 0 {
562			out.WriteString("<li>")
563		}
564		out.WriteString("<ul>\n")
565		options.tocData.currentLevel++
566	}
567
568	for level < options.tocData.currentLevel {
569		out.WriteString("</ul>")
570		if options.tocData.currentLevel > 1 {
571			out.WriteString("</li>\n")
572		}
573		options.tocData.currentLevel--
574	}
575
576	out.WriteString("<li><a href=\"#toc_")
577	out.WriteString(strconv.Itoa(options.tocData.headerCount))
578	out.WriteString("\">")
579	options.tocData.headerCount++
580
581	if !text() {
582		out.Truncate(marker)
583		return
584	}
585	out.WriteString("</a></li>\n")
586}
587
588func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
589	options := opaque.(*htmlOptions)
590	for options.tocData.currentLevel > 1 {
591		out.WriteString("</ul></li>\n")
592		options.tocData.currentLevel--
593	}
594
595	if options.tocData.currentLevel > 0 {
596		out.WriteString("</ul>\n")
597	}
598}
599
600func isHtmlTag(tag []byte, tagname string) bool {
601	i := 0
602	if i < len(tag) && tag[0] != '<' {
603		return false
604	}
605	i++
606	for i < len(tag) && isspace(tag[i]) {
607		i++
608	}
609
610	if i < len(tag) && tag[i] == '/' {
611		i++
612	}
613
614	for i < len(tag) && isspace(tag[i]) {
615		i++
616	}
617
618	tag_i := i
619	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
620		if tag_i >= len(tagname) {
621			break
622		}
623
624		if tag[i] != tagname[tag_i] {
625			return false
626		}
627	}
628
629	if i == len(tag) {
630		return false
631	}
632
633	return isspace(tag[i]) || tag[i] == '>'
634}