all repos — grayfriday @ b1a03182500bc04a7ef27c5685a8cbe5a5d48cbd

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22)
 23
 24const (
 25	HTML_SKIP_HTML = 1 << iota
 26	HTML_SKIP_STYLE
 27	HTML_SKIP_IMAGES
 28	HTML_SKIP_LINKS
 29	HTML_SAFELINK
 30	HTML_TOC
 31	HTML_GITHUB_BLOCKCODE
 32	HTML_USE_XHTML
 33	HTML_USE_SMARTYPANTS
 34	HTML_SMARTYPANTS_FRACTIONS
 35	HTML_SMARTYPANTS_LATEX_DASHES
 36)
 37
 38type htmlOptions struct {
 39	flags    int
 40	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 41	tocData  struct {
 42		headerCount  int
 43		currentLevel int
 44	}
 45	smartypants *SmartypantsRenderer
 46}
 47
 48var xhtmlClose = " />\n"
 49var htmlClose = ">\n"
 50
 51func HtmlRenderer(flags int) *Renderer {
 52	// configure the rendering engine
 53	r := new(Renderer)
 54	r.BlockCode = htmlBlockCode
 55	r.BlockQuote = htmlBlockQuote
 56	r.BlockHtml = htmlBlockHtml
 57	r.Header = htmlHeader
 58	r.HRule = htmlHRule
 59	r.List = htmlList
 60	r.ListItem = htmlListItem
 61	r.Paragraph = htmlParagraph
 62	r.Table = htmlTable
 63	r.TableRow = htmlTableRow
 64	r.TableCell = htmlTableCell
 65
 66	r.AutoLink = htmlAutoLink
 67	r.CodeSpan = htmlCodeSpan
 68	r.DoubleEmphasis = htmlDoubleEmphasis
 69	r.Emphasis = htmlEmphasis
 70	r.Image = htmlImage
 71	r.LineBreak = htmlLineBreak
 72	r.Link = htmlLink
 73	r.RawHtmlTag = htmlRawTag
 74	r.TripleEmphasis = htmlTripleEmphasis
 75	r.StrikeThrough = htmlStrikeThrough
 76	r.NormalText = htmlNormalText
 77
 78	closeTag := htmlClose
 79	if flags&HTML_USE_XHTML != 0 {
 80		closeTag = xhtmlClose
 81	}
 82	r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: Smartypants(flags)}
 83	return r
 84}
 85
 86func HtmlTocRenderer(flags int) *Renderer {
 87	// configure the rendering engine
 88	r := new(Renderer)
 89	r.Header = htmlTocHeader
 90
 91	r.CodeSpan = htmlCodeSpan
 92	r.DoubleEmphasis = htmlDoubleEmphasis
 93	r.Emphasis = htmlEmphasis
 94	r.TripleEmphasis = htmlTripleEmphasis
 95	r.StrikeThrough = htmlStrikeThrough
 96
 97	r.DocumentFooter = htmlTocFinalize
 98
 99	closeTag := ">\n"
100	if flags&HTML_USE_XHTML != 0 {
101		closeTag = " />\n"
102	}
103	r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
104	return r
105}
106
107func attrEscape(out *bytes.Buffer, src []byte) {
108	org := 0
109	for i, ch := range src {
110		// using if statements is a bit faster than a switch statement.
111		// as the compiler improves, this should be unnecessary
112		// this is only worthwhile because attrEscape is the single
113		// largest CPU user in normal use
114		if ch == '"' {
115			if i > org {
116				// copy all the normal characters since the last escape
117				out.Write(src[org:i])
118			}
119			org = i + 1
120			out.WriteString("&quot;")
121			continue
122		}
123		if ch == '&' {
124			if i > org {
125				out.Write(src[org:i])
126			}
127			org = i + 1
128			out.WriteString("&amp;")
129			continue
130		}
131		if ch == '<' {
132			if i > org {
133				out.Write(src[org:i])
134			}
135			org = i + 1
136			out.WriteString("&lt;")
137			continue
138		}
139		if ch == '>' {
140			if i > org {
141				out.Write(src[org:i])
142			}
143			org = i + 1
144			out.WriteString("&gt;")
145			continue
146		}
147	}
148	if org < len(src) {
149		out.Write(src[org:])
150	}
151}
152
153func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
154	options := opaque.(*htmlOptions)
155	marker := out.Len()
156
157	if marker > 0 {
158		out.WriteByte('\n')
159	}
160
161	if options.flags&HTML_TOC != 0 {
162		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
163		options.tocData.headerCount++
164	} else {
165		out.WriteString(fmt.Sprintf("<h%d>", level))
166	}
167
168	if !text() {
169		out.Truncate(marker)
170		return
171	}
172	out.WriteString(fmt.Sprintf("</h%d>\n", level))
173}
174
175func htmlBlockHtml(out *bytes.Buffer, text []byte, opaque interface{}) {
176	options := opaque.(*htmlOptions)
177	if options.flags&HTML_SKIP_HTML != 0 {
178		return
179	}
180
181	sz := len(text)
182	for sz > 0 && text[sz-1] == '\n' {
183		sz--
184	}
185	org := 0
186	for org < sz && text[org] == '\n' {
187		org++
188	}
189	if org >= sz {
190		return
191	}
192	if out.Len() > 0 {
193		out.WriteByte('\n')
194	}
195	out.Write(text[org:sz])
196	out.WriteByte('\n')
197}
198
199func htmlHRule(out *bytes.Buffer, opaque interface{}) {
200	options := opaque.(*htmlOptions)
201
202	if out.Len() > 0 {
203		out.WriteByte('\n')
204	}
205	out.WriteString("<hr")
206	out.WriteString(options.closeTag)
207}
208
209func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
210	options := opaque.(*htmlOptions)
211	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
212		htmlBlockCodeGithub(out, text, lang, opaque)
213	} else {
214		htmlBlockCodeNormal(out, text, lang, opaque)
215	}
216}
217
218func htmlBlockCodeNormal(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
219	if out.Len() > 0 {
220		out.WriteByte('\n')
221	}
222
223	if lang != "" {
224		out.WriteString("<pre><code class=\"")
225
226		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
227			for i < len(lang) && isspace(lang[i]) {
228				i++
229			}
230
231			if i < len(lang) {
232				org := i
233				for i < len(lang) && !isspace(lang[i]) {
234					i++
235				}
236
237				if lang[org] == '.' {
238					org++
239				}
240
241				if cls > 0 {
242					out.WriteByte(' ')
243				}
244				attrEscape(out, []byte(lang[org:]))
245			}
246		}
247
248		out.WriteString("\">")
249	} else {
250		out.WriteString("<pre><code>")
251	}
252
253	if len(text) > 0 {
254		attrEscape(out, text)
255	}
256
257	out.WriteString("</code></pre>\n")
258}
259
260/*
261 * GitHub style code block:
262 *
263 *              <pre lang="LANG"><code>
264 *              ...
265 *              </pre></code>
266 *
267 * Unlike other parsers, we store the language identifier in the <pre>,
268 * and don't let the user generate custom classes.
269 *
270 * The language identifier in the <pre> block gets postprocessed and all
271 * the code inside gets syntax highlighted with Pygments. This is much safer
272 * than letting the user specify a CSS class for highlighting.
273 *
274 * Note that we only generate HTML for the first specifier.
275 * E.g.
276 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
277 */
278func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
279	if out.Len() > 0 {
280		out.WriteByte('\n')
281	}
282
283	if len(lang) > 0 {
284		out.WriteString("<pre lang=\"")
285
286		i := 0
287		for i < len(lang) && !isspace(lang[i]) {
288			i++
289		}
290
291		if lang[0] == '.' {
292			attrEscape(out, []byte(lang[1:i]))
293		} else {
294			attrEscape(out, []byte(lang[:i]))
295		}
296
297		out.WriteString("\"><code>")
298	} else {
299		out.WriteString("<pre><code>")
300	}
301
302	if len(text) > 0 {
303		attrEscape(out, text)
304	}
305
306	out.WriteString("</code></pre>\n")
307}
308
309
310func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
311	out.WriteString("<blockquote>\n")
312	out.Write(text)
313	out.WriteString("</blockquote>")
314}
315
316func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
317	if out.Len() > 0 {
318		out.WriteByte('\n')
319	}
320	out.WriteString("<table><thead>\n")
321	out.Write(header)
322	out.WriteString("\n</thead><tbody>\n")
323	out.Write(body)
324	out.WriteString("\n</tbody></table>")
325}
326
327func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
328	if out.Len() > 0 {
329		out.WriteByte('\n')
330	}
331	out.WriteString("<tr>\n")
332	out.Write(text)
333	out.WriteString("\n</tr>")
334}
335
336func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
337	if out.Len() > 0 {
338		out.WriteByte('\n')
339	}
340	switch align {
341	case TABLE_ALIGNMENT_LEFT:
342		out.WriteString("<td align=\"left\">")
343	case TABLE_ALIGNMENT_RIGHT:
344		out.WriteString("<td align=\"right\">")
345	case TABLE_ALIGNMENT_CENTER:
346		out.WriteString("<td align=\"center\">")
347	default:
348		out.WriteString("<td>")
349	}
350
351	out.Write(text)
352	out.WriteString("</td>")
353}
354
355func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
356	marker := out.Len()
357
358	if marker > 0 {
359		out.WriteByte('\n')
360	}
361	if flags&LIST_TYPE_ORDERED != 0 {
362		out.WriteString("<ol>\n")
363	} else {
364		out.WriteString("<ul>\n")
365	}
366	if !text() {
367		out.Truncate(marker)
368		return
369	}
370	if flags&LIST_TYPE_ORDERED != 0 {
371		out.WriteString("</ol>\n")
372	} else {
373		out.WriteString("</ul>\n")
374	}
375}
376
377func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
378	out.WriteString("<li>")
379	size := len(text)
380	for size > 0 && text[size-1] == '\n' {
381		size--
382	}
383	out.Write(text[:size])
384	out.WriteString("</li>\n")
385}
386
387func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
388	marker := out.Len()
389	if marker > 0 {
390		out.WriteByte('\n')
391	}
392
393	out.WriteString("<p>")
394	if !text() {
395		out.Truncate(marker)
396		return
397	}
398	out.WriteString("</p>\n")
399}
400
401func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) bool {
402	options := opaque.(*htmlOptions)
403
404	if len(link) == 0 {
405		return false
406	}
407	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
408		return false
409	}
410
411	out.WriteString("<a href=\"")
412	if kind == LINK_TYPE_EMAIL {
413		out.WriteString("mailto:")
414	}
415	attrEscape(out, link)
416	out.WriteString("\">")
417
418	/*
419	 * Pretty print: if we get an email address as
420	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
421	 * want to print the `mailto:` prefix
422	 */
423	switch {
424	case bytes.HasPrefix(link, []byte("mailto://")):
425		attrEscape(out, link[9:])
426	case bytes.HasPrefix(link, []byte("mailto:")):
427		attrEscape(out, link[7:])
428	default:
429		attrEscape(out, link)
430	}
431
432	out.WriteString("</a>")
433
434	return true
435}
436
437func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) bool {
438	out.WriteString("<code>")
439	attrEscape(out, text)
440	out.WriteString("</code>")
441	return true
442}
443
444func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
445	if len(text) == 0 {
446		return false
447	}
448	out.WriteString("<strong>")
449	out.Write(text)
450	out.WriteString("</strong>")
451	return true
452}
453
454func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
455	if len(text) == 0 {
456		return false
457	}
458	out.WriteString("<em>")
459	out.Write(text)
460	out.WriteString("</em>")
461	return true
462}
463
464func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) bool {
465	options := opaque.(*htmlOptions)
466	if options.flags&HTML_SKIP_IMAGES != 0 {
467		return false
468	}
469
470	if len(link) == 0 {
471		return false
472	}
473	out.WriteString("<img src=\"")
474	attrEscape(out, link)
475	out.WriteString("\" alt=\"")
476	if len(alt) > 0 {
477		attrEscape(out, alt)
478	}
479	if len(title) > 0 {
480		out.WriteString("\" title=\"")
481		attrEscape(out, title)
482	}
483
484	out.WriteByte('"')
485	out.WriteString(options.closeTag)
486	return true
487}
488
489func htmlLineBreak(out *bytes.Buffer, opaque interface{}) bool {
490	options := opaque.(*htmlOptions)
491	out.WriteString("<br")
492	out.WriteString(options.closeTag)
493	return true
494}
495
496func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) bool {
497	options := opaque.(*htmlOptions)
498	if options.flags&HTML_SKIP_LINKS != 0 {
499		return false
500	}
501
502	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
503		return false
504	}
505
506	out.WriteString("<a href=\"")
507	attrEscape(out, link)
508	if len(title) > 0 {
509		out.WriteString("\" title=\"")
510		attrEscape(out, title)
511	}
512	out.WriteString("\">")
513	out.Write(content)
514	out.WriteString("</a>")
515	return true
516}
517
518func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) bool {
519	options := opaque.(*htmlOptions)
520	if options.flags&HTML_SKIP_HTML != 0 {
521		return true
522	}
523	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
524		return true
525	}
526	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
527		return true
528	}
529	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
530		return true
531	}
532	out.Write(text)
533	return true
534}
535
536func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) bool {
537	if len(text) == 0 {
538		return false
539	}
540	out.WriteString("<strong><em>")
541	out.Write(text)
542	out.WriteString("</em></strong>")
543	return true
544}
545
546func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) bool {
547	if len(text) == 0 {
548		return false
549	}
550	out.WriteString("<del>")
551	out.Write(text)
552	out.WriteString("</del>")
553	return true
554}
555
556func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
557	options := opaque.(*htmlOptions)
558	if options.flags&HTML_USE_SMARTYPANTS != 0 {
559		htmlSmartypants(out, text, opaque)
560	} else {
561		attrEscape(out, text)
562	}
563}
564
565func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
566	options := opaque.(*htmlOptions)
567	marker := out.Len()
568
569	for level > options.tocData.currentLevel {
570		if options.tocData.currentLevel > 0 {
571			out.WriteString("<li>")
572		}
573		out.WriteString("<ul>\n")
574		options.tocData.currentLevel++
575	}
576
577	for level < options.tocData.currentLevel {
578		out.WriteString("</ul>")
579		if options.tocData.currentLevel > 1 {
580			out.WriteString("</li>\n")
581		}
582		options.tocData.currentLevel--
583	}
584
585	out.WriteString("<li><a href=\"#toc_")
586	out.WriteString(strconv.Itoa(options.tocData.headerCount))
587	out.WriteString("\">")
588	options.tocData.headerCount++
589
590	if !text() {
591		out.Truncate(marker)
592		return
593	}
594	out.WriteString("</a></li>\n")
595}
596
597func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
598	options := opaque.(*htmlOptions)
599	for options.tocData.currentLevel > 1 {
600		out.WriteString("</ul></li>\n")
601		options.tocData.currentLevel--
602	}
603
604	if options.tocData.currentLevel > 0 {
605		out.WriteString("</ul>\n")
606	}
607}
608
609func isHtmlTag(tag []byte, tagname string) bool {
610	i := 0
611	if i < len(tag) && tag[0] != '<' {
612		return false
613	}
614	i++
615	for i < len(tag) && isspace(tag[i]) {
616		i++
617	}
618
619	if i < len(tag) && tag[i] == '/' {
620		i++
621	}
622
623	for i < len(tag) && isspace(tag[i]) {
624		i++
625	}
626
627	j := i
628	for ; i < len(tag); i, j = i+1, j+1 {
629		if j >= len(tagname) {
630			break
631		}
632
633		if tag[i] != tagname[j] {
634			return false
635		}
636	}
637
638	if i == len(tag) {
639		return false
640	}
641
642	return isspace(tag[i]) || tag[i] == '>'
643}