all repos — grayfriday @ cd7b9521488b24c6f4ff98c55452879cf73b8e98

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Black Friday Markdown Processor
  3// Originally based on http://github.com/tanoku/upskirt
  4// by Russ Ross <russ@russross.com>
  5//
  6
  7//
  8//
  9// HTML rendering backend
 10//
 11//
 12
 13package blackfriday
 14
 15import (
 16	"bytes"
 17	"fmt"
 18	"strconv"
 19)
 20
 21const (
 22	HTML_SKIP_HTML = 1 << iota
 23	HTML_SKIP_STYLE
 24	HTML_SKIP_IMAGES
 25	HTML_SKIP_LINKS
 26	HTML_EXPAND_TABS
 27	HTML_SAFELINK
 28	HTML_TOC
 29	HTML_HARD_WRAP
 30	HTML_GITHUB_BLOCKCODE
 31	HTML_USE_XHTML
 32	HTML_USE_SMARTYPANTS
 33	HTML_SMARTYPANTS_FRACTIONS
 34	HTML_SMARTYPANTS_LATEX_DASHES
 35)
 36
 37type htmlOptions struct {
 38	flags     int
 39	close_tag string // how to end singleton tags: usually " />\n", possibly ">\n"
 40	toc_data  struct {
 41		header_count  int
 42		current_level int
 43	}
 44	smartypants *SmartypantsRenderer
 45}
 46
 47var xhtml_close = " />\n"
 48var html_close = ">\n"
 49
 50func HtmlRenderer(flags int) *Renderer {
 51	// configure the rendering engine
 52	r := new(Renderer)
 53	if flags&HTML_GITHUB_BLOCKCODE == 0 {
 54		r.blockcode = htmlBlockcode
 55	} else {
 56		r.blockcode = htmlBlockcodeGithub
 57	}
 58	r.blockquote = htmlBlockquote
 59	if flags&HTML_SKIP_HTML == 0 {
 60		r.blockhtml = htmlRawBlock
 61	}
 62	r.header = htmlHeader
 63	r.hrule = htmlHrule
 64	r.list = htmlList
 65	r.listitem = htmlListitem
 66	r.paragraph = htmlParagraph
 67	r.table = htmlTable
 68	r.tableRow = htmlTableRow
 69	r.tableCell = htmlTableCell
 70
 71	r.autolink = htmlAutolink
 72	r.codespan = htmlCodespan
 73	r.doubleEmphasis = htmlDoubleEmphasis
 74	r.emphasis = htmlEmphasis
 75	if flags&HTML_SKIP_IMAGES == 0 {
 76		r.image = htmlImage
 77	}
 78	r.linebreak = htmlLinebreak
 79	if flags&HTML_SKIP_LINKS == 0 {
 80		r.link = htmlLink
 81	}
 82	r.rawHtmlTag = htmlRawTag
 83	r.tripleEmphasis = htmlTripleEmphasis
 84	r.strikethrough = htmlStrikethrough
 85
 86	var cb *SmartypantsRenderer
 87	if flags&HTML_USE_SMARTYPANTS == 0 {
 88		r.normalText = htmlNormalText
 89	} else {
 90		cb = Smartypants(flags)
 91		r.normalText = htmlSmartypants
 92	}
 93
 94	close_tag := html_close
 95	if flags&HTML_USE_XHTML != 0 {
 96		close_tag = xhtml_close
 97	}
 98	r.opaque = &htmlOptions{flags: flags, close_tag: close_tag, smartypants: cb}
 99	return r
100}
101
102func HtmlTocRenderer(flags int) *Renderer {
103	// configure the rendering engine
104	r := new(Renderer)
105	r.header = htmlTocHeader
106
107	r.codespan = htmlCodespan
108	r.doubleEmphasis = htmlDoubleEmphasis
109	r.emphasis = htmlEmphasis
110	r.tripleEmphasis = htmlTripleEmphasis
111	r.strikethrough = htmlStrikethrough
112
113	r.documentFooter = htmlTocFinalize
114
115	close_tag := ">\n"
116	if flags&HTML_USE_XHTML != 0 {
117		close_tag = " />\n"
118	}
119	r.opaque = &htmlOptions{flags: flags | HTML_TOC, close_tag: close_tag}
120	return r
121}
122
123func attrEscape(out *bytes.Buffer, src []byte) {
124	for i := 0; i < len(src); i++ {
125		// directly copy normal characters
126		org := i
127		for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
128			i++
129		}
130		if i > org {
131			out.Write(src[org:i])
132		}
133
134		// escape a character
135		if i >= len(src) {
136			break
137		}
138		switch src[i] {
139		case '<':
140			out.WriteString("&lt;")
141		case '>':
142			out.WriteString("&gt;")
143		case '&':
144			out.WriteString("&amp;")
145		case '"':
146			out.WriteString("&quot;")
147		}
148	}
149}
150
151func htmlHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
152	options := opaque.(*htmlOptions)
153
154	if out.Len() > 0 {
155		out.WriteByte('\n')
156	}
157
158	if options.flags&HTML_TOC != 0 {
159		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
160		options.toc_data.header_count++
161	} else {
162		out.WriteString(fmt.Sprintf("<h%d>", level))
163	}
164
165	out.Write(text)
166	out.WriteString(fmt.Sprintf("</h%d>\n", level))
167}
168
169func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
170	sz := len(text)
171	for sz > 0 && text[sz-1] == '\n' {
172		sz--
173	}
174	org := 0
175	for org < sz && text[org] == '\n' {
176		org++
177	}
178	if org >= sz {
179		return
180	}
181	if out.Len() > 0 {
182		out.WriteByte('\n')
183	}
184	out.Write(text[org:sz])
185	out.WriteByte('\n')
186}
187
188func htmlHrule(out *bytes.Buffer, opaque interface{}) {
189	options := opaque.(*htmlOptions)
190
191	if out.Len() > 0 {
192		out.WriteByte('\n')
193	}
194	out.WriteString("<hr")
195	out.WriteString(options.close_tag)
196}
197
198func htmlBlockcode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
199	if out.Len() > 0 {
200		out.WriteByte('\n')
201	}
202
203	if lang != "" {
204		out.WriteString("<pre><code class=\"")
205
206		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
207			for i < len(lang) && isspace(lang[i]) {
208				i++
209			}
210
211			if i < len(lang) {
212				org := i
213				for i < len(lang) && !isspace(lang[i]) {
214					i++
215				}
216
217				if lang[org] == '.' {
218					org++
219				}
220
221				if cls > 0 {
222					out.WriteByte(' ')
223				}
224				attrEscape(out, []byte(lang[org:]))
225			}
226		}
227
228		out.WriteString("\">")
229	} else {
230		out.WriteString("<pre><code>")
231	}
232
233	if len(text) > 0 {
234		attrEscape(out, text)
235	}
236
237	out.WriteString("</code></pre>\n")
238}
239
240/*
241 * GitHub style code block:
242 *
243 *              <pre lang="LANG"><code>
244 *              ...
245 *              </pre></code>
246 *
247 * Unlike other parsers, we store the language identifier in the <pre>,
248 * and don't let the user generate custom classes.
249 *
250 * The language identifier in the <pre> block gets postprocessed and all
251 * the code inside gets syntax highlighted with Pygments. This is much safer
252 * than letting the user specify a CSS class for highlighting.
253 *
254 * Note that we only generate HTML for the first specifier.
255 * E.g.
256 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
257 */
258func htmlBlockcodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
259	if out.Len() > 0 {
260		out.WriteByte('\n')
261	}
262
263	if len(lang) > 0 {
264		out.WriteString("<pre lang=\"")
265
266		i := 0
267		for i < len(lang) && !isspace(lang[i]) {
268			i++
269		}
270
271		if lang[0] == '.' {
272			attrEscape(out, []byte(lang[1:i]))
273		} else {
274			attrEscape(out, []byte(lang[:i]))
275		}
276
277		out.WriteString("\"><code>")
278	} else {
279		out.WriteString("<pre><code>")
280	}
281
282	if len(text) > 0 {
283		attrEscape(out, text)
284	}
285
286	out.WriteString("</code></pre>\n")
287}
288
289
290func htmlBlockquote(out *bytes.Buffer, text []byte, opaque interface{}) {
291	out.WriteString("<blockquote>\n")
292	out.Write(text)
293	out.WriteString("</blockquote>")
294}
295
296func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
297	if out.Len() > 0 {
298		out.WriteByte('\n')
299	}
300	out.WriteString("<table><thead>\n")
301	out.Write(header)
302	out.WriteString("\n</thead><tbody>\n")
303	out.Write(body)
304	out.WriteString("\n</tbody></table>")
305}
306
307func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
308	if out.Len() > 0 {
309		out.WriteByte('\n')
310	}
311	out.WriteString("<tr>\n")
312	out.Write(text)
313	out.WriteString("\n</tr>")
314}
315
316func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
317	if out.Len() > 0 {
318		out.WriteByte('\n')
319	}
320	switch align {
321	case TABLE_ALIGNMENT_LEFT:
322		out.WriteString("<td align=\"left\">")
323	case TABLE_ALIGNMENT_RIGHT:
324		out.WriteString("<td align=\"right\">")
325	case TABLE_ALIGNMENT_CENTER:
326		out.WriteString("<td align=\"center\">")
327	default:
328		out.WriteString("<td>")
329	}
330
331	out.Write(text)
332	out.WriteString("</td>")
333}
334
335func htmlList(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
336	if out.Len() > 0 {
337		out.WriteByte('\n')
338	}
339	if flags&LIST_TYPE_ORDERED != 0 {
340		out.WriteString("<ol>\n")
341	} else {
342		out.WriteString("<ul>\n")
343	}
344	out.Write(text)
345	if flags&LIST_TYPE_ORDERED != 0 {
346		out.WriteString("</ol>\n")
347	} else {
348		out.WriteString("</ul>\n")
349	}
350}
351
352func htmlListitem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
353	out.WriteString("<li>")
354	size := len(text)
355	for size > 0 && text[size-1] == '\n' {
356		size--
357	}
358	out.Write(text[:size])
359	out.WriteString("</li>\n")
360}
361
362func htmlParagraph(out *bytes.Buffer, text []byte, opaque interface{}) {
363	options := opaque.(*htmlOptions)
364	i := 0
365
366	if out.Len() > 0 {
367		out.WriteByte('\n')
368	}
369
370	if len(text) == 0 {
371		return
372	}
373
374	for i < len(text) && isspace(text[i]) {
375		i++
376	}
377
378	if i == len(text) {
379		return
380	}
381
382	out.WriteString("<p>")
383	if options.flags&HTML_HARD_WRAP != 0 {
384		for i < len(text) {
385			org := i
386			for i < len(text) && text[i] != '\n' {
387				i++
388			}
389
390			if i > org {
391				out.Write(text[org:i])
392			}
393
394			if i >= len(text) {
395				break
396			}
397
398			out.WriteString("<br>")
399			out.WriteString(options.close_tag)
400			i++
401		}
402	} else {
403		out.Write(text[i:])
404	}
405	out.WriteString("</p>\n")
406}
407
408func htmlAutolink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
409	options := opaque.(*htmlOptions)
410
411	if len(link) == 0 {
412		return 0
413	}
414	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
415		return 0
416	}
417
418	out.WriteString("<a href=\"")
419	if kind == LINK_TYPE_EMAIL {
420		out.WriteString("mailto:")
421	}
422	out.Write(link)
423	out.WriteString("\">")
424
425	/*
426	 * Pretty print: if we get an email address as
427	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
428	 * want to print the `mailto:` prefix
429	 */
430	if bytes.HasPrefix(link, []byte("mailto:")) {
431		attrEscape(out, link[7:])
432	} else {
433		attrEscape(out, link)
434	}
435
436	out.WriteString("</a>")
437
438	return 1
439}
440
441func htmlCodespan(out *bytes.Buffer, text []byte, opaque interface{}) int {
442	out.WriteString("<code>")
443	attrEscape(out, text)
444	out.WriteString("</code>")
445	return 1
446}
447
448func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
449	if len(text) == 0 {
450		return 0
451	}
452	out.WriteString("<strong>")
453	out.Write(text)
454	out.WriteString("</strong>")
455	return 1
456}
457
458func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
459	if len(text) == 0 {
460		return 0
461	}
462	out.WriteString("<em>")
463	out.Write(text)
464	out.WriteString("</em>")
465	return 1
466}
467
468func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
469	options := opaque.(*htmlOptions)
470	if len(link) == 0 {
471		return 0
472	}
473	out.WriteString("<img src=\"")
474	attrEscape(out, link)
475	out.WriteString("\" alt=\"")
476	if len(alt) > 0 {
477		attrEscape(out, alt)
478	}
479	if len(title) > 0 {
480		out.WriteString("\" title=\"")
481		attrEscape(out, title)
482	}
483
484	out.WriteByte('"')
485	out.WriteString(options.close_tag)
486	return 1
487}
488
489func htmlLinebreak(out *bytes.Buffer, opaque interface{}) int {
490	options := opaque.(*htmlOptions)
491	out.WriteString("<br")
492	out.WriteString(options.close_tag)
493	return 1
494}
495
496func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
497	options := opaque.(*htmlOptions)
498
499	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
500		return 0
501	}
502
503	out.WriteString("<a href=\"")
504	if len(link) > 0 {
505		out.Write(link)
506	}
507	if len(title) > 0 {
508		out.WriteString("\" title=\"")
509		attrEscape(out, title)
510	}
511	out.WriteString("\">")
512	if len(content) > 0 {
513		out.Write(content)
514	}
515	out.WriteString("</a>")
516	return 1
517}
518
519func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
520	options := opaque.(*htmlOptions)
521	if options.flags&HTML_SKIP_HTML != 0 {
522		return 1
523	}
524	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
525		return 1
526	}
527	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
528		return 1
529	}
530	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
531		return 1
532	}
533	out.Write(text)
534	return 1
535}
536
537func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
538	if len(text) == 0 {
539		return 0
540	}
541	out.WriteString("<strong><em>")
542	out.Write(text)
543	out.WriteString("</em></strong>")
544	return 1
545}
546
547func htmlStrikethrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
548	if len(text) == 0 {
549		return 0
550	}
551	out.WriteString("<del>")
552	out.Write(text)
553	out.WriteString("</del>")
554	return 1
555}
556
557func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
558	attrEscape(out, text)
559}
560
561func htmlTocHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
562	options := opaque.(*htmlOptions)
563	for level > options.toc_data.current_level {
564		if options.toc_data.current_level > 0 {
565			out.WriteString("<li>")
566		}
567		out.WriteString("<ul>\n")
568		options.toc_data.current_level++
569	}
570
571	for level < options.toc_data.current_level {
572		out.WriteString("</ul>")
573		if options.toc_data.current_level > 1 {
574			out.WriteString("</li>\n")
575		}
576		options.toc_data.current_level--
577	}
578
579	out.WriteString("<li><a href=\"#toc_")
580	out.WriteString(strconv.Itoa(options.toc_data.header_count))
581	out.WriteString("\">")
582	options.toc_data.header_count++
583
584	if len(text) > 0 {
585		out.Write(text)
586	}
587	out.WriteString("</a></li>\n")
588}
589
590func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
591	options := opaque.(*htmlOptions)
592	for options.toc_data.current_level > 1 {
593		out.WriteString("</ul></li>\n")
594		options.toc_data.current_level--
595	}
596
597	if options.toc_data.current_level > 0 {
598		out.WriteString("</ul>\n")
599	}
600}
601
602func isHtmlTag(tag []byte, tagname string) bool {
603	i := 0
604	if i < len(tag) && tag[0] != '<' {
605		return false
606	}
607	i++
608	for i < len(tag) && isspace(tag[i]) {
609		i++
610	}
611
612	if i < len(tag) && tag[i] == '/' {
613		i++
614	}
615
616	for i < len(tag) && isspace(tag[i]) {
617		i++
618	}
619
620	tag_i := i
621	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
622		if tag_i >= len(tagname) {
623			break
624		}
625
626		if tag[i] != tagname[tag_i] {
627			return false
628		}
629	}
630
631	if i == len(tag) {
632		return false
633	}
634
635	return isspace(tag[i]) || tag[i] == '>'
636}