all repos — grayfriday @ de40da7ad692f7fb0c81bbc68ad49d27217c49f7

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Black Friday Markdown Processor
  3// Originally based on http://github.com/tanoku/upskirt
  4// by Russ Ross <russ@russross.com>
  5//
  6
  7//
  8//
  9// HTML rendering backend
 10//
 11//
 12
 13package blackfriday
 14
 15import (
 16	"bytes"
 17	"fmt"
 18	"strconv"
 19)
 20
 21//
 22//
 23// HTML rendering
 24//
 25//
 26
 27const (
 28	HTML_SKIP_HTML = 1 << iota
 29	HTML_SKIP_STYLE
 30	HTML_SKIP_IMAGES
 31	HTML_SKIP_LINKS
 32	HTML_EXPAND_TABS
 33	HTML_SAFELINK
 34	HTML_TOC
 35	HTML_HARD_WRAP
 36	HTML_GITHUB_BLOCKCODE
 37	HTML_USE_XHTML
 38	HTML_USE_SMARTYPANTS
 39	HTML_SMARTYPANTS_FRACTIONS
 40	HTML_SMARTYPANTS_LATEX_DASHES
 41)
 42
 43type htmlOptions struct {
 44	Flags     int
 45	close_tag string // how to end singleton tags: usually " />\n", possibly ">\n"
 46	toc_data  struct {
 47		header_count  int
 48		current_level int
 49	}
 50	smartypants *SmartypantsRenderer
 51}
 52
 53var xhtml_close = " />\n"
 54var html_close = ">\n"
 55
 56func HtmlRenderer(flags int) *Renderer {
 57	// configure the rendering engine
 58	r := new(Renderer)
 59	if flags&HTML_GITHUB_BLOCKCODE == 0 {
 60		r.blockcode = rndr_blockcode
 61	} else {
 62		r.blockcode = rndr_blockcode_github
 63	}
 64	r.blockquote = rndr_blockquote
 65	if flags&HTML_SKIP_HTML == 0 {
 66		r.blockhtml = rndr_raw_block
 67	}
 68	r.header = rndr_header
 69	r.hrule = rndr_hrule
 70	r.list = rndr_list
 71	r.listitem = rndr_listitem
 72	r.paragraph = rndr_paragraph
 73	r.table = rndr_table
 74	r.table_row = rndr_tablerow
 75	r.table_cell = rndr_tablecell
 76
 77	r.autolink = rndr_autolink
 78	r.codespan = rndr_codespan
 79	r.double_emphasis = rndr_double_emphasis
 80	r.emphasis = rndr_emphasis
 81	if flags&HTML_SKIP_IMAGES == 0 {
 82		r.image = rndr_image
 83	}
 84	r.linebreak = rndr_linebreak
 85	if flags&HTML_SKIP_LINKS == 0 {
 86		r.link = rndr_link
 87	}
 88	r.raw_html_tag = rndr_raw_html_tag
 89	r.triple_emphasis = rndr_triple_emphasis
 90	r.strikethrough = rndr_strikethrough
 91
 92	var cb *SmartypantsRenderer
 93	if flags&HTML_USE_SMARTYPANTS == 0 {
 94		r.normal_text = rndr_normal_text
 95	} else {
 96		cb = Smartypants(flags)
 97		r.normal_text = rndr_smartypants
 98	}
 99
100	close_tag := html_close
101	if flags&HTML_USE_XHTML != 0 {
102		close_tag = xhtml_close
103	}
104	r.opaque = &htmlOptions{Flags: flags, close_tag: close_tag, smartypants: cb}
105	return r
106}
107
108func HtmlTocRenderer(flags int) *Renderer {
109	// configure the rendering engine
110	r := new(Renderer)
111	r.header = rndr_toc_header
112
113	r.codespan = rndr_codespan
114	r.double_emphasis = rndr_double_emphasis
115	r.emphasis = rndr_emphasis
116	r.triple_emphasis = rndr_triple_emphasis
117	r.strikethrough = rndr_strikethrough
118
119	r.doc_footer = rndr_toc_finalize
120
121	close_tag := ">\n"
122	if flags&HTML_USE_XHTML != 0 {
123		close_tag = " />\n"
124	}
125	r.opaque = &htmlOptions{Flags: flags | HTML_TOC, close_tag: close_tag}
126	return r
127}
128
129func attr_escape(ob *bytes.Buffer, src []byte) {
130	for i := 0; i < len(src); i++ {
131		// directly copy unescaped characters
132		org := i
133		for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
134			i++
135		}
136		if i > org {
137			ob.Write(src[org:i])
138		}
139
140		// escape a character
141		if i >= len(src) {
142			break
143		}
144		switch src[i] {
145		case '<':
146			ob.WriteString("&lt;")
147		case '>':
148			ob.WriteString("&gt;")
149		case '&':
150			ob.WriteString("&amp;")
151		case '"':
152			ob.WriteString("&quot;")
153		}
154	}
155}
156
157func unescape_text(ob *bytes.Buffer, src []byte) {
158	i := 0
159	for i < len(src) {
160		org := i
161		for i < len(src) && src[i] != '\\' {
162			i++
163		}
164
165		if i > org {
166			ob.Write(src[org:i])
167		}
168
169		if i+1 >= len(src) {
170			break
171		}
172
173		ob.WriteByte(src[i+1])
174		i += 2
175	}
176}
177
178func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
179	options := opaque.(*htmlOptions)
180
181	if ob.Len() > 0 {
182		ob.WriteByte('\n')
183	}
184
185	if options.Flags&HTML_TOC != 0 {
186		ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
187		options.toc_data.header_count++
188	} else {
189		ob.WriteString(fmt.Sprintf("<h%d>", level))
190	}
191
192	ob.Write(text)
193	ob.WriteString(fmt.Sprintf("</h%d>\n", level))
194}
195
196func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
197	sz := len(text)
198	for sz > 0 && text[sz-1] == '\n' {
199		sz--
200	}
201	org := 0
202	for org < sz && text[org] == '\n' {
203		org++
204	}
205	if org >= sz {
206		return
207	}
208	if ob.Len() > 0 {
209		ob.WriteByte('\n')
210	}
211	ob.Write(text[org:sz])
212	ob.WriteByte('\n')
213}
214
215func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
216	options := opaque.(*htmlOptions)
217
218	if ob.Len() > 0 {
219		ob.WriteByte('\n')
220	}
221	ob.WriteString("<hr")
222	ob.WriteString(options.close_tag)
223}
224
225func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
226	if ob.Len() > 0 {
227		ob.WriteByte('\n')
228	}
229
230	if lang != "" {
231		ob.WriteString("<pre><code class=\"")
232
233		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
234			for i < len(lang) && isspace(lang[i]) {
235				i++
236			}
237
238			if i < len(lang) {
239				org := i
240				for i < len(lang) && !isspace(lang[i]) {
241					i++
242				}
243
244				if lang[org] == '.' {
245					org++
246				}
247
248				if cls > 0 {
249					ob.WriteByte(' ')
250				}
251				attr_escape(ob, []byte(lang[org:]))
252			}
253		}
254
255		ob.WriteString("\">")
256	} else {
257		ob.WriteString("<pre><code>")
258	}
259
260	if len(text) > 0 {
261		attr_escape(ob, text)
262	}
263
264	ob.WriteString("</code></pre>\n")
265}
266
267/*
268 * GitHub style code block:
269 *
270 *              <pre lang="LANG"><code>
271 *              ...
272 *              </pre></code>
273 *
274 * Unlike other parsers, we store the language identifier in the <pre>,
275 * and don't let the user generate custom classes.
276 *
277 * The language identifier in the <pre> block gets postprocessed and all
278 * the code inside gets syntax highlighted with Pygments. This is much safer
279 * than letting the user specify a CSS class for highlighting.
280 *
281 * Note that we only generate HTML for the first specifier.
282 * E.g.
283 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
284 */
285func rndr_blockcode_github(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
286	if ob.Len() > 0 {
287		ob.WriteByte('\n')
288	}
289
290	if len(lang) > 0 {
291		ob.WriteString("<pre lang=\"")
292
293		i := 0
294		for i < len(lang) && !isspace(lang[i]) {
295			i++
296		}
297
298		if lang[0] == '.' {
299			attr_escape(ob, []byte(lang[1:i]))
300		} else {
301			attr_escape(ob, []byte(lang[:i]))
302		}
303
304		ob.WriteString("\"><code>")
305	} else {
306		ob.WriteString("<pre><code>")
307	}
308
309	if len(text) > 0 {
310		attr_escape(ob, text)
311	}
312
313	ob.WriteString("</code></pre>\n")
314}
315
316
317func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
318	ob.WriteString("<blockquote>\n")
319	ob.Write(text)
320	ob.WriteString("</blockquote>")
321}
322
323func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
324	if ob.Len() > 0 {
325		ob.WriteByte('\n')
326	}
327	ob.WriteString("<table><thead>\n")
328	ob.Write(header)
329	ob.WriteString("\n</thead><tbody>\n")
330	ob.Write(body)
331	ob.WriteString("\n</tbody></table>")
332}
333
334func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
335	if ob.Len() > 0 {
336		ob.WriteByte('\n')
337	}
338	ob.WriteString("<tr>\n")
339	ob.Write(text)
340	ob.WriteString("\n</tr>")
341}
342
343func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
344	if ob.Len() > 0 {
345		ob.WriteByte('\n')
346	}
347	switch align {
348	case TABLE_ALIGNMENT_LEFT:
349		ob.WriteString("<td align=\"left\">")
350	case TABLE_ALIGNMENT_RIGHT:
351		ob.WriteString("<td align=\"right\">")
352	case TABLE_ALIGNMENT_CENTER:
353		ob.WriteString("<td align=\"center\">")
354	default:
355		ob.WriteString("<td>")
356	}
357
358	ob.Write(text)
359	ob.WriteString("</td>")
360}
361
362func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
363	if ob.Len() > 0 {
364		ob.WriteByte('\n')
365	}
366	if flags&LIST_TYPE_ORDERED != 0 {
367		ob.WriteString("<ol>\n")
368	} else {
369		ob.WriteString("<ul>\n")
370	}
371	ob.Write(text)
372	if flags&LIST_TYPE_ORDERED != 0 {
373		ob.WriteString("</ol>\n")
374	} else {
375		ob.WriteString("</ul>\n")
376	}
377}
378
379func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
380	ob.WriteString("<li>")
381	size := len(text)
382	for size > 0 && text[size-1] == '\n' {
383		size--
384	}
385	ob.Write(text[:size])
386	ob.WriteString("</li>\n")
387}
388
389func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
390	options := opaque.(*htmlOptions)
391	i := 0
392
393	if ob.Len() > 0 {
394		ob.WriteByte('\n')
395	}
396
397	if len(text) == 0 {
398		return
399	}
400
401	for i < len(text) && isspace(text[i]) {
402		i++
403	}
404
405	if i == len(text) {
406		return
407	}
408
409	ob.WriteString("<p>")
410	if options.Flags&HTML_HARD_WRAP != 0 {
411		for i < len(text) {
412			org := i
413			for i < len(text) && text[i] != '\n' {
414				i++
415			}
416
417			if i > org {
418				ob.Write(text[org:i])
419			}
420
421			if i >= len(text) {
422				break
423			}
424
425			ob.WriteString("<br>")
426			ob.WriteString(options.close_tag)
427			i++
428		}
429	} else {
430		ob.Write(text[i:])
431	}
432	ob.WriteString("</p>\n")
433}
434
435func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
436	options := opaque.(*htmlOptions)
437
438	if len(link) == 0 {
439		return 0
440	}
441	if options.Flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != LINK_TYPE_EMAIL {
442		return 0
443	}
444
445	ob.WriteString("<a href=\"")
446	if kind == LINK_TYPE_EMAIL {
447		ob.WriteString("mailto:")
448	}
449	ob.Write(link)
450	ob.WriteString("\">")
451
452	/*
453	 * Pretty print: if we get an email address as
454	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
455	 * want to print the `mailto:` prefix
456	 */
457	if bytes.HasPrefix(link, []byte("mailto:")) {
458		attr_escape(ob, link[7:])
459	} else {
460		attr_escape(ob, link)
461	}
462
463	ob.WriteString("</a>")
464
465	return 1
466}
467
468func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
469	ob.WriteString("<code>")
470	attr_escape(ob, text)
471	ob.WriteString("</code>")
472	return 1
473}
474
475func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
476	if len(text) == 0 {
477		return 0
478	}
479	ob.WriteString("<strong>")
480	ob.Write(text)
481	ob.WriteString("</strong>")
482	return 1
483}
484
485func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
486	if len(text) == 0 {
487		return 0
488	}
489	ob.WriteString("<em>")
490	ob.Write(text)
491	ob.WriteString("</em>")
492	return 1
493}
494
495func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
496	options := opaque.(*htmlOptions)
497	if len(link) == 0 {
498		return 0
499	}
500	ob.WriteString("<img src=\"")
501	attr_escape(ob, link)
502	ob.WriteString("\" alt=\"")
503	if len(alt) > 0 {
504		attr_escape(ob, alt)
505	}
506	if len(title) > 0 {
507		ob.WriteString("\" title=\"")
508		attr_escape(ob, title)
509	}
510
511	ob.WriteByte('"')
512	ob.WriteString(options.close_tag)
513	return 1
514}
515
516func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
517	options := opaque.(*htmlOptions)
518	ob.WriteString("<br")
519	ob.WriteString(options.close_tag)
520	return 1
521}
522
523func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
524	options := opaque.(*htmlOptions)
525
526	if options.Flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
527		return 0
528	}
529
530	ob.WriteString("<a href=\"")
531	if len(link) > 0 {
532		ob.Write(link)
533	}
534	if len(title) > 0 {
535		ob.WriteString("\" title=\"")
536		attr_escape(ob, title)
537	}
538	ob.WriteString("\">")
539	if len(content) > 0 {
540		ob.Write(content)
541	}
542	ob.WriteString("</a>")
543	return 1
544}
545
546func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
547	options := opaque.(*htmlOptions)
548	if options.Flags&HTML_SKIP_HTML != 0 {
549		return 1
550	}
551	if options.Flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
552		return 1
553	}
554	if options.Flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
555		return 1
556	}
557	if options.Flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
558		return 1
559	}
560	ob.Write(text)
561	return 1
562}
563
564func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
565	if len(text) == 0 {
566		return 0
567	}
568	ob.WriteString("<strong><em>")
569	ob.Write(text)
570	ob.WriteString("</em></strong>")
571	return 1
572}
573
574func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
575	if len(text) == 0 {
576		return 0
577	}
578	ob.WriteString("<del>")
579	ob.Write(text)
580	ob.WriteString("</del>")
581	return 1
582}
583
584func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
585	attr_escape(ob, text)
586}
587
588func rndr_toc_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
589	options := opaque.(*htmlOptions)
590	for level > options.toc_data.current_level {
591		if options.toc_data.current_level > 0 {
592			ob.WriteString("<li>")
593		}
594		ob.WriteString("<ul>\n")
595		options.toc_data.current_level++
596	}
597
598	for level < options.toc_data.current_level {
599		ob.WriteString("</ul>")
600		if options.toc_data.current_level > 1 {
601			ob.WriteString("</li>\n")
602		}
603		options.toc_data.current_level--
604	}
605
606	ob.WriteString("<li><a href=\"#toc_")
607	ob.WriteString(strconv.Itoa(options.toc_data.header_count))
608	ob.WriteString("\">")
609	options.toc_data.header_count++
610
611	if len(text) > 0 {
612		ob.Write(text)
613	}
614	ob.WriteString("</a></li>\n")
615}
616
617func rndr_toc_finalize(ob *bytes.Buffer, opaque interface{}) {
618	options := opaque.(*htmlOptions)
619	for options.toc_data.current_level > 1 {
620		ob.WriteString("</ul></li>\n")
621		options.toc_data.current_level--
622	}
623
624	if options.toc_data.current_level > 0 {
625		ob.WriteString("</ul>\n")
626	}
627}
628
629func is_html_tag(tag []byte, tagname string) bool {
630	i := 0
631	if i < len(tag) && tag[0] != '<' {
632		return false
633	}
634	i++
635	for i < len(tag) && isspace(tag[i]) {
636		i++
637	}
638
639	if i < len(tag) && tag[i] == '/' {
640		i++
641	}
642
643	for i < len(tag) && isspace(tag[i]) {
644		i++
645	}
646
647	tag_i := i
648	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
649		if tag_i >= len(tagname) {
650			break
651		}
652
653		if tag[i] != tagname[tag_i] {
654			return false
655		}
656	}
657
658	if i == len(tag) {
659		return false
660	}
661
662	return isspace(tag[i]) || tag[i] == '>'
663}