all repos — grayfriday @ 4b850e809858ee051a99b751e637657f2ee9f26f

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Black Friday Markdown Processor
  3// Originally based on http://github.com/tanoku/upskirt
  4// by Russ Ross <russ@russross.com>
  5//
  6
  7//
  8//
  9// HTML rendering backend
 10//
 11//
 12
 13package blackfriday
 14
 15import (
 16	"bytes"
 17	"fmt"
 18	"strconv"
 19)
 20
 21const (
 22	HTML_SKIP_HTML = 1 << iota
 23	HTML_SKIP_STYLE
 24	HTML_SKIP_IMAGES
 25	HTML_SKIP_LINKS
 26	HTML_EXPAND_TABS
 27	HTML_SAFELINK
 28	HTML_TOC
 29	HTML_HARD_WRAP
 30	HTML_GITHUB_BLOCKCODE
 31	HTML_USE_XHTML
 32	HTML_USE_SMARTYPANTS
 33	HTML_SMARTYPANTS_FRACTIONS
 34	HTML_SMARTYPANTS_LATEX_DASHES
 35)
 36
 37type htmlOptions struct {
 38	flags    int
 39	closeTag string // how to end singleton tags: usually " />\n", possibly ">\n"
 40	tocData  struct {
 41		headerCount  int
 42		currentLevel int
 43	}
 44	smartypants *SmartypantsRenderer
 45}
 46
 47var xhtmlClose = " />\n"
 48var htmlClose = ">\n"
 49
 50func HtmlRenderer(flags int) *Renderer {
 51	// configure the rendering engine
 52	r := new(Renderer)
 53	if flags&HTML_GITHUB_BLOCKCODE == 0 {
 54		r.BlockCode = htmlBlockCode
 55	} else {
 56		r.BlockCode = htmlBlockCodeGithub
 57	}
 58	r.BlockQuote = htmlBlockQuote
 59	if flags&HTML_SKIP_HTML == 0 {
 60		r.BlockHtml = htmlRawBlock
 61	}
 62	r.Header = htmlHeader
 63	r.HRule = htmlHRule
 64	r.List = htmlList
 65	r.ListItem = htmlListItem
 66	r.Paragraph = htmlParagraph
 67	r.Table = htmlTable
 68	r.TableRow = htmlTableRow
 69	r.TableCell = htmlTableCell
 70
 71	r.AutoLink = htmlAutoLink
 72	r.CodeSpan = htmlCodeSpan
 73	r.DoubleEmphasis = htmlDoubleEmphasis
 74	r.Emphasis = htmlEmphasis
 75	if flags&HTML_SKIP_IMAGES == 0 {
 76		r.Image = htmlImage
 77	}
 78	r.LineBreak = htmlLineBreak
 79	if flags&HTML_SKIP_LINKS == 0 {
 80		r.Link = htmlLink
 81	}
 82	r.RawHtmlTag = htmlRawTag
 83	r.TripleEmphasis = htmlTripleEmphasis
 84	r.StrikeThrough = htmlStrikeThrough
 85
 86	var cb *SmartypantsRenderer
 87	if flags&HTML_USE_SMARTYPANTS == 0 {
 88		r.NormalText = htmlNormalText
 89	} else {
 90		cb = Smartypants(flags)
 91		r.NormalText = htmlSmartypants
 92	}
 93
 94	closeTag := htmlClose
 95	if flags&HTML_USE_XHTML != 0 {
 96		closeTag = xhtmlClose
 97	}
 98	r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
 99	return r
100}
101
102func HtmlTocRenderer(flags int) *Renderer {
103	// configure the rendering engine
104	r := new(Renderer)
105	r.Header = htmlTocHeader
106
107	r.CodeSpan = htmlCodeSpan
108	r.DoubleEmphasis = htmlDoubleEmphasis
109	r.Emphasis = htmlEmphasis
110	r.TripleEmphasis = htmlTripleEmphasis
111	r.StrikeThrough = htmlStrikeThrough
112
113	r.DocumentFooter = htmlTocFinalize
114
115	closeTag := ">\n"
116	if flags&HTML_USE_XHTML != 0 {
117		closeTag = " />\n"
118	}
119	r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
120	return r
121}
122
123func attrEscape(out *bytes.Buffer, src []byte) {
124	for i := 0; i < len(src); i++ {
125		// directly copy normal characters
126		org := i
127		for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
128			i++
129		}
130		if i > org {
131			out.Write(src[org:i])
132		}
133
134		// escape a character
135		if i >= len(src) {
136			break
137		}
138		switch src[i] {
139		case '<':
140			out.WriteString("&lt;")
141		case '>':
142			out.WriteString("&gt;")
143		case '&':
144			out.WriteString("&amp;")
145		case '"':
146			out.WriteString("&quot;")
147		}
148	}
149}
150
151func htmlHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
152	options := opaque.(*htmlOptions)
153
154	if out.Len() > 0 {
155		out.WriteByte('\n')
156	}
157
158	if options.flags&HTML_TOC != 0 {
159		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
160		options.tocData.headerCount++
161	} else {
162		out.WriteString(fmt.Sprintf("<h%d>", level))
163	}
164
165	out.Write(text)
166	out.WriteString(fmt.Sprintf("</h%d>\n", level))
167}
168
169func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
170	sz := len(text)
171	for sz > 0 && text[sz-1] == '\n' {
172		sz--
173	}
174	org := 0
175	for org < sz && text[org] == '\n' {
176		org++
177	}
178	if org >= sz {
179		return
180	}
181	if out.Len() > 0 {
182		out.WriteByte('\n')
183	}
184	out.Write(text[org:sz])
185	out.WriteByte('\n')
186}
187
188func htmlHRule(out *bytes.Buffer, opaque interface{}) {
189	options := opaque.(*htmlOptions)
190
191	if out.Len() > 0 {
192		out.WriteByte('\n')
193	}
194	out.WriteString("<hr")
195	out.WriteString(options.closeTag)
196}
197
198func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
199	if out.Len() > 0 {
200		out.WriteByte('\n')
201	}
202
203	if lang != "" {
204		out.WriteString("<pre><code class=\"")
205
206		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
207			for i < len(lang) && isspace(lang[i]) {
208				i++
209			}
210
211			if i < len(lang) {
212				org := i
213				for i < len(lang) && !isspace(lang[i]) {
214					i++
215				}
216
217				if lang[org] == '.' {
218					org++
219				}
220
221				if cls > 0 {
222					out.WriteByte(' ')
223				}
224				attrEscape(out, []byte(lang[org:]))
225			}
226		}
227
228		out.WriteString("\">")
229	} else {
230		out.WriteString("<pre><code>")
231	}
232
233	if len(text) > 0 {
234		attrEscape(out, text)
235	}
236
237	out.WriteString("</code></pre>\n")
238}
239
240/*
241 * GitHub style code block:
242 *
243 *              <pre lang="LANG"><code>
244 *              ...
245 *              </pre></code>
246 *
247 * Unlike other parsers, we store the language identifier in the <pre>,
248 * and don't let the user generate custom classes.
249 *
250 * The language identifier in the <pre> block gets postprocessed and all
251 * the code inside gets syntax highlighted with Pygments. This is much safer
252 * than letting the user specify a CSS class for highlighting.
253 *
254 * Note that we only generate HTML for the first specifier.
255 * E.g.
256 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
257 */
258func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
259	if out.Len() > 0 {
260		out.WriteByte('\n')
261	}
262
263	if len(lang) > 0 {
264		out.WriteString("<pre lang=\"")
265
266		i := 0
267		for i < len(lang) && !isspace(lang[i]) {
268			i++
269		}
270
271		if lang[0] == '.' {
272			attrEscape(out, []byte(lang[1:i]))
273		} else {
274			attrEscape(out, []byte(lang[:i]))
275		}
276
277		out.WriteString("\"><code>")
278	} else {
279		out.WriteString("<pre><code>")
280	}
281
282	if len(text) > 0 {
283		attrEscape(out, text)
284	}
285
286	out.WriteString("</code></pre>\n")
287}
288
289
290func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
291	out.WriteString("<blockquote>\n")
292	out.Write(text)
293	out.WriteString("</blockquote>")
294}
295
296func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
297	if out.Len() > 0 {
298		out.WriteByte('\n')
299	}
300	out.WriteString("<table><thead>\n")
301	out.Write(header)
302	out.WriteString("\n</thead><tbody>\n")
303	out.Write(body)
304	out.WriteString("\n</tbody></table>")
305}
306
307func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
308	if out.Len() > 0 {
309		out.WriteByte('\n')
310	}
311	out.WriteString("<tr>\n")
312	out.Write(text)
313	out.WriteString("\n</tr>")
314}
315
316func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
317	if out.Len() > 0 {
318		out.WriteByte('\n')
319	}
320	switch align {
321	case TABLE_ALIGNMENT_LEFT:
322		out.WriteString("<td align=\"left\">")
323	case TABLE_ALIGNMENT_RIGHT:
324		out.WriteString("<td align=\"right\">")
325	case TABLE_ALIGNMENT_CENTER:
326		out.WriteString("<td align=\"center\">")
327	default:
328		out.WriteString("<td>")
329	}
330
331	out.Write(text)
332	out.WriteString("</td>")
333}
334
335func htmlList(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
336	if out.Len() > 0 {
337		out.WriteByte('\n')
338	}
339	if flags&LIST_TYPE_ORDERED != 0 {
340		out.WriteString("<ol>\n")
341	} else {
342		out.WriteString("<ul>\n")
343	}
344	out.Write(text)
345	if flags&LIST_TYPE_ORDERED != 0 {
346		out.WriteString("</ol>\n")
347	} else {
348		out.WriteString("</ul>\n")
349	}
350}
351
352func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
353	out.WriteString("<li>")
354	size := len(text)
355	for size > 0 && text[size-1] == '\n' {
356		size--
357	}
358	out.Write(text[:size])
359	out.WriteString("</li>\n")
360}
361
362func htmlParagraph(out *bytes.Buffer, text []byte, opaque interface{}) {
363	options := opaque.(*htmlOptions)
364	i := 0
365
366	if out.Len() > 0 {
367		out.WriteByte('\n')
368	}
369
370	if len(text) == 0 {
371		return
372	}
373
374	for i < len(text) && isspace(text[i]) {
375		i++
376	}
377
378	if i == len(text) {
379		return
380	}
381
382	out.WriteString("<p>")
383	if options.flags&HTML_HARD_WRAP != 0 {
384		for i < len(text) {
385			org := i
386			for i < len(text) && text[i] != '\n' {
387				i++
388			}
389
390			if i > org {
391				out.Write(text[org:i])
392			}
393
394			if i >= len(text) {
395				break
396			}
397
398			out.WriteString("<br>")
399			out.WriteString(options.closeTag)
400			i++
401		}
402	} else {
403		out.Write(text[i:])
404	}
405	out.WriteString("</p>\n")
406}
407
408func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
409	options := opaque.(*htmlOptions)
410
411	if len(link) == 0 {
412		return 0
413	}
414	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
415		return 0
416	}
417
418	out.WriteString("<a href=\"")
419	if kind == LINK_TYPE_EMAIL {
420		out.WriteString("mailto:")
421	}
422	out.Write(link)
423	out.WriteString("\">")
424
425	/*
426	 * Pretty print: if we get an email address as
427	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
428	 * want to print the `mailto:` prefix
429	 */
430	switch {
431	case bytes.HasPrefix(link, []byte("mailto://")):
432		attrEscape(out, link[9:])
433	case bytes.HasPrefix(link, []byte("mailto:")):
434		attrEscape(out, link[7:])
435	default:
436		attrEscape(out, link)
437	}
438
439	out.WriteString("</a>")
440
441	return 1
442}
443
444func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
445	out.WriteString("<code>")
446	attrEscape(out, text)
447	out.WriteString("</code>")
448	return 1
449}
450
451func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
452	if len(text) == 0 {
453		return 0
454	}
455	out.WriteString("<strong>")
456	out.Write(text)
457	out.WriteString("</strong>")
458	return 1
459}
460
461func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
462	if len(text) == 0 {
463		return 0
464	}
465	out.WriteString("<em>")
466	out.Write(text)
467	out.WriteString("</em>")
468	return 1
469}
470
471func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
472	options := opaque.(*htmlOptions)
473	if len(link) == 0 {
474		return 0
475	}
476	out.WriteString("<img src=\"")
477	attrEscape(out, link)
478	out.WriteString("\" alt=\"")
479	if len(alt) > 0 {
480		attrEscape(out, alt)
481	}
482	if len(title) > 0 {
483		out.WriteString("\" title=\"")
484		attrEscape(out, title)
485	}
486
487	out.WriteByte('"')
488	out.WriteString(options.closeTag)
489	return 1
490}
491
492func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
493	options := opaque.(*htmlOptions)
494	out.WriteString("<br")
495	out.WriteString(options.closeTag)
496	return 1
497}
498
499func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
500	options := opaque.(*htmlOptions)
501
502	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
503		return 0
504	}
505
506	out.WriteString("<a href=\"")
507	if len(link) > 0 {
508		out.Write(link)
509	}
510	if len(title) > 0 {
511		out.WriteString("\" title=\"")
512		attrEscape(out, title)
513	}
514	out.WriteString("\">")
515	if len(content) > 0 {
516		out.Write(content)
517	}
518	out.WriteString("</a>")
519	return 1
520}
521
522func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
523	options := opaque.(*htmlOptions)
524	if options.flags&HTML_SKIP_HTML != 0 {
525		return 1
526	}
527	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
528		return 1
529	}
530	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
531		return 1
532	}
533	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
534		return 1
535	}
536	out.Write(text)
537	return 1
538}
539
540func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
541	if len(text) == 0 {
542		return 0
543	}
544	out.WriteString("<strong><em>")
545	out.Write(text)
546	out.WriteString("</em></strong>")
547	return 1
548}
549
550func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
551	if len(text) == 0 {
552		return 0
553	}
554	out.WriteString("<del>")
555	out.Write(text)
556	out.WriteString("</del>")
557	return 1
558}
559
560func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
561	attrEscape(out, text)
562}
563
564func htmlTocHeader(out *bytes.Buffer, text []byte, level int, opaque interface{}) {
565	options := opaque.(*htmlOptions)
566	for level > options.tocData.currentLevel {
567		if options.tocData.currentLevel > 0 {
568			out.WriteString("<li>")
569		}
570		out.WriteString("<ul>\n")
571		options.tocData.currentLevel++
572	}
573
574	for level < options.tocData.currentLevel {
575		out.WriteString("</ul>")
576		if options.tocData.currentLevel > 1 {
577			out.WriteString("</li>\n")
578		}
579		options.tocData.currentLevel--
580	}
581
582	out.WriteString("<li><a href=\"#toc_")
583	out.WriteString(strconv.Itoa(options.tocData.headerCount))
584	out.WriteString("\">")
585	options.tocData.headerCount++
586
587	if len(text) > 0 {
588		out.Write(text)
589	}
590	out.WriteString("</a></li>\n")
591}
592
593func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
594	options := opaque.(*htmlOptions)
595	for options.tocData.currentLevel > 1 {
596		out.WriteString("</ul></li>\n")
597		options.tocData.currentLevel--
598	}
599
600	if options.tocData.currentLevel > 0 {
601		out.WriteString("</ul>\n")
602	}
603}
604
605func isHtmlTag(tag []byte, tagname string) bool {
606	i := 0
607	if i < len(tag) && tag[0] != '<' {
608		return false
609	}
610	i++
611	for i < len(tag) && isspace(tag[i]) {
612		i++
613	}
614
615	if i < len(tag) && tag[i] == '/' {
616		i++
617	}
618
619	for i < len(tag) && isspace(tag[i]) {
620		i++
621	}
622
623	tag_i := i
624	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
625		if tag_i >= len(tagname) {
626			break
627		}
628
629		if tag[i] != tagname[tag_i] {
630			return false
631		}
632	}
633
634	if i == len(tag) {
635		return false
636	}
637
638	return isspace(tag[i]) || tag[i] == '>'
639}