all repos — grayfriday @ 3af64a90ad74a0c4bd13fc63b41b298b8a09ac21

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Black Friday Markdown Processor
  3// Originally based on http://github.com/tanoku/upskirt
  4// by Russ Ross <russ@russross.com>
  5//
  6
  7//
  8//
  9// HTML rendering backend
 10//
 11//
 12
 13package blackfriday
 14
 15import (
 16	"bytes"
 17	"fmt"
 18	"strconv"
 19)
 20
 21const (
 22	HTML_SKIP_HTML = 1 << iota
 23	HTML_SKIP_STYLE
 24	HTML_SKIP_IMAGES
 25	HTML_SKIP_LINKS
 26	HTML_SAFELINK
 27	HTML_TOC
 28	HTML_GITHUB_BLOCKCODE
 29	HTML_USE_XHTML
 30	HTML_USE_SMARTYPANTS
 31	HTML_SMARTYPANTS_FRACTIONS
 32	HTML_SMARTYPANTS_LATEX_DASHES
 33)
 34
 35type htmlOptions struct {
 36	flags    int
 37	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 38	tocData  struct {
 39		headerCount  int
 40		currentLevel int
 41	}
 42	smartypants *SmartypantsRenderer
 43}
 44
 45var xhtmlClose = " />\n"
 46var htmlClose = ">\n"
 47
 48func HtmlRenderer(flags int) *Renderer {
 49	// configure the rendering engine
 50	r := new(Renderer)
 51	if flags&HTML_GITHUB_BLOCKCODE == 0 {
 52		r.BlockCode = htmlBlockCode
 53	} else {
 54		r.BlockCode = htmlBlockCodeGithub
 55	}
 56	r.BlockQuote = htmlBlockQuote
 57	if flags&HTML_SKIP_HTML == 0 {
 58		r.BlockHtml = htmlRawBlock
 59	}
 60	r.Header = htmlHeader
 61	r.HRule = htmlHRule
 62	r.List = htmlList
 63	r.ListItem = htmlListItem
 64	r.Paragraph = htmlParagraph
 65	r.Table = htmlTable
 66	r.TableRow = htmlTableRow
 67	r.TableCell = htmlTableCell
 68
 69	r.AutoLink = htmlAutoLink
 70	r.CodeSpan = htmlCodeSpan
 71	r.DoubleEmphasis = htmlDoubleEmphasis
 72	r.Emphasis = htmlEmphasis
 73	if flags&HTML_SKIP_IMAGES == 0 {
 74		r.Image = htmlImage
 75	}
 76	r.LineBreak = htmlLineBreak
 77	if flags&HTML_SKIP_LINKS == 0 {
 78		r.Link = htmlLink
 79	}
 80	r.RawHtmlTag = htmlRawTag
 81	r.TripleEmphasis = htmlTripleEmphasis
 82	r.StrikeThrough = htmlStrikeThrough
 83
 84	var cb *SmartypantsRenderer
 85	if flags&HTML_USE_SMARTYPANTS == 0 {
 86		r.NormalText = htmlNormalText
 87	} else {
 88		cb = Smartypants(flags)
 89		r.NormalText = htmlSmartypants
 90	}
 91
 92	closeTag := htmlClose
 93	if flags&HTML_USE_XHTML != 0 {
 94		closeTag = xhtmlClose
 95	}
 96	r.Opaque = &htmlOptions{flags: flags, closeTag: closeTag, smartypants: cb}
 97	return r
 98}
 99
100func HtmlTocRenderer(flags int) *Renderer {
101	// configure the rendering engine
102	r := new(Renderer)
103	r.Header = htmlTocHeader
104
105	r.CodeSpan = htmlCodeSpan
106	r.DoubleEmphasis = htmlDoubleEmphasis
107	r.Emphasis = htmlEmphasis
108	r.TripleEmphasis = htmlTripleEmphasis
109	r.StrikeThrough = htmlStrikeThrough
110
111	r.DocumentFooter = htmlTocFinalize
112
113	closeTag := ">\n"
114	if flags&HTML_USE_XHTML != 0 {
115		closeTag = " />\n"
116	}
117	r.Opaque = &htmlOptions{flags: flags | HTML_TOC, closeTag: closeTag}
118	return r
119}
120
121func attrEscape(out *bytes.Buffer, src []byte) {
122	org := 0
123	for i, ch := range src {
124		// using if statements is a bit faster than a switch statement.
125		// as the compiler improves, this should be unnecessary
126		// this is only worthwhile because attrEscape is the single
127		// largest CPU user in normal use
128		if ch == '"' {
129			if i > org {
130				// copy all the normal characters since the last escape
131				out.Write(src[org:i])
132			}
133			org = i + 1
134			out.WriteString("&quot;")
135			continue
136		}
137		if ch == '&' {
138			if i > org {
139				out.Write(src[org:i])
140			}
141			org = i + 1
142			out.WriteString("&amp;")
143			continue
144		}
145		if ch == '<' {
146			if i > org {
147				out.Write(src[org:i])
148			}
149			org = i + 1
150			out.WriteString("&lt;")
151			continue
152		}
153		if ch == '>' {
154			if i > org {
155				out.Write(src[org:i])
156			}
157			org = i + 1
158			out.WriteString("&gt;")
159			continue
160		}
161	}
162	if org < len(src) {
163		out.Write(src[org:])
164	}
165}
166
167func htmlHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
168	options := opaque.(*htmlOptions)
169	marker := out.Len()
170
171	if marker > 0 {
172		out.WriteByte('\n')
173	}
174
175	if options.flags&HTML_TOC != 0 {
176		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.tocData.headerCount))
177		options.tocData.headerCount++
178	} else {
179		out.WriteString(fmt.Sprintf("<h%d>", level))
180	}
181
182	if !text() {
183		out.Truncate(marker)
184		return
185	}
186	out.WriteString(fmt.Sprintf("</h%d>\n", level))
187}
188
189func htmlRawBlock(out *bytes.Buffer, text []byte, opaque interface{}) {
190	sz := len(text)
191	for sz > 0 && text[sz-1] == '\n' {
192		sz--
193	}
194	org := 0
195	for org < sz && text[org] == '\n' {
196		org++
197	}
198	if org >= sz {
199		return
200	}
201	if out.Len() > 0 {
202		out.WriteByte('\n')
203	}
204	out.Write(text[org:sz])
205	out.WriteByte('\n')
206}
207
208func htmlHRule(out *bytes.Buffer, opaque interface{}) {
209	options := opaque.(*htmlOptions)
210
211	if out.Len() > 0 {
212		out.WriteByte('\n')
213	}
214	out.WriteString("<hr")
215	out.WriteString(options.closeTag)
216}
217
218func htmlBlockCode(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
219	if out.Len() > 0 {
220		out.WriteByte('\n')
221	}
222
223	if lang != "" {
224		out.WriteString("<pre><code class=\"")
225
226		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
227			for i < len(lang) && isspace(lang[i]) {
228				i++
229			}
230
231			if i < len(lang) {
232				org := i
233				for i < len(lang) && !isspace(lang[i]) {
234					i++
235				}
236
237				if lang[org] == '.' {
238					org++
239				}
240
241				if cls > 0 {
242					out.WriteByte(' ')
243				}
244				attrEscape(out, []byte(lang[org:]))
245			}
246		}
247
248		out.WriteString("\">")
249	} else {
250		out.WriteString("<pre><code>")
251	}
252
253	if len(text) > 0 {
254		attrEscape(out, text)
255	}
256
257	out.WriteString("</code></pre>\n")
258}
259
260/*
261 * GitHub style code block:
262 *
263 *              <pre lang="LANG"><code>
264 *              ...
265 *              </pre></code>
266 *
267 * Unlike other parsers, we store the language identifier in the <pre>,
268 * and don't let the user generate custom classes.
269 *
270 * The language identifier in the <pre> block gets postprocessed and all
271 * the code inside gets syntax highlighted with Pygments. This is much safer
272 * than letting the user specify a CSS class for highlighting.
273 *
274 * Note that we only generate HTML for the first specifier.
275 * E.g.
276 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
277 */
278func htmlBlockCodeGithub(out *bytes.Buffer, text []byte, lang string, opaque interface{}) {
279	if out.Len() > 0 {
280		out.WriteByte('\n')
281	}
282
283	if len(lang) > 0 {
284		out.WriteString("<pre lang=\"")
285
286		i := 0
287		for i < len(lang) && !isspace(lang[i]) {
288			i++
289		}
290
291		if lang[0] == '.' {
292			attrEscape(out, []byte(lang[1:i]))
293		} else {
294			attrEscape(out, []byte(lang[:i]))
295		}
296
297		out.WriteString("\"><code>")
298	} else {
299		out.WriteString("<pre><code>")
300	}
301
302	if len(text) > 0 {
303		attrEscape(out, text)
304	}
305
306	out.WriteString("</code></pre>\n")
307}
308
309
310func htmlBlockQuote(out *bytes.Buffer, text []byte, opaque interface{}) {
311	out.WriteString("<blockquote>\n")
312	out.Write(text)
313	out.WriteString("</blockquote>")
314}
315
316func htmlTable(out *bytes.Buffer, header []byte, body []byte, columnData []int, opaque interface{}) {
317	if out.Len() > 0 {
318		out.WriteByte('\n')
319	}
320	out.WriteString("<table><thead>\n")
321	out.Write(header)
322	out.WriteString("\n</thead><tbody>\n")
323	out.Write(body)
324	out.WriteString("\n</tbody></table>")
325}
326
327func htmlTableRow(out *bytes.Buffer, text []byte, opaque interface{}) {
328	if out.Len() > 0 {
329		out.WriteByte('\n')
330	}
331	out.WriteString("<tr>\n")
332	out.Write(text)
333	out.WriteString("\n</tr>")
334}
335
336func htmlTableCell(out *bytes.Buffer, text []byte, align int, opaque interface{}) {
337	if out.Len() > 0 {
338		out.WriteByte('\n')
339	}
340	switch align {
341	case TABLE_ALIGNMENT_LEFT:
342		out.WriteString("<td align=\"left\">")
343	case TABLE_ALIGNMENT_RIGHT:
344		out.WriteString("<td align=\"right\">")
345	case TABLE_ALIGNMENT_CENTER:
346		out.WriteString("<td align=\"center\">")
347	default:
348		out.WriteString("<td>")
349	}
350
351	out.Write(text)
352	out.WriteString("</td>")
353}
354
355func htmlList(out *bytes.Buffer, text func() bool, flags int, opaque interface{}) {
356	marker := out.Len()
357
358	if marker > 0 {
359		out.WriteByte('\n')
360	}
361	if flags&LIST_TYPE_ORDERED != 0 {
362		out.WriteString("<ol>\n")
363	} else {
364		out.WriteString("<ul>\n")
365	}
366	if !text() {
367		out.Truncate(marker)
368		return
369	}
370	if flags&LIST_TYPE_ORDERED != 0 {
371		out.WriteString("</ol>\n")
372	} else {
373		out.WriteString("</ul>\n")
374	}
375}
376
377func htmlListItem(out *bytes.Buffer, text []byte, flags int, opaque interface{}) {
378	out.WriteString("<li>")
379	size := len(text)
380	for size > 0 && text[size-1] == '\n' {
381		size--
382	}
383	out.Write(text[:size])
384	out.WriteString("</li>\n")
385}
386
387func htmlParagraph(out *bytes.Buffer, text func() bool, opaque interface{}) {
388	marker := out.Len()
389	if marker > 0 {
390		out.WriteByte('\n')
391	}
392
393	out.WriteString("<p>")
394	if !text() {
395		out.Truncate(marker)
396		return
397	}
398	out.WriteString("</p>\n")
399}
400
401func htmlAutoLink(out *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
402	options := opaque.(*htmlOptions)
403
404	if len(link) == 0 {
405		return 0
406	}
407	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
408		return 0
409	}
410
411	out.WriteString("<a href=\"")
412	if kind == LINK_TYPE_EMAIL {
413		out.WriteString("mailto:")
414	}
415	attrEscape(out, link)
416	out.WriteString("\">")
417
418	/*
419	 * Pretty print: if we get an email address as
420	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
421	 * want to print the `mailto:` prefix
422	 */
423	switch {
424	case bytes.HasPrefix(link, []byte("mailto://")):
425		attrEscape(out, link[9:])
426	case bytes.HasPrefix(link, []byte("mailto:")):
427		attrEscape(out, link[7:])
428	default:
429		attrEscape(out, link)
430	}
431
432	out.WriteString("</a>")
433
434	return 1
435}
436
437func htmlCodeSpan(out *bytes.Buffer, text []byte, opaque interface{}) int {
438	out.WriteString("<code>")
439	attrEscape(out, text)
440	out.WriteString("</code>")
441	return 1
442}
443
444func htmlDoubleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
445	if len(text) == 0 {
446		return 0
447	}
448	out.WriteString("<strong>")
449	out.Write(text)
450	out.WriteString("</strong>")
451	return 1
452}
453
454func htmlEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
455	if len(text) == 0 {
456		return 0
457	}
458	out.WriteString("<em>")
459	out.Write(text)
460	out.WriteString("</em>")
461	return 1
462}
463
464func htmlImage(out *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
465	options := opaque.(*htmlOptions)
466	if len(link) == 0 {
467		return 0
468	}
469	out.WriteString("<img src=\"")
470	attrEscape(out, link)
471	out.WriteString("\" alt=\"")
472	if len(alt) > 0 {
473		attrEscape(out, alt)
474	}
475	if len(title) > 0 {
476		out.WriteString("\" title=\"")
477		attrEscape(out, title)
478	}
479
480	out.WriteByte('"')
481	out.WriteString(options.closeTag)
482	return 1
483}
484
485func htmlLineBreak(out *bytes.Buffer, opaque interface{}) int {
486	options := opaque.(*htmlOptions)
487	out.WriteString("<br")
488	out.WriteString(options.closeTag)
489	return 1
490}
491
492func htmlLink(out *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
493	options := opaque.(*htmlOptions)
494
495	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
496		return 0
497	}
498
499	out.WriteString("<a href=\"")
500	attrEscape(out, link)
501	if len(title) > 0 {
502		out.WriteString("\" title=\"")
503		attrEscape(out, title)
504	}
505	out.WriteString("\">")
506	out.Write(content)
507	out.WriteString("</a>")
508	return 1
509}
510
511func htmlRawTag(out *bytes.Buffer, text []byte, opaque interface{}) int {
512	options := opaque.(*htmlOptions)
513	if options.flags&HTML_SKIP_HTML != 0 {
514		return 1
515	}
516	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
517		return 1
518	}
519	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
520		return 1
521	}
522	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
523		return 1
524	}
525	out.Write(text)
526	return 1
527}
528
529func htmlTripleEmphasis(out *bytes.Buffer, text []byte, opaque interface{}) int {
530	if len(text) == 0 {
531		return 0
532	}
533	out.WriteString("<strong><em>")
534	out.Write(text)
535	out.WriteString("</em></strong>")
536	return 1
537}
538
539func htmlStrikeThrough(out *bytes.Buffer, text []byte, opaque interface{}) int {
540	if len(text) == 0 {
541		return 0
542	}
543	out.WriteString("<del>")
544	out.Write(text)
545	out.WriteString("</del>")
546	return 1
547}
548
549func htmlNormalText(out *bytes.Buffer, text []byte, opaque interface{}) {
550	attrEscape(out, text)
551}
552
553func htmlTocHeader(out *bytes.Buffer, text func() bool, level int, opaque interface{}) {
554	options := opaque.(*htmlOptions)
555	marker := out.Len()
556
557	for level > options.tocData.currentLevel {
558		if options.tocData.currentLevel > 0 {
559			out.WriteString("<li>")
560		}
561		out.WriteString("<ul>\n")
562		options.tocData.currentLevel++
563	}
564
565	for level < options.tocData.currentLevel {
566		out.WriteString("</ul>")
567		if options.tocData.currentLevel > 1 {
568			out.WriteString("</li>\n")
569		}
570		options.tocData.currentLevel--
571	}
572
573	out.WriteString("<li><a href=\"#toc_")
574	out.WriteString(strconv.Itoa(options.tocData.headerCount))
575	out.WriteString("\">")
576	options.tocData.headerCount++
577
578	if !text() {
579		out.Truncate(marker)
580		return
581	}
582	out.WriteString("</a></li>\n")
583}
584
585func htmlTocFinalize(out *bytes.Buffer, opaque interface{}) {
586	options := opaque.(*htmlOptions)
587	for options.tocData.currentLevel > 1 {
588		out.WriteString("</ul></li>\n")
589		options.tocData.currentLevel--
590	}
591
592	if options.tocData.currentLevel > 0 {
593		out.WriteString("</ul>\n")
594	}
595}
596
597func isHtmlTag(tag []byte, tagname string) bool {
598	i := 0
599	if i < len(tag) && tag[0] != '<' {
600		return false
601	}
602	i++
603	for i < len(tag) && isspace(tag[i]) {
604		i++
605	}
606
607	if i < len(tag) && tag[i] == '/' {
608		i++
609	}
610
611	for i < len(tag) && isspace(tag[i]) {
612		i++
613	}
614
615	tag_i := i
616	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
617		if tag_i >= len(tagname) {
618			break
619		}
620
621		if tag[i] != tagname[tag_i] {
622			return false
623		}
624	}
625
626	if i == len(tag) {
627		return false
628	}
629
630	return isspace(tag[i]) || tag[i] == '>'
631}