all repos — grayfriday @ 60cb261accdc72d2cea155d8e67013d346d0b340

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25const (
 26	HTML_SKIP_HTML = 1 << iota
 27	HTML_SKIP_STYLE
 28	HTML_SKIP_IMAGES
 29	HTML_SKIP_LINKS
 30	HTML_SAFELINK
 31	HTML_TOC
 32	HTML_OMIT_CONTENTS
 33	HTML_COMPLETE_PAGE
 34	HTML_GITHUB_BLOCKCODE
 35	HTML_USE_XHTML
 36	HTML_USE_SMARTYPANTS
 37	HTML_SMARTYPANTS_FRACTIONS
 38	HTML_SMARTYPANTS_LATEX_DASHES
 39)
 40
 41type Html struct {
 42	flags    int    // HTML_* options
 43	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 44	title    string // document title
 45	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 46
 47	// table of contents data
 48	tocMarker    int
 49	headerCount  int
 50	currentLevel int
 51	toc          *bytes.Buffer
 52
 53	smartypants *SmartypantsRenderer
 54}
 55
 56const (
 57	xhtmlClose = " />\n"
 58	htmlClose  = ">\n"
 59)
 60
 61func HtmlRenderer(flags int, title string, css string) Renderer {
 62	// configure the rendering engine
 63	closeTag := htmlClose
 64	if flags&HTML_USE_XHTML != 0 {
 65		closeTag = xhtmlClose
 66	}
 67
 68	return &Html{
 69		flags:    flags,
 70		closeTag: closeTag,
 71		title:    title,
 72		css:      css,
 73
 74		headerCount:  0,
 75		currentLevel: 0,
 76		toc:          new(bytes.Buffer),
 77
 78		smartypants: Smartypants(flags),
 79	}
 80}
 81
 82func attrEscape(out *bytes.Buffer, src []byte) {
 83	org := 0
 84	for i, ch := range src {
 85		// using if statements is a bit faster than a switch statement.
 86		// as the compiler improves, this should be unnecessary
 87		// this is only worthwhile because attrEscape is the single
 88		// largest CPU user in normal use
 89		if ch == '"' {
 90			if i > org {
 91				// copy all the normal characters since the last escape
 92				out.Write(src[org:i])
 93			}
 94			org = i + 1
 95			out.WriteString("&quot;")
 96			continue
 97		}
 98		if ch == '&' {
 99			if i > org {
100				out.Write(src[org:i])
101			}
102			org = i + 1
103			out.WriteString("&amp;")
104			continue
105		}
106		if ch == '<' {
107			if i > org {
108				out.Write(src[org:i])
109			}
110			org = i + 1
111			out.WriteString("&lt;")
112			continue
113		}
114		if ch == '>' {
115			if i > org {
116				out.Write(src[org:i])
117			}
118			org = i + 1
119			out.WriteString("&gt;")
120			continue
121		}
122	}
123	if org < len(src) {
124		out.Write(src[org:])
125	}
126}
127
128func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
129	marker := out.Len()
130	doubleSpace(out)
131
132	if options.flags&HTML_TOC != 0 {
133		// headerCount is incremented in htmlTocHeader
134		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
135	} else {
136		out.WriteString(fmt.Sprintf("<h%d>", level))
137	}
138
139	tocMarker := out.Len()
140	if !text() {
141		out.Truncate(marker)
142		return
143	}
144
145	// are we building a table of contents?
146	if options.flags&HTML_TOC != 0 {
147		options.TocHeader(out.Bytes()[tocMarker:], level)
148	}
149
150	out.WriteString(fmt.Sprintf("</h%d>\n", level))
151}
152
153func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
154	if options.flags&HTML_SKIP_HTML != 0 {
155		return
156	}
157
158	doubleSpace(out)
159	out.Write(text)
160	out.WriteByte('\n')
161}
162
163func (options *Html) HRule(out *bytes.Buffer) {
164	doubleSpace(out)
165	out.WriteString("<hr")
166	out.WriteString(options.closeTag)
167}
168
169func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
170	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
171		options.BlockCodeGithub(out, text, lang)
172	} else {
173		options.BlockCodeNormal(out, text, lang)
174	}
175}
176
177func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
178	doubleSpace(out)
179
180	// parse out the language names/classes
181	count := 0
182	for _, elt := range strings.Fields(lang) {
183		if elt[0] == '.' {
184			elt = elt[1:]
185		}
186		if len(elt) == 0 {
187			continue
188		}
189		if count == 0 {
190			out.WriteString("<pre><code class=\"")
191		} else {
192			out.WriteByte(' ')
193		}
194		attrEscape(out, []byte(elt))
195		count++
196	}
197
198	if count == 0 {
199		out.WriteString("<pre><code>")
200	} else {
201		out.WriteString("\">")
202	}
203
204	attrEscape(out, text)
205	out.WriteString("</code></pre>\n")
206}
207
208/*
209 * GitHub style code block:
210 *
211 *              <pre lang="LANG"><code>
212 *              ...
213 *              </pre></code>
214 *
215 * Unlike other parsers, we store the language identifier in the <pre>,
216 * and don't let the user generate custom classes.
217 *
218 * The language identifier in the <pre> block gets postprocessed and all
219 * the code inside gets syntax highlighted with Pygments. This is much safer
220 * than letting the user specify a CSS class for highlighting.
221 *
222 * Note that we only generate HTML for the first specifier.
223 * E.g.
224 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
225 */
226func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
227	doubleSpace(out)
228
229	// parse out the language name
230	count := 0
231	for _, elt := range strings.Fields(lang) {
232		if elt[0] == '.' {
233			elt = elt[1:]
234		}
235		if len(elt) == 0 {
236			continue
237		}
238		out.WriteString("<pre lang=\"")
239		attrEscape(out, []byte(elt))
240		out.WriteString("\"><code>")
241		count++
242		break
243	}
244
245	if count == 0 {
246		out.WriteString("<pre><code>")
247	}
248
249	attrEscape(out, text)
250	out.WriteString("</code></pre>\n")
251}
252
253
254func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
255	doubleSpace(out)
256	out.WriteString("<blockquote>\n")
257	out.Write(text)
258	out.WriteString("</blockquote>\n")
259}
260
261func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
262	doubleSpace(out)
263	out.WriteString("<table>\n<thead>\n")
264	out.Write(header)
265	out.WriteString("</thead>\n\n<tbody>\n")
266	out.Write(body)
267	out.WriteString("</tbody>\n</table>\n")
268}
269
270func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
271	doubleSpace(out)
272	out.WriteString("<tr>\n")
273	out.Write(text)
274	out.WriteString("\n</tr>\n")
275}
276
277func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
278	doubleSpace(out)
279	switch align {
280	case TABLE_ALIGNMENT_LEFT:
281		out.WriteString("<td align=\"left\">")
282	case TABLE_ALIGNMENT_RIGHT:
283		out.WriteString("<td align=\"right\">")
284	case TABLE_ALIGNMENT_CENTER:
285		out.WriteString("<td align=\"center\">")
286	default:
287		out.WriteString("<td>")
288	}
289
290	out.Write(text)
291	out.WriteString("</td>")
292}
293
294func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
295	marker := out.Len()
296	doubleSpace(out)
297
298	if flags&LIST_TYPE_ORDERED != 0 {
299		out.WriteString("<ol>")
300	} else {
301		out.WriteString("<ul>")
302	}
303	if !text() {
304		out.Truncate(marker)
305		return
306	}
307	if flags&LIST_TYPE_ORDERED != 0 {
308		out.WriteString("</ol>\n")
309	} else {
310		out.WriteString("</ul>\n")
311	}
312}
313
314func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
315	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
316		doubleSpace(out)
317	}
318	out.WriteString("<li>")
319	out.Write(text)
320	out.WriteString("</li>\n")
321}
322
323func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
324	marker := out.Len()
325	doubleSpace(out)
326
327	out.WriteString("<p>")
328	if !text() {
329		out.Truncate(marker)
330		return
331	}
332	out.WriteString("</p>\n")
333}
334
335func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
336	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
337		// mark it but don't link it if it is not a safe link: no smartypants
338		out.WriteString("<tt>")
339		attrEscape(out, link)
340		out.WriteString("</tt>")
341		return
342	}
343
344	out.WriteString("<a href=\"")
345	if kind == LINK_TYPE_EMAIL {
346		out.WriteString("mailto:")
347	}
348	attrEscape(out, link)
349	out.WriteString("\">")
350
351	// Pretty print: if we get an email address as
352	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
353	// want to print the `mailto:` prefix
354	switch {
355	case bytes.HasPrefix(link, []byte("mailto://")):
356		attrEscape(out, link[len("mailto://"):])
357	case bytes.HasPrefix(link, []byte("mailto:")):
358		attrEscape(out, link[len("mailto:"):])
359	default:
360		attrEscape(out, link)
361	}
362
363	out.WriteString("</a>")
364}
365
366func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
367	out.WriteString("<code>")
368	attrEscape(out, text)
369	out.WriteString("</code>")
370}
371
372func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
373	out.WriteString("<strong>")
374	out.Write(text)
375	out.WriteString("</strong>")
376}
377
378func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
379	if len(text) == 0 {
380		return
381	}
382	out.WriteString("<em>")
383	out.Write(text)
384	out.WriteString("</em>")
385}
386
387func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
388	if options.flags&HTML_SKIP_IMAGES != 0 {
389		return
390	}
391
392	out.WriteString("<img src=\"")
393	attrEscape(out, link)
394	out.WriteString("\" alt=\"")
395	if len(alt) > 0 {
396		attrEscape(out, alt)
397	}
398	if len(title) > 0 {
399		out.WriteString("\" title=\"")
400		attrEscape(out, title)
401	}
402
403	out.WriteByte('"')
404	out.WriteString(options.closeTag)
405	return
406}
407
408func (options *Html) LineBreak(out *bytes.Buffer) {
409	out.WriteString("<br")
410	out.WriteString(options.closeTag)
411}
412
413func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
414	if options.flags&HTML_SKIP_LINKS != 0 {
415		// write the link text out but don't link it, just mark it with typewriter font
416		out.WriteString("<tt>")
417		attrEscape(out, content)
418		out.WriteString("</tt>")
419		return
420	}
421
422	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
423		// write the link text out but don't link it, just mark it with typewriter font
424		out.WriteString("<tt>")
425		attrEscape(out, content)
426		out.WriteString("</tt>")
427		return
428	}
429
430	out.WriteString("<a href=\"")
431	attrEscape(out, link)
432	if len(title) > 0 {
433		out.WriteString("\" title=\"")
434		attrEscape(out, title)
435	}
436	out.WriteString("\">")
437	out.Write(content)
438	out.WriteString("</a>")
439	return
440}
441
442func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
443	if options.flags&HTML_SKIP_HTML != 0 {
444		return
445	}
446	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
447		return
448	}
449	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
450		return
451	}
452	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
453		return
454	}
455	out.Write(text)
456}
457
458func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
459	out.WriteString("<strong><em>")
460	out.Write(text)
461	out.WriteString("</em></strong>")
462}
463
464func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
465	out.WriteString("<del>")
466	out.Write(text)
467	out.WriteString("</del>")
468}
469
470func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
471	out.Write(entity)
472}
473
474func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
475	if options.flags&HTML_USE_SMARTYPANTS != 0 {
476		options.Smartypants(out, text)
477	} else {
478		attrEscape(out, text)
479	}
480}
481
482func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
483	smrt := smartypantsData{false, false}
484
485	// first do normal entity escaping
486	var escaped bytes.Buffer
487	attrEscape(&escaped, text)
488	text = escaped.Bytes()
489
490	mark := 0
491	for i := 0; i < len(text); i++ {
492		if action := options.smartypants[text[i]]; action != nil {
493			if i > mark {
494				out.Write(text[mark:i])
495			}
496
497			previousChar := byte(0)
498			if i > 0 {
499				previousChar = text[i-1]
500			}
501			i += action(out, &smrt, previousChar, text[i:])
502			mark = i + 1
503		}
504	}
505
506	if mark < len(text) {
507		out.Write(text[mark:])
508	}
509}
510
511func (options *Html) DocumentHeader(out *bytes.Buffer) {
512	if options.flags&HTML_COMPLETE_PAGE == 0 {
513		return
514	}
515
516	ending := ""
517	if options.flags&HTML_USE_XHTML != 0 {
518		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
519		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
520		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
521		ending = " /"
522	} else {
523		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
524		out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
525		out.WriteString("<html>\n")
526	}
527	out.WriteString("<head>\n")
528	out.WriteString("  <title>")
529	options.NormalText(out, []byte(options.title))
530	out.WriteString("</title>\n")
531	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
532	out.WriteString(VERSION)
533	out.WriteString("\"")
534	out.WriteString(ending)
535	out.WriteString(">\n")
536	out.WriteString("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
537	out.WriteString(ending)
538	out.WriteString(">\n")
539	if options.css != "" {
540		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
541		attrEscape(out, []byte(options.css))
542		out.WriteString("\"")
543		out.WriteString(ending)
544		out.WriteString(">\n")
545	}
546	out.WriteString("</head>\n")
547	out.WriteString("<body>\n")
548
549	options.tocMarker = out.Len()
550}
551
552func (options *Html) DocumentFooter(out *bytes.Buffer) {
553	// finalize and insert the table of contents
554	if options.flags&HTML_TOC != 0 {
555		options.TocFinalize()
556
557		// now we have to insert the table of contents into the document
558		var temp bytes.Buffer
559
560		// start by making a copy of everything after the document header
561		temp.Write(out.Bytes()[options.tocMarker:])
562
563		// now clear the copied material from the main output buffer
564		out.Truncate(options.tocMarker)
565
566		// corner case spacing issue
567		if options.flags&HTML_COMPLETE_PAGE != 0 {
568			out.WriteByte('\n')
569		}
570
571		// insert the table of contents
572		out.Write(options.toc.Bytes())
573
574		// corner case spacing issue
575		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
576			out.WriteByte('\n')
577		}
578
579		// write out everything that came after it
580		if options.flags&HTML_OMIT_CONTENTS == 0 {
581			out.Write(temp.Bytes())
582		}
583	}
584
585	if options.flags&HTML_COMPLETE_PAGE != 0 {
586		out.WriteString("\n</body>\n")
587		out.WriteString("</html>\n")
588	}
589
590}
591
592func (options *Html) TocHeader(text []byte, level int) {
593	for level > options.currentLevel {
594		switch {
595		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
596			// this sublist can nest underneath a header
597			size := options.toc.Len()
598			options.toc.Truncate(size - len("</li>\n"))
599
600		case options.currentLevel > 0:
601			options.toc.WriteString("<li>")
602		}
603		if options.toc.Len() > 0 {
604			options.toc.WriteByte('\n')
605		}
606		options.toc.WriteString("<ul>\n")
607		options.currentLevel++
608	}
609
610	for level < options.currentLevel {
611		options.toc.WriteString("</ul>")
612		if options.currentLevel > 1 {
613			options.toc.WriteString("</li>\n")
614		}
615		options.currentLevel--
616	}
617
618	options.toc.WriteString("<li><a href=\"#toc_")
619	options.toc.WriteString(strconv.Itoa(options.headerCount))
620	options.toc.WriteString("\">")
621	options.headerCount++
622
623	options.toc.Write(text)
624
625	options.toc.WriteString("</a></li>\n")
626}
627
628func (options *Html) TocFinalize() {
629	for options.currentLevel > 1 {
630		options.toc.WriteString("</ul></li>\n")
631		options.currentLevel--
632	}
633
634	if options.currentLevel > 0 {
635		options.toc.WriteString("</ul>\n")
636	}
637}
638
639func isHtmlTag(tag []byte, tagname string) bool {
640	i := 0
641	if i < len(tag) && tag[0] != '<' {
642		return false
643	}
644	i++
645	for i < len(tag) && isspace(tag[i]) {
646		i++
647	}
648
649	if i < len(tag) && tag[i] == '/' {
650		i++
651	}
652
653	for i < len(tag) && isspace(tag[i]) {
654		i++
655	}
656
657	j := i
658	for ; i < len(tag); i, j = i+1, j+1 {
659		if j >= len(tagname) {
660			break
661		}
662
663		if tag[i] != tagname[j] {
664			return false
665		}
666	}
667
668	if i == len(tag) {
669		return false
670	}
671
672	return isspace(tag[i]) || tag[i] == '>'
673}
674
675func doubleSpace(out *bytes.Buffer) {
676	if out.Len() > 0 {
677		out.WriteByte('\n')
678	}
679}