all repos — grayfriday @ a99c922b84125a8284f65750d7ef9dff05757d5d

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22	"strings"
 23)
 24
 25const (
 26	HTML_SKIP_HTML = 1 << iota
 27	HTML_SKIP_STYLE
 28	HTML_SKIP_IMAGES
 29	HTML_SKIP_LINKS
 30	HTML_SAFELINK
 31	HTML_TOC
 32	HTML_OMIT_CONTENTS
 33	HTML_COMPLETE_PAGE
 34	HTML_GITHUB_BLOCKCODE
 35	HTML_USE_XHTML
 36	HTML_USE_SMARTYPANTS
 37	HTML_SMARTYPANTS_FRACTIONS
 38	HTML_SMARTYPANTS_LATEX_DASHES
 39)
 40
 41type Html struct {
 42	flags    int    // HTML_* options
 43	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 44	title    string // document title
 45	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 46
 47	// table of contents data
 48	tocMarker    int
 49	headerCount  int
 50	currentLevel int
 51	toc          *bytes.Buffer
 52
 53	smartypants *SmartypantsRenderer
 54}
 55
 56const (
 57	xhtmlClose = " />\n"
 58	htmlClose  = ">\n"
 59)
 60
 61func HtmlRenderer(flags int, title string, css string) Renderer {
 62	// configure the rendering engine
 63	closeTag := htmlClose
 64	if flags&HTML_USE_XHTML != 0 {
 65		closeTag = xhtmlClose
 66	}
 67
 68	return &Html{
 69		flags:    flags,
 70		closeTag: closeTag,
 71		title:    title,
 72		css:      css,
 73
 74		headerCount:  0,
 75		currentLevel: 0,
 76		toc:          new(bytes.Buffer),
 77
 78		smartypants: Smartypants(flags),
 79	}
 80}
 81
 82func attrEscape(out *bytes.Buffer, src []byte) {
 83	org := 0
 84	for i, ch := range src {
 85		// using if statements is a bit faster than a switch statement.
 86		// as the compiler improves, this should be unnecessary
 87		// this is only worthwhile because attrEscape is the single
 88		// largest CPU user in normal use
 89		if ch == '"' {
 90			if i > org {
 91				// copy all the normal characters since the last escape
 92				out.Write(src[org:i])
 93			}
 94			org = i + 1
 95			out.WriteString("&quot;")
 96			continue
 97		}
 98		if ch == '&' {
 99			if i > org {
100				out.Write(src[org:i])
101			}
102			org = i + 1
103			out.WriteString("&amp;")
104			continue
105		}
106		if ch == '<' {
107			if i > org {
108				out.Write(src[org:i])
109			}
110			org = i + 1
111			out.WriteString("&lt;")
112			continue
113		}
114		if ch == '>' {
115			if i > org {
116				out.Write(src[org:i])
117			}
118			org = i + 1
119			out.WriteString("&gt;")
120			continue
121		}
122	}
123	if org < len(src) {
124		out.Write(src[org:])
125	}
126}
127
128func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
129	marker := out.Len()
130	doubleSpace(out)
131
132	if options.flags&HTML_TOC != 0 {
133		// headerCount is incremented in htmlTocHeader
134		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
135	} else {
136		out.WriteString(fmt.Sprintf("<h%d>", level))
137	}
138
139	tocMarker := out.Len()
140	if !text() {
141		out.Truncate(marker)
142		return
143	}
144
145	// are we building a table of contents?
146	if options.flags&HTML_TOC != 0 {
147		options.TocHeader(out.Bytes()[tocMarker:], level)
148	}
149
150	out.WriteString(fmt.Sprintf("</h%d>\n", level))
151}
152
153func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
154	if options.flags&HTML_SKIP_HTML != 0 {
155		return
156	}
157
158	doubleSpace(out)
159	out.Write(text)
160	out.WriteByte('\n')
161}
162
163func (options *Html) HRule(out *bytes.Buffer) {
164	doubleSpace(out)
165	out.WriteString("<hr")
166	out.WriteString(options.closeTag)
167}
168
169func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
170	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
171		options.BlockCodeGithub(out, text, lang)
172	} else {
173		options.BlockCodeNormal(out, text, lang)
174	}
175}
176
177func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
178	doubleSpace(out)
179
180	// parse out the language names/classes
181	count := 0
182	for _, elt := range strings.Fields(lang) {
183		if elt[0] == '.' {
184			elt = elt[1:]
185		}
186		if len(elt) == 0 {
187			continue
188		}
189		if count == 0 {
190			out.WriteString("<pre><code class=\"")
191		} else {
192			out.WriteByte(' ')
193		}
194		attrEscape(out, []byte(elt))
195		count++
196	}
197
198	if count == 0 {
199		out.WriteString("<pre><code>")
200	} else {
201		out.WriteString("\">")
202	}
203
204	attrEscape(out, text)
205	out.WriteString("</code></pre>\n")
206}
207
208/*
209 * GitHub style code block:
210 *
211 *              <pre lang="LANG"><code>
212 *              ...
213 *              </pre></code>
214 *
215 * Unlike other parsers, we store the language identifier in the <pre>,
216 * and don't let the user generate custom classes.
217 *
218 * The language identifier in the <pre> block gets postprocessed and all
219 * the code inside gets syntax highlighted with Pygments. This is much safer
220 * than letting the user specify a CSS class for highlighting.
221 *
222 * Note that we only generate HTML for the first specifier.
223 * E.g.
224 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
225 */
226func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
227	doubleSpace(out)
228
229	// parse out the language name
230	count := 0
231	for _, elt := range strings.Fields(lang) {
232		if elt[0] == '.' {
233			elt = elt[1:]
234		}
235		if len(elt) == 0 {
236			continue
237		}
238		out.WriteString("<pre lang=\"")
239		attrEscape(out, []byte(elt))
240		out.WriteString("\"><code>")
241		count++
242		break
243	}
244
245	if count == 0 {
246		out.WriteString("<pre><code>")
247	}
248
249	attrEscape(out, text)
250	out.WriteString("</code></pre>\n")
251}
252
253
254func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
255	out.WriteString("<blockquote>\n")
256	out.Write(text)
257	out.WriteString("</blockquote>")
258}
259
260func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
261	doubleSpace(out)
262	out.WriteString("<table>\n<thead>\n")
263	out.Write(header)
264	out.WriteString("\n</thead>\n<tbody>\n")
265	out.Write(body)
266	out.WriteString("\n</tbody>\n</table>")
267}
268
269func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
270	doubleSpace(out)
271	out.WriteString("<tr>\n")
272	out.Write(text)
273	out.WriteString("\n</tr>")
274}
275
276func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
277	doubleSpace(out)
278	switch align {
279	case TABLE_ALIGNMENT_LEFT:
280		out.WriteString("<td align=\"left\">")
281	case TABLE_ALIGNMENT_RIGHT:
282		out.WriteString("<td align=\"right\">")
283	case TABLE_ALIGNMENT_CENTER:
284		out.WriteString("<td align=\"center\">")
285	default:
286		out.WriteString("<td>")
287	}
288
289	out.Write(text)
290	out.WriteString("</td>")
291}
292
293func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
294	marker := out.Len()
295	doubleSpace(out)
296
297	if flags&LIST_TYPE_ORDERED != 0 {
298		out.WriteString("<ol>\n")
299	} else {
300		out.WriteString("<ul>\n")
301	}
302	if !text() {
303		out.Truncate(marker)
304		return
305	}
306	if flags&LIST_TYPE_ORDERED != 0 {
307		out.WriteString("</ol>\n")
308	} else {
309		out.WriteString("</ul>\n")
310	}
311}
312
313func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
314	out.WriteString("<li>")
315	out.Write(text)
316	out.WriteString("</li>\n")
317}
318
319func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
320	marker := out.Len()
321	doubleSpace(out)
322
323	out.WriteString("<p>")
324	if !text() {
325		out.Truncate(marker)
326		return
327	}
328	out.WriteString("</p>\n")
329}
330
331func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
332	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
333		// mark it but don't link it if it is not a safe link: no smartypants
334		out.WriteString("<tt>")
335		attrEscape(out, link)
336		out.WriteString("</tt>")
337		return
338	}
339
340	out.WriteString("<a href=\"")
341	if kind == LINK_TYPE_EMAIL {
342		out.WriteString("mailto:")
343	}
344	attrEscape(out, link)
345	out.WriteString("\">")
346
347	// Pretty print: if we get an email address as
348	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
349	// want to print the `mailto:` prefix
350	switch {
351	case bytes.HasPrefix(link, []byte("mailto://")):
352		attrEscape(out, link[len("mailto://"):])
353	case bytes.HasPrefix(link, []byte("mailto:")):
354		attrEscape(out, link[len("mailto:"):])
355	default:
356		attrEscape(out, link)
357	}
358
359	out.WriteString("</a>")
360}
361
362func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
363	out.WriteString("<code>")
364	attrEscape(out, text)
365	out.WriteString("</code>")
366}
367
368func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
369	out.WriteString("<strong>")
370	out.Write(text)
371	out.WriteString("</strong>")
372}
373
374func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
375	if len(text) == 0 {
376		return
377	}
378	out.WriteString("<em>")
379	out.Write(text)
380	out.WriteString("</em>")
381}
382
383func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
384	if options.flags&HTML_SKIP_IMAGES != 0 {
385		return
386	}
387
388	out.WriteString("<img src=\"")
389	attrEscape(out, link)
390	out.WriteString("\" alt=\"")
391	if len(alt) > 0 {
392		attrEscape(out, alt)
393	}
394	if len(title) > 0 {
395		out.WriteString("\" title=\"")
396		attrEscape(out, title)
397	}
398
399	out.WriteByte('"')
400	out.WriteString(options.closeTag)
401	return
402}
403
404func (options *Html) LineBreak(out *bytes.Buffer) {
405	out.WriteString("<br")
406	out.WriteString(options.closeTag)
407}
408
409func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
410	if options.flags&HTML_SKIP_LINKS != 0 {
411		// write the link text out but don't link it, just mark it with typewriter font
412		out.WriteString("<tt>")
413		attrEscape(out, content)
414		out.WriteString("</tt>")
415		return
416	}
417
418	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
419		// write the link text out but don't link it, just mark it with typewriter font
420		out.WriteString("<tt>")
421		attrEscape(out, content)
422		out.WriteString("</tt>")
423		return
424	}
425
426	out.WriteString("<a href=\"")
427	attrEscape(out, link)
428	if len(title) > 0 {
429		out.WriteString("\" title=\"")
430		attrEscape(out, title)
431	}
432	out.WriteString("\">")
433	out.Write(content)
434	out.WriteString("</a>")
435	return
436}
437
438func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
439	if options.flags&HTML_SKIP_HTML != 0 {
440		return
441	}
442	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
443		return
444	}
445	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
446		return
447	}
448	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
449		return
450	}
451	out.Write(text)
452}
453
454func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
455	out.WriteString("<strong><em>")
456	out.Write(text)
457	out.WriteString("</em></strong>")
458}
459
460func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
461	out.WriteString("<del>")
462	out.Write(text)
463	out.WriteString("</del>")
464}
465
466func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
467	out.Write(entity)
468}
469
470func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
471	if options.flags&HTML_USE_SMARTYPANTS != 0 {
472		options.Smartypants(out, text)
473	} else {
474		attrEscape(out, text)
475	}
476}
477
478func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
479	smrt := smartypantsData{false, false}
480
481	// first do normal entity escaping
482	var escaped bytes.Buffer
483	attrEscape(&escaped, text)
484	text = escaped.Bytes()
485
486	mark := 0
487	for i := 0; i < len(text); i++ {
488		if action := options.smartypants[text[i]]; action != nil {
489			if i > mark {
490				out.Write(text[mark:i])
491			}
492
493			previousChar := byte(0)
494			if i > 0 {
495				previousChar = text[i-1]
496			}
497			i += action(out, &smrt, previousChar, text[i:])
498			mark = i + 1
499		}
500	}
501
502	if mark < len(text) {
503		out.Write(text[mark:])
504	}
505}
506
507func (options *Html) DocumentHeader(out *bytes.Buffer) {
508	if options.flags&HTML_COMPLETE_PAGE == 0 {
509		return
510	}
511
512	ending := ""
513	if options.flags&HTML_USE_XHTML != 0 {
514		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
515		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
516		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
517		ending = " /"
518	} else {
519		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
520		out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
521		out.WriteString("<html>\n")
522	}
523	out.WriteString("<head>\n")
524	out.WriteString("  <title>")
525	options.NormalText(out, []byte(options.title))
526	out.WriteString("</title>\n")
527	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
528	out.WriteString(VERSION)
529	out.WriteString("\"")
530	out.WriteString(ending)
531	out.WriteString(">\n")
532	out.WriteString("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
533	out.WriteString(ending)
534	out.WriteString(">\n")
535	if options.css != "" {
536		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
537		attrEscape(out, []byte(options.css))
538		out.WriteString("\"")
539		out.WriteString(ending)
540		out.WriteString(">\n")
541	}
542	out.WriteString("</head>\n")
543	out.WriteString("<body>\n")
544
545	options.tocMarker = out.Len()
546}
547
548func (options *Html) DocumentFooter(out *bytes.Buffer) {
549	// finalize and insert the table of contents
550	if options.flags&HTML_TOC != 0 {
551		options.TocFinalize()
552
553		// now we have to insert the table of contents into the document
554		var temp bytes.Buffer
555
556		// start by making a copy of everything after the document header
557		temp.Write(out.Bytes()[options.tocMarker:])
558
559		// now clear the copied material from the main output buffer
560		out.Truncate(options.tocMarker)
561
562		// corner case spacing issue
563		if options.flags&HTML_COMPLETE_PAGE != 0 {
564			out.WriteByte('\n')
565		}
566
567		// insert the table of contents
568		out.Write(options.toc.Bytes())
569
570		// corner case spacing issue
571		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
572			out.WriteByte('\n')
573		}
574
575		// write out everything that came after it
576		if options.flags&HTML_OMIT_CONTENTS == 0 {
577			out.Write(temp.Bytes())
578		}
579	}
580
581	if options.flags&HTML_COMPLETE_PAGE != 0 {
582		out.WriteString("\n</body>\n")
583		out.WriteString("</html>\n")
584	}
585
586}
587
588func (options *Html) TocHeader(text []byte, level int) {
589	for level > options.currentLevel {
590		switch {
591		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
592			// this sublist can nest underneath a header
593			size := options.toc.Len()
594			options.toc.Truncate(size - len("</li>\n"))
595
596		case options.currentLevel > 0:
597			options.toc.WriteString("<li>")
598		}
599		if options.toc.Len() > 0 {
600			options.toc.WriteByte('\n')
601		}
602		options.toc.WriteString("<ul>\n")
603		options.currentLevel++
604	}
605
606	for level < options.currentLevel {
607		options.toc.WriteString("</ul>")
608		if options.currentLevel > 1 {
609			options.toc.WriteString("</li>\n")
610		}
611		options.currentLevel--
612	}
613
614	options.toc.WriteString("<li><a href=\"#toc_")
615	options.toc.WriteString(strconv.Itoa(options.headerCount))
616	options.toc.WriteString("\">")
617	options.headerCount++
618
619	options.toc.Write(text)
620
621	options.toc.WriteString("</a></li>\n")
622}
623
624func (options *Html) TocFinalize() {
625	for options.currentLevel > 1 {
626		options.toc.WriteString("</ul></li>\n")
627		options.currentLevel--
628	}
629
630	if options.currentLevel > 0 {
631		options.toc.WriteString("</ul>\n")
632	}
633}
634
635func isHtmlTag(tag []byte, tagname string) bool {
636	i := 0
637	if i < len(tag) && tag[0] != '<' {
638		return false
639	}
640	i++
641	for i < len(tag) && isspace(tag[i]) {
642		i++
643	}
644
645	if i < len(tag) && tag[i] == '/' {
646		i++
647	}
648
649	for i < len(tag) && isspace(tag[i]) {
650		i++
651	}
652
653	j := i
654	for ; i < len(tag); i, j = i+1, j+1 {
655		if j >= len(tagname) {
656			break
657		}
658
659		if tag[i] != tagname[j] {
660			return false
661		}
662	}
663
664	if i == len(tag) {
665		return false
666	}
667
668	return isspace(tag[i]) || tag[i] == '>'
669}
670
671func doubleSpace(out *bytes.Buffer) {
672	if out.Len() > 0 {
673		out.WriteByte('\n')
674	}
675}