all repos — grayfriday @ d3c822509655d75b89de9a305a3771672395d588

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strconv"
 22)
 23
 24const (
 25	HTML_SKIP_HTML = 1 << iota
 26	HTML_SKIP_STYLE
 27	HTML_SKIP_IMAGES
 28	HTML_SKIP_LINKS
 29	HTML_SAFELINK
 30	HTML_TOC
 31	HTML_OMIT_CONTENTS
 32	HTML_COMPLETE_PAGE
 33	HTML_GITHUB_BLOCKCODE
 34	HTML_USE_XHTML
 35	HTML_USE_SMARTYPANTS
 36	HTML_SMARTYPANTS_FRACTIONS
 37	HTML_SMARTYPANTS_LATEX_DASHES
 38)
 39
 40type Html struct {
 41	flags    int    // HTML_* options
 42	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 43	title    string // document title
 44	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 45
 46	// table of contents data
 47	tocMarker    int
 48	headerCount  int
 49	currentLevel int
 50	toc          *bytes.Buffer
 51
 52	smartypants *SmartypantsRenderer
 53}
 54
 55const (
 56	xhtmlClose = " />\n"
 57	htmlClose  = ">\n"
 58)
 59
 60func HtmlRenderer(flags int, title string, css string) Renderer {
 61	// configure the rendering engine
 62	closeTag := htmlClose
 63	if flags&HTML_USE_XHTML != 0 {
 64		closeTag = xhtmlClose
 65	}
 66
 67	return &Html{
 68		flags:    flags,
 69		closeTag: closeTag,
 70		title:    title,
 71		css:      css,
 72
 73		headerCount:  0,
 74		currentLevel: 0,
 75		toc:          new(bytes.Buffer),
 76
 77		smartypants: Smartypants(flags),
 78	}
 79}
 80
 81func attrEscape(out *bytes.Buffer, src []byte) {
 82	org := 0
 83	for i, ch := range src {
 84		// using if statements is a bit faster than a switch statement.
 85		// as the compiler improves, this should be unnecessary
 86		// this is only worthwhile because attrEscape is the single
 87		// largest CPU user in normal use
 88		if ch == '"' {
 89			if i > org {
 90				// copy all the normal characters since the last escape
 91				out.Write(src[org:i])
 92			}
 93			org = i + 1
 94			out.WriteString("&quot;")
 95			continue
 96		}
 97		if ch == '&' {
 98			if i > org {
 99				out.Write(src[org:i])
100			}
101			org = i + 1
102			out.WriteString("&amp;")
103			continue
104		}
105		if ch == '<' {
106			if i > org {
107				out.Write(src[org:i])
108			}
109			org = i + 1
110			out.WriteString("&lt;")
111			continue
112		}
113		if ch == '>' {
114			if i > org {
115				out.Write(src[org:i])
116			}
117			org = i + 1
118			out.WriteString("&gt;")
119			continue
120		}
121	}
122	if org < len(src) {
123		out.Write(src[org:])
124	}
125}
126
127func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
128	marker := out.Len()
129
130	if marker > 0 {
131		out.WriteByte('\n')
132	}
133
134	if options.flags&HTML_TOC != 0 {
135		// headerCount is incremented in htmlTocHeader
136		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
137	} else {
138		out.WriteString(fmt.Sprintf("<h%d>", level))
139	}
140
141	tocMarker := out.Len()
142	if !text() {
143		out.Truncate(marker)
144		return
145	}
146
147	// are we building a table of contents?
148	if options.flags&HTML_TOC != 0 {
149		options.TocHeader(out.Bytes()[tocMarker:], level)
150	}
151
152	out.WriteString(fmt.Sprintf("</h%d>\n", level))
153}
154
155func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
156	if options.flags&HTML_SKIP_HTML != 0 {
157		return
158	}
159
160	sz := len(text)
161	for sz > 0 && text[sz-1] == '\n' {
162		sz--
163	}
164	org := 0
165	for org < sz && text[org] == '\n' {
166		org++
167	}
168	if org >= sz {
169		return
170	}
171	if out.Len() > 0 {
172		out.WriteByte('\n')
173	}
174	out.Write(text[org:sz])
175	out.WriteByte('\n')
176}
177
178func (options *Html) HRule(out *bytes.Buffer) {
179	if out.Len() > 0 {
180		out.WriteByte('\n')
181	}
182	out.WriteString("<hr")
183	out.WriteString(options.closeTag)
184}
185
186func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
187	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
188		options.BlockCodeGithub(out, text, lang)
189	} else {
190		options.BlockCodeNormal(out, text, lang)
191	}
192}
193
194func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
195	if out.Len() > 0 {
196		out.WriteByte('\n')
197	}
198
199	if lang != "" {
200		out.WriteString("<pre><code class=\"")
201
202		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
203			for i < len(lang) && isspace(lang[i]) {
204				i++
205			}
206
207			if i < len(lang) {
208				org := i
209				for i < len(lang) && !isspace(lang[i]) {
210					i++
211				}
212
213				if lang[org] == '.' {
214					org++
215				}
216
217				if cls > 0 {
218					out.WriteByte(' ')
219				}
220				attrEscape(out, []byte(lang[org:]))
221			}
222		}
223
224		out.WriteString("\">")
225	} else {
226		out.WriteString("<pre><code>")
227	}
228
229	if len(text) > 0 {
230		attrEscape(out, text)
231	}
232
233	out.WriteString("</code></pre>\n")
234}
235
236/*
237 * GitHub style code block:
238 *
239 *              <pre lang="LANG"><code>
240 *              ...
241 *              </pre></code>
242 *
243 * Unlike other parsers, we store the language identifier in the <pre>,
244 * and don't let the user generate custom classes.
245 *
246 * The language identifier in the <pre> block gets postprocessed and all
247 * the code inside gets syntax highlighted with Pygments. This is much safer
248 * than letting the user specify a CSS class for highlighting.
249 *
250 * Note that we only generate HTML for the first specifier.
251 * E.g.
252 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
253 */
254func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
255	if out.Len() > 0 {
256		out.WriteByte('\n')
257	}
258
259	if len(lang) > 0 {
260		out.WriteString("<pre lang=\"")
261
262		i := 0
263		for i < len(lang) && !isspace(lang[i]) {
264			i++
265		}
266
267		if lang[0] == '.' {
268			attrEscape(out, []byte(lang[1:i]))
269		} else {
270			attrEscape(out, []byte(lang[:i]))
271		}
272
273		out.WriteString("\"><code>")
274	} else {
275		out.WriteString("<pre><code>")
276	}
277
278	if len(text) > 0 {
279		attrEscape(out, text)
280	}
281
282	out.WriteString("</code></pre>\n")
283}
284
285
286func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
287	out.WriteString("<blockquote>\n")
288	out.Write(text)
289	out.WriteString("</blockquote>")
290}
291
292func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
293	if out.Len() > 0 {
294		out.WriteByte('\n')
295	}
296	out.WriteString("<table><thead>\n")
297	out.Write(header)
298	out.WriteString("\n</thead><tbody>\n")
299	out.Write(body)
300	out.WriteString("\n</tbody></table>")
301}
302
303func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
304	if out.Len() > 0 {
305		out.WriteByte('\n')
306	}
307	out.WriteString("<tr>\n")
308	out.Write(text)
309	out.WriteString("\n</tr>")
310}
311
312func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
313	if out.Len() > 0 {
314		out.WriteByte('\n')
315	}
316	switch align {
317	case TABLE_ALIGNMENT_LEFT:
318		out.WriteString("<td align=\"left\">")
319	case TABLE_ALIGNMENT_RIGHT:
320		out.WriteString("<td align=\"right\">")
321	case TABLE_ALIGNMENT_CENTER:
322		out.WriteString("<td align=\"center\">")
323	default:
324		out.WriteString("<td>")
325	}
326
327	out.Write(text)
328	out.WriteString("</td>")
329}
330
331func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
332	marker := out.Len()
333
334	if marker > 0 {
335		out.WriteByte('\n')
336	}
337	if flags&LIST_TYPE_ORDERED != 0 {
338		out.WriteString("<ol>\n")
339	} else {
340		out.WriteString("<ul>\n")
341	}
342	if !text() {
343		out.Truncate(marker)
344		return
345	}
346	if flags&LIST_TYPE_ORDERED != 0 {
347		out.WriteString("</ol>\n")
348	} else {
349		out.WriteString("</ul>\n")
350	}
351}
352
353func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
354	out.WriteString("<li>")
355	size := len(text)
356	for size > 0 && text[size-1] == '\n' {
357		size--
358	}
359	out.Write(text[:size])
360	out.WriteString("</li>\n")
361}
362
363func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
364	marker := out.Len()
365	if marker > 0 {
366		out.WriteByte('\n')
367	}
368
369	out.WriteString("<p>")
370	if !text() {
371		out.Truncate(marker)
372		return
373	}
374	out.WriteString("</p>\n")
375}
376
377func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
378	if len(link) == 0 {
379		return
380	}
381	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
382		return
383	}
384
385	out.WriteString("<a href=\"")
386	if kind == LINK_TYPE_EMAIL {
387		out.WriteString("mailto:")
388	}
389	attrEscape(out, link)
390	out.WriteString("\">")
391
392	/*
393	 * Pretty print: if we get an email address as
394	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
395	 * want to print the `mailto:` prefix
396	 */
397	switch {
398	case bytes.HasPrefix(link, []byte("mailto://")):
399		attrEscape(out, link[9:])
400	case bytes.HasPrefix(link, []byte("mailto:")):
401		attrEscape(out, link[7:])
402	default:
403		attrEscape(out, link)
404	}
405
406	out.WriteString("</a>")
407}
408
409func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
410	out.WriteString("<code>")
411	attrEscape(out, text)
412	out.WriteString("</code>")
413}
414
415func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
416	if len(text) == 0 {
417		return
418	}
419	out.WriteString("<strong>")
420	out.Write(text)
421	out.WriteString("</strong>")
422}
423
424func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
425	if len(text) == 0 {
426		return
427	}
428	out.WriteString("<em>")
429	out.Write(text)
430	out.WriteString("</em>")
431}
432
433func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
434	if options.flags&HTML_SKIP_IMAGES != 0 {
435		return
436	}
437
438	if len(link) == 0 {
439		return
440	}
441	out.WriteString("<img src=\"")
442	attrEscape(out, link)
443	out.WriteString("\" alt=\"")
444	if len(alt) > 0 {
445		attrEscape(out, alt)
446	}
447	if len(title) > 0 {
448		out.WriteString("\" title=\"")
449		attrEscape(out, title)
450	}
451
452	out.WriteByte('"')
453	out.WriteString(options.closeTag)
454	return
455}
456
457func (options *Html) LineBreak(out *bytes.Buffer) {
458	out.WriteString("<br")
459	out.WriteString(options.closeTag)
460}
461
462func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
463	if options.flags&HTML_SKIP_LINKS != 0 {
464		return
465	}
466
467	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
468		return
469	}
470
471	out.WriteString("<a href=\"")
472	attrEscape(out, link)
473	if len(title) > 0 {
474		out.WriteString("\" title=\"")
475		attrEscape(out, title)
476	}
477	out.WriteString("\">")
478	out.Write(content)
479	out.WriteString("</a>")
480	return
481}
482
483func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
484	if options.flags&HTML_SKIP_HTML != 0 {
485		return
486	}
487	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
488		return
489	}
490	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
491		return
492	}
493	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
494		return
495	}
496	out.Write(text)
497}
498
499func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
500	if len(text) == 0 {
501		return
502	}
503	out.WriteString("<strong><em>")
504	out.Write(text)
505	out.WriteString("</em></strong>")
506}
507
508func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
509	if len(text) == 0 {
510		return
511	}
512	out.WriteString("<del>")
513	out.Write(text)
514	out.WriteString("</del>")
515}
516
517func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
518	out.Write(entity)
519}
520
521func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
522	if options.flags&HTML_USE_SMARTYPANTS != 0 {
523		options.Smartypants(out, text)
524	} else {
525		attrEscape(out, text)
526	}
527}
528
529func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
530	smrt := smartypantsData{false, false}
531
532	// first do normal entity escaping
533	var escaped bytes.Buffer
534	attrEscape(&escaped, text)
535	text = escaped.Bytes()
536
537	mark := 0
538	for i := 0; i < len(text); i++ {
539		if action := options.smartypants[text[i]]; action != nil {
540			if i > mark {
541				out.Write(text[mark:i])
542			}
543
544			previousChar := byte(0)
545			if i > 0 {
546				previousChar = text[i-1]
547			}
548			i += action(out, &smrt, previousChar, text[i:])
549			mark = i + 1
550		}
551	}
552
553	if mark < len(text) {
554		out.Write(text[mark:])
555	}
556}
557
558func (options *Html) DocumentHeader(out *bytes.Buffer) {
559	if options.flags&HTML_COMPLETE_PAGE == 0 {
560		return
561	}
562
563	ending := ""
564	if options.flags&HTML_USE_XHTML != 0 {
565		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
566		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
567		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
568		ending = " /"
569	} else {
570		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\" ")
571		out.WriteString("\"http://www.w3.org/TR/html4/strict.dtd\">\n")
572		out.WriteString("<html>\n")
573	}
574	out.WriteString("<head>\n")
575	out.WriteString("  <title>")
576	options.NormalText(out, []byte(options.title))
577	out.WriteString("</title>\n")
578	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
579	out.WriteString(VERSION)
580	out.WriteString("\"")
581	out.WriteString(ending)
582	out.WriteString(">\n")
583	out.WriteString("  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"")
584	out.WriteString(ending)
585	out.WriteString(">\n")
586	if options.css != "" {
587		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
588		attrEscape(out, []byte(options.css))
589		out.WriteString("\"")
590		out.WriteString(ending)
591		out.WriteString(">\n")
592	}
593	out.WriteString("</head>\n")
594	out.WriteString("<body>\n")
595
596	options.tocMarker = out.Len()
597}
598
599func (options *Html) DocumentFooter(out *bytes.Buffer) {
600	// finalize and insert the table of contents
601	if options.flags&HTML_TOC != 0 {
602		options.TocFinalize()
603
604		// now we have to insert the table of contents into the document
605		var temp bytes.Buffer
606
607		// start by making a copy of everything after the document header
608		temp.Write(out.Bytes()[options.tocMarker:])
609
610		// now clear the copied material from the main output buffer
611		out.Truncate(options.tocMarker)
612
613		// corner case spacing issue
614		if options.flags&HTML_COMPLETE_PAGE != 0 {
615			out.WriteByte('\n')
616		}
617
618		// insert the table of contents
619		out.Write(options.toc.Bytes())
620
621		// corner case spacing issue
622		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
623			out.WriteByte('\n')
624		}
625
626		// write out everything that came after it
627		if options.flags&HTML_OMIT_CONTENTS == 0 {
628			out.Write(temp.Bytes())
629		}
630	}
631
632	if options.flags&HTML_COMPLETE_PAGE != 0 {
633		out.WriteString("\n</body>\n")
634		out.WriteString("</html>\n")
635	}
636
637}
638
639func (options *Html) TocHeader(text []byte, level int) {
640	for level > options.currentLevel {
641		switch {
642		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
643			// this sublist can nest underneath a header
644			size := options.toc.Len()
645			options.toc.Truncate(size - len("</li>\n"))
646
647		case options.currentLevel > 0:
648			options.toc.WriteString("<li>")
649		}
650		if options.toc.Len() > 0 {
651			options.toc.WriteByte('\n')
652		}
653		options.toc.WriteString("<ul>\n")
654		options.currentLevel++
655	}
656
657	for level < options.currentLevel {
658		options.toc.WriteString("</ul>")
659		if options.currentLevel > 1 {
660			options.toc.WriteString("</li>\n")
661		}
662		options.currentLevel--
663	}
664
665	options.toc.WriteString("<li><a href=\"#toc_")
666	options.toc.WriteString(strconv.Itoa(options.headerCount))
667	options.toc.WriteString("\">")
668	options.headerCount++
669
670	options.toc.Write(text)
671
672	options.toc.WriteString("</a></li>\n")
673}
674
675func (options *Html) TocFinalize() {
676	for options.currentLevel > 1 {
677		options.toc.WriteString("</ul></li>\n")
678		options.currentLevel--
679	}
680
681	if options.currentLevel > 0 {
682		options.toc.WriteString("</ul>\n")
683	}
684}
685
686func isHtmlTag(tag []byte, tagname string) bool {
687	i := 0
688	if i < len(tag) && tag[0] != '<' {
689		return false
690	}
691	i++
692	for i < len(tag) && isspace(tag[i]) {
693		i++
694	}
695
696	if i < len(tag) && tag[i] == '/' {
697		i++
698	}
699
700	for i < len(tag) && isspace(tag[i]) {
701		i++
702	}
703
704	j := i
705	for ; i < len(tag); i, j = i+1, j+1 {
706		if j >= len(tagname) {
707			break
708		}
709
710		if tag[i] != tagname[j] {
711			return false
712		}
713	}
714
715	if i == len(tag) {
716		return false
717	}
718
719	return isspace(tag[i]) || tag[i] == '>'
720}