all repos — grayfriday @ 50b8e0370b6d767a2df828f8a3481a6a443fdb61

blackfriday fork with a few changes

html.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// HTML rendering backend
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strconv"
 23	"strings"
 24)
 25
 26// Html renderer configuration options.
 27const (
 28	HTML_SKIP_HTML                = 1 << iota // skip preformatted HTML blocks
 29	HTML_SKIP_STYLE                           // skip embedded <style> elements
 30	HTML_SKIP_IMAGES                          // skip embedded images
 31	HTML_SKIP_LINKS                           // skip all links
 32	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 33	HTML_SAFELINK                             // only link to trusted protocols
 34	HTML_NOFOLLOW_LINKS                       // only link with rel="nofollow"
 35	HTML_HREF_TARGET_BLANK                    // add a blank target
 36	HTML_TOC                                  // generate a table of contents
 37	HTML_OMIT_CONTENTS                        // skip the main contents (for a standalone table of contents)
 38	HTML_COMPLETE_PAGE                        // generate a complete HTML page
 39	HTML_GITHUB_BLOCKCODE                     // use github fenced code rendering rules
 40	HTML_USE_XHTML                            // generate XHTML output instead of HTML
 41	HTML_USE_SMARTYPANTS                      // enable smart punctuation substitutions
 42	HTML_SMARTYPANTS_FRACTIONS                // enable smart fractions (with HTML_USE_SMARTYPANTS)
 43	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 44)
 45
 46var (
 47	alignments = []string{
 48		"left",
 49		"right",
 50		"center",
 51	}
 52
 53	// TODO: improve this regexp to catch all possible entities:
 54	htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
 55)
 56
 57// Html is a type that implements the Renderer interface for HTML output.
 58//
 59// Do not create this directly, instead use the HtmlRenderer function.
 60type Html struct {
 61	flags    int    // HTML_* options
 62	closeTag string // how to end singleton tags: either " />\n" or ">\n"
 63	title    string // document title
 64	css      string // optional css file url (used with HTML_COMPLETE_PAGE)
 65
 66	// table of contents data
 67	tocMarker    int
 68	headerCount  int
 69	currentLevel int
 70	toc          *bytes.Buffer
 71
 72	smartypants *smartypantsRenderer
 73}
 74
 75const (
 76	xhtmlClose = " />\n"
 77	htmlClose  = ">\n"
 78)
 79
 80// HtmlRenderer creates and configures an Html object, which
 81// satisfies the Renderer interface.
 82//
 83// flags is a set of HTML_* options ORed together.
 84// title is the title of the document, and css is a URL for the document's
 85// stylesheet.
 86// title and css are only used when HTML_COMPLETE_PAGE is selected.
 87func HtmlRenderer(flags int, title string, css string) Renderer {
 88	// configure the rendering engine
 89	closeTag := htmlClose
 90	if flags&HTML_USE_XHTML != 0 {
 91		closeTag = xhtmlClose
 92	}
 93
 94	return &Html{
 95		flags:    flags,
 96		closeTag: closeTag,
 97		title:    title,
 98		css:      css,
 99
100		headerCount:  0,
101		currentLevel: 0,
102		toc:          new(bytes.Buffer),
103
104		smartypants: smartypants(flags),
105	}
106}
107
108// Using if statements is a bit faster than a switch statement. As the compiler
109// improves, this should be unnecessary this is only worthwhile because
110// attrEscape is the single largest CPU user in normal use.
111// Also tried using map, but that gave a ~3x slowdown.
112func escapeSingleChar(char byte) (string, bool) {
113	if char == '"' {
114		return "&quot;", true
115	}
116	if char == '&' {
117		return "&amp;", true
118	}
119	if char == '<' {
120		return "&lt;", true
121	}
122	if char == '>' {
123		return "&gt;", true
124	}
125	return "", false
126}
127
128func attrEscape(out *bytes.Buffer, src []byte) {
129	org := 0
130	for i, ch := range src {
131		if entity, ok := escapeSingleChar(ch); ok {
132			if i > org {
133				// copy all the normal characters since the last escape
134				out.Write(src[org:i])
135			}
136			org = i + 1
137			out.WriteString(entity)
138		}
139	}
140	if org < len(src) {
141		out.Write(src[org:])
142	}
143}
144
145func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
146	end := 0
147	for _, rang := range skipRanges {
148		attrEscape(out, src[end:rang[0]])
149		out.Write(src[rang[0]:rang[1]])
150		end = rang[1]
151	}
152	attrEscape(out, src[end:])
153}
154
155func (options *Html) GetFlags() int {
156	return options.flags
157}
158
159func (options *Html) Header(out *bytes.Buffer, text func() bool, level int, id string) {
160	marker := out.Len()
161	doubleSpace(out)
162
163	if id != "" {
164		out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id))
165	} else if options.flags&HTML_TOC != 0 {
166		// headerCount is incremented in htmlTocHeader
167		out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
168	} else {
169		out.WriteString(fmt.Sprintf("<h%d>", level))
170	}
171
172	tocMarker := out.Len()
173	if !text() {
174		out.Truncate(marker)
175		return
176	}
177
178	// are we building a table of contents?
179	if options.flags&HTML_TOC != 0 {
180		options.TocHeader(out.Bytes()[tocMarker:], level)
181	}
182
183	out.WriteString(fmt.Sprintf("</h%d>\n", level))
184}
185
186func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
187	if options.flags&HTML_SKIP_HTML != 0 {
188		return
189	}
190
191	doubleSpace(out)
192	out.Write(text)
193	out.WriteByte('\n')
194}
195
196func (options *Html) HRule(out *bytes.Buffer) {
197	doubleSpace(out)
198	out.WriteString("<hr")
199	out.WriteString(options.closeTag)
200}
201
202func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
203	if options.flags&HTML_GITHUB_BLOCKCODE != 0 {
204		options.BlockCodeGithub(out, text, lang)
205	} else {
206		options.BlockCodeNormal(out, text, lang)
207	}
208}
209
210func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
211	doubleSpace(out)
212
213	// parse out the language names/classes
214	count := 0
215	for _, elt := range strings.Fields(lang) {
216		if elt[0] == '.' {
217			elt = elt[1:]
218		}
219		if len(elt) == 0 {
220			continue
221		}
222		if count == 0 {
223			out.WriteString("<pre><code class=\"")
224		} else {
225			out.WriteByte(' ')
226		}
227		attrEscape(out, []byte(elt))
228		count++
229	}
230
231	if count == 0 {
232		out.WriteString("<pre><code>")
233	} else {
234		out.WriteString("\">")
235	}
236
237	attrEscape(out, text)
238	out.WriteString("</code></pre>\n")
239}
240
241// GitHub style code block:
242//
243//              <pre lang="LANG"><code>
244//              ...
245//              </code></pre>
246//
247// Unlike other parsers, we store the language identifier in the <pre>,
248// and don't let the user generate custom classes.
249//
250// The language identifier in the <pre> block gets postprocessed and all
251// the code inside gets syntax highlighted with Pygments. This is much safer
252// than letting the user specify a CSS class for highlighting.
253//
254// Note that we only generate HTML for the first specifier.
255// E.g.
256//              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
257func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
258	doubleSpace(out)
259
260	// parse out the language name
261	count := 0
262	for _, elt := range strings.Fields(lang) {
263		if elt[0] == '.' {
264			elt = elt[1:]
265		}
266		if len(elt) == 0 {
267			continue
268		}
269		out.WriteString("<pre lang=\"")
270		attrEscape(out, []byte(elt))
271		out.WriteString("\"><code>")
272		count++
273		break
274	}
275
276	if count == 0 {
277		out.WriteString("<pre><code>")
278	}
279
280	attrEscape(out, text)
281	out.WriteString("</code></pre>\n")
282}
283
284func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
285	doubleSpace(out)
286	out.WriteString("<blockquote>\n")
287	out.Write(text)
288	out.WriteString("</blockquote>\n")
289}
290
291func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
292	doubleSpace(out)
293	out.WriteString("<table>\n<thead>\n")
294	out.Write(header)
295	out.WriteString("</thead>\n\n<tbody>\n")
296	out.Write(body)
297	out.WriteString("</tbody>\n</table>\n")
298}
299
300func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
301	doubleSpace(out)
302	out.WriteString("<tr>\n")
303	out.Write(text)
304	out.WriteString("\n</tr>\n")
305}
306
307func (options *Html) TableHeaderCell(out *bytes.Buffer, text []byte, align int) {
308	doubleSpace(out)
309	switch align {
310	case TABLE_ALIGNMENT_LEFT:
311		out.WriteString("<th align=\"left\">")
312	case TABLE_ALIGNMENT_RIGHT:
313		out.WriteString("<th align=\"right\">")
314	case TABLE_ALIGNMENT_CENTER:
315		out.WriteString("<th align=\"center\">")
316	default:
317		out.WriteString("<th>")
318	}
319
320	out.Write(text)
321	out.WriteString("</th>")
322}
323
324func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
325	doubleSpace(out)
326	switch align {
327	case TABLE_ALIGNMENT_LEFT:
328		out.WriteString("<td align=\"left\">")
329	case TABLE_ALIGNMENT_RIGHT:
330		out.WriteString("<td align=\"right\">")
331	case TABLE_ALIGNMENT_CENTER:
332		out.WriteString("<td align=\"center\">")
333	default:
334		out.WriteString("<td>")
335	}
336
337	out.Write(text)
338	out.WriteString("</td>")
339}
340
341func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) {
342	out.WriteString("<div class=\"footnotes\">\n")
343	options.HRule(out)
344	options.List(out, text, LIST_TYPE_ORDERED)
345	out.WriteString("</div>\n")
346}
347
348func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
349	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
350		doubleSpace(out)
351	}
352	out.WriteString(`<li id="fn:`)
353	out.Write(slugify(name))
354	out.WriteString(`">`)
355	out.Write(text)
356	out.WriteString("</li>\n")
357}
358
359func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
360	marker := out.Len()
361	doubleSpace(out)
362
363	if flags&LIST_TYPE_ORDERED != 0 {
364		out.WriteString("<ol>")
365	} else {
366		out.WriteString("<ul>")
367	}
368	if !text() {
369		out.Truncate(marker)
370		return
371	}
372	if flags&LIST_TYPE_ORDERED != 0 {
373		out.WriteString("</ol>\n")
374	} else {
375		out.WriteString("</ul>\n")
376	}
377}
378
379func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
380	if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 {
381		doubleSpace(out)
382	}
383	out.WriteString("<li>")
384	out.Write(text)
385	out.WriteString("</li>\n")
386}
387
388func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
389	marker := out.Len()
390	doubleSpace(out)
391
392	out.WriteString("<p>")
393	if !text() {
394		out.Truncate(marker)
395		return
396	}
397	out.WriteString("</p>\n")
398}
399
400func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
401	skipRanges := htmlEntity.FindAllIndex(link, -1)
402	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
403		// mark it but don't link it if it is not a safe link: no smartypants
404		out.WriteString("<tt>")
405		entityEscapeWithSkip(out, link, skipRanges)
406		out.WriteString("</tt>")
407		return
408	}
409
410	out.WriteString("<a href=\"")
411	if kind == LINK_TYPE_EMAIL {
412		out.WriteString("mailto:")
413	}
414	entityEscapeWithSkip(out, link, skipRanges)
415
416	if options.flags&HTML_NOFOLLOW_LINKS != 0 && !isRelativeLink(link) {
417		out.WriteString("\" rel=\"nofollow")
418	}
419	// blank target only add to external link
420	if options.flags&HTML_HREF_TARGET_BLANK != 0 && !isRelativeLink(link) {
421		out.WriteString("\" target=\"_blank")
422	}
423
424	out.WriteString("\">")
425
426	// Pretty print: if we get an email address as
427	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
428	// want to print the `mailto:` prefix
429	switch {
430	case bytes.HasPrefix(link, []byte("mailto://")):
431		attrEscape(out, link[len("mailto://"):])
432	case bytes.HasPrefix(link, []byte("mailto:")):
433		attrEscape(out, link[len("mailto:"):])
434	default:
435		entityEscapeWithSkip(out, link, skipRanges)
436	}
437
438	out.WriteString("</a>")
439}
440
441func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
442	out.WriteString("<code>")
443	attrEscape(out, text)
444	out.WriteString("</code>")
445}
446
447func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
448	out.WriteString("<strong>")
449	out.Write(text)
450	out.WriteString("</strong>")
451}
452
453func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
454	if len(text) == 0 {
455		return
456	}
457	out.WriteString("<em>")
458	out.Write(text)
459	out.WriteString("</em>")
460}
461
462func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
463	if options.flags&HTML_SKIP_IMAGES != 0 {
464		return
465	}
466
467	out.WriteString("<img src=\"")
468	attrEscape(out, link)
469	out.WriteString("\" alt=\"")
470	if len(alt) > 0 {
471		attrEscape(out, alt)
472	}
473	if len(title) > 0 {
474		out.WriteString("\" title=\"")
475		attrEscape(out, title)
476	}
477
478	out.WriteByte('"')
479	out.WriteString(options.closeTag)
480	return
481}
482
483func (options *Html) LineBreak(out *bytes.Buffer) {
484	out.WriteString("<br")
485	out.WriteString(options.closeTag)
486}
487
488func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
489	if options.flags&HTML_SKIP_LINKS != 0 {
490		// write the link text out but don't link it, just mark it with typewriter font
491		out.WriteString("<tt>")
492		attrEscape(out, content)
493		out.WriteString("</tt>")
494		return
495	}
496
497	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
498		// write the link text out but don't link it, just mark it with typewriter font
499		out.WriteString("<tt>")
500		attrEscape(out, content)
501		out.WriteString("</tt>")
502		return
503	}
504
505	out.WriteString("<a href=\"")
506	attrEscape(out, link)
507	if len(title) > 0 {
508		out.WriteString("\" title=\"")
509		attrEscape(out, title)
510	}
511	if options.flags&HTML_NOFOLLOW_LINKS != 0 && !isRelativeLink(link) {
512		out.WriteString("\" rel=\"nofollow")
513	}
514	// blank target only add to external link
515	if options.flags&HTML_HREF_TARGET_BLANK != 0 && !isRelativeLink(link) {
516		out.WriteString("\" target=\"_blank")
517	}
518
519	out.WriteString("\">")
520	out.Write(content)
521	out.WriteString("</a>")
522	return
523}
524
525func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
526	if options.flags&HTML_SKIP_HTML != 0 {
527		return
528	}
529	if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") {
530		return
531	}
532	if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") {
533		return
534	}
535	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
536		return
537	}
538	out.Write(text)
539}
540
541func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
542	out.WriteString("<strong><em>")
543	out.Write(text)
544	out.WriteString("</em></strong>")
545}
546
547func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
548	out.WriteString("<del>")
549	out.Write(text)
550	out.WriteString("</del>")
551}
552
553func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
554	slug := slugify(ref)
555	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
556	out.Write(slug)
557	out.WriteString(`"><a rel="footnote" href="#fn:`)
558	out.Write(slug)
559	out.WriteString(`">`)
560	out.WriteString(strconv.Itoa(id))
561	out.WriteString(`</a></sup>`)
562}
563
564func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
565	out.Write(entity)
566}
567
568func (options *Html) NormalText(out *bytes.Buffer, text []byte) {
569	if options.flags&HTML_USE_SMARTYPANTS != 0 {
570		options.Smartypants(out, text)
571	} else {
572		attrEscape(out, text)
573	}
574}
575
576func (options *Html) Smartypants(out *bytes.Buffer, text []byte) {
577	smrt := smartypantsData{false, false}
578
579	// first do normal entity escaping
580	var escaped bytes.Buffer
581	attrEscape(&escaped, text)
582	text = escaped.Bytes()
583
584	mark := 0
585	for i := 0; i < len(text); i++ {
586		if action := options.smartypants[text[i]]; action != nil {
587			if i > mark {
588				out.Write(text[mark:i])
589			}
590
591			previousChar := byte(0)
592			if i > 0 {
593				previousChar = text[i-1]
594			}
595			i += action(out, &smrt, previousChar, text[i:])
596			mark = i + 1
597		}
598	}
599
600	if mark < len(text) {
601		out.Write(text[mark:])
602	}
603}
604
605func (options *Html) DocumentHeader(out *bytes.Buffer) {
606	if options.flags&HTML_COMPLETE_PAGE == 0 {
607		return
608	}
609
610	ending := ""
611	if options.flags&HTML_USE_XHTML != 0 {
612		out.WriteString("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" ")
613		out.WriteString("\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n")
614		out.WriteString("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n")
615		ending = " /"
616	} else {
617		out.WriteString("<!DOCTYPE html>\n")
618		out.WriteString("<html>\n")
619	}
620	out.WriteString("<head>\n")
621	out.WriteString("  <title>")
622	options.NormalText(out, []byte(options.title))
623	out.WriteString("</title>\n")
624	out.WriteString("  <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
625	out.WriteString(VERSION)
626	out.WriteString("\"")
627	out.WriteString(ending)
628	out.WriteString(">\n")
629	out.WriteString("  <meta charset=\"utf-8\"")
630	out.WriteString(ending)
631	out.WriteString(">\n")
632	if options.css != "" {
633		out.WriteString("  <link rel=\"stylesheet\" type=\"text/css\" href=\"")
634		attrEscape(out, []byte(options.css))
635		out.WriteString("\"")
636		out.WriteString(ending)
637		out.WriteString(">\n")
638	}
639	out.WriteString("</head>\n")
640	out.WriteString("<body>\n")
641
642	options.tocMarker = out.Len()
643}
644
645func (options *Html) DocumentFooter(out *bytes.Buffer) {
646	// finalize and insert the table of contents
647	if options.flags&HTML_TOC != 0 {
648		options.TocFinalize()
649
650		// now we have to insert the table of contents into the document
651		var temp bytes.Buffer
652
653		// start by making a copy of everything after the document header
654		temp.Write(out.Bytes()[options.tocMarker:])
655
656		// now clear the copied material from the main output buffer
657		out.Truncate(options.tocMarker)
658
659		// corner case spacing issue
660		if options.flags&HTML_COMPLETE_PAGE != 0 {
661			out.WriteByte('\n')
662		}
663
664		// insert the table of contents
665		out.WriteString("<nav>\n")
666		out.Write(options.toc.Bytes())
667		out.WriteString("</nav>\n")
668
669		// corner case spacing issue
670		if options.flags&HTML_COMPLETE_PAGE == 0 && options.flags&HTML_OMIT_CONTENTS == 0 {
671			out.WriteByte('\n')
672		}
673
674		// write out everything that came after it
675		if options.flags&HTML_OMIT_CONTENTS == 0 {
676			out.Write(temp.Bytes())
677		}
678	}
679
680	if options.flags&HTML_COMPLETE_PAGE != 0 {
681		out.WriteString("\n</body>\n")
682		out.WriteString("</html>\n")
683	}
684
685}
686
687func (options *Html) TocHeader(text []byte, level int) {
688	for level > options.currentLevel {
689		switch {
690		case bytes.HasSuffix(options.toc.Bytes(), []byte("</li>\n")):
691			// this sublist can nest underneath a header
692			size := options.toc.Len()
693			options.toc.Truncate(size - len("</li>\n"))
694
695		case options.currentLevel > 0:
696			options.toc.WriteString("<li>")
697		}
698		if options.toc.Len() > 0 {
699			options.toc.WriteByte('\n')
700		}
701		options.toc.WriteString("<ul>\n")
702		options.currentLevel++
703	}
704
705	for level < options.currentLevel {
706		options.toc.WriteString("</ul>")
707		if options.currentLevel > 1 {
708			options.toc.WriteString("</li>\n")
709		}
710		options.currentLevel--
711	}
712
713	options.toc.WriteString("<li><a href=\"#toc_")
714	options.toc.WriteString(strconv.Itoa(options.headerCount))
715	options.toc.WriteString("\">")
716	options.headerCount++
717
718	options.toc.Write(text)
719
720	options.toc.WriteString("</a></li>\n")
721}
722
723func (options *Html) TocFinalize() {
724	for options.currentLevel > 1 {
725		options.toc.WriteString("</ul></li>\n")
726		options.currentLevel--
727	}
728
729	if options.currentLevel > 0 {
730		options.toc.WriteString("</ul>\n")
731	}
732}
733
734func isHtmlTag(tag []byte, tagname string) bool {
735	found, _ := findHtmlTagPos(tag, tagname)
736	return found
737}
738
739// Look for a character, but ignore it when it's in any kind of quotes, it
740// might be JavaScript
741func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
742	inSingleQuote := false
743	inDoubleQuote := false
744	inGraveQuote := false
745	i := start
746	for i < len(html) {
747		switch {
748		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
749			return i
750		case html[i] == '\'':
751			inSingleQuote = !inSingleQuote
752		case html[i] == '"':
753			inDoubleQuote = !inDoubleQuote
754		case html[i] == '`':
755			inGraveQuote = !inGraveQuote
756		}
757		i++
758	}
759	return start
760}
761
762func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
763	i := 0
764	if i < len(tag) && tag[0] != '<' {
765		return false, -1
766	}
767	i++
768	i = skipSpace(tag, i)
769
770	if i < len(tag) && tag[i] == '/' {
771		i++
772	}
773
774	i = skipSpace(tag, i)
775	j := 0
776	for ; i < len(tag); i, j = i+1, j+1 {
777		if j >= len(tagname) {
778			break
779		}
780
781		if strings.ToLower(string(tag[i]))[0] != tagname[j] {
782			return false, -1
783		}
784	}
785
786	if i == len(tag) {
787		return false, -1
788	}
789
790	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
791	if rightAngle > i {
792		return true, rightAngle
793	}
794
795	return false, -1
796}
797
798func skipUntilChar(text []byte, start int, char byte) int {
799	i := start
800	for i < len(text) && text[i] != char {
801		i++
802	}
803	return i
804}
805
806func skipSpace(tag []byte, i int) int {
807	for i < len(tag) && isspace(tag[i]) {
808		i++
809	}
810	return i
811}
812
813func doubleSpace(out *bytes.Buffer) {
814	if out.Len() > 0 {
815		out.WriteByte('\n')
816	}
817}
818
819func isRelativeLink(link []byte) (yes bool) {
820	yes = false
821
822	// a tag begin with '#'
823	if link[0] == '#' {
824		yes = true
825	}
826
827	// link begin with '/' but not '//', the second maybe a protocol relative link
828	if len(link) >= 2 && link[0] == '/' && link[1] != '/' {
829		yes = true
830	}
831
832	// only the root '/'
833	if len(link) == 1 && link[0] == '/' {
834		yes = true
835	}
836	return
837}