all repos — grayfriday @ 46b7355a788e9af2207ab63452dbb1c236e31f6d

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11//
  12// Markdown parsing and processing
  13//
  14//
  15
  16// Package blackfriday is a markdown processor.
  17//
  18// Translates plain text with simple formatting rules into HTML or LaTeX.
  19package blackfriday
  20
  21import (
  22	"bytes"
  23	"fmt"
  24	"io"
  25	"strings"
  26	"unicode/utf8"
  27)
  28
  29// Version string of the package.
  30const Version = "2.0"
  31
  32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
  33// extensions.
  34type Extensions int
  35
  36// These are the supported markdown parsing extensions.
  37// OR these values together to select multiple extensions.
  38const (
  39	NoExtensions            Extensions = 0
  40	NoIntraEmphasis         Extensions = 1 << iota // Ignore emphasis markers inside words
  41	Tables                                         // Render tables
  42	FencedCode                                     // Render fenced code blocks
  43	Autolink                                       // Detect embedded URLs that are not explicitly marked
  44	Strikethrough                                  // Strikethrough text using ~~test~~
  45	LaxHTMLBlocks                                  // Loosen up HTML block parsing rules
  46	SpaceHeaders                                   // Be strict about prefix header rules
  47	HardLineBreak                                  // Translate newlines into line breaks
  48	TabSizeEight                                   // Expand tabs to eight spaces instead of four
  49	Footnotes                                      // Pandoc-style footnotes
  50	NoEmptyLineBeforeBlock                         // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  51	HeaderIDs                                      // specify header IDs  with {#id}
  52	Titleblock                                     // Titleblock ala pandoc
  53	AutoHeaderIDs                                  // Create the header ID from the text
  54	BackslashLineBreak                             // Translate trailing backslashes into line breaks
  55	DefinitionLists                                // Render definition lists
  56	Smartypants                                    // Enable smart punctuation substitutions
  57	SmartypantsFractions                           // Enable smart fractions (with Smartypants)
  58	SmartypantsDashes                              // Enable smart dashes (with Smartypants)
  59	SmartypantsLatexDashes                         // Enable LaTeX-style dashes (with Smartypants)
  60	SmartypantsAngledQuotes                        // Enable angled double quotes (with Smartypants) for double quotes rendering
  61	TOC                                            // Generate a table of contents
  62	OmitContents                                   // Skip the main contents (for a standalone table of contents)
  63
  64	CommonHTMLFlags HTMLFlags = UseXHTML
  65
  66	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
  67		Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
  68		BackslashLineBreak | DefinitionLists | Smartypants |
  69		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
  70)
  71
  72// DefaultOptions is a convenience variable with all the options that are
  73// enabled by default.
  74var DefaultOptions = Options{
  75	Extensions: CommonExtensions,
  76}
  77
  78// TODO: this should probably be unexported. Or moved to node.go
  79type LinkType int
  80
  81// These are the possible flag values for the link renderer.
  82// Only a single one of these values will be used; they are not ORed together.
  83// These are mostly of interest if you are writing a new output format.
  84const (
  85	LinkTypeNotAutolink LinkType = iota
  86	LinkTypeNormal
  87	LinkTypeEmail
  88)
  89
  90// ListType contains bitwise or'ed flags for list and list item objects.
  91type ListType int
  92
  93// These are the possible flag values for the ListItem renderer.
  94// Multiple flag values may be ORed together.
  95// These are mostly of interest if you are writing a new output format.
  96const (
  97	ListTypeOrdered ListType = 1 << iota
  98	ListTypeDefinition
  99	ListTypeTerm
 100
 101	ListItemContainsBlock
 102	ListItemBeginningOfList
 103	ListItemEndOfList
 104)
 105
 106// CellAlignFlags holds a type of alignment in a table cell.
 107type CellAlignFlags int
 108
 109// These are the possible flag values for the table cell renderer.
 110// Only a single one of these values will be used; they are not ORed together.
 111// These are mostly of interest if you are writing a new output format.
 112const (
 113	TableAlignmentLeft = 1 << iota
 114	TableAlignmentRight
 115	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 116)
 117
 118// The size of a tab stop.
 119const (
 120	TabSizeDefault = 4
 121	TabSizeDouble  = 8
 122)
 123
 124// blockTags is a set of tags that are recognized as HTML block tags.
 125// Any of these can be included in markdown text without special escaping.
 126var blockTags = map[string]struct{}{
 127	"blockquote": struct{}{},
 128	"del":        struct{}{},
 129	"div":        struct{}{},
 130	"dl":         struct{}{},
 131	"fieldset":   struct{}{},
 132	"form":       struct{}{},
 133	"h1":         struct{}{},
 134	"h2":         struct{}{},
 135	"h3":         struct{}{},
 136	"h4":         struct{}{},
 137	"h5":         struct{}{},
 138	"h6":         struct{}{},
 139	"iframe":     struct{}{},
 140	"ins":        struct{}{},
 141	"math":       struct{}{},
 142	"noscript":   struct{}{},
 143	"ol":         struct{}{},
 144	"pre":        struct{}{},
 145	"p":          struct{}{},
 146	"script":     struct{}{},
 147	"style":      struct{}{},
 148	"table":      struct{}{},
 149	"ul":         struct{}{},
 150
 151	// HTML5
 152	"address":    struct{}{},
 153	"article":    struct{}{},
 154	"aside":      struct{}{},
 155	"canvas":     struct{}{},
 156	"figcaption": struct{}{},
 157	"figure":     struct{}{},
 158	"footer":     struct{}{},
 159	"header":     struct{}{},
 160	"hgroup":     struct{}{},
 161	"main":       struct{}{},
 162	"nav":        struct{}{},
 163	"output":     struct{}{},
 164	"progress":   struct{}{},
 165	"section":    struct{}{},
 166	"video":      struct{}{},
 167}
 168
 169// Renderer is the rendering interface.
 170// This is mostly of interest if you are implementing a new rendering format.
 171//
 172// When a byte slice is provided, it contains the (rendered) contents of the
 173// element.
 174//
 175// When a callback is provided instead, it will write the contents of the
 176// respective element directly to the output buffer and return true on success.
 177// If the callback returns false, the rendering function should reset the
 178// output buffer as though it had never been called.
 179//
 180// Currently HTML and Latex implementations are provided
 181type Renderer interface {
 182	Render(ast *Node) []byte
 183	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
 184}
 185
 186// Callback functions for inline parsing. One such function is defined
 187// for each character that triggers a response when parsing inline data.
 188type inlineParser func(p *parser, data []byte, offset int) int
 189
 190// Parser holds runtime state used by the parser.
 191// This is constructed by the Markdown function.
 192type parser struct {
 193	refOverride    ReferenceOverrideFunc
 194	refs           map[string]*reference
 195	inlineCallback [256]inlineParser
 196	flags          Extensions
 197	nesting        int
 198	maxNesting     int
 199	insideLink     bool
 200
 201	// Footnotes need to be ordered as well as available to quickly check for
 202	// presence. If a ref is also a footnote, it's stored both in refs and here
 203	// in notes. Slice is nil if footnotes not enabled.
 204	notes []*reference
 205
 206	doc                  *Node
 207	tip                  *Node // = doc
 208	oldTip               *Node
 209	lastMatchedContainer *Node // = doc
 210	allClosed            bool
 211	currBlock            *Node // a block node currently being parsed by inline parser
 212}
 213
 214func (p *parser) getRef(refid string) (ref *reference, found bool) {
 215	if p.refOverride != nil {
 216		r, overridden := p.refOverride(refid)
 217		if overridden {
 218			if r == nil {
 219				return nil, false
 220			}
 221			return &reference{
 222				link:     []byte(r.Link),
 223				title:    []byte(r.Title),
 224				noteID:   0,
 225				hasBlock: false,
 226				text:     []byte(r.Text)}, true
 227		}
 228	}
 229	// refs are case insensitive
 230	ref, found = p.refs[strings.ToLower(refid)]
 231	return ref, found
 232}
 233
 234func (p *parser) finalize(block *Node) {
 235	above := block.Parent
 236	block.open = false
 237	p.tip = above
 238}
 239
 240func (p *parser) addChild(node NodeType, offset uint32) *Node {
 241	for !p.tip.canContain(node) {
 242		p.finalize(p.tip)
 243	}
 244	newNode := NewNode(node)
 245	newNode.content = []byte{}
 246	p.tip.appendChild(newNode)
 247	p.tip = newNode
 248	return newNode
 249}
 250
 251func (p *parser) closeUnmatchedBlocks() {
 252	if !p.allClosed {
 253		for p.oldTip != p.lastMatchedContainer {
 254			parent := p.oldTip.Parent
 255			p.finalize(p.oldTip)
 256			p.oldTip = parent
 257		}
 258		p.allClosed = true
 259	}
 260}
 261
 262//
 263//
 264// Public interface
 265//
 266//
 267
 268// Reference represents the details of a link.
 269// See the documentation in Options for more details on use-case.
 270type Reference struct {
 271	// Link is usually the URL the reference points to.
 272	Link string
 273	// Title is the alternate text describing the link in more detail.
 274	Title string
 275	// Text is the optional text to override the ref with if the syntax used was
 276	// [refid][]
 277	Text string
 278}
 279
 280// ReferenceOverrideFunc is expected to be called with a reference string and
 281// return either a valid Reference type that the reference string maps to or
 282// nil. If overridden is false, the default reference logic will be executed.
 283// See the documentation in Options for more details on use-case.
 284type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
 285
 286// Options represents configurable overrides and callbacks (in addition to the
 287// extension flag set) for configuring a Markdown parse.
 288type Options struct {
 289	// Extensions is a flag set of bit-wise ORed extension bits. See the
 290	// Extensions flags defined in this package.
 291	Extensions Extensions
 292
 293	// ReferenceOverride is an optional function callback that is called every
 294	// time a reference is resolved.
 295	//
 296	// In Markdown, the link reference syntax can be made to resolve a link to
 297	// a reference instead of an inline URL, in one of the following ways:
 298	//
 299	//  * [link text][refid]
 300	//  * [refid][]
 301	//
 302	// Usually, the refid is defined at the bottom of the Markdown document. If
 303	// this override function is provided, the refid is passed to the override
 304	// function first, before consulting the defined refids at the bottom. If
 305	// the override function indicates an override did not occur, the refids at
 306	// the bottom will be used to fill in the link details.
 307	ReferenceOverride ReferenceOverrideFunc
 308}
 309
 310// MarkdownBasic is a convenience function for simple rendering.
 311// It processes markdown input with no extensions enabled.
 312func MarkdownBasic(input []byte) []byte {
 313	// set up the HTML renderer
 314	renderer := NewHTMLRenderer(HTMLRendererParameters{
 315		Flags:      UseXHTML,
 316		Extensions: CommonExtensions,
 317	})
 318
 319	// set up the parser
 320	return Markdown(input, renderer, Options{})
 321}
 322
 323// MarkdownCommon is a convenience function for simple rendering. It calls
 324// Markdown with most useful extensions enabled, including:
 325//
 326// * Smartypants processing with smart fractions and LaTeX dashes
 327//
 328// * Intra-word emphasis suppression
 329//
 330// * Tables
 331//
 332// * Fenced code blocks
 333//
 334// * Autolinking
 335//
 336// * Strikethrough support
 337//
 338// * Strict header parsing
 339//
 340// * Custom Header IDs
 341func MarkdownCommon(input []byte) []byte {
 342	// set up the HTML renderer
 343	renderer := NewHTMLRenderer(HTMLRendererParameters{
 344		Flags:      CommonHTMLFlags,
 345		Extensions: CommonExtensions,
 346	})
 347	return Markdown(input, renderer, DefaultOptions)
 348}
 349
 350// Markdown is the main rendering function.
 351// It parses and renders a block of markdown-encoded text.
 352// The supplied Renderer is used to format the output, and extensions dictates
 353// which non-standard extensions are enabled.
 354//
 355// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
 356// NewLatexRenderer, respectively.
 357func Markdown(input []byte, renderer Renderer, options Options) []byte {
 358	if renderer == nil {
 359		return nil
 360	}
 361	return renderer.Render(Parse(input, options))
 362}
 363
 364// Parse is an entry point to the parsing part of Blackfriday. It takes an
 365// input markdown document and produces a syntax tree for its contents. This
 366// tree can then be rendered with a default or custom renderer, or
 367// analyzed/transformed by the caller to whatever non-standard needs they have.
 368func Parse(input []byte, opts Options) *Node {
 369	extensions := opts.Extensions
 370
 371	// fill in the render structure
 372	p := new(parser)
 373	p.flags = extensions
 374	p.refOverride = opts.ReferenceOverride
 375	p.refs = make(map[string]*reference)
 376	p.maxNesting = 16
 377	p.insideLink = false
 378
 379	docNode := NewNode(Document)
 380	p.doc = docNode
 381	p.tip = docNode
 382	p.oldTip = docNode
 383	p.lastMatchedContainer = docNode
 384	p.allClosed = true
 385
 386	// register inline parsers
 387	p.inlineCallback['*'] = emphasis
 388	p.inlineCallback['_'] = emphasis
 389	if extensions&Strikethrough != 0 {
 390		p.inlineCallback['~'] = emphasis
 391	}
 392	p.inlineCallback['`'] = codeSpan
 393	p.inlineCallback['\n'] = lineBreak
 394	p.inlineCallback['['] = link
 395	p.inlineCallback['<'] = leftAngle
 396	p.inlineCallback['\\'] = escape
 397	p.inlineCallback['&'] = entity
 398	p.inlineCallback['!'] = maybeImage
 399	p.inlineCallback['^'] = maybeInlineFootnote
 400
 401	if extensions&Autolink != 0 {
 402		p.inlineCallback['h'] = maybeAutoLink
 403		p.inlineCallback['m'] = maybeAutoLink
 404		p.inlineCallback['f'] = maybeAutoLink
 405		p.inlineCallback['H'] = maybeAutoLink
 406		p.inlineCallback['M'] = maybeAutoLink
 407		p.inlineCallback['F'] = maybeAutoLink
 408	}
 409
 410	if extensions&Footnotes != 0 {
 411		p.notes = make([]*reference, 0)
 412	}
 413
 414	first := firstPass(p, input)
 415	secondPass(p, first)
 416	// Walk the tree and finish up some of unfinished blocks
 417	for p.tip != nil {
 418		p.finalize(p.tip)
 419	}
 420	// Walk the tree again and process inline markdown in each block
 421	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 422		if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
 423			p.currBlock = node
 424			p.inline(node.content)
 425			node.content = nil
 426		}
 427		return GoToNext
 428	})
 429	p.parseRefsToAST()
 430	p.generateTOC()
 431	return p.doc
 432}
 433
 434func (p *parser) generateTOC() {
 435	if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
 436		return
 437	}
 438	navNode := NewNode(HTMLBlock)
 439	navNode.Literal = []byte("<nav>")
 440	navNode.open = false
 441
 442	var topList *Node
 443	var listNode *Node
 444	var lastItem *Node
 445	headerCount := 0
 446	currentLevel := 0
 447	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 448		if entering && node.Type == Header {
 449			if node.Level > currentLevel {
 450				currentLevel++
 451				newList := NewNode(List)
 452				if lastItem != nil {
 453					lastItem.appendChild(newList)
 454					listNode = newList
 455				} else {
 456					listNode = newList
 457					topList = listNode
 458				}
 459			}
 460			if node.Level < currentLevel {
 461				finalizeList(listNode)
 462				lastItem = listNode.Parent
 463				listNode = lastItem.Parent
 464			}
 465			node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
 466			headerCount++
 467			lastItem = NewNode(Item)
 468			listNode.appendChild(lastItem)
 469			anchorNode := NewNode(Link)
 470			anchorNode.Destination = []byte("#" + node.HeaderID)
 471			lastItem.appendChild(anchorNode)
 472			anchorNode.appendChild(text(node.FirstChild.Literal))
 473		}
 474		return GoToNext
 475	})
 476	firstChild := p.doc.FirstChild
 477	// Insert TOC only if there is anything to insert
 478	if topList != nil {
 479		finalizeList(topList)
 480		firstChild.insertBefore(navNode)
 481		firstChild.insertBefore(topList)
 482		navCloseNode := NewNode(HTMLBlock)
 483		navCloseNode.Literal = []byte("</nav>")
 484		navCloseNode.open = false
 485		firstChild.insertBefore(navCloseNode)
 486	}
 487	// Drop everything after the TOC if OmitContents was requested
 488	if p.flags&OmitContents != 0 {
 489		for firstChild != nil {
 490			next := firstChild.Next
 491			firstChild.unlink()
 492			firstChild = next
 493		}
 494	}
 495}
 496
 497func (p *parser) parseRefsToAST() {
 498	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
 499		return
 500	}
 501	p.tip = p.doc
 502	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
 503	p.addBlock(HorizontalRule, nil)
 504	block := p.addBlock(List, nil)
 505	block.ListFlags = ListTypeOrdered
 506	flags := ListItemBeginningOfList
 507	// Note: this loop is intentionally explicit, not range-form. This is
 508	// because the body of the loop will append nested footnotes to p.notes and
 509	// we need to process those late additions. Range form would only walk over
 510	// the fixed initial set.
 511	for i := 0; i < len(p.notes); i++ {
 512		ref := p.notes[i]
 513		block := p.addBlock(Item, nil)
 514		block.ListFlags = ListTypeOrdered
 515		block.RefLink = ref.link
 516		if ref.hasBlock {
 517			flags |= ListItemContainsBlock
 518			p.block(ref.title)
 519		} else {
 520			p.currBlock = block
 521			p.inline(ref.title)
 522		}
 523		flags &^= ListItemBeginningOfList | ListItemContainsBlock
 524	}
 525	above := block.Parent
 526	finalizeList(block)
 527	p.tip = above
 528	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
 529	block.Walk(func(node *Node, entering bool) WalkStatus {
 530		if node.Type == Paragraph || node.Type == Header {
 531			p.currBlock = node
 532			p.inline(node.content)
 533			node.content = nil
 534		}
 535		return GoToNext
 536	})
 537}
 538
 539// first pass:
 540// - normalize newlines
 541// - extract references (outside of fenced code blocks)
 542// - expand tabs (outside of fenced code blocks)
 543// - copy everything else
 544func firstPass(p *parser, input []byte) []byte {
 545	var out bytes.Buffer
 546	tabSize := TabSizeDefault
 547	if p.flags&TabSizeEight != 0 {
 548		tabSize = TabSizeDouble
 549	}
 550	beg := 0
 551	lastFencedCodeBlockEnd := 0
 552	for beg < len(input) {
 553		// Find end of this line, then process the line.
 554		end := beg
 555		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
 556			end++
 557		}
 558
 559		if p.flags&FencedCode != 0 {
 560			// track fenced code block boundaries to suppress tab expansion
 561			// and reference extraction inside them:
 562			if beg >= lastFencedCodeBlockEnd {
 563				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
 564					lastFencedCodeBlockEnd = beg + i
 565				}
 566			}
 567		}
 568
 569		// add the line body if present
 570		if end > beg {
 571			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
 572				out.Write(input[beg:end])
 573			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
 574				beg += refEnd
 575				continue
 576			} else {
 577				expandTabs(&out, input[beg:end], tabSize)
 578			}
 579		}
 580
 581		if end < len(input) && input[end] == '\r' {
 582			end++
 583		}
 584		if end < len(input) && input[end] == '\n' {
 585			end++
 586		}
 587		out.WriteByte('\n')
 588
 589		beg = end
 590	}
 591
 592	// empty input?
 593	if out.Len() == 0 {
 594		out.WriteByte('\n')
 595	}
 596
 597	return out.Bytes()
 598}
 599
 600// second pass: actual rendering
 601func secondPass(p *parser, input []byte) {
 602	p.block(input)
 603
 604	if p.flags&Footnotes != 0 && len(p.notes) > 0 {
 605		flags := ListItemBeginningOfList
 606		for i := 0; i < len(p.notes); i++ {
 607			ref := p.notes[i]
 608			if ref.hasBlock {
 609				flags |= ListItemContainsBlock
 610				p.block(ref.title)
 611			} else {
 612				p.inline(ref.title)
 613			}
 614			flags &^= ListItemBeginningOfList | ListItemContainsBlock
 615		}
 616	}
 617
 618	if p.nesting != 0 {
 619		panic("Nesting level did not end at zero")
 620	}
 621}
 622
 623//
 624// Link references
 625//
 626// This section implements support for references that (usually) appear
 627// as footnotes in a document, and can be referenced anywhere in the document.
 628// The basic format is:
 629//
 630//    [1]: http://www.google.com/ "Google"
 631//    [2]: http://www.github.com/ "Github"
 632//
 633// Anywhere in the document, the reference can be linked by referring to its
 634// label, i.e., 1 and 2 in this example, as in:
 635//
 636//    This library is hosted on [Github][2], a git hosting site.
 637//
 638// Actual footnotes as specified in Pandoc and supported by some other Markdown
 639// libraries such as php-markdown are also taken care of. They look like this:
 640//
 641//    This sentence needs a bit of further explanation.[^note]
 642//
 643//    [^note]: This is the explanation.
 644//
 645// Footnotes should be placed at the end of the document in an ordered list.
 646// Inline footnotes such as:
 647//
 648//    Inline footnotes^[Not supported.] also exist.
 649//
 650// are not yet supported.
 651
 652// References are parsed and stored in this struct.
 653type reference struct {
 654	link     []byte
 655	title    []byte
 656	noteID   int // 0 if not a footnote ref
 657	hasBlock bool
 658	text     []byte
 659}
 660
 661func (r *reference) String() string {
 662	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
 663		r.link, r.title, r.text, r.noteID, r.hasBlock)
 664}
 665
 666// Check whether or not data starts with a reference link.
 667// If so, it is parsed and stored in the list of references
 668// (in the render struct).
 669// Returns the number of bytes to skip to move past it,
 670// or zero if the first line is not a reference.
 671func isReference(p *parser, data []byte, tabSize int) int {
 672	// up to 3 optional leading spaces
 673	if len(data) < 4 {
 674		return 0
 675	}
 676	i := 0
 677	for i < 3 && data[i] == ' ' {
 678		i++
 679	}
 680
 681	noteID := 0
 682
 683	// id part: anything but a newline between brackets
 684	if data[i] != '[' {
 685		return 0
 686	}
 687	i++
 688	if p.flags&Footnotes != 0 {
 689		if i < len(data) && data[i] == '^' {
 690			// we can set it to anything here because the proper noteIds will
 691			// be assigned later during the second pass. It just has to be != 0
 692			noteID = 1
 693			i++
 694		}
 695	}
 696	idOffset := i
 697	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 698		i++
 699	}
 700	if i >= len(data) || data[i] != ']' {
 701		return 0
 702	}
 703	idEnd := i
 704
 705	// spacer: colon (space | tab)* newline? (space | tab)*
 706	i++
 707	if i >= len(data) || data[i] != ':' {
 708		return 0
 709	}
 710	i++
 711	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 712		i++
 713	}
 714	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 715		i++
 716		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
 717			i++
 718		}
 719	}
 720	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 721		i++
 722	}
 723	if i >= len(data) {
 724		return 0
 725	}
 726
 727	var (
 728		linkOffset, linkEnd   int
 729		titleOffset, titleEnd int
 730		lineEnd               int
 731		raw                   []byte
 732		hasBlock              bool
 733	)
 734
 735	if p.flags&Footnotes != 0 && noteID != 0 {
 736		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
 737		lineEnd = linkEnd
 738	} else {
 739		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
 740	}
 741	if lineEnd == 0 {
 742		return 0
 743	}
 744
 745	// a valid ref has been found
 746
 747	ref := &reference{
 748		noteID:   noteID,
 749		hasBlock: hasBlock,
 750	}
 751
 752	if noteID > 0 {
 753		// reusing the link field for the id since footnotes don't have links
 754		ref.link = data[idOffset:idEnd]
 755		// if footnote, it's not really a title, it's the contained text
 756		ref.title = raw
 757	} else {
 758		ref.link = data[linkOffset:linkEnd]
 759		ref.title = data[titleOffset:titleEnd]
 760	}
 761
 762	// id matches are case-insensitive
 763	id := string(bytes.ToLower(data[idOffset:idEnd]))
 764
 765	p.refs[id] = ref
 766
 767	return lineEnd
 768}
 769
 770func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
 771	// link: whitespace-free sequence, optionally between angle brackets
 772	if data[i] == '<' {
 773		i++
 774	}
 775	linkOffset = i
 776	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 777		i++
 778	}
 779	if i == len(data) {
 780		return
 781	}
 782	linkEnd = i
 783	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
 784		linkOffset++
 785		linkEnd--
 786	}
 787
 788	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 789	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 790		i++
 791	}
 792	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 793		return
 794	}
 795
 796	// compute end-of-line
 797	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 798		lineEnd = i
 799	}
 800	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
 801		lineEnd++
 802	}
 803
 804	// optional (space|tab)* spacer after a newline
 805	if lineEnd > 0 {
 806		i = lineEnd + 1
 807		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 808			i++
 809		}
 810	}
 811
 812	// optional title: any non-newline sequence enclosed in '"() alone on its line
 813	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 814		i++
 815		titleOffset = i
 816
 817		// look for EOL
 818		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 819			i++
 820		}
 821		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 822			titleEnd = i + 1
 823		} else {
 824			titleEnd = i
 825		}
 826
 827		// step back
 828		i--
 829		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
 830			i--
 831		}
 832		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 833			lineEnd = titleEnd
 834			titleEnd = i
 835		}
 836	}
 837
 838	return
 839}
 840
 841// The first bit of this logic is the same as (*parser).listItem, but the rest
 842// is much simpler. This function simply finds the entire block and shifts it
 843// over by one tab if it is indeed a block (just returns the line if it's not).
 844// blockEnd is the end of the section in the input buffer, and contents is the
 845// extracted text that was shifted over one tab. It will need to be rendered at
 846// the end of the document.
 847func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
 848	if i == 0 || len(data) == 0 {
 849		return
 850	}
 851
 852	// skip leading whitespace on first line
 853	for i < len(data) && data[i] == ' ' {
 854		i++
 855	}
 856
 857	blockStart = i
 858
 859	// find the end of the line
 860	blockEnd = i
 861	for i < len(data) && data[i-1] != '\n' {
 862		i++
 863	}
 864
 865	// get working buffer
 866	var raw bytes.Buffer
 867
 868	// put the first line into the working buffer
 869	raw.Write(data[blockEnd:i])
 870	blockEnd = i
 871
 872	// process the following lines
 873	containsBlankLine := false
 874
 875gatherLines:
 876	for blockEnd < len(data) {
 877		i++
 878
 879		// find the end of this line
 880		for i < len(data) && data[i-1] != '\n' {
 881			i++
 882		}
 883
 884		// if it is an empty line, guess that it is part of this item
 885		// and move on to the next line
 886		if p.isEmpty(data[blockEnd:i]) > 0 {
 887			containsBlankLine = true
 888			blockEnd = i
 889			continue
 890		}
 891
 892		n := 0
 893		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
 894			// this is the end of the block.
 895			// we don't want to include this last line in the index.
 896			break gatherLines
 897		}
 898
 899		// if there were blank lines before this one, insert a new one now
 900		if containsBlankLine {
 901			raw.WriteByte('\n')
 902			containsBlankLine = false
 903		}
 904
 905		// get rid of that first tab, write to buffer
 906		raw.Write(data[blockEnd+n : i])
 907		hasBlock = true
 908
 909		blockEnd = i
 910	}
 911
 912	if data[blockEnd-1] != '\n' {
 913		raw.WriteByte('\n')
 914	}
 915
 916	contents = raw.Bytes()
 917
 918	return
 919}
 920
 921//
 922//
 923// Miscellaneous helper functions
 924//
 925//
 926
 927// Test if a character is a punctuation symbol.
 928// Taken from a private function in regexp in the stdlib.
 929func ispunct(c byte) bool {
 930	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 931		if c == r {
 932			return true
 933		}
 934	}
 935	return false
 936}
 937
 938// Test if a character is a whitespace character.
 939func isspace(c byte) bool {
 940	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 941}
 942
 943// Test if a character is letter.
 944func isletter(c byte) bool {
 945	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 946}
 947
 948// Test if a character is a letter or a digit.
 949// TODO: check when this is looking for ASCII alnum and when it should use unicode
 950func isalnum(c byte) bool {
 951	return (c >= '0' && c <= '9') || isletter(c)
 952}
 953
 954// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
 955// always ends output with a newline
 956func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
 957	// first, check for common cases: no tabs, or only tabs at beginning of line
 958	i, prefix := 0, 0
 959	slowcase := false
 960	for i = 0; i < len(line); i++ {
 961		if line[i] == '\t' {
 962			if prefix == i {
 963				prefix++
 964			} else {
 965				slowcase = true
 966				break
 967			}
 968		}
 969	}
 970
 971	// no need to decode runes if all tabs are at the beginning of the line
 972	if !slowcase {
 973		for i = 0; i < prefix*tabSize; i++ {
 974			out.WriteByte(' ')
 975		}
 976		out.Write(line[prefix:])
 977		return
 978	}
 979
 980	// the slow case: we need to count runes to figure out how
 981	// many spaces to insert for each tab
 982	column := 0
 983	i = 0
 984	for i < len(line) {
 985		start := i
 986		for i < len(line) && line[i] != '\t' {
 987			_, size := utf8.DecodeRune(line[i:])
 988			i += size
 989			column++
 990		}
 991
 992		if i > start {
 993			out.Write(line[start:i])
 994		}
 995
 996		if i >= len(line) {
 997			break
 998		}
 999
1000		for {
1001			out.WriteByte(' ')
1002			column++
1003			if column%tabSize == 0 {
1004				break
1005			}
1006		}
1007
1008		i++
1009	}
1010}
1011
1012// Find if a line counts as indented or not.
1013// Returns number of characters the indent is (0 = not indented).
1014func isIndented(data []byte, indentSize int) int {
1015	if len(data) == 0 {
1016		return 0
1017	}
1018	if data[0] == '\t' {
1019		return 1
1020	}
1021	if len(data) < indentSize {
1022		return 0
1023	}
1024	for i := 0; i < indentSize; i++ {
1025		if data[i] != ' ' {
1026			return 0
1027		}
1028	}
1029	return indentSize
1030}
1031
1032// Create a url-safe slug for fragments
1033func slugify(in []byte) []byte {
1034	if len(in) == 0 {
1035		return in
1036	}
1037	out := make([]byte, 0, len(in))
1038	sym := false
1039
1040	for _, ch := range in {
1041		if isalnum(ch) {
1042			sym = false
1043			out = append(out, ch)
1044		} else if sym {
1045			continue
1046		} else {
1047			out = append(out, '-')
1048			sym = true
1049		}
1050	}
1051	var a, b int
1052	var ch byte
1053	for a, ch = range out {
1054		if ch != '-' {
1055			break
1056		}
1057	}
1058	for b = len(out) - 1; b > 0; b-- {
1059		if out[b] != '-' {
1060			break
1061		}
1062	}
1063	return out[a : b+1]
1064}