all repos — grayfriday @ bd774a209a10d88da06adb3fa0fe87dc19902705

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11//
  12// Markdown parsing and processing
  13//
  14//
  15
  16// Package blackfriday is a markdown processor.
  17//
  18// Translates plain text with simple formatting rules into HTML or LaTeX.
  19package blackfriday
  20
  21import (
  22	"bytes"
  23	"fmt"
  24	"io"
  25	"strings"
  26	"unicode/utf8"
  27)
  28
  29// Version string of the package.
  30const Version = "2.0"
  31
  32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
  33// extensions.
  34type Extensions int
  35
  36// These are the supported markdown parsing extensions.
  37// OR these values together to select multiple extensions.
  38const (
  39	NoExtensions            Extensions = 0
  40	NoIntraEmphasis         Extensions = 1 << iota // Ignore emphasis markers inside words
  41	Tables                                         // Render tables
  42	FencedCode                                     // Render fenced code blocks
  43	Autolink                                       // Detect embedded URLs that are not explicitly marked
  44	Strikethrough                                  // Strikethrough text using ~~test~~
  45	LaxHTMLBlocks                                  // Loosen up HTML block parsing rules
  46	SpaceHeaders                                   // Be strict about prefix header rules
  47	HardLineBreak                                  // Translate newlines into line breaks
  48	TabSizeEight                                   // Expand tabs to eight spaces instead of four
  49	Footnotes                                      // Pandoc-style footnotes
  50	NoEmptyLineBeforeBlock                         // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  51	HeaderIDs                                      // specify header IDs  with {#id}
  52	Titleblock                                     // Titleblock ala pandoc
  53	AutoHeaderIDs                                  // Create the header ID from the text
  54	BackslashLineBreak                             // Translate trailing backslashes into line breaks
  55	DefinitionLists                                // Render definition lists
  56	Smartypants                                    // Enable smart punctuation substitutions
  57	SmartypantsFractions                           // Enable smart fractions (with Smartypants)
  58	SmartypantsDashes                              // Enable smart dashes (with Smartypants)
  59	SmartypantsLatexDashes                         // Enable LaTeX-style dashes (with Smartypants)
  60	SmartypantsAngledQuotes                        // Enable angled double quotes (with Smartypants) for double quotes rendering
  61	TOC                                            // Generate a table of contents
  62	OmitContents                                   // Skip the main contents (for a standalone table of contents)
  63
  64	CommonHTMLFlags HTMLFlags = UseXHTML
  65
  66	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
  67		Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
  68		BackslashLineBreak | DefinitionLists | Smartypants |
  69		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
  70)
  71
  72// DefaultOptions is a convenience variable with all the options that are
  73// enabled by default.
  74var DefaultOptions = Options{
  75	Extensions: CommonExtensions,
  76}
  77
  78// TODO: this should probably be unexported. Or moved to node.go
  79type LinkType int
  80
  81// These are the possible flag values for the link renderer.
  82// Only a single one of these values will be used; they are not ORed together.
  83// These are mostly of interest if you are writing a new output format.
  84const (
  85	LinkTypeNotAutolink LinkType = iota
  86	LinkTypeNormal
  87	LinkTypeEmail
  88)
  89
  90// ListType contains bitwise or'ed flags for list and list item objects.
  91type ListType int
  92
  93// These are the possible flag values for the ListItem renderer.
  94// Multiple flag values may be ORed together.
  95// These are mostly of interest if you are writing a new output format.
  96const (
  97	ListTypeOrdered ListType = 1 << iota
  98	ListTypeDefinition
  99	ListTypeTerm
 100
 101	ListItemContainsBlock
 102	ListItemBeginningOfList
 103	ListItemEndOfList
 104)
 105
 106// CellAlignFlags holds a type of alignment in a table cell.
 107type CellAlignFlags int
 108
 109// These are the possible flag values for the table cell renderer.
 110// Only a single one of these values will be used; they are not ORed together.
 111// These are mostly of interest if you are writing a new output format.
 112const (
 113	TableAlignmentLeft = 1 << iota
 114	TableAlignmentRight
 115	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 116)
 117
 118// The size of a tab stop.
 119const (
 120	TabSizeDefault = 4
 121	TabSizeDouble  = 8
 122)
 123
 124// blockTags is a set of tags that are recognized as HTML block tags.
 125// Any of these can be included in markdown text without special escaping.
 126var blockTags = map[string]struct{}{
 127	"blockquote": struct{}{},
 128	"del":        struct{}{},
 129	"div":        struct{}{},
 130	"dl":         struct{}{},
 131	"fieldset":   struct{}{},
 132	"form":       struct{}{},
 133	"h1":         struct{}{},
 134	"h2":         struct{}{},
 135	"h3":         struct{}{},
 136	"h4":         struct{}{},
 137	"h5":         struct{}{},
 138	"h6":         struct{}{},
 139	"iframe":     struct{}{},
 140	"ins":        struct{}{},
 141	"math":       struct{}{},
 142	"noscript":   struct{}{},
 143	"ol":         struct{}{},
 144	"pre":        struct{}{},
 145	"p":          struct{}{},
 146	"script":     struct{}{},
 147	"style":      struct{}{},
 148	"table":      struct{}{},
 149	"ul":         struct{}{},
 150
 151	// HTML5
 152	"address":    struct{}{},
 153	"article":    struct{}{},
 154	"aside":      struct{}{},
 155	"canvas":     struct{}{},
 156	"figcaption": struct{}{},
 157	"figure":     struct{}{},
 158	"footer":     struct{}{},
 159	"header":     struct{}{},
 160	"hgroup":     struct{}{},
 161	"main":       struct{}{},
 162	"nav":        struct{}{},
 163	"output":     struct{}{},
 164	"progress":   struct{}{},
 165	"section":    struct{}{},
 166	"video":      struct{}{},
 167}
 168
 169// Renderer is the rendering interface.
 170// This is mostly of interest if you are implementing a new rendering format.
 171//
 172// When a byte slice is provided, it contains the (rendered) contents of the
 173// element.
 174//
 175// When a callback is provided instead, it will write the contents of the
 176// respective element directly to the output buffer and return true on success.
 177// If the callback returns false, the rendering function should reset the
 178// output buffer as though it had never been called.
 179//
 180// Currently HTML and Latex implementations are provided
 181type Renderer interface {
 182	Render(ast *Node) []byte
 183	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
 184}
 185
 186// Callback functions for inline parsing. One such function is defined
 187// for each character that triggers a response when parsing inline data.
 188type inlineParser func(p *parser, data []byte, offset int) int
 189
 190// Parser holds runtime state used by the parser.
 191// This is constructed by the Markdown function.
 192type parser struct {
 193	refOverride    ReferenceOverrideFunc
 194	refs           map[string]*reference
 195	inlineCallback [256]inlineParser
 196	flags          Extensions
 197	nesting        int
 198	maxNesting     int
 199	insideLink     bool
 200
 201	// Footnotes need to be ordered as well as available to quickly check for
 202	// presence. If a ref is also a footnote, it's stored both in refs and here
 203	// in notes. Slice is nil if footnotes not enabled.
 204	notes []*reference
 205
 206	doc                  *Node
 207	tip                  *Node // = doc
 208	oldTip               *Node
 209	lastMatchedContainer *Node // = doc
 210	allClosed            bool
 211	currBlock            *Node // a block node currently being parsed by inline parser
 212}
 213
 214func (p *parser) getRef(refid string) (ref *reference, found bool) {
 215	if p.refOverride != nil {
 216		r, overridden := p.refOverride(refid)
 217		if overridden {
 218			if r == nil {
 219				return nil, false
 220			}
 221			return &reference{
 222				link:     []byte(r.Link),
 223				title:    []byte(r.Title),
 224				noteID:   0,
 225				hasBlock: false,
 226				text:     []byte(r.Text)}, true
 227		}
 228	}
 229	// refs are case insensitive
 230	ref, found = p.refs[strings.ToLower(refid)]
 231	return ref, found
 232}
 233
 234func (p *parser) finalize(block *Node) {
 235	above := block.Parent
 236	block.open = false
 237	p.tip = above
 238}
 239
 240func (p *parser) addChild(node NodeType, offset uint32) *Node {
 241	for !p.tip.canContain(node) {
 242		p.finalize(p.tip)
 243	}
 244	newNode := NewNode(node)
 245	newNode.content = []byte{}
 246	p.tip.appendChild(newNode)
 247	p.tip = newNode
 248	return newNode
 249}
 250
 251func (p *parser) closeUnmatchedBlocks() {
 252	if !p.allClosed {
 253		for p.oldTip != p.lastMatchedContainer {
 254			parent := p.oldTip.Parent
 255			p.finalize(p.oldTip)
 256			p.oldTip = parent
 257		}
 258		p.allClosed = true
 259	}
 260}
 261
 262//
 263//
 264// Public interface
 265//
 266//
 267
 268// Reference represents the details of a link.
 269// See the documentation in Options for more details on use-case.
 270type Reference struct {
 271	// Link is usually the URL the reference points to.
 272	Link string
 273	// Title is the alternate text describing the link in more detail.
 274	Title string
 275	// Text is the optional text to override the ref with if the syntax used was
 276	// [refid][]
 277	Text string
 278}
 279
 280// ReferenceOverrideFunc is expected to be called with a reference string and
 281// return either a valid Reference type that the reference string maps to or
 282// nil. If overridden is false, the default reference logic will be executed.
 283// See the documentation in Options for more details on use-case.
 284type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
 285
 286// Options represents configurable overrides and callbacks (in addition to the
 287// extension flag set) for configuring a Markdown parse.
 288type Options struct {
 289	// Extensions is a flag set of bit-wise ORed extension bits. See the
 290	// Extensions flags defined in this package.
 291	Extensions Extensions
 292
 293	// ReferenceOverride is an optional function callback that is called every
 294	// time a reference is resolved.
 295	//
 296	// In Markdown, the link reference syntax can be made to resolve a link to
 297	// a reference instead of an inline URL, in one of the following ways:
 298	//
 299	//  * [link text][refid]
 300	//  * [refid][]
 301	//
 302	// Usually, the refid is defined at the bottom of the Markdown document. If
 303	// this override function is provided, the refid is passed to the override
 304	// function first, before consulting the defined refids at the bottom. If
 305	// the override function indicates an override did not occur, the refids at
 306	// the bottom will be used to fill in the link details.
 307	ReferenceOverride ReferenceOverrideFunc
 308}
 309
 310// MarkdownBasic is a convenience function for simple rendering.
 311// It processes markdown input with no extensions enabled.
 312func MarkdownBasic(input []byte) []byte {
 313	// set up the HTML renderer
 314	renderer := NewHTMLRenderer(HTMLRendererParameters{
 315		Flags:      UseXHTML,
 316		Extensions: CommonExtensions,
 317	})
 318
 319	// set up the parser
 320	return Markdown(input, renderer, Options{})
 321}
 322
 323// MarkdownCommon is a convenience function for simple rendering. It calls
 324// Markdown with most useful extensions enabled, including:
 325//
 326// * Smartypants processing with smart fractions and LaTeX dashes
 327// * Intra-word emphasis suppression
 328// * Tables
 329// * Fenced code blocks
 330// * Autolinking
 331// * Strikethrough support
 332// * Strict header parsing
 333// * Custom Header IDs
 334func MarkdownCommon(input []byte) []byte {
 335	// set up the HTML renderer
 336	renderer := NewHTMLRenderer(HTMLRendererParameters{
 337		Flags:      CommonHTMLFlags,
 338		Extensions: CommonExtensions,
 339	})
 340	return Markdown(input, renderer, DefaultOptions)
 341}
 342
 343// Markdown is the main rendering function.
 344// It parses and renders a block of markdown-encoded text.
 345// The supplied Renderer is used to format the output, and extensions dictates
 346// which non-standard extensions are enabled.
 347//
 348// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
 349// NewLatexRenderer, respectively.
 350func Markdown(input []byte, renderer Renderer, options Options) []byte {
 351	if renderer == nil {
 352		return nil
 353	}
 354	return renderer.Render(Parse(input, options))
 355}
 356
 357// Parse is an entry point to the parsing part of Blackfriday. It takes an
 358// input markdown document and produces a syntax tree for its contents. This
 359// tree can then be rendered with a default or custom renderer, or
 360// analyzed/transformed by the caller to whatever non-standard needs they have.
 361func Parse(input []byte, opts Options) *Node {
 362	extensions := opts.Extensions
 363
 364	// fill in the render structure
 365	p := new(parser)
 366	p.flags = extensions
 367	p.refOverride = opts.ReferenceOverride
 368	p.refs = make(map[string]*reference)
 369	p.maxNesting = 16
 370	p.insideLink = false
 371
 372	docNode := NewNode(Document)
 373	p.doc = docNode
 374	p.tip = docNode
 375	p.oldTip = docNode
 376	p.lastMatchedContainer = docNode
 377	p.allClosed = true
 378
 379	// register inline parsers
 380	p.inlineCallback['*'] = emphasis
 381	p.inlineCallback['_'] = emphasis
 382	if extensions&Strikethrough != 0 {
 383		p.inlineCallback['~'] = emphasis
 384	}
 385	p.inlineCallback['`'] = codeSpan
 386	p.inlineCallback['\n'] = lineBreak
 387	p.inlineCallback['['] = link
 388	p.inlineCallback['<'] = leftAngle
 389	p.inlineCallback['\\'] = escape
 390	p.inlineCallback['&'] = entity
 391	p.inlineCallback['!'] = maybeImage
 392	p.inlineCallback['^'] = maybeInlineFootnote
 393
 394	if extensions&Autolink != 0 {
 395		p.inlineCallback['h'] = maybeAutoLink
 396		p.inlineCallback['m'] = maybeAutoLink
 397		p.inlineCallback['f'] = maybeAutoLink
 398		p.inlineCallback['H'] = maybeAutoLink
 399		p.inlineCallback['M'] = maybeAutoLink
 400		p.inlineCallback['F'] = maybeAutoLink
 401	}
 402
 403	if extensions&Footnotes != 0 {
 404		p.notes = make([]*reference, 0)
 405	}
 406
 407	first := firstPass(p, input)
 408	secondPass(p, first)
 409	// Walk the tree and finish up some of unfinished blocks
 410	for p.tip != nil {
 411		p.finalize(p.tip)
 412	}
 413	// Walk the tree again and process inline markdown in each block
 414	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 415		if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
 416			p.currBlock = node
 417			p.inline(node.content)
 418			node.content = nil
 419		}
 420		return GoToNext
 421	})
 422	p.parseRefsToAST()
 423	p.generateTOC()
 424	return p.doc
 425}
 426
 427func (p *parser) generateTOC() {
 428	if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
 429		return
 430	}
 431	navNode := NewNode(HTMLBlock)
 432	navNode.Literal = []byte("<nav>")
 433	navNode.open = false
 434
 435	var topList *Node
 436	var listNode *Node
 437	var lastItem *Node
 438	headerCount := 0
 439	currentLevel := 0
 440	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 441		if entering && node.Type == Header {
 442			if node.Level > currentLevel {
 443				currentLevel++
 444				newList := NewNode(List)
 445				if lastItem != nil {
 446					lastItem.appendChild(newList)
 447					listNode = newList
 448				} else {
 449					listNode = newList
 450					topList = listNode
 451				}
 452			}
 453			if node.Level < currentLevel {
 454				finalizeList(listNode)
 455				lastItem = listNode.Parent
 456				listNode = lastItem.Parent
 457			}
 458			node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
 459			headerCount++
 460			lastItem = NewNode(Item)
 461			listNode.appendChild(lastItem)
 462			anchorNode := NewNode(Link)
 463			anchorNode.Destination = []byte("#" + node.HeaderID)
 464			lastItem.appendChild(anchorNode)
 465			anchorNode.appendChild(text(node.FirstChild.Literal))
 466		}
 467		return GoToNext
 468	})
 469	firstChild := p.doc.FirstChild
 470	// Insert TOC only if there is anything to insert
 471	if topList != nil {
 472		finalizeList(topList)
 473		firstChild.insertBefore(navNode)
 474		firstChild.insertBefore(topList)
 475		navCloseNode := NewNode(HTMLBlock)
 476		navCloseNode.Literal = []byte("</nav>")
 477		navCloseNode.open = false
 478		firstChild.insertBefore(navCloseNode)
 479	}
 480	// Drop everything after the TOC if OmitContents was requested
 481	if p.flags&OmitContents != 0 {
 482		for firstChild != nil {
 483			next := firstChild.Next
 484			firstChild.unlink()
 485			firstChild = next
 486		}
 487	}
 488}
 489
 490func (p *parser) parseRefsToAST() {
 491	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
 492		return
 493	}
 494	p.tip = p.doc
 495	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
 496	p.addBlock(HorizontalRule, nil)
 497	block := p.addBlock(List, nil)
 498	block.ListFlags = ListTypeOrdered
 499	flags := ListItemBeginningOfList
 500	// Note: this loop is intentionally explicit, not range-form. This is
 501	// because the body of the loop will append nested footnotes to p.notes and
 502	// we need to process those late additions. Range form would only walk over
 503	// the fixed initial set.
 504	for i := 0; i < len(p.notes); i++ {
 505		ref := p.notes[i]
 506		block := p.addBlock(Item, nil)
 507		block.ListFlags = ListTypeOrdered
 508		block.RefLink = ref.link
 509		if ref.hasBlock {
 510			flags |= ListItemContainsBlock
 511			p.block(ref.title)
 512		} else {
 513			p.currBlock = block
 514			p.inline(ref.title)
 515		}
 516		flags &^= ListItemBeginningOfList | ListItemContainsBlock
 517	}
 518	above := block.Parent
 519	finalizeList(block)
 520	p.tip = above
 521	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
 522	block.Walk(func(node *Node, entering bool) WalkStatus {
 523		if node.Type == Paragraph || node.Type == Header {
 524			p.currBlock = node
 525			p.inline(node.content)
 526			node.content = nil
 527		}
 528		return GoToNext
 529	})
 530}
 531
 532// first pass:
 533// - normalize newlines
 534// - extract references (outside of fenced code blocks)
 535// - expand tabs (outside of fenced code blocks)
 536// - copy everything else
 537func firstPass(p *parser, input []byte) []byte {
 538	var out bytes.Buffer
 539	tabSize := TabSizeDefault
 540	if p.flags&TabSizeEight != 0 {
 541		tabSize = TabSizeDouble
 542	}
 543	beg := 0
 544	lastFencedCodeBlockEnd := 0
 545	for beg < len(input) {
 546		// Find end of this line, then process the line.
 547		end := beg
 548		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
 549			end++
 550		}
 551
 552		if p.flags&FencedCode != 0 {
 553			// track fenced code block boundaries to suppress tab expansion
 554			// and reference extraction inside them:
 555			if beg >= lastFencedCodeBlockEnd {
 556				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
 557					lastFencedCodeBlockEnd = beg + i
 558				}
 559			}
 560		}
 561
 562		// add the line body if present
 563		if end > beg {
 564			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
 565				out.Write(input[beg:end])
 566			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
 567				beg += refEnd
 568				continue
 569			} else {
 570				expandTabs(&out, input[beg:end], tabSize)
 571			}
 572		}
 573
 574		if end < len(input) && input[end] == '\r' {
 575			end++
 576		}
 577		if end < len(input) && input[end] == '\n' {
 578			end++
 579		}
 580		out.WriteByte('\n')
 581
 582		beg = end
 583	}
 584
 585	// empty input?
 586	if out.Len() == 0 {
 587		out.WriteByte('\n')
 588	}
 589
 590	return out.Bytes()
 591}
 592
 593// second pass: actual rendering
 594func secondPass(p *parser, input []byte) {
 595	p.block(input)
 596
 597	if p.flags&Footnotes != 0 && len(p.notes) > 0 {
 598		flags := ListItemBeginningOfList
 599		for i := 0; i < len(p.notes); i++ {
 600			ref := p.notes[i]
 601			if ref.hasBlock {
 602				flags |= ListItemContainsBlock
 603				p.block(ref.title)
 604			} else {
 605				p.inline(ref.title)
 606			}
 607			flags &^= ListItemBeginningOfList | ListItemContainsBlock
 608		}
 609	}
 610
 611	if p.nesting != 0 {
 612		panic("Nesting level did not end at zero")
 613	}
 614}
 615
 616//
 617// Link references
 618//
 619// This section implements support for references that (usually) appear
 620// as footnotes in a document, and can be referenced anywhere in the document.
 621// The basic format is:
 622//
 623//    [1]: http://www.google.com/ "Google"
 624//    [2]: http://www.github.com/ "Github"
 625//
 626// Anywhere in the document, the reference can be linked by referring to its
 627// label, i.e., 1 and 2 in this example, as in:
 628//
 629//    This library is hosted on [Github][2], a git hosting site.
 630//
 631// Actual footnotes as specified in Pandoc and supported by some other Markdown
 632// libraries such as php-markdown are also taken care of. They look like this:
 633//
 634//    This sentence needs a bit of further explanation.[^note]
 635//
 636//    [^note]: This is the explanation.
 637//
 638// Footnotes should be placed at the end of the document in an ordered list.
 639// Inline footnotes such as:
 640//
 641//    Inline footnotes^[Not supported.] also exist.
 642//
 643// are not yet supported.
 644
 645// References are parsed and stored in this struct.
 646type reference struct {
 647	link     []byte
 648	title    []byte
 649	noteID   int // 0 if not a footnote ref
 650	hasBlock bool
 651	text     []byte
 652}
 653
 654func (r *reference) String() string {
 655	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
 656		r.link, r.title, r.text, r.noteID, r.hasBlock)
 657}
 658
 659// Check whether or not data starts with a reference link.
 660// If so, it is parsed and stored in the list of references
 661// (in the render struct).
 662// Returns the number of bytes to skip to move past it,
 663// or zero if the first line is not a reference.
 664func isReference(p *parser, data []byte, tabSize int) int {
 665	// up to 3 optional leading spaces
 666	if len(data) < 4 {
 667		return 0
 668	}
 669	i := 0
 670	for i < 3 && data[i] == ' ' {
 671		i++
 672	}
 673
 674	noteID := 0
 675
 676	// id part: anything but a newline between brackets
 677	if data[i] != '[' {
 678		return 0
 679	}
 680	i++
 681	if p.flags&Footnotes != 0 {
 682		if i < len(data) && data[i] == '^' {
 683			// we can set it to anything here because the proper noteIds will
 684			// be assigned later during the second pass. It just has to be != 0
 685			noteID = 1
 686			i++
 687		}
 688	}
 689	idOffset := i
 690	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 691		i++
 692	}
 693	if i >= len(data) || data[i] != ']' {
 694		return 0
 695	}
 696	idEnd := i
 697
 698	// spacer: colon (space | tab)* newline? (space | tab)*
 699	i++
 700	if i >= len(data) || data[i] != ':' {
 701		return 0
 702	}
 703	i++
 704	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 705		i++
 706	}
 707	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 708		i++
 709		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
 710			i++
 711		}
 712	}
 713	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 714		i++
 715	}
 716	if i >= len(data) {
 717		return 0
 718	}
 719
 720	var (
 721		linkOffset, linkEnd   int
 722		titleOffset, titleEnd int
 723		lineEnd               int
 724		raw                   []byte
 725		hasBlock              bool
 726	)
 727
 728	if p.flags&Footnotes != 0 && noteID != 0 {
 729		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
 730		lineEnd = linkEnd
 731	} else {
 732		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
 733	}
 734	if lineEnd == 0 {
 735		return 0
 736	}
 737
 738	// a valid ref has been found
 739
 740	ref := &reference{
 741		noteID:   noteID,
 742		hasBlock: hasBlock,
 743	}
 744
 745	if noteID > 0 {
 746		// reusing the link field for the id since footnotes don't have links
 747		ref.link = data[idOffset:idEnd]
 748		// if footnote, it's not really a title, it's the contained text
 749		ref.title = raw
 750	} else {
 751		ref.link = data[linkOffset:linkEnd]
 752		ref.title = data[titleOffset:titleEnd]
 753	}
 754
 755	// id matches are case-insensitive
 756	id := string(bytes.ToLower(data[idOffset:idEnd]))
 757
 758	p.refs[id] = ref
 759
 760	return lineEnd
 761}
 762
 763func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
 764	// link: whitespace-free sequence, optionally between angle brackets
 765	if data[i] == '<' {
 766		i++
 767	}
 768	linkOffset = i
 769	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 770		i++
 771	}
 772	if i == len(data) {
 773		return
 774	}
 775	linkEnd = i
 776	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
 777		linkOffset++
 778		linkEnd--
 779	}
 780
 781	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 782	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 783		i++
 784	}
 785	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 786		return
 787	}
 788
 789	// compute end-of-line
 790	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 791		lineEnd = i
 792	}
 793	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
 794		lineEnd++
 795	}
 796
 797	// optional (space|tab)* spacer after a newline
 798	if lineEnd > 0 {
 799		i = lineEnd + 1
 800		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 801			i++
 802		}
 803	}
 804
 805	// optional title: any non-newline sequence enclosed in '"() alone on its line
 806	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 807		i++
 808		titleOffset = i
 809
 810		// look for EOL
 811		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 812			i++
 813		}
 814		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 815			titleEnd = i + 1
 816		} else {
 817			titleEnd = i
 818		}
 819
 820		// step back
 821		i--
 822		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
 823			i--
 824		}
 825		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 826			lineEnd = titleEnd
 827			titleEnd = i
 828		}
 829	}
 830
 831	return
 832}
 833
 834// The first bit of this logic is the same as (*parser).listItem, but the rest
 835// is much simpler. This function simply finds the entire block and shifts it
 836// over by one tab if it is indeed a block (just returns the line if it's not).
 837// blockEnd is the end of the section in the input buffer, and contents is the
 838// extracted text that was shifted over one tab. It will need to be rendered at
 839// the end of the document.
 840func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
 841	if i == 0 || len(data) == 0 {
 842		return
 843	}
 844
 845	// skip leading whitespace on first line
 846	for i < len(data) && data[i] == ' ' {
 847		i++
 848	}
 849
 850	blockStart = i
 851
 852	// find the end of the line
 853	blockEnd = i
 854	for i < len(data) && data[i-1] != '\n' {
 855		i++
 856	}
 857
 858	// get working buffer
 859	var raw bytes.Buffer
 860
 861	// put the first line into the working buffer
 862	raw.Write(data[blockEnd:i])
 863	blockEnd = i
 864
 865	// process the following lines
 866	containsBlankLine := false
 867
 868gatherLines:
 869	for blockEnd < len(data) {
 870		i++
 871
 872		// find the end of this line
 873		for i < len(data) && data[i-1] != '\n' {
 874			i++
 875		}
 876
 877		// if it is an empty line, guess that it is part of this item
 878		// and move on to the next line
 879		if p.isEmpty(data[blockEnd:i]) > 0 {
 880			containsBlankLine = true
 881			blockEnd = i
 882			continue
 883		}
 884
 885		n := 0
 886		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
 887			// this is the end of the block.
 888			// we don't want to include this last line in the index.
 889			break gatherLines
 890		}
 891
 892		// if there were blank lines before this one, insert a new one now
 893		if containsBlankLine {
 894			raw.WriteByte('\n')
 895			containsBlankLine = false
 896		}
 897
 898		// get rid of that first tab, write to buffer
 899		raw.Write(data[blockEnd+n : i])
 900		hasBlock = true
 901
 902		blockEnd = i
 903	}
 904
 905	if data[blockEnd-1] != '\n' {
 906		raw.WriteByte('\n')
 907	}
 908
 909	contents = raw.Bytes()
 910
 911	return
 912}
 913
 914//
 915//
 916// Miscellaneous helper functions
 917//
 918//
 919
 920// Test if a character is a punctuation symbol.
 921// Taken from a private function in regexp in the stdlib.
 922func ispunct(c byte) bool {
 923	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 924		if c == r {
 925			return true
 926		}
 927	}
 928	return false
 929}
 930
 931// Test if a character is a whitespace character.
 932func isspace(c byte) bool {
 933	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 934}
 935
 936// Test if a character is letter.
 937func isletter(c byte) bool {
 938	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 939}
 940
 941// Test if a character is a letter or a digit.
 942// TODO: check when this is looking for ASCII alnum and when it should use unicode
 943func isalnum(c byte) bool {
 944	return (c >= '0' && c <= '9') || isletter(c)
 945}
 946
 947// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
 948// always ends output with a newline
 949func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
 950	// first, check for common cases: no tabs, or only tabs at beginning of line
 951	i, prefix := 0, 0
 952	slowcase := false
 953	for i = 0; i < len(line); i++ {
 954		if line[i] == '\t' {
 955			if prefix == i {
 956				prefix++
 957			} else {
 958				slowcase = true
 959				break
 960			}
 961		}
 962	}
 963
 964	// no need to decode runes if all tabs are at the beginning of the line
 965	if !slowcase {
 966		for i = 0; i < prefix*tabSize; i++ {
 967			out.WriteByte(' ')
 968		}
 969		out.Write(line[prefix:])
 970		return
 971	}
 972
 973	// the slow case: we need to count runes to figure out how
 974	// many spaces to insert for each tab
 975	column := 0
 976	i = 0
 977	for i < len(line) {
 978		start := i
 979		for i < len(line) && line[i] != '\t' {
 980			_, size := utf8.DecodeRune(line[i:])
 981			i += size
 982			column++
 983		}
 984
 985		if i > start {
 986			out.Write(line[start:i])
 987		}
 988
 989		if i >= len(line) {
 990			break
 991		}
 992
 993		for {
 994			out.WriteByte(' ')
 995			column++
 996			if column%tabSize == 0 {
 997				break
 998			}
 999		}
1000
1001		i++
1002	}
1003}
1004
1005// Find if a line counts as indented or not.
1006// Returns number of characters the indent is (0 = not indented).
1007func isIndented(data []byte, indentSize int) int {
1008	if len(data) == 0 {
1009		return 0
1010	}
1011	if data[0] == '\t' {
1012		return 1
1013	}
1014	if len(data) < indentSize {
1015		return 0
1016	}
1017	for i := 0; i < indentSize; i++ {
1018		if data[i] != ' ' {
1019			return 0
1020		}
1021	}
1022	return indentSize
1023}
1024
1025// Create a url-safe slug for fragments
1026func slugify(in []byte) []byte {
1027	if len(in) == 0 {
1028		return in
1029	}
1030	out := make([]byte, 0, len(in))
1031	sym := false
1032
1033	for _, ch := range in {
1034		if isalnum(ch) {
1035			sym = false
1036			out = append(out, ch)
1037		} else if sym {
1038			continue
1039		} else {
1040			out = append(out, '-')
1041			sym = true
1042		}
1043	}
1044	var a, b int
1045	var ch byte
1046	for a, ch = range out {
1047		if ch != '-' {
1048			break
1049		}
1050	}
1051	for b = len(out) - 1; b > 0; b-- {
1052		if out[b] != '-' {
1053			break
1054		}
1055	}
1056	return out[a : b+1]
1057}