all repos — grayfriday @ ab20da6e278cf0217d66c11bbcb2e79c5815d3fb

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11//
  12// Markdown parsing and processing
  13//
  14//
  15
  16// Blackfriday markdown processor.
  17//
  18// Translates plain text with simple formatting rules into HTML or LaTeX.
  19package blackfriday
  20
  21import (
  22	"bytes"
  23	"fmt"
  24	"strings"
  25	"unicode/utf8"
  26)
  27
  28const VERSION = "1.4"
  29
  30type Extensions int
  31
  32// These are the supported markdown parsing extensions.
  33// OR these values together to select multiple extensions.
  34const (
  35	NoExtensions            Extensions = 0
  36	NoIntraEmphasis         Extensions = 1 << iota // Ignore emphasis markers inside words
  37	Tables                                         // Render tables
  38	FencedCode                                     // Render fenced code blocks
  39	Autolink                                       // Detect embedded URLs that are not explicitly marked
  40	Strikethrough                                  // Strikethrough text using ~~test~~
  41	LaxHTMLBlocks                                  // Loosen up HTML block parsing rules
  42	SpaceHeaders                                   // Be strict about prefix header rules
  43	HardLineBreak                                  // Translate newlines into line breaks
  44	TabSizeEight                                   // Expand tabs to eight spaces instead of four
  45	Footnotes                                      // Pandoc-style footnotes
  46	NoEmptyLineBeforeBlock                         // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  47	HeaderIDs                                      // specify header IDs  with {#id}
  48	Titleblock                                     // Titleblock ala pandoc
  49	AutoHeaderIDs                                  // Create the header ID from the text
  50	BackslashLineBreak                             // Translate trailing backslashes into line breaks
  51	DefinitionLists                                // Render definition lists
  52	Smartypants                                    // Enable smart punctuation substitutions
  53	SmartypantsFractions                           // Enable smart fractions (with Smartypants)
  54	SmartypantsDashes                              // Enable smart dashes (with Smartypants)
  55	SmartypantsLatexDashes                         // Enable LaTeX-style dashes (with Smartypants)
  56	SmartypantsAngledQuotes                        // Enable angled double quotes (with Smartypants) for double quotes rendering
  57	TOC                                            // Generate a table of contents
  58	OmitContents                                   // Skip the main contents (for a standalone table of contents)
  59
  60	CommonHtmlFlags HTMLFlags = UseXHTML
  61
  62	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
  63		Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
  64		BackslashLineBreak | DefinitionLists | Smartypants |
  65		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
  66)
  67
  68var DefaultOptions = Options{
  69	Extensions: CommonExtensions,
  70}
  71
  72type LinkType int
  73
  74// These are the possible flag values for the link renderer.
  75// Only a single one of these values will be used; they are not ORed together.
  76// These are mostly of interest if you are writing a new output format.
  77const (
  78	LinkTypeNotAutolink LinkType = iota
  79	LinkTypeNormal
  80	LinkTypeEmail
  81)
  82
  83type ListType int
  84
  85// These are the possible flag values for the ListItem renderer.
  86// Multiple flag values may be ORed together.
  87// These are mostly of interest if you are writing a new output format.
  88const (
  89	ListTypeOrdered ListType = 1 << iota
  90	ListTypeDefinition
  91	ListTypeTerm
  92
  93	ListItemContainsBlock
  94	ListItemBeginningOfList
  95	ListItemEndOfList
  96)
  97
  98type CellAlignFlags int
  99
 100// These are the possible flag values for the table cell renderer.
 101// Only a single one of these values will be used; they are not ORed together.
 102// These are mostly of interest if you are writing a new output format.
 103const (
 104	TableAlignmentLeft = 1 << iota
 105	TableAlignmentRight
 106	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 107)
 108
 109// The size of a tab stop.
 110const (
 111	TabSizeDefault = 4
 112	TabSizeDouble  = 8
 113)
 114
 115// blockTags is a set of tags that are recognized as HTML block tags.
 116// Any of these can be included in markdown text without special escaping.
 117var blockTags = map[string]struct{}{
 118	"blockquote": struct{}{},
 119	"del":        struct{}{},
 120	"div":        struct{}{},
 121	"dl":         struct{}{},
 122	"fieldset":   struct{}{},
 123	"form":       struct{}{},
 124	"h1":         struct{}{},
 125	"h2":         struct{}{},
 126	"h3":         struct{}{},
 127	"h4":         struct{}{},
 128	"h5":         struct{}{},
 129	"h6":         struct{}{},
 130	"iframe":     struct{}{},
 131	"ins":        struct{}{},
 132	"math":       struct{}{},
 133	"noscript":   struct{}{},
 134	"ol":         struct{}{},
 135	"pre":        struct{}{},
 136	"p":          struct{}{},
 137	"script":     struct{}{},
 138	"style":      struct{}{},
 139	"table":      struct{}{},
 140	"ul":         struct{}{},
 141
 142	// HTML5
 143	"address":    struct{}{},
 144	"article":    struct{}{},
 145	"aside":      struct{}{},
 146	"canvas":     struct{}{},
 147	"figcaption": struct{}{},
 148	"figure":     struct{}{},
 149	"footer":     struct{}{},
 150	"header":     struct{}{},
 151	"hgroup":     struct{}{},
 152	"main":       struct{}{},
 153	"nav":        struct{}{},
 154	"output":     struct{}{},
 155	"progress":   struct{}{},
 156	"section":    struct{}{},
 157	"video":      struct{}{},
 158}
 159
 160// Renderer is the rendering interface.
 161// This is mostly of interest if you are implementing a new rendering format.
 162//
 163// When a byte slice is provided, it contains the (rendered) contents of the
 164// element.
 165//
 166// When a callback is provided instead, it will write the contents of the
 167// respective element directly to the output buffer and return true on success.
 168// If the callback returns false, the rendering function should reset the
 169// output buffer as though it had never been called.
 170//
 171// Currently Html and Latex implementations are provided
 172type Renderer interface {
 173	Render(ast *Node) []byte
 174}
 175
 176// Callback functions for inline parsing. One such function is defined
 177// for each character that triggers a response when parsing inline data.
 178type inlineParser func(p *parser, data []byte, offset int) int
 179
 180// Parser holds runtime state used by the parser.
 181// This is constructed by the Markdown function.
 182type parser struct {
 183	refOverride    ReferenceOverrideFunc
 184	refs           map[string]*reference
 185	inlineCallback [256]inlineParser
 186	flags          Extensions
 187	nesting        int
 188	maxNesting     int
 189	insideLink     bool
 190
 191	// Footnotes need to be ordered as well as available to quickly check for
 192	// presence. If a ref is also a footnote, it's stored both in refs and here
 193	// in notes. Slice is nil if footnotes not enabled.
 194	notes []*reference
 195
 196	doc                  *Node
 197	tip                  *Node // = doc
 198	oldTip               *Node
 199	lastMatchedContainer *Node // = doc
 200	allClosed            bool
 201	currBlock            *Node // a block node currently being parsed by inline parser
 202}
 203
 204func (p *parser) getRef(refid string) (ref *reference, found bool) {
 205	if p.refOverride != nil {
 206		r, overridden := p.refOverride(refid)
 207		if overridden {
 208			if r == nil {
 209				return nil, false
 210			}
 211			return &reference{
 212				link:     []byte(r.Link),
 213				title:    []byte(r.Title),
 214				noteId:   0,
 215				hasBlock: false,
 216				text:     []byte(r.Text)}, true
 217		}
 218	}
 219	// refs are case insensitive
 220	ref, found = p.refs[strings.ToLower(refid)]
 221	return ref, found
 222}
 223
 224func (p *parser) finalize(block *Node) {
 225	above := block.Parent
 226	block.open = false
 227	p.tip = above
 228}
 229
 230func (p *parser) addChild(node NodeType, offset uint32) *Node {
 231	for !p.tip.canContain(node) {
 232		p.finalize(p.tip)
 233	}
 234	newNode := NewNode(node)
 235	newNode.content = []byte{}
 236	p.tip.appendChild(newNode)
 237	p.tip = newNode
 238	return newNode
 239}
 240
 241func (p *parser) closeUnmatchedBlocks() {
 242	if !p.allClosed {
 243		for p.oldTip != p.lastMatchedContainer {
 244			parent := p.oldTip.Parent
 245			p.finalize(p.oldTip)
 246			p.oldTip = parent
 247		}
 248		p.allClosed = true
 249	}
 250}
 251
 252//
 253//
 254// Public interface
 255//
 256//
 257
 258// Reference represents the details of a link.
 259// See the documentation in Options for more details on use-case.
 260type Reference struct {
 261	// Link is usually the URL the reference points to.
 262	Link string
 263	// Title is the alternate text describing the link in more detail.
 264	Title string
 265	// Text is the optional text to override the ref with if the syntax used was
 266	// [refid][]
 267	Text string
 268}
 269
 270// ReferenceOverrideFunc is expected to be called with a reference string and
 271// return either a valid Reference type that the reference string maps to or
 272// nil. If overridden is false, the default reference logic will be executed.
 273// See the documentation in Options for more details on use-case.
 274type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
 275
 276// Options represents configurable overrides and callbacks (in addition to the
 277// extension flag set) for configuring a Markdown parse.
 278type Options struct {
 279	// Extensions is a flag set of bit-wise ORed extension bits. See the
 280	// Extensions flags defined in this package.
 281	Extensions Extensions
 282
 283	// ReferenceOverride is an optional function callback that is called every
 284	// time a reference is resolved.
 285	//
 286	// In Markdown, the link reference syntax can be made to resolve a link to
 287	// a reference instead of an inline URL, in one of the following ways:
 288	//
 289	//  * [link text][refid]
 290	//  * [refid][]
 291	//
 292	// Usually, the refid is defined at the bottom of the Markdown document. If
 293	// this override function is provided, the refid is passed to the override
 294	// function first, before consulting the defined refids at the bottom. If
 295	// the override function indicates an override did not occur, the refids at
 296	// the bottom will be used to fill in the link details.
 297	ReferenceOverride ReferenceOverrideFunc
 298}
 299
 300// MarkdownBasic is a convenience function for simple rendering.
 301// It processes markdown input with no extensions enabled.
 302func MarkdownBasic(input []byte) []byte {
 303	// set up the HTML renderer
 304	renderer := NewHTMLRenderer(HTMLRendererParameters{
 305		Flags:      UseXHTML,
 306		Extensions: CommonExtensions,
 307	})
 308
 309	// set up the parser
 310	return Markdown(input, renderer, Options{})
 311}
 312
 313// Call Markdown with most useful extensions enabled
 314// MarkdownCommon is a convenience function for simple rendering.
 315// It processes markdown input with common extensions enabled, including:
 316//
 317// * Smartypants processing with smart fractions and LaTeX dashes
 318//
 319// * Intra-word emphasis suppression
 320//
 321// * Tables
 322//
 323// * Fenced code blocks
 324//
 325// * Autolinking
 326//
 327// * Strikethrough support
 328//
 329// * Strict header parsing
 330//
 331// * Custom Header IDs
 332func MarkdownCommon(input []byte) []byte {
 333	// set up the HTML renderer
 334	renderer := NewHTMLRenderer(HTMLRendererParameters{
 335		Flags:      CommonHtmlFlags,
 336		Extensions: CommonExtensions,
 337	})
 338	return Markdown(input, renderer, DefaultOptions)
 339}
 340
 341// Markdown is the main rendering function.
 342// It parses and renders a block of markdown-encoded text.
 343// The supplied Renderer is used to format the output, and extensions dictates
 344// which non-standard extensions are enabled.
 345//
 346// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
 347// NewLatexRenderer, respectively.
 348func Markdown(input []byte, renderer Renderer, options Options) []byte {
 349	if renderer == nil {
 350		return nil
 351	}
 352	return renderer.Render(Parse(input, options))
 353}
 354
 355func Parse(input []byte, opts Options) *Node {
 356	extensions := opts.Extensions
 357
 358	// fill in the render structure
 359	p := new(parser)
 360	p.flags = extensions
 361	p.refOverride = opts.ReferenceOverride
 362	p.refs = make(map[string]*reference)
 363	p.maxNesting = 16
 364	p.insideLink = false
 365
 366	docNode := NewNode(Document)
 367	p.doc = docNode
 368	p.tip = docNode
 369	p.oldTip = docNode
 370	p.lastMatchedContainer = docNode
 371	p.allClosed = true
 372
 373	// register inline parsers
 374	p.inlineCallback['*'] = emphasis
 375	p.inlineCallback['_'] = emphasis
 376	if extensions&Strikethrough != 0 {
 377		p.inlineCallback['~'] = emphasis
 378	}
 379	p.inlineCallback['`'] = codeSpan
 380	p.inlineCallback['\n'] = lineBreak
 381	p.inlineCallback['['] = link
 382	p.inlineCallback['<'] = leftAngle
 383	p.inlineCallback['\\'] = escape
 384	p.inlineCallback['&'] = entity
 385	p.inlineCallback['!'] = maybeImage
 386	p.inlineCallback['^'] = maybeInlineFootnote
 387
 388	if extensions&Autolink != 0 {
 389		p.inlineCallback['h'] = maybeAutoLink
 390		p.inlineCallback['m'] = maybeAutoLink
 391		p.inlineCallback['f'] = maybeAutoLink
 392		p.inlineCallback['H'] = maybeAutoLink
 393		p.inlineCallback['M'] = maybeAutoLink
 394		p.inlineCallback['F'] = maybeAutoLink
 395	}
 396
 397	if extensions&Footnotes != 0 {
 398		p.notes = make([]*reference, 0)
 399	}
 400
 401	first := firstPass(p, input)
 402	secondPass(p, first)
 403	// Walk the tree and finish up some of unfinished blocks
 404	for p.tip != nil {
 405		p.finalize(p.tip)
 406	}
 407	// Walk the tree again and process inline markdown in each block
 408	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 409		if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
 410			p.currBlock = node
 411			p.inline(node.content)
 412			node.content = nil
 413		}
 414		return GoToNext
 415	})
 416	p.parseRefsToAST()
 417	p.generateTOC()
 418	return p.doc
 419}
 420
 421func (p *parser) generateTOC() {
 422	if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
 423		return
 424	}
 425	navNode := NewNode(HTMLBlock)
 426	navNode.Literal = []byte("<nav>")
 427	navNode.open = false
 428
 429	var topList *Node
 430	var listNode *Node
 431	var lastItem *Node
 432	headerCount := 0
 433	currentLevel := 0
 434	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 435		if entering && node.Type == Header {
 436			if node.Level > currentLevel {
 437				currentLevel++
 438				newList := NewNode(List)
 439				if lastItem != nil {
 440					lastItem.appendChild(newList)
 441					listNode = newList
 442				} else {
 443					listNode = newList
 444					topList = listNode
 445				}
 446			}
 447			if node.Level < currentLevel {
 448				finalizeList(listNode)
 449				lastItem = listNode.Parent
 450				listNode = lastItem.Parent
 451			}
 452			node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
 453			headerCount++
 454			lastItem = NewNode(Item)
 455			listNode.appendChild(lastItem)
 456			anchorNode := NewNode(Link)
 457			anchorNode.Destination = []byte("#" + node.HeaderID)
 458			lastItem.appendChild(anchorNode)
 459			anchorNode.appendChild(text(node.FirstChild.Literal))
 460		}
 461		return GoToNext
 462	})
 463	firstChild := p.doc.FirstChild
 464	// Insert TOC only if there is anything to insert
 465	if topList != nil {
 466		finalizeList(topList)
 467		firstChild.insertBefore(navNode)
 468		firstChild.insertBefore(topList)
 469		navCloseNode := NewNode(HTMLBlock)
 470		navCloseNode.Literal = []byte("</nav>")
 471		navCloseNode.open = false
 472		firstChild.insertBefore(navCloseNode)
 473	}
 474	// Drop everything after the TOC if OmitContents was requested
 475	if p.flags&OmitContents != 0 {
 476		for firstChild != nil {
 477			next := firstChild.Next
 478			firstChild.unlink()
 479			firstChild = next
 480		}
 481	}
 482}
 483
 484func (p *parser) parseRefsToAST() {
 485	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
 486		return
 487	}
 488	p.tip = p.doc
 489	finalizeHtmlBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
 490	p.addBlock(HorizontalRule, nil)
 491	block := p.addBlock(List, nil)
 492	block.ListFlags = ListTypeOrdered
 493	flags := ListItemBeginningOfList
 494	// Note: this loop is intentionally explicit, not range-form. This is
 495	// because the body of the loop will append nested footnotes to p.notes and
 496	// we need to process those late additions. Range form would only walk over
 497	// the fixed initial set.
 498	for i := 0; i < len(p.notes); i++ {
 499		ref := p.notes[i]
 500		block := p.addBlock(Item, nil)
 501		block.ListFlags = ListTypeOrdered
 502		block.RefLink = ref.link
 503		if ref.hasBlock {
 504			flags |= ListItemContainsBlock
 505			p.block(ref.title)
 506		} else {
 507			p.currBlock = block
 508			p.inline(ref.title)
 509		}
 510		flags &^= ListItemBeginningOfList | ListItemContainsBlock
 511	}
 512	above := block.Parent
 513	finalizeList(block)
 514	p.tip = above
 515	finalizeHtmlBlock(p.addBlock(HTMLBlock, []byte("</div>")))
 516	block.Walk(func(node *Node, entering bool) WalkStatus {
 517		if node.Type == Paragraph || node.Type == Header {
 518			p.currBlock = node
 519			p.inline(node.content)
 520			node.content = nil
 521		}
 522		return GoToNext
 523	})
 524}
 525
 526// first pass:
 527// - extract references
 528// - expand tabs
 529// - normalize newlines
 530// - copy everything else
 531func firstPass(p *parser, input []byte) []byte {
 532	var out bytes.Buffer
 533	tabSize := TabSizeDefault
 534	if p.flags&TabSizeEight != 0 {
 535		tabSize = TabSizeDouble
 536	}
 537	beg, end := 0, 0
 538	lastFencedCodeBlockEnd := 0
 539	for beg < len(input) { // iterate over lines
 540		if end = isReference(p, input[beg:], tabSize); end > 0 {
 541			beg += end
 542		} else { // skip to the next line
 543			end = beg
 544			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
 545				end++
 546			}
 547
 548			if p.flags&FencedCode != 0 {
 549				// track fenced code block boundaries to suppress tab expansion
 550				// inside them:
 551				if beg >= lastFencedCodeBlockEnd {
 552					if i := p.fencedCode(input[beg:], false); i > 0 {
 553						lastFencedCodeBlockEnd = beg + i
 554					}
 555				}
 556			}
 557
 558			// add the line body if present
 559			if end > beg {
 560				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
 561					out.Write(input[beg:end])
 562				} else {
 563					expandTabs(&out, input[beg:end], tabSize)
 564				}
 565			}
 566			out.WriteByte('\n')
 567
 568			if end < len(input) && input[end] == '\r' {
 569				end++
 570			}
 571			if end < len(input) && input[end] == '\n' {
 572				end++
 573			}
 574
 575			beg = end
 576		}
 577	}
 578
 579	// empty input?
 580	if out.Len() == 0 {
 581		out.WriteByte('\n')
 582	}
 583
 584	return out.Bytes()
 585}
 586
 587// second pass: actual rendering
 588func secondPass(p *parser, input []byte) {
 589	p.block(input)
 590
 591	if p.flags&Footnotes != 0 && len(p.notes) > 0 {
 592		flags := ListItemBeginningOfList
 593		for i := 0; i < len(p.notes); i += 1 {
 594			ref := p.notes[i]
 595			if ref.hasBlock {
 596				flags |= ListItemContainsBlock
 597				p.block(ref.title)
 598			} else {
 599				p.inline(ref.title)
 600			}
 601			flags &^= ListItemBeginningOfList | ListItemContainsBlock
 602		}
 603	}
 604
 605	if p.nesting != 0 {
 606		panic("Nesting level did not end at zero")
 607	}
 608}
 609
 610//
 611// Link references
 612//
 613// This section implements support for references that (usually) appear
 614// as footnotes in a document, and can be referenced anywhere in the document.
 615// The basic format is:
 616//
 617//    [1]: http://www.google.com/ "Google"
 618//    [2]: http://www.github.com/ "Github"
 619//
 620// Anywhere in the document, the reference can be linked by referring to its
 621// label, i.e., 1 and 2 in this example, as in:
 622//
 623//    This library is hosted on [Github][2], a git hosting site.
 624//
 625// Actual footnotes as specified in Pandoc and supported by some other Markdown
 626// libraries such as php-markdown are also taken care of. They look like this:
 627//
 628//    This sentence needs a bit of further explanation.[^note]
 629//
 630//    [^note]: This is the explanation.
 631//
 632// Footnotes should be placed at the end of the document in an ordered list.
 633// Inline footnotes such as:
 634//
 635//    Inline footnotes^[Not supported.] also exist.
 636//
 637// are not yet supported.
 638
 639// References are parsed and stored in this struct.
 640type reference struct {
 641	link     []byte
 642	title    []byte
 643	noteId   int // 0 if not a footnote ref
 644	hasBlock bool
 645	text     []byte
 646}
 647
 648func (r *reference) String() string {
 649	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
 650		r.link, r.title, r.text, r.noteId, r.hasBlock)
 651}
 652
 653// Check whether or not data starts with a reference link.
 654// If so, it is parsed and stored in the list of references
 655// (in the render struct).
 656// Returns the number of bytes to skip to move past it,
 657// or zero if the first line is not a reference.
 658func isReference(p *parser, data []byte, tabSize int) int {
 659	// up to 3 optional leading spaces
 660	if len(data) < 4 {
 661		return 0
 662	}
 663	i := 0
 664	for i < 3 && data[i] == ' ' {
 665		i++
 666	}
 667
 668	noteId := 0
 669
 670	// id part: anything but a newline between brackets
 671	if data[i] != '[' {
 672		return 0
 673	}
 674	i++
 675	if p.flags&Footnotes != 0 {
 676		if i < len(data) && data[i] == '^' {
 677			// we can set it to anything here because the proper noteIds will
 678			// be assigned later during the second pass. It just has to be != 0
 679			noteId = 1
 680			i++
 681		}
 682	}
 683	idOffset := i
 684	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 685		i++
 686	}
 687	if i >= len(data) || data[i] != ']' {
 688		return 0
 689	}
 690	idEnd := i
 691
 692	// spacer: colon (space | tab)* newline? (space | tab)*
 693	i++
 694	if i >= len(data) || data[i] != ':' {
 695		return 0
 696	}
 697	i++
 698	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 699		i++
 700	}
 701	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 702		i++
 703		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
 704			i++
 705		}
 706	}
 707	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 708		i++
 709	}
 710	if i >= len(data) {
 711		return 0
 712	}
 713
 714	var (
 715		linkOffset, linkEnd   int
 716		titleOffset, titleEnd int
 717		lineEnd               int
 718		raw                   []byte
 719		hasBlock              bool
 720	)
 721
 722	if p.flags&Footnotes != 0 && noteId != 0 {
 723		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
 724		lineEnd = linkEnd
 725	} else {
 726		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
 727	}
 728	if lineEnd == 0 {
 729		return 0
 730	}
 731
 732	// a valid ref has been found
 733
 734	ref := &reference{
 735		noteId:   noteId,
 736		hasBlock: hasBlock,
 737	}
 738
 739	if noteId > 0 {
 740		// reusing the link field for the id since footnotes don't have links
 741		ref.link = data[idOffset:idEnd]
 742		// if footnote, it's not really a title, it's the contained text
 743		ref.title = raw
 744	} else {
 745		ref.link = data[linkOffset:linkEnd]
 746		ref.title = data[titleOffset:titleEnd]
 747	}
 748
 749	// id matches are case-insensitive
 750	id := string(bytes.ToLower(data[idOffset:idEnd]))
 751
 752	p.refs[id] = ref
 753
 754	return lineEnd
 755}
 756
 757func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
 758	// link: whitespace-free sequence, optionally between angle brackets
 759	if data[i] == '<' {
 760		i++
 761	}
 762	linkOffset = i
 763	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 764		i++
 765	}
 766	if i == len(data) {
 767		return
 768	}
 769	linkEnd = i
 770	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
 771		linkOffset++
 772		linkEnd--
 773	}
 774
 775	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 776	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 777		i++
 778	}
 779	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 780		return
 781	}
 782
 783	// compute end-of-line
 784	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 785		lineEnd = i
 786	}
 787	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
 788		lineEnd++
 789	}
 790
 791	// optional (space|tab)* spacer after a newline
 792	if lineEnd > 0 {
 793		i = lineEnd + 1
 794		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 795			i++
 796		}
 797	}
 798
 799	// optional title: any non-newline sequence enclosed in '"() alone on its line
 800	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 801		i++
 802		titleOffset = i
 803
 804		// look for EOL
 805		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 806			i++
 807		}
 808		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 809			titleEnd = i + 1
 810		} else {
 811			titleEnd = i
 812		}
 813
 814		// step back
 815		i--
 816		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
 817			i--
 818		}
 819		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 820			lineEnd = titleEnd
 821			titleEnd = i
 822		}
 823	}
 824
 825	return
 826}
 827
 828// The first bit of this logic is the same as (*parser).listItem, but the rest
 829// is much simpler. This function simply finds the entire block and shifts it
 830// over by one tab if it is indeed a block (just returns the line if it's not).
 831// blockEnd is the end of the section in the input buffer, and contents is the
 832// extracted text that was shifted over one tab. It will need to be rendered at
 833// the end of the document.
 834func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
 835	if i == 0 || len(data) == 0 {
 836		return
 837	}
 838
 839	// skip leading whitespace on first line
 840	for i < len(data) && data[i] == ' ' {
 841		i++
 842	}
 843
 844	blockStart = i
 845
 846	// find the end of the line
 847	blockEnd = i
 848	for i < len(data) && data[i-1] != '\n' {
 849		i++
 850	}
 851
 852	// get working buffer
 853	var raw bytes.Buffer
 854
 855	// put the first line into the working buffer
 856	raw.Write(data[blockEnd:i])
 857	blockEnd = i
 858
 859	// process the following lines
 860	containsBlankLine := false
 861
 862gatherLines:
 863	for blockEnd < len(data) {
 864		i++
 865
 866		// find the end of this line
 867		for i < len(data) && data[i-1] != '\n' {
 868			i++
 869		}
 870
 871		// if it is an empty line, guess that it is part of this item
 872		// and move on to the next line
 873		if p.isEmpty(data[blockEnd:i]) > 0 {
 874			containsBlankLine = true
 875			blockEnd = i
 876			continue
 877		}
 878
 879		n := 0
 880		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
 881			// this is the end of the block.
 882			// we don't want to include this last line in the index.
 883			break gatherLines
 884		}
 885
 886		// if there were blank lines before this one, insert a new one now
 887		if containsBlankLine {
 888			raw.WriteByte('\n')
 889			containsBlankLine = false
 890		}
 891
 892		// get rid of that first tab, write to buffer
 893		raw.Write(data[blockEnd+n : i])
 894		hasBlock = true
 895
 896		blockEnd = i
 897	}
 898
 899	if data[blockEnd-1] != '\n' {
 900		raw.WriteByte('\n')
 901	}
 902
 903	contents = raw.Bytes()
 904
 905	return
 906}
 907
 908//
 909//
 910// Miscellaneous helper functions
 911//
 912//
 913
 914// Test if a character is a punctuation symbol.
 915// Taken from a private function in regexp in the stdlib.
 916func ispunct(c byte) bool {
 917	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 918		if c == r {
 919			return true
 920		}
 921	}
 922	return false
 923}
 924
 925// Test if a character is a whitespace character.
 926func isspace(c byte) bool {
 927	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 928}
 929
 930// Test if a character is letter.
 931func isletter(c byte) bool {
 932	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 933}
 934
 935// Test if a character is a letter or a digit.
 936// TODO: check when this is looking for ASCII alnum and when it should use unicode
 937func isalnum(c byte) bool {
 938	return (c >= '0' && c <= '9') || isletter(c)
 939}
 940
 941// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
 942// always ends output with a newline
 943func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
 944	// first, check for common cases: no tabs, or only tabs at beginning of line
 945	i, prefix := 0, 0
 946	slowcase := false
 947	for i = 0; i < len(line); i++ {
 948		if line[i] == '\t' {
 949			if prefix == i {
 950				prefix++
 951			} else {
 952				slowcase = true
 953				break
 954			}
 955		}
 956	}
 957
 958	// no need to decode runes if all tabs are at the beginning of the line
 959	if !slowcase {
 960		for i = 0; i < prefix*tabSize; i++ {
 961			out.WriteByte(' ')
 962		}
 963		out.Write(line[prefix:])
 964		return
 965	}
 966
 967	// the slow case: we need to count runes to figure out how
 968	// many spaces to insert for each tab
 969	column := 0
 970	i = 0
 971	for i < len(line) {
 972		start := i
 973		for i < len(line) && line[i] != '\t' {
 974			_, size := utf8.DecodeRune(line[i:])
 975			i += size
 976			column++
 977		}
 978
 979		if i > start {
 980			out.Write(line[start:i])
 981		}
 982
 983		if i >= len(line) {
 984			break
 985		}
 986
 987		for {
 988			out.WriteByte(' ')
 989			column++
 990			if column%tabSize == 0 {
 991				break
 992			}
 993		}
 994
 995		i++
 996	}
 997}
 998
 999// Find if a line counts as indented or not.
1000// Returns number of characters the indent is (0 = not indented).
1001func isIndented(data []byte, indentSize int) int {
1002	if len(data) == 0 {
1003		return 0
1004	}
1005	if data[0] == '\t' {
1006		return 1
1007	}
1008	if len(data) < indentSize {
1009		return 0
1010	}
1011	for i := 0; i < indentSize; i++ {
1012		if data[i] != ' ' {
1013			return 0
1014		}
1015	}
1016	return indentSize
1017}
1018
1019// Create a url-safe slug for fragments
1020func slugify(in []byte) []byte {
1021	if len(in) == 0 {
1022		return in
1023	}
1024	out := make([]byte, 0, len(in))
1025	sym := false
1026
1027	for _, ch := range in {
1028		if isalnum(ch) {
1029			sym = false
1030			out = append(out, ch)
1031		} else if sym {
1032			continue
1033		} else {
1034			out = append(out, '-')
1035			sym = true
1036		}
1037	}
1038	var a, b int
1039	var ch byte
1040	for a, ch = range out {
1041		if ch != '-' {
1042			break
1043		}
1044	}
1045	for b = len(out) - 1; b > 0; b-- {
1046		if out[b] != '-' {
1047			break
1048		}
1049	}
1050	return out[a : b+1]
1051}