all repos — grayfriday @ a4f1e5c786bb9fedf9ed5b76b58a1c5142b693fa

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11//
  12// Markdown parsing and processing
  13//
  14//
  15
  16// Blackfriday markdown processor.
  17//
  18// Translates plain text with simple formatting rules into HTML or LaTeX.
  19package blackfriday
  20
  21import (
  22	"bytes"
  23	"fmt"
  24	"io"
  25	"strings"
  26	"unicode/utf8"
  27)
  28
  29const VERSION = "1.4"
  30
  31type Extensions int
  32
  33// These are the supported markdown parsing extensions.
  34// OR these values together to select multiple extensions.
  35const (
  36	NoExtensions            Extensions = 0
  37	NoIntraEmphasis         Extensions = 1 << iota // Ignore emphasis markers inside words
  38	Tables                                         // Render tables
  39	FencedCode                                     // Render fenced code blocks
  40	Autolink                                       // Detect embedded URLs that are not explicitly marked
  41	Strikethrough                                  // Strikethrough text using ~~test~~
  42	LaxHTMLBlocks                                  // Loosen up HTML block parsing rules
  43	SpaceHeaders                                   // Be strict about prefix header rules
  44	HardLineBreak                                  // Translate newlines into line breaks
  45	TabSizeEight                                   // Expand tabs to eight spaces instead of four
  46	Footnotes                                      // Pandoc-style footnotes
  47	NoEmptyLineBeforeBlock                         // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  48	HeaderIDs                                      // specify header IDs  with {#id}
  49	Titleblock                                     // Titleblock ala pandoc
  50	AutoHeaderIDs                                  // Create the header ID from the text
  51	BackslashLineBreak                             // Translate trailing backslashes into line breaks
  52	DefinitionLists                                // Render definition lists
  53	Smartypants                                    // Enable smart punctuation substitutions
  54	SmartypantsFractions                           // Enable smart fractions (with Smartypants)
  55	SmartypantsDashes                              // Enable smart dashes (with Smartypants)
  56	SmartypantsLatexDashes                         // Enable LaTeX-style dashes (with Smartypants)
  57	SmartypantsAngledQuotes                        // Enable angled double quotes (with Smartypants) for double quotes rendering
  58	TOC                                            // Generate a table of contents
  59	OmitContents                                   // Skip the main contents (for a standalone table of contents)
  60
  61	CommonHtmlFlags HTMLFlags = UseXHTML
  62
  63	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
  64		Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
  65		BackslashLineBreak | DefinitionLists | Smartypants |
  66		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
  67)
  68
  69var DefaultOptions = Options{
  70	Extensions: CommonExtensions,
  71}
  72
  73type LinkType int
  74
  75// These are the possible flag values for the link renderer.
  76// Only a single one of these values will be used; they are not ORed together.
  77// These are mostly of interest if you are writing a new output format.
  78const (
  79	LinkTypeNotAutolink LinkType = iota
  80	LinkTypeNormal
  81	LinkTypeEmail
  82)
  83
  84type ListType int
  85
  86// These are the possible flag values for the ListItem renderer.
  87// Multiple flag values may be ORed together.
  88// These are mostly of interest if you are writing a new output format.
  89const (
  90	ListTypeOrdered ListType = 1 << iota
  91	ListTypeDefinition
  92	ListTypeTerm
  93
  94	ListItemContainsBlock
  95	ListItemBeginningOfList
  96	ListItemEndOfList
  97)
  98
  99type CellAlignFlags int
 100
 101// These are the possible flag values for the table cell renderer.
 102// Only a single one of these values will be used; they are not ORed together.
 103// These are mostly of interest if you are writing a new output format.
 104const (
 105	TableAlignmentLeft = 1 << iota
 106	TableAlignmentRight
 107	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 108)
 109
 110// The size of a tab stop.
 111const (
 112	TabSizeDefault = 4
 113	TabSizeDouble  = 8
 114)
 115
 116// blockTags is a set of tags that are recognized as HTML block tags.
 117// Any of these can be included in markdown text without special escaping.
 118var blockTags = map[string]struct{}{
 119	"blockquote": struct{}{},
 120	"del":        struct{}{},
 121	"div":        struct{}{},
 122	"dl":         struct{}{},
 123	"fieldset":   struct{}{},
 124	"form":       struct{}{},
 125	"h1":         struct{}{},
 126	"h2":         struct{}{},
 127	"h3":         struct{}{},
 128	"h4":         struct{}{},
 129	"h5":         struct{}{},
 130	"h6":         struct{}{},
 131	"iframe":     struct{}{},
 132	"ins":        struct{}{},
 133	"math":       struct{}{},
 134	"noscript":   struct{}{},
 135	"ol":         struct{}{},
 136	"pre":        struct{}{},
 137	"p":          struct{}{},
 138	"script":     struct{}{},
 139	"style":      struct{}{},
 140	"table":      struct{}{},
 141	"ul":         struct{}{},
 142
 143	// HTML5
 144	"address":    struct{}{},
 145	"article":    struct{}{},
 146	"aside":      struct{}{},
 147	"canvas":     struct{}{},
 148	"figcaption": struct{}{},
 149	"figure":     struct{}{},
 150	"footer":     struct{}{},
 151	"header":     struct{}{},
 152	"hgroup":     struct{}{},
 153	"main":       struct{}{},
 154	"nav":        struct{}{},
 155	"output":     struct{}{},
 156	"progress":   struct{}{},
 157	"section":    struct{}{},
 158	"video":      struct{}{},
 159}
 160
 161// Renderer is the rendering interface.
 162// This is mostly of interest if you are implementing a new rendering format.
 163//
 164// When a byte slice is provided, it contains the (rendered) contents of the
 165// element.
 166//
 167// When a callback is provided instead, it will write the contents of the
 168// respective element directly to the output buffer and return true on success.
 169// If the callback returns false, the rendering function should reset the
 170// output buffer as though it had never been called.
 171//
 172// Currently HTML and Latex implementations are provided
 173type Renderer interface {
 174	Render(ast *Node) []byte
 175	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
 176}
 177
 178// Callback functions for inline parsing. One such function is defined
 179// for each character that triggers a response when parsing inline data.
 180type inlineParser func(p *parser, data []byte, offset int) int
 181
 182// Parser holds runtime state used by the parser.
 183// This is constructed by the Markdown function.
 184type parser struct {
 185	refOverride    ReferenceOverrideFunc
 186	refs           map[string]*reference
 187	inlineCallback [256]inlineParser
 188	flags          Extensions
 189	nesting        int
 190	maxNesting     int
 191	insideLink     bool
 192
 193	// Footnotes need to be ordered as well as available to quickly check for
 194	// presence. If a ref is also a footnote, it's stored both in refs and here
 195	// in notes. Slice is nil if footnotes not enabled.
 196	notes []*reference
 197
 198	doc                  *Node
 199	tip                  *Node // = doc
 200	oldTip               *Node
 201	lastMatchedContainer *Node // = doc
 202	allClosed            bool
 203	currBlock            *Node // a block node currently being parsed by inline parser
 204}
 205
 206func (p *parser) getRef(refid string) (ref *reference, found bool) {
 207	if p.refOverride != nil {
 208		r, overridden := p.refOverride(refid)
 209		if overridden {
 210			if r == nil {
 211				return nil, false
 212			}
 213			return &reference{
 214				link:     []byte(r.Link),
 215				title:    []byte(r.Title),
 216				noteId:   0,
 217				hasBlock: false,
 218				text:     []byte(r.Text)}, true
 219		}
 220	}
 221	// refs are case insensitive
 222	ref, found = p.refs[strings.ToLower(refid)]
 223	return ref, found
 224}
 225
 226func (p *parser) finalize(block *Node) {
 227	above := block.Parent
 228	block.open = false
 229	p.tip = above
 230}
 231
 232func (p *parser) addChild(node NodeType, offset uint32) *Node {
 233	for !p.tip.canContain(node) {
 234		p.finalize(p.tip)
 235	}
 236	newNode := NewNode(node)
 237	newNode.content = []byte{}
 238	p.tip.appendChild(newNode)
 239	p.tip = newNode
 240	return newNode
 241}
 242
 243func (p *parser) closeUnmatchedBlocks() {
 244	if !p.allClosed {
 245		for p.oldTip != p.lastMatchedContainer {
 246			parent := p.oldTip.Parent
 247			p.finalize(p.oldTip)
 248			p.oldTip = parent
 249		}
 250		p.allClosed = true
 251	}
 252}
 253
 254//
 255//
 256// Public interface
 257//
 258//
 259
 260// Reference represents the details of a link.
 261// See the documentation in Options for more details on use-case.
 262type Reference struct {
 263	// Link is usually the URL the reference points to.
 264	Link string
 265	// Title is the alternate text describing the link in more detail.
 266	Title string
 267	// Text is the optional text to override the ref with if the syntax used was
 268	// [refid][]
 269	Text string
 270}
 271
 272// ReferenceOverrideFunc is expected to be called with a reference string and
 273// return either a valid Reference type that the reference string maps to or
 274// nil. If overridden is false, the default reference logic will be executed.
 275// See the documentation in Options for more details on use-case.
 276type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
 277
 278// Options represents configurable overrides and callbacks (in addition to the
 279// extension flag set) for configuring a Markdown parse.
 280type Options struct {
 281	// Extensions is a flag set of bit-wise ORed extension bits. See the
 282	// Extensions flags defined in this package.
 283	Extensions Extensions
 284
 285	// ReferenceOverride is an optional function callback that is called every
 286	// time a reference is resolved.
 287	//
 288	// In Markdown, the link reference syntax can be made to resolve a link to
 289	// a reference instead of an inline URL, in one of the following ways:
 290	//
 291	//  * [link text][refid]
 292	//  * [refid][]
 293	//
 294	// Usually, the refid is defined at the bottom of the Markdown document. If
 295	// this override function is provided, the refid is passed to the override
 296	// function first, before consulting the defined refids at the bottom. If
 297	// the override function indicates an override did not occur, the refids at
 298	// the bottom will be used to fill in the link details.
 299	ReferenceOverride ReferenceOverrideFunc
 300}
 301
 302// MarkdownBasic is a convenience function for simple rendering.
 303// It processes markdown input with no extensions enabled.
 304func MarkdownBasic(input []byte) []byte {
 305	// set up the HTML renderer
 306	renderer := NewHTMLRenderer(HTMLRendererParameters{
 307		Flags:      UseXHTML,
 308		Extensions: CommonExtensions,
 309	})
 310
 311	// set up the parser
 312	return Markdown(input, renderer, Options{})
 313}
 314
 315// Call Markdown with most useful extensions enabled
 316// MarkdownCommon is a convenience function for simple rendering.
 317// It processes markdown input with common extensions enabled, including:
 318//
 319// * Smartypants processing with smart fractions and LaTeX dashes
 320//
 321// * Intra-word emphasis suppression
 322//
 323// * Tables
 324//
 325// * Fenced code blocks
 326//
 327// * Autolinking
 328//
 329// * Strikethrough support
 330//
 331// * Strict header parsing
 332//
 333// * Custom Header IDs
 334func MarkdownCommon(input []byte) []byte {
 335	// set up the HTML renderer
 336	renderer := NewHTMLRenderer(HTMLRendererParameters{
 337		Flags:      CommonHtmlFlags,
 338		Extensions: CommonExtensions,
 339	})
 340	return Markdown(input, renderer, DefaultOptions)
 341}
 342
 343// Markdown is the main rendering function.
 344// It parses and renders a block of markdown-encoded text.
 345// The supplied Renderer is used to format the output, and extensions dictates
 346// which non-standard extensions are enabled.
 347//
 348// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
 349// NewLatexRenderer, respectively.
 350func Markdown(input []byte, renderer Renderer, options Options) []byte {
 351	if renderer == nil {
 352		return nil
 353	}
 354	return renderer.Render(Parse(input, options))
 355}
 356
 357func Parse(input []byte, opts Options) *Node {
 358	extensions := opts.Extensions
 359
 360	// fill in the render structure
 361	p := new(parser)
 362	p.flags = extensions
 363	p.refOverride = opts.ReferenceOverride
 364	p.refs = make(map[string]*reference)
 365	p.maxNesting = 16
 366	p.insideLink = false
 367
 368	docNode := NewNode(Document)
 369	p.doc = docNode
 370	p.tip = docNode
 371	p.oldTip = docNode
 372	p.lastMatchedContainer = docNode
 373	p.allClosed = true
 374
 375	// register inline parsers
 376	p.inlineCallback['*'] = emphasis
 377	p.inlineCallback['_'] = emphasis
 378	if extensions&Strikethrough != 0 {
 379		p.inlineCallback['~'] = emphasis
 380	}
 381	p.inlineCallback['`'] = codeSpan
 382	p.inlineCallback['\n'] = lineBreak
 383	p.inlineCallback['['] = link
 384	p.inlineCallback['<'] = leftAngle
 385	p.inlineCallback['\\'] = escape
 386	p.inlineCallback['&'] = entity
 387	p.inlineCallback['!'] = maybeImage
 388	p.inlineCallback['^'] = maybeInlineFootnote
 389
 390	if extensions&Autolink != 0 {
 391		p.inlineCallback['h'] = maybeAutoLink
 392		p.inlineCallback['m'] = maybeAutoLink
 393		p.inlineCallback['f'] = maybeAutoLink
 394		p.inlineCallback['H'] = maybeAutoLink
 395		p.inlineCallback['M'] = maybeAutoLink
 396		p.inlineCallback['F'] = maybeAutoLink
 397	}
 398
 399	if extensions&Footnotes != 0 {
 400		p.notes = make([]*reference, 0)
 401	}
 402
 403	first := firstPass(p, input)
 404	secondPass(p, first)
 405	// Walk the tree and finish up some of unfinished blocks
 406	for p.tip != nil {
 407		p.finalize(p.tip)
 408	}
 409	// Walk the tree again and process inline markdown in each block
 410	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 411		if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
 412			p.currBlock = node
 413			p.inline(node.content)
 414			node.content = nil
 415		}
 416		return GoToNext
 417	})
 418	p.parseRefsToAST()
 419	p.generateTOC()
 420	return p.doc
 421}
 422
 423func (p *parser) generateTOC() {
 424	if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
 425		return
 426	}
 427	navNode := NewNode(HTMLBlock)
 428	navNode.Literal = []byte("<nav>")
 429	navNode.open = false
 430
 431	var topList *Node
 432	var listNode *Node
 433	var lastItem *Node
 434	headerCount := 0
 435	currentLevel := 0
 436	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
 437		if entering && node.Type == Header {
 438			if node.Level > currentLevel {
 439				currentLevel++
 440				newList := NewNode(List)
 441				if lastItem != nil {
 442					lastItem.appendChild(newList)
 443					listNode = newList
 444				} else {
 445					listNode = newList
 446					topList = listNode
 447				}
 448			}
 449			if node.Level < currentLevel {
 450				finalizeList(listNode)
 451				lastItem = listNode.Parent
 452				listNode = lastItem.Parent
 453			}
 454			node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
 455			headerCount++
 456			lastItem = NewNode(Item)
 457			listNode.appendChild(lastItem)
 458			anchorNode := NewNode(Link)
 459			anchorNode.Destination = []byte("#" + node.HeaderID)
 460			lastItem.appendChild(anchorNode)
 461			anchorNode.appendChild(text(node.FirstChild.Literal))
 462		}
 463		return GoToNext
 464	})
 465	firstChild := p.doc.FirstChild
 466	// Insert TOC only if there is anything to insert
 467	if topList != nil {
 468		finalizeList(topList)
 469		firstChild.insertBefore(navNode)
 470		firstChild.insertBefore(topList)
 471		navCloseNode := NewNode(HTMLBlock)
 472		navCloseNode.Literal = []byte("</nav>")
 473		navCloseNode.open = false
 474		firstChild.insertBefore(navCloseNode)
 475	}
 476	// Drop everything after the TOC if OmitContents was requested
 477	if p.flags&OmitContents != 0 {
 478		for firstChild != nil {
 479			next := firstChild.Next
 480			firstChild.unlink()
 481			firstChild = next
 482		}
 483	}
 484}
 485
 486func (p *parser) parseRefsToAST() {
 487	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
 488		return
 489	}
 490	p.tip = p.doc
 491	finalizeHtmlBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
 492	p.addBlock(HorizontalRule, nil)
 493	block := p.addBlock(List, nil)
 494	block.ListFlags = ListTypeOrdered
 495	flags := ListItemBeginningOfList
 496	// Note: this loop is intentionally explicit, not range-form. This is
 497	// because the body of the loop will append nested footnotes to p.notes and
 498	// we need to process those late additions. Range form would only walk over
 499	// the fixed initial set.
 500	for i := 0; i < len(p.notes); i++ {
 501		ref := p.notes[i]
 502		block := p.addBlock(Item, nil)
 503		block.ListFlags = ListTypeOrdered
 504		block.RefLink = ref.link
 505		if ref.hasBlock {
 506			flags |= ListItemContainsBlock
 507			p.block(ref.title)
 508		} else {
 509			p.currBlock = block
 510			p.inline(ref.title)
 511		}
 512		flags &^= ListItemBeginningOfList | ListItemContainsBlock
 513	}
 514	above := block.Parent
 515	finalizeList(block)
 516	p.tip = above
 517	finalizeHtmlBlock(p.addBlock(HTMLBlock, []byte("</div>")))
 518	block.Walk(func(node *Node, entering bool) WalkStatus {
 519		if node.Type == Paragraph || node.Type == Header {
 520			p.currBlock = node
 521			p.inline(node.content)
 522			node.content = nil
 523		}
 524		return GoToNext
 525	})
 526}
 527
 528// first pass:
 529// - normalize newlines
 530// - extract references (outside of fenced code blocks)
 531// - expand tabs (outside of fenced code blocks)
 532// - copy everything else
 533func firstPass(p *parser, input []byte) []byte {
 534	var out bytes.Buffer
 535	tabSize := TabSizeDefault
 536	if p.flags&TabSizeEight != 0 {
 537		tabSize = TabSizeDouble
 538	}
 539	beg := 0
 540	lastFencedCodeBlockEnd := 0
 541	for beg < len(input) {
 542		// Find end of this line, then process the line.
 543		end := beg
 544		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
 545			end++
 546		}
 547
 548		if p.flags&FencedCode != 0 {
 549			// track fenced code block boundaries to suppress tab expansion
 550			// and reference extraction inside them:
 551			if beg >= lastFencedCodeBlockEnd {
 552				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
 553					lastFencedCodeBlockEnd = beg + i
 554				}
 555			}
 556		}
 557
 558		// add the line body if present
 559		if end > beg {
 560			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
 561				out.Write(input[beg:end])
 562			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
 563				beg += refEnd
 564				continue
 565			} else {
 566				expandTabs(&out, input[beg:end], tabSize)
 567			}
 568		}
 569
 570		if end < len(input) && input[end] == '\r' {
 571			end++
 572		}
 573		if end < len(input) && input[end] == '\n' {
 574			end++
 575		}
 576		out.WriteByte('\n')
 577
 578		beg = end
 579	}
 580
 581	// empty input?
 582	if out.Len() == 0 {
 583		out.WriteByte('\n')
 584	}
 585
 586	return out.Bytes()
 587}
 588
 589// second pass: actual rendering
 590func secondPass(p *parser, input []byte) {
 591	p.block(input)
 592
 593	if p.flags&Footnotes != 0 && len(p.notes) > 0 {
 594		flags := ListItemBeginningOfList
 595		for i := 0; i < len(p.notes); i += 1 {
 596			ref := p.notes[i]
 597			if ref.hasBlock {
 598				flags |= ListItemContainsBlock
 599				p.block(ref.title)
 600			} else {
 601				p.inline(ref.title)
 602			}
 603			flags &^= ListItemBeginningOfList | ListItemContainsBlock
 604		}
 605	}
 606
 607	if p.nesting != 0 {
 608		panic("Nesting level did not end at zero")
 609	}
 610}
 611
 612//
 613// Link references
 614//
 615// This section implements support for references that (usually) appear
 616// as footnotes in a document, and can be referenced anywhere in the document.
 617// The basic format is:
 618//
 619//    [1]: http://www.google.com/ "Google"
 620//    [2]: http://www.github.com/ "Github"
 621//
 622// Anywhere in the document, the reference can be linked by referring to its
 623// label, i.e., 1 and 2 in this example, as in:
 624//
 625//    This library is hosted on [Github][2], a git hosting site.
 626//
 627// Actual footnotes as specified in Pandoc and supported by some other Markdown
 628// libraries such as php-markdown are also taken care of. They look like this:
 629//
 630//    This sentence needs a bit of further explanation.[^note]
 631//
 632//    [^note]: This is the explanation.
 633//
 634// Footnotes should be placed at the end of the document in an ordered list.
 635// Inline footnotes such as:
 636//
 637//    Inline footnotes^[Not supported.] also exist.
 638//
 639// are not yet supported.
 640
 641// References are parsed and stored in this struct.
 642type reference struct {
 643	link     []byte
 644	title    []byte
 645	noteId   int // 0 if not a footnote ref
 646	hasBlock bool
 647	text     []byte
 648}
 649
 650func (r *reference) String() string {
 651	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
 652		r.link, r.title, r.text, r.noteId, r.hasBlock)
 653}
 654
 655// Check whether or not data starts with a reference link.
 656// If so, it is parsed and stored in the list of references
 657// (in the render struct).
 658// Returns the number of bytes to skip to move past it,
 659// or zero if the first line is not a reference.
 660func isReference(p *parser, data []byte, tabSize int) int {
 661	// up to 3 optional leading spaces
 662	if len(data) < 4 {
 663		return 0
 664	}
 665	i := 0
 666	for i < 3 && data[i] == ' ' {
 667		i++
 668	}
 669
 670	noteId := 0
 671
 672	// id part: anything but a newline between brackets
 673	if data[i] != '[' {
 674		return 0
 675	}
 676	i++
 677	if p.flags&Footnotes != 0 {
 678		if i < len(data) && data[i] == '^' {
 679			// we can set it to anything here because the proper noteIds will
 680			// be assigned later during the second pass. It just has to be != 0
 681			noteId = 1
 682			i++
 683		}
 684	}
 685	idOffset := i
 686	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 687		i++
 688	}
 689	if i >= len(data) || data[i] != ']' {
 690		return 0
 691	}
 692	idEnd := i
 693
 694	// spacer: colon (space | tab)* newline? (space | tab)*
 695	i++
 696	if i >= len(data) || data[i] != ':' {
 697		return 0
 698	}
 699	i++
 700	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 701		i++
 702	}
 703	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 704		i++
 705		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
 706			i++
 707		}
 708	}
 709	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 710		i++
 711	}
 712	if i >= len(data) {
 713		return 0
 714	}
 715
 716	var (
 717		linkOffset, linkEnd   int
 718		titleOffset, titleEnd int
 719		lineEnd               int
 720		raw                   []byte
 721		hasBlock              bool
 722	)
 723
 724	if p.flags&Footnotes != 0 && noteId != 0 {
 725		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
 726		lineEnd = linkEnd
 727	} else {
 728		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
 729	}
 730	if lineEnd == 0 {
 731		return 0
 732	}
 733
 734	// a valid ref has been found
 735
 736	ref := &reference{
 737		noteId:   noteId,
 738		hasBlock: hasBlock,
 739	}
 740
 741	if noteId > 0 {
 742		// reusing the link field for the id since footnotes don't have links
 743		ref.link = data[idOffset:idEnd]
 744		// if footnote, it's not really a title, it's the contained text
 745		ref.title = raw
 746	} else {
 747		ref.link = data[linkOffset:linkEnd]
 748		ref.title = data[titleOffset:titleEnd]
 749	}
 750
 751	// id matches are case-insensitive
 752	id := string(bytes.ToLower(data[idOffset:idEnd]))
 753
 754	p.refs[id] = ref
 755
 756	return lineEnd
 757}
 758
 759func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
 760	// link: whitespace-free sequence, optionally between angle brackets
 761	if data[i] == '<' {
 762		i++
 763	}
 764	linkOffset = i
 765	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 766		i++
 767	}
 768	if i == len(data) {
 769		return
 770	}
 771	linkEnd = i
 772	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
 773		linkOffset++
 774		linkEnd--
 775	}
 776
 777	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 778	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 779		i++
 780	}
 781	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 782		return
 783	}
 784
 785	// compute end-of-line
 786	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 787		lineEnd = i
 788	}
 789	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
 790		lineEnd++
 791	}
 792
 793	// optional (space|tab)* spacer after a newline
 794	if lineEnd > 0 {
 795		i = lineEnd + 1
 796		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 797			i++
 798		}
 799	}
 800
 801	// optional title: any non-newline sequence enclosed in '"() alone on its line
 802	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 803		i++
 804		titleOffset = i
 805
 806		// look for EOL
 807		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 808			i++
 809		}
 810		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 811			titleEnd = i + 1
 812		} else {
 813			titleEnd = i
 814		}
 815
 816		// step back
 817		i--
 818		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
 819			i--
 820		}
 821		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 822			lineEnd = titleEnd
 823			titleEnd = i
 824		}
 825	}
 826
 827	return
 828}
 829
 830// The first bit of this logic is the same as (*parser).listItem, but the rest
 831// is much simpler. This function simply finds the entire block and shifts it
 832// over by one tab if it is indeed a block (just returns the line if it's not).
 833// blockEnd is the end of the section in the input buffer, and contents is the
 834// extracted text that was shifted over one tab. It will need to be rendered at
 835// the end of the document.
 836func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
 837	if i == 0 || len(data) == 0 {
 838		return
 839	}
 840
 841	// skip leading whitespace on first line
 842	for i < len(data) && data[i] == ' ' {
 843		i++
 844	}
 845
 846	blockStart = i
 847
 848	// find the end of the line
 849	blockEnd = i
 850	for i < len(data) && data[i-1] != '\n' {
 851		i++
 852	}
 853
 854	// get working buffer
 855	var raw bytes.Buffer
 856
 857	// put the first line into the working buffer
 858	raw.Write(data[blockEnd:i])
 859	blockEnd = i
 860
 861	// process the following lines
 862	containsBlankLine := false
 863
 864gatherLines:
 865	for blockEnd < len(data) {
 866		i++
 867
 868		// find the end of this line
 869		for i < len(data) && data[i-1] != '\n' {
 870			i++
 871		}
 872
 873		// if it is an empty line, guess that it is part of this item
 874		// and move on to the next line
 875		if p.isEmpty(data[blockEnd:i]) > 0 {
 876			containsBlankLine = true
 877			blockEnd = i
 878			continue
 879		}
 880
 881		n := 0
 882		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
 883			// this is the end of the block.
 884			// we don't want to include this last line in the index.
 885			break gatherLines
 886		}
 887
 888		// if there were blank lines before this one, insert a new one now
 889		if containsBlankLine {
 890			raw.WriteByte('\n')
 891			containsBlankLine = false
 892		}
 893
 894		// get rid of that first tab, write to buffer
 895		raw.Write(data[blockEnd+n : i])
 896		hasBlock = true
 897
 898		blockEnd = i
 899	}
 900
 901	if data[blockEnd-1] != '\n' {
 902		raw.WriteByte('\n')
 903	}
 904
 905	contents = raw.Bytes()
 906
 907	return
 908}
 909
 910//
 911//
 912// Miscellaneous helper functions
 913//
 914//
 915
 916// Test if a character is a punctuation symbol.
 917// Taken from a private function in regexp in the stdlib.
 918func ispunct(c byte) bool {
 919	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 920		if c == r {
 921			return true
 922		}
 923	}
 924	return false
 925}
 926
 927// Test if a character is a whitespace character.
 928func isspace(c byte) bool {
 929	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 930}
 931
 932// Test if a character is letter.
 933func isletter(c byte) bool {
 934	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 935}
 936
 937// Test if a character is a letter or a digit.
 938// TODO: check when this is looking for ASCII alnum and when it should use unicode
 939func isalnum(c byte) bool {
 940	return (c >= '0' && c <= '9') || isletter(c)
 941}
 942
 943// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
 944// always ends output with a newline
 945func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
 946	// first, check for common cases: no tabs, or only tabs at beginning of line
 947	i, prefix := 0, 0
 948	slowcase := false
 949	for i = 0; i < len(line); i++ {
 950		if line[i] == '\t' {
 951			if prefix == i {
 952				prefix++
 953			} else {
 954				slowcase = true
 955				break
 956			}
 957		}
 958	}
 959
 960	// no need to decode runes if all tabs are at the beginning of the line
 961	if !slowcase {
 962		for i = 0; i < prefix*tabSize; i++ {
 963			out.WriteByte(' ')
 964		}
 965		out.Write(line[prefix:])
 966		return
 967	}
 968
 969	// the slow case: we need to count runes to figure out how
 970	// many spaces to insert for each tab
 971	column := 0
 972	i = 0
 973	for i < len(line) {
 974		start := i
 975		for i < len(line) && line[i] != '\t' {
 976			_, size := utf8.DecodeRune(line[i:])
 977			i += size
 978			column++
 979		}
 980
 981		if i > start {
 982			out.Write(line[start:i])
 983		}
 984
 985		if i >= len(line) {
 986			break
 987		}
 988
 989		for {
 990			out.WriteByte(' ')
 991			column++
 992			if column%tabSize == 0 {
 993				break
 994			}
 995		}
 996
 997		i++
 998	}
 999}
1000
1001// Find if a line counts as indented or not.
1002// Returns number of characters the indent is (0 = not indented).
1003func isIndented(data []byte, indentSize int) int {
1004	if len(data) == 0 {
1005		return 0
1006	}
1007	if data[0] == '\t' {
1008		return 1
1009	}
1010	if len(data) < indentSize {
1011		return 0
1012	}
1013	for i := 0; i < indentSize; i++ {
1014		if data[i] != ' ' {
1015			return 0
1016		}
1017	}
1018	return indentSize
1019}
1020
1021// Create a url-safe slug for fragments
1022func slugify(in []byte) []byte {
1023	if len(in) == 0 {
1024		return in
1025	}
1026	out := make([]byte, 0, len(in))
1027	sym := false
1028
1029	for _, ch := range in {
1030		if isalnum(ch) {
1031			sym = false
1032			out = append(out, ch)
1033		} else if sym {
1034			continue
1035		} else {
1036			out = append(out, '-')
1037			sym = true
1038		}
1039	}
1040	var a, b int
1041	var ch byte
1042	for a, ch = range out {
1043		if ch != '-' {
1044			break
1045		}
1046	}
1047	for b = len(out) - 1; b > 0; b-- {
1048		if out[b] != '-' {
1049			break
1050		}
1051	}
1052	return out[a : b+1]
1053}