all repos — grayfriday @ a658caacc6244df1b6a760c6a571e579a2327b57

blackfriday fork with a few changes

block.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"html"
  19	"regexp"
  20
  21	"github.com/shurcooL/sanitized_anchor_name"
  22)
  23
  24const (
  25	Entity    = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
  26	Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
  27)
  28
  29var (
  30	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
  31	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
  32	reTrailingWhitespace  = regexp.MustCompile("(\n *)+$")
  33)
  34
  35// Parse block-level data.
  36// Note: this function and many that it calls assume that
  37// the input buffer ends with a newline.
  38func (p *parser) block(data []byte) {
  39	if len(data) == 0 || data[len(data)-1] != '\n' {
  40		panic("block input is missing terminating newline")
  41	}
  42
  43	// this is called recursively: enforce a maximum depth
  44	if p.nesting >= p.maxNesting {
  45		return
  46	}
  47	p.nesting++
  48
  49	// parse out one block-level construct at a time
  50	for len(data) > 0 {
  51		// prefixed header:
  52		//
  53		// # Header 1
  54		// ## Header 2
  55		// ...
  56		// ###### Header 6
  57		if p.isPrefixHeader(data) {
  58			data = data[p.prefixHeader(data):]
  59			continue
  60		}
  61
  62		// block of preformatted HTML:
  63		//
  64		// <div>
  65		//     ...
  66		// </div>
  67		if data[0] == '<' {
  68			if i := p.html(data, true); i > 0 {
  69				data = data[i:]
  70				continue
  71			}
  72		}
  73
  74		// title block
  75		//
  76		// % stuff
  77		// % more stuff
  78		// % even more stuff
  79		if p.flags&Titleblock != 0 {
  80			if data[0] == '%' {
  81				if i := p.titleBlock(data, true); i > 0 {
  82					data = data[i:]
  83					continue
  84				}
  85			}
  86		}
  87
  88		// blank lines.  note: returns the # of bytes to skip
  89		if i := p.isEmpty(data); i > 0 {
  90			data = data[i:]
  91			continue
  92		}
  93
  94		// indented code block:
  95		//
  96		//     func max(a, b int) int {
  97		//         if a > b {
  98		//             return a
  99		//         }
 100		//         return b
 101		//      }
 102		if p.codePrefix(data) > 0 {
 103			data = data[p.code(data):]
 104			continue
 105		}
 106
 107		// fenced code block:
 108		//
 109		// ``` go
 110		// func fact(n int) int {
 111		//     if n <= 1 {
 112		//         return n
 113		//     }
 114		//     return n * fact(n-1)
 115		// }
 116		// ```
 117		if p.flags&FencedCode != 0 {
 118			if i := p.fencedCode(data, true); i > 0 {
 119				data = data[i:]
 120				continue
 121			}
 122		}
 123
 124		// horizontal rule:
 125		//
 126		// ------
 127		// or
 128		// ******
 129		// or
 130		// ______
 131		if p.isHRule(data) {
 132			p.addBlock(HorizontalRule, nil)
 133			var i int
 134			for i = 0; data[i] != '\n'; i++ {
 135			}
 136			data = data[i:]
 137			continue
 138		}
 139
 140		// block quote:
 141		//
 142		// > A big quote I found somewhere
 143		// > on the web
 144		if p.quotePrefix(data) > 0 {
 145			data = data[p.quote(data):]
 146			continue
 147		}
 148
 149		// table:
 150		//
 151		// Name  | Age | Phone
 152		// ------|-----|---------
 153		// Bob   | 31  | 555-1234
 154		// Alice | 27  | 555-4321
 155		if p.flags&Tables != 0 {
 156			if i := p.table(data); i > 0 {
 157				data = data[i:]
 158				continue
 159			}
 160		}
 161
 162		// an itemized/unordered list:
 163		//
 164		// * Item 1
 165		// * Item 2
 166		//
 167		// also works with + or -
 168		if p.uliPrefix(data) > 0 {
 169			data = data[p.list(data, 0):]
 170			continue
 171		}
 172
 173		// a numbered/ordered list:
 174		//
 175		// 1. Item 1
 176		// 2. Item 2
 177		if p.oliPrefix(data) > 0 {
 178			data = data[p.list(data, ListTypeOrdered):]
 179			continue
 180		}
 181
 182		// definition lists:
 183		//
 184		// Term 1
 185		// :   Definition a
 186		// :   Definition b
 187		//
 188		// Term 2
 189		// :   Definition c
 190		if p.flags&DefinitionLists != 0 {
 191			if p.dliPrefix(data) > 0 {
 192				data = data[p.list(data, ListTypeDefinition):]
 193				continue
 194			}
 195		}
 196
 197		// anything else must look like a normal paragraph
 198		// note: this finds underlined headers, too
 199		data = data[p.paragraph(data):]
 200	}
 201
 202	p.nesting--
 203}
 204
 205func (p *parser) addBlock(typ NodeType, content []byte) *Node {
 206	p.closeUnmatchedBlocks()
 207	container := p.addChild(typ, 0)
 208	container.content = content
 209	return container
 210}
 211
 212func (p *parser) isPrefixHeader(data []byte) bool {
 213	if data[0] != '#' {
 214		return false
 215	}
 216
 217	if p.flags&SpaceHeaders != 0 {
 218		level := 0
 219		for level < 6 && data[level] == '#' {
 220			level++
 221		}
 222		if data[level] != ' ' {
 223			return false
 224		}
 225	}
 226	return true
 227}
 228
 229func (p *parser) prefixHeader(data []byte) int {
 230	level := 0
 231	for level < 6 && data[level] == '#' {
 232		level++
 233	}
 234	i := skipChar(data, level, ' ')
 235	end := skipUntilChar(data, i, '\n')
 236	skip := end
 237	id := ""
 238	if p.flags&HeaderIDs != 0 {
 239		j, k := 0, 0
 240		// find start/end of header id
 241		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 242		}
 243		for k = j + 1; k < end && data[k] != '}'; k++ {
 244		}
 245		// extract header id iff found
 246		if j < end && k < end {
 247			id = string(data[j+2 : k])
 248			end = j
 249			skip = k + 1
 250			for end > 0 && data[end-1] == ' ' {
 251				end--
 252			}
 253		}
 254	}
 255	for end > 0 && data[end-1] == '#' {
 256		if isBackslashEscaped(data, end-1) {
 257			break
 258		}
 259		end--
 260	}
 261	for end > 0 && data[end-1] == ' ' {
 262		end--
 263	}
 264	if end > i {
 265		if id == "" && p.flags&AutoHeaderIDs != 0 {
 266			id = sanitized_anchor_name.Create(string(data[i:end]))
 267		}
 268		block := p.addBlock(Header, data[i:end])
 269		block.HeaderID = id
 270		block.Level = uint32(level)
 271	}
 272	return skip
 273}
 274
 275func (p *parser) isUnderlinedHeader(data []byte) int {
 276	// test of level 1 header
 277	if data[0] == '=' {
 278		i := skipChar(data, 1, '=')
 279		i = skipChar(data, i, ' ')
 280		if data[i] == '\n' {
 281			return 1
 282		} else {
 283			return 0
 284		}
 285	}
 286
 287	// test of level 2 header
 288	if data[0] == '-' {
 289		i := skipChar(data, 1, '-')
 290		i = skipChar(data, i, ' ')
 291		if data[i] == '\n' {
 292			return 2
 293		} else {
 294			return 0
 295		}
 296	}
 297
 298	return 0
 299}
 300
 301func (p *parser) titleBlock(data []byte, doRender bool) int {
 302	if data[0] != '%' {
 303		return 0
 304	}
 305	splitData := bytes.Split(data, []byte("\n"))
 306	var i int
 307	for idx, b := range splitData {
 308		if !bytes.HasPrefix(b, []byte("%")) {
 309			i = idx // - 1
 310			break
 311		}
 312	}
 313
 314	data = bytes.Join(splitData[0:i], []byte("\n"))
 315	consumed := len(data)
 316	data = bytes.TrimPrefix(data, []byte("% "))
 317	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
 318	block := p.addBlock(Header, data)
 319	block.Level = 1
 320	block.IsTitleblock = true
 321
 322	return consumed
 323}
 324
 325func (p *parser) html(data []byte, doRender bool) int {
 326	var i, j int
 327
 328	// identify the opening tag
 329	if data[0] != '<' {
 330		return 0
 331	}
 332	curtag, tagfound := p.htmlFindTag(data[1:])
 333
 334	// handle special cases
 335	if !tagfound {
 336		// check for an HTML comment
 337		if size := p.htmlComment(data, doRender); size > 0 {
 338			return size
 339		}
 340
 341		// check for an <hr> tag
 342		if size := p.htmlHr(data, doRender); size > 0 {
 343			return size
 344		}
 345
 346		// no special case recognized
 347		return 0
 348	}
 349
 350	// look for an unindented matching closing tag
 351	// followed by a blank line
 352	found := false
 353	/*
 354		closetag := []byte("\n</" + curtag + ">")
 355		j = len(curtag) + 1
 356		for !found {
 357			// scan for a closing tag at the beginning of a line
 358			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 359				j += skip + len(closetag)
 360			} else {
 361				break
 362			}
 363
 364			// see if it is the only thing on the line
 365			if skip := p.isEmpty(data[j:]); skip > 0 {
 366				// see if it is followed by a blank line/eof
 367				j += skip
 368				if j >= len(data) {
 369					found = true
 370					i = j
 371				} else {
 372					if skip := p.isEmpty(data[j:]); skip > 0 {
 373						j += skip
 374						found = true
 375						i = j
 376					}
 377				}
 378			}
 379		}
 380	*/
 381
 382	// if not found, try a second pass looking for indented match
 383	// but not if tag is "ins" or "del" (following original Markdown.pl)
 384	if !found && curtag != "ins" && curtag != "del" {
 385		i = 1
 386		for i < len(data) {
 387			i++
 388			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 389				i++
 390			}
 391
 392			if i+2+len(curtag) >= len(data) {
 393				break
 394			}
 395
 396			j = p.htmlFindEnd(curtag, data[i-1:])
 397
 398			if j > 0 {
 399				i += j - 1
 400				found = true
 401				break
 402			}
 403		}
 404	}
 405
 406	if !found {
 407		return 0
 408	}
 409
 410	// the end of the block has been found
 411	if doRender {
 412		// trim newlines
 413		end := i
 414		for end > 0 && data[end-1] == '\n' {
 415			end--
 416		}
 417		finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
 418	}
 419
 420	return i
 421}
 422
 423func finalizeHtmlBlock(block *Node) {
 424	block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
 425	block.content = []byte{}
 426}
 427
 428// HTML comment, lax form
 429func (p *parser) htmlComment(data []byte, doRender bool) int {
 430	i := p.inlineHtmlComment(data)
 431	// needs to end with a blank line
 432	if j := p.isEmpty(data[i:]); j > 0 {
 433		size := i + j
 434		if doRender {
 435			// trim trailing newlines
 436			end := size
 437			for end > 0 && data[end-1] == '\n' {
 438				end--
 439			}
 440			block := p.addBlock(HTMLBlock, data[:end])
 441			finalizeHtmlBlock(block)
 442		}
 443		return size
 444	}
 445	return 0
 446}
 447
 448// HR, which is the only self-closing block tag considered
 449func (p *parser) htmlHr(data []byte, doRender bool) int {
 450	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 451		return 0
 452	}
 453	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 454		// not an <hr> tag after all; at least not a valid one
 455		return 0
 456	}
 457
 458	i := 3
 459	for data[i] != '>' && data[i] != '\n' {
 460		i++
 461	}
 462
 463	if data[i] == '>' {
 464		i++
 465		if j := p.isEmpty(data[i:]); j > 0 {
 466			size := i + j
 467			if doRender {
 468				// trim newlines
 469				end := size
 470				for end > 0 && data[end-1] == '\n' {
 471					end--
 472				}
 473				finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
 474			}
 475			return size
 476		}
 477	}
 478
 479	return 0
 480}
 481
 482func (p *parser) htmlFindTag(data []byte) (string, bool) {
 483	i := 0
 484	for isalnum(data[i]) {
 485		i++
 486	}
 487	key := string(data[:i])
 488	if _, ok := blockTags[key]; ok {
 489		return key, true
 490	}
 491	return "", false
 492}
 493
 494func (p *parser) htmlFindEnd(tag string, data []byte) int {
 495	// assume data[0] == '<' && data[1] == '/' already tested
 496	if tag == "hr" {
 497		return 2
 498	}
 499	// check if tag is a match
 500	closetag := []byte("</" + tag + ">")
 501	if !bytes.HasPrefix(data, closetag) {
 502		return 0
 503	}
 504	i := len(closetag)
 505
 506	// check that the rest of the line is blank
 507	skip := 0
 508	if skip = p.isEmpty(data[i:]); skip == 0 {
 509		return 0
 510	}
 511	i += skip
 512	skip = 0
 513
 514	if i >= len(data) {
 515		return i
 516	}
 517
 518	if p.flags&LaxHTMLBlocks != 0 {
 519		return i
 520	}
 521	if skip = p.isEmpty(data[i:]); skip == 0 {
 522		// following line must be blank
 523		return 0
 524	}
 525
 526	return i + skip
 527}
 528
 529func (p *parser) isEmpty(data []byte) int {
 530	// it is okay to call isEmpty on an empty buffer
 531	if len(data) == 0 {
 532		return 0
 533	}
 534
 535	var i int
 536	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 537		if data[i] != ' ' && data[i] != '\t' {
 538			return 0
 539		}
 540	}
 541	return i + 1
 542}
 543
 544func (p *parser) isHRule(data []byte) bool {
 545	i := 0
 546
 547	// skip up to three spaces
 548	for i < 3 && data[i] == ' ' {
 549		i++
 550	}
 551
 552	// look at the hrule char
 553	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 554		return false
 555	}
 556	c := data[i]
 557
 558	// the whole line must be the char or whitespace
 559	n := 0
 560	for data[i] != '\n' {
 561		switch {
 562		case data[i] == c:
 563			n++
 564		case data[i] != ' ':
 565			return false
 566		}
 567		i++
 568	}
 569
 570	return n >= 3
 571}
 572
 573func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 574	i, size := 0, 0
 575	skip = 0
 576
 577	// skip up to three spaces
 578	for i < len(data) && i < 3 && data[i] == ' ' {
 579		i++
 580	}
 581	if i >= len(data) {
 582		return
 583	}
 584
 585	// check for the marker characters: ~ or `
 586	if data[i] != '~' && data[i] != '`' {
 587		return
 588	}
 589
 590	c := data[i]
 591
 592	// the whole line must be the same char or whitespace
 593	for i < len(data) && data[i] == c {
 594		size++
 595		i++
 596	}
 597
 598	if i >= len(data) {
 599		return
 600	}
 601
 602	// the marker char must occur at least 3 times
 603	if size < 3 {
 604		return
 605	}
 606	marker = string(data[i-size : i])
 607
 608	// if this is the end marker, it must match the beginning marker
 609	if oldmarker != "" && marker != oldmarker {
 610		return
 611	}
 612
 613	if syntax != nil {
 614		syn := 0
 615		i = skipChar(data, i, ' ')
 616
 617		if i >= len(data) {
 618			return
 619		}
 620
 621		syntaxStart := i
 622
 623		if data[i] == '{' {
 624			i++
 625			syntaxStart++
 626
 627			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 628				syn++
 629				i++
 630			}
 631
 632			if i >= len(data) || data[i] != '}' {
 633				return
 634			}
 635
 636			// strip all whitespace at the beginning and the end
 637			// of the {} block
 638			for syn > 0 && isspace(data[syntaxStart]) {
 639				syntaxStart++
 640				syn--
 641			}
 642
 643			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 644				syn--
 645			}
 646
 647			i++
 648		} else {
 649			for i < len(data) && !isspace(data[i]) {
 650				syn++
 651				i++
 652			}
 653		}
 654
 655		language := string(data[syntaxStart : syntaxStart+syn])
 656		*syntax = &language
 657	}
 658
 659	i = skipChar(data, i, ' ')
 660	if i >= len(data) || data[i] != '\n' {
 661		return
 662	}
 663
 664	skip = i + 1
 665	return
 666}
 667
 668func (p *parser) fencedCode(data []byte, doRender bool) int {
 669	var lang *string
 670	beg, marker := p.isFencedCode(data, &lang, "")
 671	if beg == 0 || beg >= len(data) {
 672		return 0
 673	}
 674
 675	var work bytes.Buffer
 676	if lang != nil {
 677		work.Write([]byte(*lang))
 678		work.WriteByte('\n')
 679	}
 680
 681	for {
 682		// safe to assume beg < len(data)
 683
 684		// check for the end of the code block
 685		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
 686		if fenceEnd != 0 {
 687			beg += fenceEnd
 688			break
 689		}
 690
 691		// copy the current line
 692		end := skipUntilChar(data, beg, '\n') + 1
 693
 694		// did we reach the end of the buffer without a closing marker?
 695		if end >= len(data) {
 696			return 0
 697		}
 698
 699		// verbatim copy to the working buffer
 700		if doRender {
 701			work.Write(data[beg:end])
 702		}
 703		beg = end
 704	}
 705
 706	//syntax := ""
 707	//if lang != nil {
 708	//	syntax = *lang
 709	//}
 710
 711	if doRender {
 712		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 713		block.IsFenced = true
 714		finalizeCodeBlock(block)
 715	}
 716
 717	return beg
 718}
 719
 720func unescapeChar(str []byte) []byte {
 721	if str[0] == '\\' {
 722		return []byte{str[1]}
 723	}
 724	return []byte(html.UnescapeString(string(str)))
 725}
 726
 727func unescapeString(str []byte) []byte {
 728	if reBackslashOrAmp.Match(str) {
 729		return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
 730	} else {
 731		return str
 732	}
 733}
 734
 735func finalizeCodeBlock(block *Node) {
 736	if block.IsFenced {
 737		newlinePos := bytes.IndexByte(block.content, '\n')
 738		firstLine := block.content[:newlinePos]
 739		rest := block.content[newlinePos+1:]
 740		block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 741		block.Literal = rest
 742	} else {
 743		block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
 744	}
 745	block.content = nil
 746}
 747
 748func (p *parser) table(data []byte) int {
 749	table := p.addBlock(Table, nil)
 750	i, columns := p.tableHeader(data)
 751	if i == 0 {
 752		p.tip = table.Parent
 753		table.unlink()
 754		return 0
 755	}
 756
 757	p.addBlock(TableBody, nil)
 758
 759	for i < len(data) {
 760		pipes, rowStart := 0, i
 761		for ; data[i] != '\n'; i++ {
 762			if data[i] == '|' {
 763				pipes++
 764			}
 765		}
 766
 767		if pipes == 0 {
 768			i = rowStart
 769			break
 770		}
 771
 772		// include the newline in data sent to tableRow
 773		i++
 774		p.tableRow(data[rowStart:i], columns, false)
 775	}
 776
 777	return i
 778}
 779
 780// check if the specified position is preceded by an odd number of backslashes
 781func isBackslashEscaped(data []byte, i int) bool {
 782	backslashes := 0
 783	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 784		backslashes++
 785	}
 786	return backslashes&1 == 1
 787}
 788
 789func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
 790	i := 0
 791	colCount := 1
 792	for i = 0; data[i] != '\n'; i++ {
 793		if data[i] == '|' && !isBackslashEscaped(data, i) {
 794			colCount++
 795		}
 796	}
 797
 798	// doesn't look like a table header
 799	if colCount == 1 {
 800		return
 801	}
 802
 803	// include the newline in the data sent to tableRow
 804	header := data[:i+1]
 805
 806	// column count ignores pipes at beginning or end of line
 807	if data[0] == '|' {
 808		colCount--
 809	}
 810	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 811		colCount--
 812	}
 813
 814	columns = make([]CellAlignFlags, colCount)
 815
 816	// move on to the header underline
 817	i++
 818	if i >= len(data) {
 819		return
 820	}
 821
 822	if data[i] == '|' && !isBackslashEscaped(data, i) {
 823		i++
 824	}
 825	i = skipChar(data, i, ' ')
 826
 827	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 828	// and trailing | optional on last column
 829	col := 0
 830	for data[i] != '\n' {
 831		dashes := 0
 832
 833		if data[i] == ':' {
 834			i++
 835			columns[col] |= TableAlignmentLeft
 836			dashes++
 837		}
 838		for data[i] == '-' {
 839			i++
 840			dashes++
 841		}
 842		if data[i] == ':' {
 843			i++
 844			columns[col] |= TableAlignmentRight
 845			dashes++
 846		}
 847		for data[i] == ' ' {
 848			i++
 849		}
 850
 851		// end of column test is messy
 852		switch {
 853		case dashes < 3:
 854			// not a valid column
 855			return
 856
 857		case data[i] == '|' && !isBackslashEscaped(data, i):
 858			// marker found, now skip past trailing whitespace
 859			col++
 860			i++
 861			for data[i] == ' ' {
 862				i++
 863			}
 864
 865			// trailing junk found after last column
 866			if col >= colCount && data[i] != '\n' {
 867				return
 868			}
 869
 870		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 871			// something else found where marker was required
 872			return
 873
 874		case data[i] == '\n':
 875			// marker is optional for the last column
 876			col++
 877
 878		default:
 879			// trailing junk found after last column
 880			return
 881		}
 882	}
 883	if col != colCount {
 884		return
 885	}
 886
 887	p.addBlock(TableHead, nil)
 888	p.tableRow(header, columns, true)
 889	size = i + 1
 890	return
 891}
 892
 893func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
 894	p.addBlock(TableRow, nil)
 895	i, col := 0, 0
 896
 897	if data[i] == '|' && !isBackslashEscaped(data, i) {
 898		i++
 899	}
 900
 901	for col = 0; col < len(columns) && i < len(data); col++ {
 902		for data[i] == ' ' {
 903			i++
 904		}
 905
 906		cellStart := i
 907
 908		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 909			i++
 910		}
 911
 912		cellEnd := i
 913
 914		// skip the end-of-cell marker, possibly taking us past end of buffer
 915		i++
 916
 917		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 918			cellEnd--
 919		}
 920
 921		cell := p.addBlock(TableCell, data[cellStart:cellEnd])
 922		cell.IsHeader = header
 923		cell.Align = columns[col]
 924	}
 925
 926	// pad it out with empty columns to get the right number
 927	for ; col < len(columns); col++ {
 928		cell := p.addBlock(TableCell, nil)
 929		cell.IsHeader = header
 930		cell.Align = columns[col]
 931	}
 932
 933	// silently ignore rows with too many cells
 934}
 935
 936// returns blockquote prefix length
 937func (p *parser) quotePrefix(data []byte) int {
 938	i := 0
 939	for i < 3 && data[i] == ' ' {
 940		i++
 941	}
 942	if data[i] == '>' {
 943		if data[i+1] == ' ' {
 944			return i + 2
 945		}
 946		return i + 1
 947	}
 948	return 0
 949}
 950
 951// blockquote ends with at least one blank line
 952// followed by something without a blockquote prefix
 953func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
 954	if p.isEmpty(data[beg:]) <= 0 {
 955		return false
 956	}
 957	if end >= len(data) {
 958		return true
 959	}
 960	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
 961}
 962
 963// parse a blockquote fragment
 964func (p *parser) quote(data []byte) int {
 965	block := p.addBlock(BlockQuote, nil)
 966	var raw bytes.Buffer
 967	beg, end := 0, 0
 968	for beg < len(data) {
 969		end = beg
 970		// Step over whole lines, collecting them. While doing that, check for
 971		// fenced code and if one's found, incorporate it altogether,
 972		// irregardless of any contents inside it
 973		for data[end] != '\n' {
 974			if p.flags&FencedCode != 0 {
 975				if i := p.fencedCode(data[end:], false); i > 0 {
 976					// -1 to compensate for the extra end++ after the loop:
 977					end += i - 1
 978					break
 979				}
 980			}
 981			end++
 982		}
 983		end++
 984		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 985			// skip the prefix
 986			beg += pre
 987		} else if p.terminateBlockquote(data, beg, end) {
 988			break
 989		}
 990		// this line is part of the blockquote
 991		raw.Write(data[beg:end])
 992		beg = end
 993	}
 994	p.block(raw.Bytes())
 995	p.finalize(block)
 996	return end
 997}
 998
 999// returns prefix length for block code
1000func (p *parser) codePrefix(data []byte) int {
1001	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1002		return 4
1003	}
1004	return 0
1005}
1006
1007func (p *parser) code(data []byte) int {
1008	var work bytes.Buffer
1009
1010	i := 0
1011	for i < len(data) {
1012		beg := i
1013		for data[i] != '\n' {
1014			i++
1015		}
1016		i++
1017
1018		blankline := p.isEmpty(data[beg:i]) > 0
1019		if pre := p.codePrefix(data[beg:i]); pre > 0 {
1020			beg += pre
1021		} else if !blankline {
1022			// non-empty, non-prefixed line breaks the pre
1023			i = beg
1024			break
1025		}
1026
1027		// verbatim copy to the working buffeu
1028		if blankline {
1029			work.WriteByte('\n')
1030		} else {
1031			work.Write(data[beg:i])
1032		}
1033	}
1034
1035	// trim all the \n off the end of work
1036	workbytes := work.Bytes()
1037	eol := len(workbytes)
1038	for eol > 0 && workbytes[eol-1] == '\n' {
1039		eol--
1040	}
1041	if eol != len(workbytes) {
1042		work.Truncate(eol)
1043	}
1044
1045	work.WriteByte('\n')
1046
1047	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1048	block.IsFenced = false
1049	finalizeCodeBlock(block)
1050
1051	return i
1052}
1053
1054// returns unordered list item prefix
1055func (p *parser) uliPrefix(data []byte) int {
1056	i := 0
1057
1058	// start with up to 3 spaces
1059	for i < 3 && data[i] == ' ' {
1060		i++
1061	}
1062
1063	// need a *, +, or - followed by a space
1064	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1065		data[i+1] != ' ' {
1066		return 0
1067	}
1068	return i + 2
1069}
1070
1071// returns ordered list item prefix
1072func (p *parser) oliPrefix(data []byte) int {
1073	i := 0
1074
1075	// start with up to 3 spaces
1076	for i < 3 && data[i] == ' ' {
1077		i++
1078	}
1079
1080	// count the digits
1081	start := i
1082	for data[i] >= '0' && data[i] <= '9' {
1083		i++
1084	}
1085
1086	// we need >= 1 digits followed by a dot and a space
1087	if start == i || data[i] != '.' || data[i+1] != ' ' {
1088		return 0
1089	}
1090	return i + 2
1091}
1092
1093// returns definition list item prefix
1094func (p *parser) dliPrefix(data []byte) int {
1095	i := 0
1096
1097	// need a : followed by a spaces
1098	if data[i] != ':' || data[i+1] != ' ' {
1099		return 0
1100	}
1101	for data[i] == ' ' {
1102		i++
1103	}
1104	return i + 2
1105}
1106
1107// parse ordered or unordered list block
1108func (p *parser) list(data []byte, flags ListType) int {
1109	i := 0
1110	flags |= ListItemBeginningOfList
1111	block := p.addBlock(List, nil)
1112	block.ListFlags = flags
1113	block.Tight = true
1114
1115	for i < len(data) {
1116		skip := p.listItem(data[i:], &flags)
1117		if flags&ListItemContainsBlock != 0 {
1118			block.ListData.Tight = false
1119		}
1120		i += skip
1121		if skip == 0 || flags&ListItemEndOfList != 0 {
1122			break
1123		}
1124		flags &= ^ListItemBeginningOfList
1125	}
1126
1127	above := block.Parent
1128	finalizeList(block)
1129	p.tip = above
1130	return i
1131}
1132
1133// Returns true if block ends with a blank line, descending if needed
1134// into lists and sublists.
1135func endsWithBlankLine(block *Node) bool {
1136	// TODO: figure this out. Always false now.
1137	for block != nil {
1138		//if block.lastLineBlank {
1139		//return true
1140		//}
1141		t := block.Type
1142		if t == List || t == Item {
1143			block = block.LastChild
1144		} else {
1145			break
1146		}
1147	}
1148	return false
1149}
1150
1151func finalizeList(block *Node) {
1152	block.open = false
1153	item := block.FirstChild
1154	for item != nil {
1155		// check for non-final list item ending with blank line:
1156		if endsWithBlankLine(item) && item.Next != nil {
1157			block.ListData.Tight = false
1158			break
1159		}
1160		// recurse into children of list item, to see if there are spaces
1161		// between any of them:
1162		subItem := item.FirstChild
1163		for subItem != nil {
1164			if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1165				block.ListData.Tight = false
1166				break
1167			}
1168			subItem = subItem.Next
1169		}
1170		item = item.Next
1171	}
1172}
1173
1174// Parse a single list item.
1175// Assumes initial prefix is already removed if this is a sublist.
1176func (p *parser) listItem(data []byte, flags *ListType) int {
1177	// keep track of the indentation of the first line
1178	itemIndent := 0
1179	for itemIndent < 3 && data[itemIndent] == ' ' {
1180		itemIndent++
1181	}
1182
1183	var bulletChar byte = '*'
1184	i := p.uliPrefix(data)
1185	if i == 0 {
1186		i = p.oliPrefix(data)
1187	} else {
1188		bulletChar = data[i-2]
1189	}
1190	if i == 0 {
1191		i = p.dliPrefix(data)
1192		// reset definition term flag
1193		if i > 0 {
1194			*flags &= ^ListTypeTerm
1195		}
1196	}
1197	if i == 0 {
1198		// if in definition list, set term flag and continue
1199		if *flags&ListTypeDefinition != 0 {
1200			*flags |= ListTypeTerm
1201		} else {
1202			return 0
1203		}
1204	}
1205
1206	// skip leading whitespace on first line
1207	for data[i] == ' ' {
1208		i++
1209	}
1210
1211	// find the end of the line
1212	line := i
1213	for i > 0 && data[i-1] != '\n' {
1214		i++
1215	}
1216
1217	// get working buffer
1218	var raw bytes.Buffer
1219
1220	// put the first line into the working buffer
1221	raw.Write(data[line:i])
1222	line = i
1223
1224	// process the following lines
1225	containsBlankLine := false
1226	sublist := 0
1227
1228gatherlines:
1229	for line < len(data) {
1230		i++
1231
1232		// find the end of this line
1233		for data[i-1] != '\n' {
1234			i++
1235		}
1236
1237		// if it is an empty line, guess that it is part of this item
1238		// and move on to the next line
1239		if p.isEmpty(data[line:i]) > 0 {
1240			containsBlankLine = true
1241			line = i
1242			continue
1243		}
1244
1245		// calculate the indentation
1246		indent := 0
1247		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1248			indent++
1249		}
1250
1251		chunk := data[line+indent : i]
1252
1253		// evaluate how this line fits in
1254		switch {
1255		// is this a nested list item?
1256		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1257			p.oliPrefix(chunk) > 0 ||
1258			p.dliPrefix(chunk) > 0:
1259
1260			if containsBlankLine {
1261				*flags |= ListItemContainsBlock
1262			}
1263
1264			// to be a nested list, it must be indented more
1265			// if not, it is the next item in the same list
1266			if indent <= itemIndent {
1267				break gatherlines
1268			}
1269
1270			// is this the first item in the nested list?
1271			if sublist == 0 {
1272				sublist = raw.Len()
1273			}
1274
1275		// is this a nested prefix header?
1276		case p.isPrefixHeader(chunk):
1277			// if the header is not indented, it is not nested in the list
1278			// and thus ends the list
1279			if containsBlankLine && indent < 4 {
1280				*flags |= ListItemEndOfList
1281				break gatherlines
1282			}
1283			*flags |= ListItemContainsBlock
1284
1285		// anything following an empty line is only part
1286		// of this item if it is indented 4 spaces
1287		// (regardless of the indentation of the beginning of the item)
1288		case containsBlankLine && indent < 4:
1289			if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1290				// is the next item still a part of this list?
1291				next := i
1292				for data[next] != '\n' {
1293					next++
1294				}
1295				for next < len(data)-1 && data[next] == '\n' {
1296					next++
1297				}
1298				if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1299					*flags |= ListItemEndOfList
1300				}
1301			} else {
1302				*flags |= ListItemEndOfList
1303			}
1304			break gatherlines
1305
1306		// a blank line means this should be parsed as a block
1307		case containsBlankLine:
1308			raw.WriteByte('\n')
1309			*flags |= ListItemContainsBlock
1310		}
1311
1312		// if this line was preceded by one or more blanks,
1313		// re-introduce the blank into the buffer
1314		if containsBlankLine {
1315			containsBlankLine = false
1316			raw.WriteByte('\n')
1317		}
1318
1319		// add the line into the working buffer without prefix
1320		raw.Write(data[line+indent : i])
1321
1322		line = i
1323	}
1324
1325	rawBytes := raw.Bytes()
1326
1327	block := p.addBlock(Item, nil)
1328	block.ListFlags = *flags
1329	block.Tight = false
1330	block.BulletChar = bulletChar
1331	block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1332
1333	// render the contents of the list item
1334	if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1335		// intermediate render of block item, except for definition term
1336		if sublist > 0 {
1337			p.block(rawBytes[:sublist])
1338			p.block(rawBytes[sublist:])
1339		} else {
1340			p.block(rawBytes)
1341		}
1342	} else {
1343		// intermediate render of inline item
1344		if sublist > 0 {
1345			child := p.addChild(Paragraph, 0)
1346			child.content = rawBytes[:sublist]
1347			p.block(rawBytes[sublist:])
1348		} else {
1349			child := p.addChild(Paragraph, 0)
1350			child.content = rawBytes
1351		}
1352	}
1353	return line
1354}
1355
1356// render a single paragraph that has already been parsed out
1357func (p *parser) renderParagraph(data []byte) {
1358	if len(data) == 0 {
1359		return
1360	}
1361
1362	// trim leading spaces
1363	beg := 0
1364	for data[beg] == ' ' {
1365		beg++
1366	}
1367
1368	// trim trailing newline
1369	end := len(data) - 1
1370
1371	// trim trailing spaces
1372	for end > beg && data[end-1] == ' ' {
1373		end--
1374	}
1375
1376	p.addBlock(Paragraph, data[beg:end])
1377}
1378
1379func (p *parser) paragraph(data []byte) int {
1380	// prev: index of 1st char of previous line
1381	// line: index of 1st char of current line
1382	// i: index of cursor/end of current line
1383	var prev, line, i int
1384
1385	// keep going until we find something to mark the end of the paragraph
1386	for i < len(data) {
1387		// mark the beginning of the current line
1388		prev = line
1389		current := data[i:]
1390		line = i
1391
1392		// did we find a blank line marking the end of the paragraph?
1393		if n := p.isEmpty(current); n > 0 {
1394			// did this blank line followed by a definition list item?
1395			if p.flags&DefinitionLists != 0 {
1396				if i < len(data)-1 && data[i+1] == ':' {
1397					return p.list(data[prev:], ListTypeDefinition)
1398				}
1399			}
1400
1401			p.renderParagraph(data[:i])
1402			return i + n
1403		}
1404
1405		// an underline under some text marks a header, so our paragraph ended on prev line
1406		if i > 0 {
1407			if level := p.isUnderlinedHeader(current); level > 0 {
1408				// render the paragraph
1409				p.renderParagraph(data[:prev])
1410
1411				// ignore leading and trailing whitespace
1412				eol := i - 1
1413				for prev < eol && data[prev] == ' ' {
1414					prev++
1415				}
1416				for eol > prev && data[eol-1] == ' ' {
1417					eol--
1418				}
1419
1420				id := ""
1421				if p.flags&AutoHeaderIDs != 0 {
1422					id = sanitized_anchor_name.Create(string(data[prev:eol]))
1423				}
1424
1425				block := p.addBlock(Header, data[prev:eol])
1426				block.Level = uint32(level)
1427				block.HeaderID = id
1428
1429				// find the end of the underline
1430				for data[i] != '\n' {
1431					i++
1432				}
1433				return i
1434			}
1435		}
1436
1437		// if the next line starts a block of HTML, then the paragraph ends here
1438		if p.flags&LaxHTMLBlocks != 0 {
1439			if data[i] == '<' && p.html(current, false) > 0 {
1440				// rewind to before the HTML block
1441				p.renderParagraph(data[:i])
1442				return i
1443			}
1444		}
1445
1446		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1447		if p.isPrefixHeader(current) || p.isHRule(current) {
1448			p.renderParagraph(data[:i])
1449			return i
1450		}
1451
1452		// if there's a fenced code block, paragraph is over
1453		if p.flags&FencedCode != 0 {
1454			if p.fencedCode(current, false) > 0 {
1455				p.renderParagraph(data[:i])
1456				return i
1457			}
1458		}
1459
1460		// if there's a definition list item, prev line is a definition term
1461		if p.flags&DefinitionLists != 0 {
1462			if p.dliPrefix(current) != 0 {
1463				return p.list(data[prev:], ListTypeDefinition)
1464			}
1465		}
1466
1467		// if there's a list after this, paragraph is over
1468		if p.flags&NoEmptyLineBeforeBlock != 0 {
1469			if p.uliPrefix(current) != 0 ||
1470				p.oliPrefix(current) != 0 ||
1471				p.quotePrefix(current) != 0 ||
1472				p.codePrefix(current) != 0 {
1473				p.renderParagraph(data[:i])
1474				return i
1475			}
1476		}
1477
1478		// otherwise, scan to the beginning of the next line
1479		for data[i] != '\n' {
1480			i++
1481		}
1482		i++
1483	}
1484
1485	p.renderParagraph(data[:i])
1486	return i
1487}