icy does git — grayfriday (b98e30685370e0b7a5556c084dc4aa7ca6f59136): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"html"
  19	"regexp"
  20
  21	"github.com/shurcooL/sanitized_anchor_name"
  22)
  23
  24const (
  25	charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
  26	escapable  = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
  27)
  28
  29var (
  30	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
  31	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
  32)
  33
  34// Parse block-level data.
  35// Note: this function and many that it calls assume that
  36// the input buffer ends with a newline.
  37func (p *parser) block(data []byte) {
  38	// this is called recursively: enforce a maximum depth
  39	if p.nesting >= p.maxNesting {
  40		return
  41	}
  42	p.nesting++
  43
  44	// parse out one block-level construct at a time
  45	for len(data) > 0 {
  46		// prefixed heading:
  47		//
  48		// # Heading 1
  49		// ## Heading 2
  50		// ...
  51		// ###### Heading 6
  52		if p.isPrefixHeading(data) {
  53			data = data[p.prefixHeading(data):]
  54			continue
  55		}
  56
  57		// block of preformatted HTML:
  58		//
  59		// <div>
  60		//     ...
  61		// </div>
  62		if data[0] == '<' {
  63			if i := p.html(data, true); i > 0 {
  64				data = data[i:]
  65				continue
  66			}
  67		}
  68
  69		// title block
  70		//
  71		// % stuff
  72		// % more stuff
  73		// % even more stuff
  74		if p.flags&Titleblock != 0 {
  75			if data[0] == '%' {
  76				if i := p.titleBlock(data, true); i > 0 {
  77					data = data[i:]
  78					continue
  79				}
  80			}
  81		}
  82
  83		// blank lines.  note: returns the # of bytes to skip
  84		if i := p.isEmpty(data); i > 0 {
  85			data = data[i:]
  86			continue
  87		}
  88
  89		// indented code block:
  90		//
  91		//     func max(a, b int) int {
  92		//         if a > b {
  93		//             return a
  94		//         }
  95		//         return b
  96		//      }
  97		if p.codePrefix(data) > 0 {
  98			data = data[p.code(data):]
  99			continue
 100		}
 101
 102		// fenced code block:
 103		//
 104		// ``` go
 105		// func fact(n int) int {
 106		//     if n <= 1 {
 107		//         return n
 108		//     }
 109		//     return n * fact(n-1)
 110		// }
 111		// ```
 112		if p.flags&FencedCode != 0 {
 113			if i := p.fencedCodeBlock(data, true); i > 0 {
 114				data = data[i:]
 115				continue
 116			}
 117		}
 118
 119		// horizontal rule:
 120		//
 121		// ------
 122		// or
 123		// ******
 124		// or
 125		// ______
 126		if p.isHRule(data) {
 127			p.addBlock(HorizontalRule, nil)
 128			var i int
 129			for i = 0; i < len(data) && data[i] != '\n'; i++ {
 130			}
 131			data = data[i:]
 132			continue
 133		}
 134
 135		// block quote:
 136		//
 137		// > A big quote I found somewhere
 138		// > on the web
 139		if p.quotePrefix(data) > 0 {
 140			data = data[p.quote(data):]
 141			continue
 142		}
 143
 144		// table:
 145		//
 146		// Name  | Age | Phone
 147		// ------|-----|---------
 148		// Bob   | 31  | 555-1234
 149		// Alice | 27  | 555-4321
 150		if p.flags&Tables != 0 {
 151			if i := p.table(data); i > 0 {
 152				data = data[i:]
 153				continue
 154			}
 155		}
 156
 157		// an itemized/unordered list:
 158		//
 159		// * Item 1
 160		// * Item 2
 161		//
 162		// also works with + or -
 163		if p.uliPrefix(data) > 0 {
 164			data = data[p.list(data, 0):]
 165			continue
 166		}
 167
 168		// a numbered/ordered list:
 169		//
 170		// 1. Item 1
 171		// 2. Item 2
 172		if p.oliPrefix(data) > 0 {
 173			data = data[p.list(data, ListTypeOrdered):]
 174			continue
 175		}
 176
 177		// definition lists:
 178		//
 179		// Term 1
 180		// :   Definition a
 181		// :   Definition b
 182		//
 183		// Term 2
 184		// :   Definition c
 185		if p.flags&DefinitionLists != 0 {
 186			if p.dliPrefix(data) > 0 {
 187				data = data[p.list(data, ListTypeDefinition):]
 188				continue
 189			}
 190		}
 191
 192		// anything else must look like a normal paragraph
 193		// note: this finds underlined headings, too
 194		data = data[p.paragraph(data):]
 195	}
 196
 197	p.nesting--
 198}
 199
 200func (p *parser) addBlock(typ NodeType, content []byte) *Node {
 201	p.closeUnmatchedBlocks()
 202	container := p.addChild(typ, 0)
 203	container.content = content
 204	return container
 205}
 206
 207func (p *parser) isPrefixHeading(data []byte) bool {
 208	if data[0] != '#' {
 209		return false
 210	}
 211
 212	if p.flags&SpaceHeadings != 0 {
 213		level := 0
 214		for level < 6 && level < len(data) && data[level] == '#' {
 215			level++
 216		}
 217		if level == len(data) || data[level] != ' ' {
 218			return false
 219		}
 220	}
 221	return true
 222}
 223
 224func (p *parser) prefixHeading(data []byte) int {
 225	level := 0
 226	for level < 6 && level < len(data) && data[level] == '#' {
 227		level++
 228	}
 229	i := skipChar(data, level, ' ')
 230	end := skipUntilChar(data, i, '\n')
 231	skip := end
 232	id := ""
 233	if p.flags&HeadingIDs != 0 {
 234		j, k := 0, 0
 235		// find start/end of heading id
 236		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 237		}
 238		for k = j + 1; k < end && data[k] != '}'; k++ {
 239		}
 240		// extract heading id iff found
 241		if j < end && k < end {
 242			id = string(data[j+2 : k])
 243			end = j
 244			skip = k + 1
 245			for end > 0 && data[end-1] == ' ' {
 246				end--
 247			}
 248		}
 249	}
 250	for end > 0 && data[end-1] == '#' {
 251		if isBackslashEscaped(data, end-1) {
 252			break
 253		}
 254		end--
 255	}
 256	for end > 0 && data[end-1] == ' ' {
 257		end--
 258	}
 259	if end > i {
 260		if id == "" && p.flags&AutoHeadingIDs != 0 {
 261			id = sanitized_anchor_name.Create(string(data[i:end]))
 262		}
 263		block := p.addBlock(Heading, data[i:end])
 264		block.HeadingID = id
 265		block.Level = level
 266	}
 267	return skip
 268}
 269
 270func (p *parser) isUnderlinedHeading(data []byte) int {
 271	// test of level 1 heading
 272	if data[0] == '=' {
 273		i := skipChar(data, 1, '=')
 274		i = skipChar(data, i, ' ')
 275		if i < len(data) && data[i] == '\n' {
 276			return 1
 277		}
 278		return 0
 279	}
 280
 281	// test of level 2 heading
 282	if data[0] == '-' {
 283		i := skipChar(data, 1, '-')
 284		i = skipChar(data, i, ' ')
 285		if i < len(data) && data[i] == '\n' {
 286			return 2
 287		}
 288		return 0
 289	}
 290
 291	return 0
 292}
 293
 294func (p *parser) titleBlock(data []byte, doRender bool) int {
 295	if data[0] != '%' {
 296		return 0
 297	}
 298	splitData := bytes.Split(data, []byte("\n"))
 299	var i int
 300	for idx, b := range splitData {
 301		if !bytes.HasPrefix(b, []byte("%")) {
 302			i = idx // - 1
 303			break
 304		}
 305	}
 306
 307	data = bytes.Join(splitData[0:i], []byte("\n"))
 308	consumed := len(data)
 309	data = bytes.TrimPrefix(data, []byte("% "))
 310	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
 311	block := p.addBlock(Heading, data)
 312	block.Level = 1
 313	block.IsTitleblock = true
 314
 315	return consumed
 316}
 317
 318func (p *parser) html(data []byte, doRender bool) int {
 319	var i, j int
 320
 321	// identify the opening tag
 322	if data[0] != '<' {
 323		return 0
 324	}
 325	curtag, tagfound := p.htmlFindTag(data[1:])
 326
 327	// handle special cases
 328	if !tagfound {
 329		// check for an HTML comment
 330		if size := p.htmlComment(data, doRender); size > 0 {
 331			return size
 332		}
 333
 334		// check for an <hr> tag
 335		if size := p.htmlHr(data, doRender); size > 0 {
 336			return size
 337		}
 338
 339		// no special case recognized
 340		return 0
 341	}
 342
 343	// look for an unindented matching closing tag
 344	// followed by a blank line
 345	found := false
 346	/*
 347		closetag := []byte("\n</" + curtag + ">")
 348		j = len(curtag) + 1
 349		for !found {
 350			// scan for a closing tag at the beginning of a line
 351			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 352				j += skip + len(closetag)
 353			} else {
 354				break
 355			}
 356
 357			// see if it is the only thing on the line
 358			if skip := p.isEmpty(data[j:]); skip > 0 {
 359				// see if it is followed by a blank line/eof
 360				j += skip
 361				if j >= len(data) {
 362					found = true
 363					i = j
 364				} else {
 365					if skip := p.isEmpty(data[j:]); skip > 0 {
 366						j += skip
 367						found = true
 368						i = j
 369					}
 370				}
 371			}
 372		}
 373	*/
 374
 375	// if not found, try a second pass looking for indented match
 376	// but not if tag is "ins" or "del" (following original Markdown.pl)
 377	if !found && curtag != "ins" && curtag != "del" {
 378		i = 1
 379		for i < len(data) {
 380			i++
 381			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 382				i++
 383			}
 384
 385			if i+2+len(curtag) >= len(data) {
 386				break
 387			}
 388
 389			j = p.htmlFindEnd(curtag, data[i-1:])
 390
 391			if j > 0 {
 392				i += j - 1
 393				found = true
 394				break
 395			}
 396		}
 397	}
 398
 399	if !found {
 400		return 0
 401	}
 402
 403	// the end of the block has been found
 404	if doRender {
 405		// trim newlines
 406		end := i
 407		for end > 0 && data[end-1] == '\n' {
 408			end--
 409		}
 410		finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
 411	}
 412
 413	return i
 414}
 415
 416func finalizeHTMLBlock(block *Node) {
 417	block.Literal = block.content
 418	block.content = nil
 419}
 420
 421// HTML comment, lax form
 422func (p *parser) htmlComment(data []byte, doRender bool) int {
 423	i := p.inlineHTMLComment(data)
 424	// needs to end with a blank line
 425	if j := p.isEmpty(data[i:]); j > 0 {
 426		size := i + j
 427		if doRender {
 428			// trim trailing newlines
 429			end := size
 430			for end > 0 && data[end-1] == '\n' {
 431				end--
 432			}
 433			block := p.addBlock(HTMLBlock, data[:end])
 434			finalizeHTMLBlock(block)
 435		}
 436		return size
 437	}
 438	return 0
 439}
 440
 441// HR, which is the only self-closing block tag considered
 442func (p *parser) htmlHr(data []byte, doRender bool) int {
 443	if len(data) < 4 {
 444		return 0
 445	}
 446	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 447		return 0
 448	}
 449	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 450		// not an <hr> tag after all; at least not a valid one
 451		return 0
 452	}
 453	i := 3
 454	for i < len(data) && data[i] != '>' && data[i] != '\n' {
 455		i++
 456	}
 457	if i < len(data) && data[i] == '>' {
 458		i++
 459		if j := p.isEmpty(data[i:]); j > 0 {
 460			size := i + j
 461			if doRender {
 462				// trim newlines
 463				end := size
 464				for end > 0 && data[end-1] == '\n' {
 465					end--
 466				}
 467				finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
 468			}
 469			return size
 470		}
 471	}
 472	return 0
 473}
 474
 475func (p *parser) htmlFindTag(data []byte) (string, bool) {
 476	i := 0
 477	for i < len(data) && isalnum(data[i]) {
 478		i++
 479	}
 480	key := string(data[:i])
 481	if _, ok := blockTags[key]; ok {
 482		return key, true
 483	}
 484	return "", false
 485}
 486
 487func (p *parser) htmlFindEnd(tag string, data []byte) int {
 488	// assume data[0] == '<' && data[1] == '/' already tested
 489	if tag == "hr" {
 490		return 2
 491	}
 492	// check if tag is a match
 493	closetag := []byte("</" + tag + ">")
 494	if !bytes.HasPrefix(data, closetag) {
 495		return 0
 496	}
 497	i := len(closetag)
 498
 499	// check that the rest of the line is blank
 500	skip := 0
 501	if skip = p.isEmpty(data[i:]); skip == 0 {
 502		return 0
 503	}
 504	i += skip
 505	skip = 0
 506
 507	if i >= len(data) {
 508		return i
 509	}
 510
 511	if p.flags&LaxHTMLBlocks != 0 {
 512		return i
 513	}
 514	if skip = p.isEmpty(data[i:]); skip == 0 {
 515		// following line must be blank
 516		return 0
 517	}
 518
 519	return i + skip
 520}
 521
 522func (*parser) isEmpty(data []byte) int {
 523	// it is okay to call isEmpty on an empty buffer
 524	if len(data) == 0 {
 525		return 0
 526	}
 527
 528	var i int
 529	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 530		if data[i] != ' ' && data[i] != '\t' {
 531			return 0
 532		}
 533	}
 534	if i < len(data) && data[i] == '\n' {
 535		i++
 536	}
 537	return i
 538}
 539
 540func (*parser) isHRule(data []byte) bool {
 541	i := 0
 542
 543	// skip up to three spaces
 544	for i < 3 && data[i] == ' ' {
 545		i++
 546	}
 547
 548	// look at the hrule char
 549	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 550		return false
 551	}
 552	c := data[i]
 553
 554	// the whole line must be the char or whitespace
 555	n := 0
 556	for i < len(data) && data[i] != '\n' {
 557		switch {
 558		case data[i] == c:
 559			n++
 560		case data[i] != ' ':
 561			return false
 562		}
 563		i++
 564	}
 565
 566	return n >= 3
 567}
 568
 569// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
 570// and returns the end index if so, or 0 otherwise. It also returns the marker found.
 571// If syntax is not nil, it gets set to the syntax specified in the fence line.
 572func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
 573	i, size := 0, 0
 574
 575	// skip up to three spaces
 576	for i < len(data) && i < 3 && data[i] == ' ' {
 577		i++
 578	}
 579
 580	// check for the marker characters: ~ or `
 581	if i >= len(data) {
 582		return 0, ""
 583	}
 584	if data[i] != '~' && data[i] != '`' {
 585		return 0, ""
 586	}
 587
 588	c := data[i]
 589
 590	// the whole line must be the same char or whitespace
 591	for i < len(data) && data[i] == c {
 592		size++
 593		i++
 594	}
 595
 596	// the marker char must occur at least 3 times
 597	if size < 3 {
 598		return 0, ""
 599	}
 600	marker = string(data[i-size : i])
 601
 602	// if this is the end marker, it must match the beginning marker
 603	if oldmarker != "" && marker != oldmarker {
 604		return 0, ""
 605	}
 606
 607	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
 608	// into one, always get the syntax, and discard it if the caller doesn't care.
 609	if syntax != nil {
 610		syn := 0
 611		i = skipChar(data, i, ' ')
 612
 613		if i >= len(data) {
 614			if i == len(data) {
 615				return i, marker
 616			}
 617			return 0, ""
 618		}
 619
 620		syntaxStart := i
 621
 622		if data[i] == '{' {
 623			i++
 624			syntaxStart++
 625
 626			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 627				syn++
 628				i++
 629			}
 630
 631			if i >= len(data) || data[i] != '}' {
 632				return 0, ""
 633			}
 634
 635			// strip all whitespace at the beginning and the end
 636			// of the {} block
 637			for syn > 0 && isspace(data[syntaxStart]) {
 638				syntaxStart++
 639				syn--
 640			}
 641
 642			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 643				syn--
 644			}
 645
 646			i++
 647		} else {
 648			for i < len(data) && !isspace(data[i]) {
 649				syn++
 650				i++
 651			}
 652		}
 653
 654		*syntax = string(data[syntaxStart : syntaxStart+syn])
 655	}
 656
 657	i = skipChar(data, i, ' ')
 658	if i >= len(data) || data[i] != '\n' {
 659		if i == len(data) {
 660			return i, marker
 661		}
 662		return 0, ""
 663	}
 664	return i + 1, marker // Take newline into account.
 665}
 666
 667// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
 668// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
 669// If doRender is true, a final newline is mandatory to recognize the fenced code block.
 670func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
 671	var syntax string
 672	beg, marker := isFenceLine(data, &syntax, "")
 673	if beg == 0 || beg >= len(data) {
 674		return 0
 675	}
 676
 677	var work bytes.Buffer
 678	work.Write([]byte(syntax))
 679	work.WriteByte('\n')
 680
 681	for {
 682		// safe to assume beg < len(data)
 683
 684		// check for the end of the code block
 685		fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
 686		if fenceEnd != 0 {
 687			beg += fenceEnd
 688			break
 689		}
 690
 691		// copy the current line
 692		end := skipUntilChar(data, beg, '\n') + 1
 693
 694		// did we reach the end of the buffer without a closing marker?
 695		if end >= len(data) {
 696			return 0
 697		}
 698
 699		// verbatim copy to the working buffer
 700		if doRender {
 701			work.Write(data[beg:end])
 702		}
 703		beg = end
 704	}
 705
 706	if doRender {
 707		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 708		block.IsFenced = true
 709		finalizeCodeBlock(block)
 710	}
 711
 712	return beg
 713}
 714
 715func unescapeChar(str []byte) []byte {
 716	if str[0] == '\\' {
 717		return []byte{str[1]}
 718	}
 719	return []byte(html.UnescapeString(string(str)))
 720}
 721
 722func unescapeString(str []byte) []byte {
 723	if reBackslashOrAmp.Match(str) {
 724		return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
 725	}
 726	return str
 727}
 728
 729func finalizeCodeBlock(block *Node) {
 730	if block.IsFenced {
 731		newlinePos := bytes.IndexByte(block.content, '\n')
 732		firstLine := block.content[:newlinePos]
 733		rest := block.content[newlinePos+1:]
 734		block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 735		block.Literal = rest
 736	} else {
 737		block.Literal = block.content
 738	}
 739	block.content = nil
 740}
 741
 742func (p *parser) table(data []byte) int {
 743	table := p.addBlock(Table, nil)
 744	i, columns := p.tableHeader(data)
 745	if i == 0 {
 746		p.tip = table.Parent
 747		table.Unlink()
 748		return 0
 749	}
 750
 751	p.addBlock(TableBody, nil)
 752
 753	for i < len(data) {
 754		pipes, rowStart := 0, i
 755		for ; i < len(data) && data[i] != '\n'; i++ {
 756			if data[i] == '|' {
 757				pipes++
 758			}
 759		}
 760
 761		if pipes == 0 {
 762			i = rowStart
 763			break
 764		}
 765
 766		// include the newline in data sent to tableRow
 767		if i < len(data) && data[i] == '\n' {
 768			i++
 769		}
 770		p.tableRow(data[rowStart:i], columns, false)
 771	}
 772
 773	return i
 774}
 775
 776// check if the specified position is preceded by an odd number of backslashes
 777func isBackslashEscaped(data []byte, i int) bool {
 778	backslashes := 0
 779	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 780		backslashes++
 781	}
 782	return backslashes&1 == 1
 783}
 784
 785func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
 786	i := 0
 787	colCount := 1
 788	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 789		if data[i] == '|' && !isBackslashEscaped(data, i) {
 790			colCount++
 791		}
 792	}
 793
 794	// doesn't look like a table header
 795	if colCount == 1 {
 796		return
 797	}
 798
 799	// include the newline in the data sent to tableRow
 800	j := i
 801	if j < len(data) && data[j] == '\n' {
 802		j++
 803	}
 804	header := data[:j]
 805
 806	// column count ignores pipes at beginning or end of line
 807	if data[0] == '|' {
 808		colCount--
 809	}
 810	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 811		colCount--
 812	}
 813
 814	columns = make([]CellAlignFlags, colCount)
 815
 816	// move on to the header underline
 817	i++
 818	if i >= len(data) {
 819		return
 820	}
 821
 822	if data[i] == '|' && !isBackslashEscaped(data, i) {
 823		i++
 824	}
 825	i = skipChar(data, i, ' ')
 826
 827	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 828	// and trailing | optional on last column
 829	col := 0
 830	for i < len(data) && data[i] != '\n' {
 831		dashes := 0
 832
 833		if data[i] == ':' {
 834			i++
 835			columns[col] |= TableAlignmentLeft
 836			dashes++
 837		}
 838		for i < len(data) && data[i] == '-' {
 839			i++
 840			dashes++
 841		}
 842		if i < len(data) && data[i] == ':' {
 843			i++
 844			columns[col] |= TableAlignmentRight
 845			dashes++
 846		}
 847		for i < len(data) && data[i] == ' ' {
 848			i++
 849		}
 850		if i == len(data) {
 851			return
 852		}
 853		// end of column test is messy
 854		switch {
 855		case dashes < 3:
 856			// not a valid column
 857			return
 858
 859		case data[i] == '|' && !isBackslashEscaped(data, i):
 860			// marker found, now skip past trailing whitespace
 861			col++
 862			i++
 863			for i < len(data) && data[i] == ' ' {
 864				i++
 865			}
 866
 867			// trailing junk found after last column
 868			if col >= colCount && i < len(data) && data[i] != '\n' {
 869				return
 870			}
 871
 872		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 873			// something else found where marker was required
 874			return
 875
 876		case data[i] == '\n':
 877			// marker is optional for the last column
 878			col++
 879
 880		default:
 881			// trailing junk found after last column
 882			return
 883		}
 884	}
 885	if col != colCount {
 886		return
 887	}
 888
 889	p.addBlock(TableHead, nil)
 890	p.tableRow(header, columns, true)
 891	size = i
 892	if size < len(data) && data[size] == '\n' {
 893		size++
 894	}
 895	return
 896}
 897
 898func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
 899	p.addBlock(TableRow, nil)
 900	i, col := 0, 0
 901
 902	if data[i] == '|' && !isBackslashEscaped(data, i) {
 903		i++
 904	}
 905
 906	for col = 0; col < len(columns) && i < len(data); col++ {
 907		for i < len(data) && data[i] == ' ' {
 908			i++
 909		}
 910
 911		cellStart := i
 912
 913		for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 914			i++
 915		}
 916
 917		cellEnd := i
 918
 919		// skip the end-of-cell marker, possibly taking us past end of buffer
 920		i++
 921
 922		for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
 923			cellEnd--
 924		}
 925
 926		cell := p.addBlock(TableCell, data[cellStart:cellEnd])
 927		cell.IsHeader = header
 928		cell.Align = columns[col]
 929	}
 930
 931	// pad it out with empty columns to get the right number
 932	for ; col < len(columns); col++ {
 933		cell := p.addBlock(TableCell, nil)
 934		cell.IsHeader = header
 935		cell.Align = columns[col]
 936	}
 937
 938	// silently ignore rows with too many cells
 939}
 940
 941// returns blockquote prefix length
 942func (p *parser) quotePrefix(data []byte) int {
 943	i := 0
 944	for i < 3 && i < len(data) && data[i] == ' ' {
 945		i++
 946	}
 947	if i < len(data) && data[i] == '>' {
 948		if i+1 < len(data) && data[i+1] == ' ' {
 949			return i + 2
 950		}
 951		return i + 1
 952	}
 953	return 0
 954}
 955
 956// blockquote ends with at least one blank line
 957// followed by something without a blockquote prefix
 958func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
 959	if p.isEmpty(data[beg:]) <= 0 {
 960		return false
 961	}
 962	if end >= len(data) {
 963		return true
 964	}
 965	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
 966}
 967
 968// parse a blockquote fragment
 969func (p *parser) quote(data []byte) int {
 970	block := p.addBlock(BlockQuote, nil)
 971	var raw bytes.Buffer
 972	beg, end := 0, 0
 973	for beg < len(data) {
 974		end = beg
 975		// Step over whole lines, collecting them. While doing that, check for
 976		// fenced code and if one's found, incorporate it altogether,
 977		// irregardless of any contents inside it
 978		for end < len(data) && data[end] != '\n' {
 979			if p.flags&FencedCode != 0 {
 980				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
 981					// -1 to compensate for the extra end++ after the loop:
 982					end += i - 1
 983					break
 984				}
 985			}
 986			end++
 987		}
 988		if end < len(data) && data[end] == '\n' {
 989			end++
 990		}
 991		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 992			// skip the prefix
 993			beg += pre
 994		} else if p.terminateBlockquote(data, beg, end) {
 995			break
 996		}
 997		// this line is part of the blockquote
 998		raw.Write(data[beg:end])
 999		beg = end
1000	}
1001	p.block(raw.Bytes())
1002	p.finalize(block)
1003	return end
1004}
1005
1006// returns prefix length for block code
1007func (p *parser) codePrefix(data []byte) int {
1008	if len(data) >= 1 && data[0] == '\t' {
1009		return 1
1010	}
1011	if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1012		return 4
1013	}
1014	return 0
1015}
1016
1017func (p *parser) code(data []byte) int {
1018	var work bytes.Buffer
1019
1020	i := 0
1021	for i < len(data) {
1022		beg := i
1023		for i < len(data) && data[i] != '\n' {
1024			i++
1025		}
1026		if i < len(data) && data[i] == '\n' {
1027			i++
1028		}
1029
1030		blankline := p.isEmpty(data[beg:i]) > 0
1031		if pre := p.codePrefix(data[beg:i]); pre > 0 {
1032			beg += pre
1033		} else if !blankline {
1034			// non-empty, non-prefixed line breaks the pre
1035			i = beg
1036			break
1037		}
1038
1039		// verbatim copy to the working buffer
1040		if blankline {
1041			work.WriteByte('\n')
1042		} else {
1043			work.Write(data[beg:i])
1044		}
1045	}
1046
1047	// trim all the \n off the end of work
1048	workbytes := work.Bytes()
1049	eol := len(workbytes)
1050	for eol > 0 && workbytes[eol-1] == '\n' {
1051		eol--
1052	}
1053	if eol != len(workbytes) {
1054		work.Truncate(eol)
1055	}
1056
1057	work.WriteByte('\n')
1058
1059	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1060	block.IsFenced = false
1061	finalizeCodeBlock(block)
1062
1063	return i
1064}
1065
1066// returns unordered list item prefix
1067func (p *parser) uliPrefix(data []byte) int {
1068	i := 0
1069	// start with up to 3 spaces
1070	for i < len(data) && i < 3 && data[i] == ' ' {
1071		i++
1072	}
1073	if i >= len(data)-1 {
1074		return 0
1075	}
1076	// need one of {'*', '+', '-'} followed by a space or a tab
1077	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1078		(data[i+1] != ' ' && data[i+1] != '\t') {
1079		return 0
1080	}
1081	return i + 2
1082}
1083
1084// returns ordered list item prefix
1085func (p *parser) oliPrefix(data []byte) int {
1086	i := 0
1087
1088	// start with up to 3 spaces
1089	for i < 3 && i < len(data) && data[i] == ' ' {
1090		i++
1091	}
1092
1093	// count the digits
1094	start := i
1095	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1096		i++
1097	}
1098	if start == i || i >= len(data)-1 {
1099		return 0
1100	}
1101
1102	// we need >= 1 digits followed by a dot and a space or a tab
1103	if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1104		return 0
1105	}
1106	return i + 2
1107}
1108
1109// returns definition list item prefix
1110func (p *parser) dliPrefix(data []byte) int {
1111	if len(data) < 2 {
1112		return 0
1113	}
1114	i := 0
1115	// need a ':' followed by a space or a tab
1116	if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1117		return 0
1118	}
1119	for i < len(data) && data[i] == ' ' {
1120		i++
1121	}
1122	return i + 2
1123}
1124
1125// parse ordered or unordered list block
1126func (p *parser) list(data []byte, flags ListType) int {
1127	i := 0
1128	flags |= ListItemBeginningOfList
1129	block := p.addBlock(List, nil)
1130	block.ListFlags = flags
1131	block.Tight = true
1132
1133	for i < len(data) {
1134		skip := p.listItem(data[i:], &flags)
1135		if flags&ListItemContainsBlock != 0 {
1136			block.ListData.Tight = false
1137		}
1138		i += skip
1139		if skip == 0 || flags&ListItemEndOfList != 0 {
1140			break
1141		}
1142		flags &= ^ListItemBeginningOfList
1143	}
1144
1145	above := block.Parent
1146	finalizeList(block)
1147	p.tip = above
1148	return i
1149}
1150
1151// Returns true if block ends with a blank line, descending if needed
1152// into lists and sublists.
1153func endsWithBlankLine(block *Node) bool {
1154	// TODO: figure this out. Always false now.
1155	for block != nil {
1156		//if block.lastLineBlank {
1157		//return true
1158		//}
1159		t := block.Type
1160		if t == List || t == Item {
1161			block = block.LastChild
1162		} else {
1163			break
1164		}
1165	}
1166	return false
1167}
1168
1169func finalizeList(block *Node) {
1170	block.open = false
1171	item := block.FirstChild
1172	for item != nil {
1173		// check for non-final list item ending with blank line:
1174		if endsWithBlankLine(item) && item.Next != nil {
1175			block.ListData.Tight = false
1176			break
1177		}
1178		// recurse into children of list item, to see if there are spaces
1179		// between any of them:
1180		subItem := item.FirstChild
1181		for subItem != nil {
1182			if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1183				block.ListData.Tight = false
1184				break
1185			}
1186			subItem = subItem.Next
1187		}
1188		item = item.Next
1189	}
1190}
1191
1192// Parse a single list item.
1193// Assumes initial prefix is already removed if this is a sublist.
1194func (p *parser) listItem(data []byte, flags *ListType) int {
1195	// keep track of the indentation of the first line
1196	itemIndent := 0
1197	if data[0] == '\t' {
1198		itemIndent += 4
1199	} else {
1200		for itemIndent < 3 && data[itemIndent] == ' ' {
1201			itemIndent++
1202		}
1203	}
1204
1205	var bulletChar byte = '*'
1206	i := p.uliPrefix(data)
1207	if i == 0 {
1208		i = p.oliPrefix(data)
1209	} else {
1210		bulletChar = data[i-2]
1211	}
1212	if i == 0 {
1213		i = p.dliPrefix(data)
1214		// reset definition term flag
1215		if i > 0 {
1216			*flags &= ^ListTypeTerm
1217		}
1218	}
1219	if i == 0 {
1220		// if in definition list, set term flag and continue
1221		if *flags&ListTypeDefinition != 0 {
1222			*flags |= ListTypeTerm
1223		} else {
1224			return 0
1225		}
1226	}
1227
1228	// skip leading whitespace on first line
1229	for i < len(data) && data[i] == ' ' {
1230		i++
1231	}
1232
1233	// find the end of the line
1234	line := i
1235	for i > 0 && i < len(data) && data[i-1] != '\n' {
1236		i++
1237	}
1238
1239	// get working buffer
1240	var raw bytes.Buffer
1241
1242	// put the first line into the working buffer
1243	raw.Write(data[line:i])
1244	line = i
1245
1246	// process the following lines
1247	containsBlankLine := false
1248	sublist := 0
1249
1250gatherlines:
1251	for line < len(data) {
1252		i++
1253
1254		// find the end of this line
1255		for i < len(data) && data[i-1] != '\n' {
1256			i++
1257		}
1258
1259		// if it is an empty line, guess that it is part of this item
1260		// and move on to the next line
1261		if p.isEmpty(data[line:i]) > 0 {
1262			containsBlankLine = true
1263			line = i
1264			continue
1265		}
1266
1267		// calculate the indentation
1268		indent := 0
1269		indentIndex := 0
1270		if data[line] == '\t' {
1271			indentIndex++
1272			indent += 4
1273		} else {
1274			for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1275				indent++
1276				indentIndex++
1277			}
1278		}
1279
1280		chunk := data[line+indentIndex : i]
1281
1282		// evaluate how this line fits in
1283		switch {
1284		// is this a nested list item?
1285		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1286			p.oliPrefix(chunk) > 0 ||
1287			p.dliPrefix(chunk) > 0:
1288
1289			if containsBlankLine {
1290				*flags |= ListItemContainsBlock
1291			}
1292
1293			// to be a nested list, it must be indented more
1294			// if not, it is the next item in the same list
1295			if indent <= itemIndent {
1296				break gatherlines
1297			}
1298
1299			// is this the first item in the nested list?
1300			if sublist == 0 {
1301				sublist = raw.Len()
1302			}
1303
1304		// is this a nested prefix heading?
1305		case p.isPrefixHeading(chunk):
1306			// if the heading is not indented, it is not nested in the list
1307			// and thus ends the list
1308			if containsBlankLine && indent < 4 {
1309				*flags |= ListItemEndOfList
1310				break gatherlines
1311			}
1312			*flags |= ListItemContainsBlock
1313
1314		// anything following an empty line is only part
1315		// of this item if it is indented 4 spaces
1316		// (regardless of the indentation of the beginning of the item)
1317		case containsBlankLine && indent < 4:
1318			if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1319				// is the next item still a part of this list?
1320				next := i
1321				for next < len(data) && data[next] != '\n' {
1322					next++
1323				}
1324				for next < len(data)-1 && data[next] == '\n' {
1325					next++
1326				}
1327				if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1328					*flags |= ListItemEndOfList
1329				}
1330			} else {
1331				*flags |= ListItemEndOfList
1332			}
1333			break gatherlines
1334
1335		// a blank line means this should be parsed as a block
1336		case containsBlankLine:
1337			raw.WriteByte('\n')
1338			*flags |= ListItemContainsBlock
1339		}
1340
1341		// if this line was preceded by one or more blanks,
1342		// re-introduce the blank into the buffer
1343		if containsBlankLine {
1344			containsBlankLine = false
1345			raw.WriteByte('\n')
1346		}
1347
1348		// add the line into the working buffer without prefix
1349		raw.Write(data[line+indentIndex : i])
1350
1351		line = i
1352	}
1353
1354	rawBytes := raw.Bytes()
1355
1356	block := p.addBlock(Item, nil)
1357	block.ListFlags = *flags
1358	block.Tight = false
1359	block.BulletChar = bulletChar
1360	block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1361
1362	// render the contents of the list item
1363	if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1364		// intermediate render of block item, except for definition term
1365		if sublist > 0 {
1366			p.block(rawBytes[:sublist])
1367			p.block(rawBytes[sublist:])
1368		} else {
1369			p.block(rawBytes)
1370		}
1371	} else {
1372		// intermediate render of inline item
1373		if sublist > 0 {
1374			child := p.addChild(Paragraph, 0)
1375			child.content = rawBytes[:sublist]
1376			p.block(rawBytes[sublist:])
1377		} else {
1378			child := p.addChild(Paragraph, 0)
1379			child.content = rawBytes
1380		}
1381	}
1382	return line
1383}
1384
1385// render a single paragraph that has already been parsed out
1386func (p *parser) renderParagraph(data []byte) {
1387	if len(data) == 0 {
1388		return
1389	}
1390
1391	// trim leading spaces
1392	beg := 0
1393	for data[beg] == ' ' {
1394		beg++
1395	}
1396
1397	end := len(data)
1398	// trim trailing newline
1399	if data[len(data)-1] == '\n' {
1400		end--
1401	}
1402
1403	// trim trailing spaces
1404	for end > beg && data[end-1] == ' ' {
1405		end--
1406	}
1407
1408	p.addBlock(Paragraph, data[beg:end])
1409}
1410
1411func (p *parser) paragraph(data []byte) int {
1412	// prev: index of 1st char of previous line
1413	// line: index of 1st char of current line
1414	// i: index of cursor/end of current line
1415	var prev, line, i int
1416	tabSize := TabSizeDefault
1417	if p.flags&TabSizeEight != 0 {
1418		tabSize = TabSizeDouble
1419	}
1420	// keep going until we find something to mark the end of the paragraph
1421	for i < len(data) {
1422		// mark the beginning of the current line
1423		prev = line
1424		current := data[i:]
1425		line = i
1426
1427		// did we find a reference or a footnote? If so, end a paragraph
1428		// preceding it and report that we have consumed up to the end of that
1429		// reference:
1430		if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1431			p.renderParagraph(data[:i])
1432			return i + refEnd
1433		}
1434
1435		// did we find a blank line marking the end of the paragraph?
1436		if n := p.isEmpty(current); n > 0 {
1437			// did this blank line followed by a definition list item?
1438			if p.flags&DefinitionLists != 0 {
1439				if i < len(data)-1 && data[i+1] == ':' {
1440					return p.list(data[prev:], ListTypeDefinition)
1441				}
1442			}
1443
1444			p.renderParagraph(data[:i])
1445			return i + n
1446		}
1447
1448		// an underline under some text marks a heading, so our paragraph ended on prev line
1449		if i > 0 {
1450			if level := p.isUnderlinedHeading(current); level > 0 {
1451				// render the paragraph
1452				p.renderParagraph(data[:prev])
1453
1454				// ignore leading and trailing whitespace
1455				eol := i - 1
1456				for prev < eol && data[prev] == ' ' {
1457					prev++
1458				}
1459				for eol > prev && data[eol-1] == ' ' {
1460					eol--
1461				}
1462
1463				id := ""
1464				if p.flags&AutoHeadingIDs != 0 {
1465					id = sanitized_anchor_name.Create(string(data[prev:eol]))
1466				}
1467
1468				block := p.addBlock(Heading, data[prev:eol])
1469				block.Level = level
1470				block.HeadingID = id
1471
1472				// find the end of the underline
1473				for i < len(data) && data[i] != '\n' {
1474					i++
1475				}
1476				return i
1477			}
1478		}
1479
1480		// if the next line starts a block of HTML, then the paragraph ends here
1481		if p.flags&LaxHTMLBlocks != 0 {
1482			if data[i] == '<' && p.html(current, false) > 0 {
1483				// rewind to before the HTML block
1484				p.renderParagraph(data[:i])
1485				return i
1486			}
1487		}
1488
1489		// if there's a prefixed heading or a horizontal rule after this, paragraph is over
1490		if p.isPrefixHeading(current) || p.isHRule(current) {
1491			p.renderParagraph(data[:i])
1492			return i
1493		}
1494
1495		// if there's a fenced code block, paragraph is over
1496		if p.flags&FencedCode != 0 {
1497			if p.fencedCodeBlock(current, false) > 0 {
1498				p.renderParagraph(data[:i])
1499				return i
1500			}
1501		}
1502
1503		// if there's a definition list item, prev line is a definition term
1504		if p.flags&DefinitionLists != 0 {
1505			if p.dliPrefix(current) != 0 {
1506				ret := p.list(data[prev:], ListTypeDefinition)
1507				return ret
1508			}
1509		}
1510
1511		// if there's a list after this, paragraph is over
1512		if p.flags&NoEmptyLineBeforeBlock != 0 {
1513			if p.uliPrefix(current) != 0 ||
1514				p.oliPrefix(current) != 0 ||
1515				p.quotePrefix(current) != 0 ||
1516				p.codePrefix(current) != 0 {
1517				p.renderParagraph(data[:i])
1518				return i
1519			}
1520		}
1521
1522		// otherwise, scan to the beginning of the next line
1523		nl := bytes.IndexByte(data[i:], '\n')
1524		if nl >= 0 {
1525			i += nl + 1
1526		} else {
1527			i += len(data[i:])
1528		}
1529	}
1530
1531	p.renderParagraph(data[:i])
1532	return i
1533}
1534
1535func skipChar(data []byte, start int, char byte) int {
1536	i := start
1537	for i < len(data) && data[i] == char {
1538		i++
1539	}
1540	return i
1541}
1542
1543func skipUntilChar(text []byte, start int, char byte) int {
1544	i := start
1545	for i < len(text) && text[i] != char {
1546		i++
1547	}
1548	return i
1549}
all repos — grayfriday @ b98e30685370e0b7a5556c084dc4aa7ca6f59136

blackfriday fork with a few changes