icy does git — grayfriday (72633fddee675b450f536cb2e3d93549993394c1): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"html"
  19	"regexp"
  20
  21	"github.com/shurcooL/sanitized_anchor_name"
  22)
  23
  24const (
  25	Entity    = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
  26	Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
  27)
  28
  29var (
  30	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
  31	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
  32	reTrailingWhitespace  = regexp.MustCompile("(\n *)+$")
  33)
  34
  35// Parse block-level data.
  36// Note: this function and many that it calls assume that
  37// the input buffer ends with a newline.
  38func (p *parser) block(data []byte) {
  39	if len(data) == 0 || data[len(data)-1] != '\n' {
  40		panic("block input is missing terminating newline")
  41	}
  42
  43	// this is called recursively: enforce a maximum depth
  44	if p.nesting >= p.maxNesting {
  45		return
  46	}
  47	p.nesting++
  48
  49	// parse out one block-level construct at a time
  50	for len(data) > 0 {
  51		// prefixed header:
  52		//
  53		// # Header 1
  54		// ## Header 2
  55		// ...
  56		// ###### Header 6
  57		if p.isPrefixHeader(data) {
  58			data = data[p.prefixHeader(data):]
  59			continue
  60		}
  61
  62		// block of preformatted HTML:
  63		//
  64		// <div>
  65		//     ...
  66		// </div>
  67		if data[0] == '<' {
  68			if i := p.html(data, true); i > 0 {
  69				data = data[i:]
  70				continue
  71			}
  72		}
  73
  74		// title block
  75		//
  76		// % stuff
  77		// % more stuff
  78		// % even more stuff
  79		if p.flags&Titleblock != 0 {
  80			if data[0] == '%' {
  81				if i := p.titleBlock(data, true); i > 0 {
  82					data = data[i:]
  83					continue
  84				}
  85			}
  86		}
  87
  88		// blank lines.  note: returns the # of bytes to skip
  89		if i := p.isEmpty(data); i > 0 {
  90			data = data[i:]
  91			continue
  92		}
  93
  94		// indented code block:
  95		//
  96		//     func max(a, b int) int {
  97		//         if a > b {
  98		//             return a
  99		//         }
 100		//         return b
 101		//      }
 102		if p.codePrefix(data) > 0 {
 103			data = data[p.code(data):]
 104			continue
 105		}
 106
 107		// fenced code block:
 108		//
 109		// ``` go
 110		// func fact(n int) int {
 111		//     if n <= 1 {
 112		//         return n
 113		//     }
 114		//     return n * fact(n-1)
 115		// }
 116		// ```
 117		if p.flags&FencedCode != 0 {
 118			if i := p.fencedCodeBlock(data, true); i > 0 {
 119				data = data[i:]
 120				continue
 121			}
 122		}
 123
 124		// horizontal rule:
 125		//
 126		// ------
 127		// or
 128		// ******
 129		// or
 130		// ______
 131		if p.isHRule(data) {
 132			p.addBlock(HorizontalRule, nil)
 133			var i int
 134			for i = 0; data[i] != '\n'; i++ {
 135			}
 136			data = data[i:]
 137			continue
 138		}
 139
 140		// block quote:
 141		//
 142		// > A big quote I found somewhere
 143		// > on the web
 144		if p.quotePrefix(data) > 0 {
 145			data = data[p.quote(data):]
 146			continue
 147		}
 148
 149		// table:
 150		//
 151		// Name  | Age | Phone
 152		// ------|-----|---------
 153		// Bob   | 31  | 555-1234
 154		// Alice | 27  | 555-4321
 155		if p.flags&Tables != 0 {
 156			if i := p.table(data); i > 0 {
 157				data = data[i:]
 158				continue
 159			}
 160		}
 161
 162		// an itemized/unordered list:
 163		//
 164		// * Item 1
 165		// * Item 2
 166		//
 167		// also works with + or -
 168		if p.uliPrefix(data) > 0 {
 169			data = data[p.list(data, 0):]
 170			continue
 171		}
 172
 173		// a numbered/ordered list:
 174		//
 175		// 1. Item 1
 176		// 2. Item 2
 177		if p.oliPrefix(data) > 0 {
 178			data = data[p.list(data, ListTypeOrdered):]
 179			continue
 180		}
 181
 182		// definition lists:
 183		//
 184		// Term 1
 185		// :   Definition a
 186		// :   Definition b
 187		//
 188		// Term 2
 189		// :   Definition c
 190		if p.flags&DefinitionLists != 0 {
 191			if p.dliPrefix(data) > 0 {
 192				data = data[p.list(data, ListTypeDefinition):]
 193				continue
 194			}
 195		}
 196
 197		// anything else must look like a normal paragraph
 198		// note: this finds underlined headers, too
 199		data = data[p.paragraph(data):]
 200	}
 201
 202	p.nesting--
 203}
 204
 205func (p *parser) addBlock(typ NodeType, content []byte) *Node {
 206	p.closeUnmatchedBlocks()
 207	container := p.addChild(typ, 0)
 208	container.content = content
 209	return container
 210}
 211
 212func (p *parser) isPrefixHeader(data []byte) bool {
 213	if data[0] != '#' {
 214		return false
 215	}
 216
 217	if p.flags&SpaceHeaders != 0 {
 218		level := 0
 219		for level < 6 && data[level] == '#' {
 220			level++
 221		}
 222		if data[level] != ' ' {
 223			return false
 224		}
 225	}
 226	return true
 227}
 228
 229func (p *parser) prefixHeader(data []byte) int {
 230	level := 0
 231	for level < 6 && data[level] == '#' {
 232		level++
 233	}
 234	i := skipChar(data, level, ' ')
 235	end := skipUntilChar(data, i, '\n')
 236	skip := end
 237	id := ""
 238	if p.flags&HeaderIDs != 0 {
 239		j, k := 0, 0
 240		// find start/end of header id
 241		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 242		}
 243		for k = j + 1; k < end && data[k] != '}'; k++ {
 244		}
 245		// extract header id iff found
 246		if j < end && k < end {
 247			id = string(data[j+2 : k])
 248			end = j
 249			skip = k + 1
 250			for end > 0 && data[end-1] == ' ' {
 251				end--
 252			}
 253		}
 254	}
 255	for end > 0 && data[end-1] == '#' {
 256		if isBackslashEscaped(data, end-1) {
 257			break
 258		}
 259		end--
 260	}
 261	for end > 0 && data[end-1] == ' ' {
 262		end--
 263	}
 264	if end > i {
 265		if id == "" && p.flags&AutoHeaderIDs != 0 {
 266			id = sanitized_anchor_name.Create(string(data[i:end]))
 267		}
 268		block := p.addBlock(Header, data[i:end])
 269		block.HeaderID = id
 270		block.Level = level
 271	}
 272	return skip
 273}
 274
 275func (p *parser) isUnderlinedHeader(data []byte) int {
 276	// test of level 1 header
 277	if data[0] == '=' {
 278		i := skipChar(data, 1, '=')
 279		i = skipChar(data, i, ' ')
 280		if data[i] == '\n' {
 281			return 1
 282		} else {
 283			return 0
 284		}
 285	}
 286
 287	// test of level 2 header
 288	if data[0] == '-' {
 289		i := skipChar(data, 1, '-')
 290		i = skipChar(data, i, ' ')
 291		if data[i] == '\n' {
 292			return 2
 293		} else {
 294			return 0
 295		}
 296	}
 297
 298	return 0
 299}
 300
 301func (p *parser) titleBlock(data []byte, doRender bool) int {
 302	if data[0] != '%' {
 303		return 0
 304	}
 305	splitData := bytes.Split(data, []byte("\n"))
 306	var i int
 307	for idx, b := range splitData {
 308		if !bytes.HasPrefix(b, []byte("%")) {
 309			i = idx // - 1
 310			break
 311		}
 312	}
 313
 314	data = bytes.Join(splitData[0:i], []byte("\n"))
 315	consumed := len(data)
 316	data = bytes.TrimPrefix(data, []byte("% "))
 317	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
 318	block := p.addBlock(Header, data)
 319	block.Level = 1
 320	block.IsTitleblock = true
 321
 322	return consumed
 323}
 324
 325func (p *parser) html(data []byte, doRender bool) int {
 326	var i, j int
 327
 328	// identify the opening tag
 329	if data[0] != '<' {
 330		return 0
 331	}
 332	curtag, tagfound := p.htmlFindTag(data[1:])
 333
 334	// handle special cases
 335	if !tagfound {
 336		// check for an HTML comment
 337		if size := p.htmlComment(data, doRender); size > 0 {
 338			return size
 339		}
 340
 341		// check for an <hr> tag
 342		if size := p.htmlHr(data, doRender); size > 0 {
 343			return size
 344		}
 345
 346		// no special case recognized
 347		return 0
 348	}
 349
 350	// look for an unindented matching closing tag
 351	// followed by a blank line
 352	found := false
 353	/*
 354		closetag := []byte("\n</" + curtag + ">")
 355		j = len(curtag) + 1
 356		for !found {
 357			// scan for a closing tag at the beginning of a line
 358			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 359				j += skip + len(closetag)
 360			} else {
 361				break
 362			}
 363
 364			// see if it is the only thing on the line
 365			if skip := p.isEmpty(data[j:]); skip > 0 {
 366				// see if it is followed by a blank line/eof
 367				j += skip
 368				if j >= len(data) {
 369					found = true
 370					i = j
 371				} else {
 372					if skip := p.isEmpty(data[j:]); skip > 0 {
 373						j += skip
 374						found = true
 375						i = j
 376					}
 377				}
 378			}
 379		}
 380	*/
 381
 382	// if not found, try a second pass looking for indented match
 383	// but not if tag is "ins" or "del" (following original Markdown.pl)
 384	if !found && curtag != "ins" && curtag != "del" {
 385		i = 1
 386		for i < len(data) {
 387			i++
 388			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 389				i++
 390			}
 391
 392			if i+2+len(curtag) >= len(data) {
 393				break
 394			}
 395
 396			j = p.htmlFindEnd(curtag, data[i-1:])
 397
 398			if j > 0 {
 399				i += j - 1
 400				found = true
 401				break
 402			}
 403		}
 404	}
 405
 406	if !found {
 407		return 0
 408	}
 409
 410	// the end of the block has been found
 411	if doRender {
 412		// trim newlines
 413		end := i
 414		for end > 0 && data[end-1] == '\n' {
 415			end--
 416		}
 417		finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
 418	}
 419
 420	return i
 421}
 422
 423func finalizeHtmlBlock(block *Node) {
 424	block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
 425	block.content = []byte{}
 426}
 427
 428// HTML comment, lax form
 429func (p *parser) htmlComment(data []byte, doRender bool) int {
 430	i := p.inlineHtmlComment(data)
 431	// needs to end with a blank line
 432	if j := p.isEmpty(data[i:]); j > 0 {
 433		size := i + j
 434		if doRender {
 435			// trim trailing newlines
 436			end := size
 437			for end > 0 && data[end-1] == '\n' {
 438				end--
 439			}
 440			block := p.addBlock(HTMLBlock, data[:end])
 441			finalizeHtmlBlock(block)
 442		}
 443		return size
 444	}
 445	return 0
 446}
 447
 448// HR, which is the only self-closing block tag considered
 449func (p *parser) htmlHr(data []byte, doRender bool) int {
 450	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 451		return 0
 452	}
 453	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 454		// not an <hr> tag after all; at least not a valid one
 455		return 0
 456	}
 457
 458	i := 3
 459	for data[i] != '>' && data[i] != '\n' {
 460		i++
 461	}
 462
 463	if data[i] == '>' {
 464		i++
 465		if j := p.isEmpty(data[i:]); j > 0 {
 466			size := i + j
 467			if doRender {
 468				// trim newlines
 469				end := size
 470				for end > 0 && data[end-1] == '\n' {
 471					end--
 472				}
 473				finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
 474			}
 475			return size
 476		}
 477	}
 478
 479	return 0
 480}
 481
 482func (p *parser) htmlFindTag(data []byte) (string, bool) {
 483	i := 0
 484	for isalnum(data[i]) {
 485		i++
 486	}
 487	key := string(data[:i])
 488	if _, ok := blockTags[key]; ok {
 489		return key, true
 490	}
 491	return "", false
 492}
 493
 494func (p *parser) htmlFindEnd(tag string, data []byte) int {
 495	// assume data[0] == '<' && data[1] == '/' already tested
 496	if tag == "hr" {
 497		return 2
 498	}
 499	// check if tag is a match
 500	closetag := []byte("</" + tag + ">")
 501	if !bytes.HasPrefix(data, closetag) {
 502		return 0
 503	}
 504	i := len(closetag)
 505
 506	// check that the rest of the line is blank
 507	skip := 0
 508	if skip = p.isEmpty(data[i:]); skip == 0 {
 509		return 0
 510	}
 511	i += skip
 512	skip = 0
 513
 514	if i >= len(data) {
 515		return i
 516	}
 517
 518	if p.flags&LaxHTMLBlocks != 0 {
 519		return i
 520	}
 521	if skip = p.isEmpty(data[i:]); skip == 0 {
 522		// following line must be blank
 523		return 0
 524	}
 525
 526	return i + skip
 527}
 528
 529func (*parser) isEmpty(data []byte) int {
 530	// it is okay to call isEmpty on an empty buffer
 531	if len(data) == 0 {
 532		return 0
 533	}
 534
 535	var i int
 536	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 537		if data[i] != ' ' && data[i] != '\t' {
 538			return 0
 539		}
 540	}
 541	return i + 1
 542}
 543
 544func (*parser) isHRule(data []byte) bool {
 545	i := 0
 546
 547	// skip up to three spaces
 548	for i < 3 && data[i] == ' ' {
 549		i++
 550	}
 551
 552	// look at the hrule char
 553	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 554		return false
 555	}
 556	c := data[i]
 557
 558	// the whole line must be the char or whitespace
 559	n := 0
 560	for data[i] != '\n' {
 561		switch {
 562		case data[i] == c:
 563			n++
 564		case data[i] != ' ':
 565			return false
 566		}
 567		i++
 568	}
 569
 570	return n >= 3
 571}
 572
 573// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
 574// and returns the end index if so, or 0 otherwise. It also returns the marker found.
 575// If syntax is not nil, it gets set to the syntax specified in the fence line.
 576// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
 577func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 578	i, size := 0, 0
 579
 580	// skip up to three spaces
 581	for i < len(data) && i < 3 && data[i] == ' ' {
 582		i++
 583	}
 584
 585	// check for the marker characters: ~ or `
 586	if i >= len(data) {
 587		return 0, ""
 588	}
 589	if data[i] != '~' && data[i] != '`' {
 590		return 0, ""
 591	}
 592
 593	c := data[i]
 594
 595	// the whole line must be the same char or whitespace
 596	for i < len(data) && data[i] == c {
 597		size++
 598		i++
 599	}
 600
 601	// the marker char must occur at least 3 times
 602	if size < 3 {
 603		return 0, ""
 604	}
 605	marker = string(data[i-size : i])
 606
 607	// if this is the end marker, it must match the beginning marker
 608	if oldmarker != "" && marker != oldmarker {
 609		return 0, ""
 610	}
 611
 612	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
 613	// into one, always get the syntax, and discard it if the caller doesn't care.
 614	if syntax != nil {
 615		syn := 0
 616		i = skipChar(data, i, ' ')
 617
 618		if i >= len(data) {
 619			if newlineOptional && i == len(data) {
 620				return i, marker
 621			}
 622			return 0, ""
 623		}
 624
 625		syntaxStart := i
 626
 627		if data[i] == '{' {
 628			i++
 629			syntaxStart++
 630
 631			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 632				syn++
 633				i++
 634			}
 635
 636			if i >= len(data) || data[i] != '}' {
 637				return 0, ""
 638			}
 639
 640			// strip all whitespace at the beginning and the end
 641			// of the {} block
 642			for syn > 0 && isspace(data[syntaxStart]) {
 643				syntaxStart++
 644				syn--
 645			}
 646
 647			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 648				syn--
 649			}
 650
 651			i++
 652		} else {
 653			for i < len(data) && !isspace(data[i]) {
 654				syn++
 655				i++
 656			}
 657		}
 658
 659		*syntax = string(data[syntaxStart : syntaxStart+syn])
 660	}
 661
 662	i = skipChar(data, i, ' ')
 663	if i >= len(data) || data[i] != '\n' {
 664		if newlineOptional && i == len(data) {
 665			return i, marker
 666		}
 667		return 0, ""
 668	}
 669
 670	return i + 1, marker // Take newline into account.
 671}
 672
 673// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
 674// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
 675// If doRender is true, a final newline is mandatory to recognize the fenced code block.
 676func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
 677	var syntax string
 678	beg, marker := isFenceLine(data, &syntax, "", false)
 679	if beg == 0 || beg >= len(data) {
 680		return 0
 681	}
 682
 683	var work bytes.Buffer
 684	work.Write([]byte(syntax))
 685	work.WriteByte('\n')
 686
 687	for {
 688		// safe to assume beg < len(data)
 689
 690		// check for the end of the code block
 691		newlineOptional := !doRender
 692		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 693		if fenceEnd != 0 {
 694			beg += fenceEnd
 695			break
 696		}
 697
 698		// copy the current line
 699		end := skipUntilChar(data, beg, '\n') + 1
 700
 701		// did we reach the end of the buffer without a closing marker?
 702		if end >= len(data) {
 703			return 0
 704		}
 705
 706		// verbatim copy to the working buffer
 707		if doRender {
 708			work.Write(data[beg:end])
 709		}
 710		beg = end
 711	}
 712
 713	if doRender {
 714		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 715		block.IsFenced = true
 716		finalizeCodeBlock(block)
 717	}
 718
 719	return beg
 720}
 721
 722func unescapeChar(str []byte) []byte {
 723	if str[0] == '\\' {
 724		return []byte{str[1]}
 725	}
 726	return []byte(html.UnescapeString(string(str)))
 727}
 728
 729func unescapeString(str []byte) []byte {
 730	if reBackslashOrAmp.Match(str) {
 731		return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
 732	} else {
 733		return str
 734	}
 735}
 736
 737func finalizeCodeBlock(block *Node) {
 738	if block.IsFenced {
 739		newlinePos := bytes.IndexByte(block.content, '\n')
 740		firstLine := block.content[:newlinePos]
 741		rest := block.content[newlinePos+1:]
 742		block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 743		block.Literal = rest
 744	} else {
 745		block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
 746	}
 747	block.content = nil
 748}
 749
 750func (p *parser) table(data []byte) int {
 751	table := p.addBlock(Table, nil)
 752	i, columns := p.tableHeader(data)
 753	if i == 0 {
 754		p.tip = table.Parent
 755		table.unlink()
 756		return 0
 757	}
 758
 759	p.addBlock(TableBody, nil)
 760
 761	for i < len(data) {
 762		pipes, rowStart := 0, i
 763		for ; data[i] != '\n'; i++ {
 764			if data[i] == '|' {
 765				pipes++
 766			}
 767		}
 768
 769		if pipes == 0 {
 770			i = rowStart
 771			break
 772		}
 773
 774		// include the newline in data sent to tableRow
 775		i++
 776		p.tableRow(data[rowStart:i], columns, false)
 777	}
 778
 779	return i
 780}
 781
 782// check if the specified position is preceded by an odd number of backslashes
 783func isBackslashEscaped(data []byte, i int) bool {
 784	backslashes := 0
 785	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 786		backslashes++
 787	}
 788	return backslashes&1 == 1
 789}
 790
 791func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
 792	i := 0
 793	colCount := 1
 794	for i = 0; data[i] != '\n'; i++ {
 795		if data[i] == '|' && !isBackslashEscaped(data, i) {
 796			colCount++
 797		}
 798	}
 799
 800	// doesn't look like a table header
 801	if colCount == 1 {
 802		return
 803	}
 804
 805	// include the newline in the data sent to tableRow
 806	header := data[:i+1]
 807
 808	// column count ignores pipes at beginning or end of line
 809	if data[0] == '|' {
 810		colCount--
 811	}
 812	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 813		colCount--
 814	}
 815
 816	columns = make([]CellAlignFlags, colCount)
 817
 818	// move on to the header underline
 819	i++
 820	if i >= len(data) {
 821		return
 822	}
 823
 824	if data[i] == '|' && !isBackslashEscaped(data, i) {
 825		i++
 826	}
 827	i = skipChar(data, i, ' ')
 828
 829	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 830	// and trailing | optional on last column
 831	col := 0
 832	for data[i] != '\n' {
 833		dashes := 0
 834
 835		if data[i] == ':' {
 836			i++
 837			columns[col] |= TableAlignmentLeft
 838			dashes++
 839		}
 840		for data[i] == '-' {
 841			i++
 842			dashes++
 843		}
 844		if data[i] == ':' {
 845			i++
 846			columns[col] |= TableAlignmentRight
 847			dashes++
 848		}
 849		for data[i] == ' ' {
 850			i++
 851		}
 852
 853		// end of column test is messy
 854		switch {
 855		case dashes < 3:
 856			// not a valid column
 857			return
 858
 859		case data[i] == '|' && !isBackslashEscaped(data, i):
 860			// marker found, now skip past trailing whitespace
 861			col++
 862			i++
 863			for data[i] == ' ' {
 864				i++
 865			}
 866
 867			// trailing junk found after last column
 868			if col >= colCount && data[i] != '\n' {
 869				return
 870			}
 871
 872		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 873			// something else found where marker was required
 874			return
 875
 876		case data[i] == '\n':
 877			// marker is optional for the last column
 878			col++
 879
 880		default:
 881			// trailing junk found after last column
 882			return
 883		}
 884	}
 885	if col != colCount {
 886		return
 887	}
 888
 889	p.addBlock(TableHead, nil)
 890	p.tableRow(header, columns, true)
 891	size = i + 1
 892	return
 893}
 894
 895func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
 896	p.addBlock(TableRow, nil)
 897	i, col := 0, 0
 898
 899	if data[i] == '|' && !isBackslashEscaped(data, i) {
 900		i++
 901	}
 902
 903	for col = 0; col < len(columns) && i < len(data); col++ {
 904		for data[i] == ' ' {
 905			i++
 906		}
 907
 908		cellStart := i
 909
 910		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 911			i++
 912		}
 913
 914		cellEnd := i
 915
 916		// skip the end-of-cell marker, possibly taking us past end of buffer
 917		i++
 918
 919		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 920			cellEnd--
 921		}
 922
 923		cell := p.addBlock(TableCell, data[cellStart:cellEnd])
 924		cell.IsHeader = header
 925		cell.Align = columns[col]
 926	}
 927
 928	// pad it out with empty columns to get the right number
 929	for ; col < len(columns); col++ {
 930		cell := p.addBlock(TableCell, nil)
 931		cell.IsHeader = header
 932		cell.Align = columns[col]
 933	}
 934
 935	// silently ignore rows with too many cells
 936}
 937
 938// returns blockquote prefix length
 939func (p *parser) quotePrefix(data []byte) int {
 940	i := 0
 941	for i < 3 && data[i] == ' ' {
 942		i++
 943	}
 944	if data[i] == '>' {
 945		if data[i+1] == ' ' {
 946			return i + 2
 947		}
 948		return i + 1
 949	}
 950	return 0
 951}
 952
 953// blockquote ends with at least one blank line
 954// followed by something without a blockquote prefix
 955func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
 956	if p.isEmpty(data[beg:]) <= 0 {
 957		return false
 958	}
 959	if end >= len(data) {
 960		return true
 961	}
 962	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
 963}
 964
 965// parse a blockquote fragment
 966func (p *parser) quote(data []byte) int {
 967	block := p.addBlock(BlockQuote, nil)
 968	var raw bytes.Buffer
 969	beg, end := 0, 0
 970	for beg < len(data) {
 971		end = beg
 972		// Step over whole lines, collecting them. While doing that, check for
 973		// fenced code and if one's found, incorporate it altogether,
 974		// irregardless of any contents inside it
 975		for data[end] != '\n' {
 976			if p.flags&FencedCode != 0 {
 977				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
 978					// -1 to compensate for the extra end++ after the loop:
 979					end += i - 1
 980					break
 981				}
 982			}
 983			end++
 984		}
 985		end++
 986		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 987			// skip the prefix
 988			beg += pre
 989		} else if p.terminateBlockquote(data, beg, end) {
 990			break
 991		}
 992		// this line is part of the blockquote
 993		raw.Write(data[beg:end])
 994		beg = end
 995	}
 996	p.block(raw.Bytes())
 997	p.finalize(block)
 998	return end
 999}
1000
1001// returns prefix length for block code
1002func (p *parser) codePrefix(data []byte) int {
1003	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1004		return 4
1005	}
1006	return 0
1007}
1008
1009func (p *parser) code(data []byte) int {
1010	var work bytes.Buffer
1011
1012	i := 0
1013	for i < len(data) {
1014		beg := i
1015		for data[i] != '\n' {
1016			i++
1017		}
1018		i++
1019
1020		blankline := p.isEmpty(data[beg:i]) > 0
1021		if pre := p.codePrefix(data[beg:i]); pre > 0 {
1022			beg += pre
1023		} else if !blankline {
1024			// non-empty, non-prefixed line breaks the pre
1025			i = beg
1026			break
1027		}
1028
1029		// verbatim copy to the working buffeu
1030		if blankline {
1031			work.WriteByte('\n')
1032		} else {
1033			work.Write(data[beg:i])
1034		}
1035	}
1036
1037	// trim all the \n off the end of work
1038	workbytes := work.Bytes()
1039	eol := len(workbytes)
1040	for eol > 0 && workbytes[eol-1] == '\n' {
1041		eol--
1042	}
1043	if eol != len(workbytes) {
1044		work.Truncate(eol)
1045	}
1046
1047	work.WriteByte('\n')
1048
1049	block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1050	block.IsFenced = false
1051	finalizeCodeBlock(block)
1052
1053	return i
1054}
1055
1056// returns unordered list item prefix
1057func (p *parser) uliPrefix(data []byte) int {
1058	i := 0
1059
1060	// start with up to 3 spaces
1061	for i < 3 && data[i] == ' ' {
1062		i++
1063	}
1064
1065	// need a *, +, or - followed by a space
1066	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1067		data[i+1] != ' ' {
1068		return 0
1069	}
1070	return i + 2
1071}
1072
1073// returns ordered list item prefix
1074func (p *parser) oliPrefix(data []byte) int {
1075	i := 0
1076
1077	// start with up to 3 spaces
1078	for i < 3 && data[i] == ' ' {
1079		i++
1080	}
1081
1082	// count the digits
1083	start := i
1084	for data[i] >= '0' && data[i] <= '9' {
1085		i++
1086	}
1087
1088	// we need >= 1 digits followed by a dot and a space
1089	if start == i || data[i] != '.' || data[i+1] != ' ' {
1090		return 0
1091	}
1092	return i + 2
1093}
1094
1095// returns definition list item prefix
1096func (p *parser) dliPrefix(data []byte) int {
1097	i := 0
1098
1099	// need a : followed by a spaces
1100	if data[i] != ':' || data[i+1] != ' ' {
1101		return 0
1102	}
1103	for data[i] == ' ' {
1104		i++
1105	}
1106	return i + 2
1107}
1108
1109// parse ordered or unordered list block
1110func (p *parser) list(data []byte, flags ListType) int {
1111	i := 0
1112	flags |= ListItemBeginningOfList
1113	block := p.addBlock(List, nil)
1114	block.ListFlags = flags
1115	block.Tight = true
1116
1117	for i < len(data) {
1118		skip := p.listItem(data[i:], &flags)
1119		if flags&ListItemContainsBlock != 0 {
1120			block.ListData.Tight = false
1121		}
1122		i += skip
1123		if skip == 0 || flags&ListItemEndOfList != 0 {
1124			break
1125		}
1126		flags &= ^ListItemBeginningOfList
1127	}
1128
1129	above := block.Parent
1130	finalizeList(block)
1131	p.tip = above
1132	return i
1133}
1134
1135// Returns true if block ends with a blank line, descending if needed
1136// into lists and sublists.
1137func endsWithBlankLine(block *Node) bool {
1138	// TODO: figure this out. Always false now.
1139	for block != nil {
1140		//if block.lastLineBlank {
1141		//return true
1142		//}
1143		t := block.Type
1144		if t == List || t == Item {
1145			block = block.LastChild
1146		} else {
1147			break
1148		}
1149	}
1150	return false
1151}
1152
1153func finalizeList(block *Node) {
1154	block.open = false
1155	item := block.FirstChild
1156	for item != nil {
1157		// check for non-final list item ending with blank line:
1158		if endsWithBlankLine(item) && item.Next != nil {
1159			block.ListData.Tight = false
1160			break
1161		}
1162		// recurse into children of list item, to see if there are spaces
1163		// between any of them:
1164		subItem := item.FirstChild
1165		for subItem != nil {
1166			if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1167				block.ListData.Tight = false
1168				break
1169			}
1170			subItem = subItem.Next
1171		}
1172		item = item.Next
1173	}
1174}
1175
1176// Parse a single list item.
1177// Assumes initial prefix is already removed if this is a sublist.
1178func (p *parser) listItem(data []byte, flags *ListType) int {
1179	// keep track of the indentation of the first line
1180	itemIndent := 0
1181	for itemIndent < 3 && data[itemIndent] == ' ' {
1182		itemIndent++
1183	}
1184
1185	var bulletChar byte = '*'
1186	i := p.uliPrefix(data)
1187	if i == 0 {
1188		i = p.oliPrefix(data)
1189	} else {
1190		bulletChar = data[i-2]
1191	}
1192	if i == 0 {
1193		i = p.dliPrefix(data)
1194		// reset definition term flag
1195		if i > 0 {
1196			*flags &= ^ListTypeTerm
1197		}
1198	}
1199	if i == 0 {
1200		// if in definition list, set term flag and continue
1201		if *flags&ListTypeDefinition != 0 {
1202			*flags |= ListTypeTerm
1203		} else {
1204			return 0
1205		}
1206	}
1207
1208	// skip leading whitespace on first line
1209	for data[i] == ' ' {
1210		i++
1211	}
1212
1213	// find the end of the line
1214	line := i
1215	for i > 0 && data[i-1] != '\n' {
1216		i++
1217	}
1218
1219	// get working buffer
1220	var raw bytes.Buffer
1221
1222	// put the first line into the working buffer
1223	raw.Write(data[line:i])
1224	line = i
1225
1226	// process the following lines
1227	containsBlankLine := false
1228	sublist := 0
1229
1230gatherlines:
1231	for line < len(data) {
1232		i++
1233
1234		// find the end of this line
1235		for data[i-1] != '\n' {
1236			i++
1237		}
1238
1239		// if it is an empty line, guess that it is part of this item
1240		// and move on to the next line
1241		if p.isEmpty(data[line:i]) > 0 {
1242			containsBlankLine = true
1243			line = i
1244			continue
1245		}
1246
1247		// calculate the indentation
1248		indent := 0
1249		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1250			indent++
1251		}
1252
1253		chunk := data[line+indent : i]
1254
1255		// evaluate how this line fits in
1256		switch {
1257		// is this a nested list item?
1258		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1259			p.oliPrefix(chunk) > 0 ||
1260			p.dliPrefix(chunk) > 0:
1261
1262			if containsBlankLine {
1263				*flags |= ListItemContainsBlock
1264			}
1265
1266			// to be a nested list, it must be indented more
1267			// if not, it is the next item in the same list
1268			if indent <= itemIndent {
1269				break gatherlines
1270			}
1271
1272			// is this the first item in the nested list?
1273			if sublist == 0 {
1274				sublist = raw.Len()
1275			}
1276
1277		// is this a nested prefix header?
1278		case p.isPrefixHeader(chunk):
1279			// if the header is not indented, it is not nested in the list
1280			// and thus ends the list
1281			if containsBlankLine && indent < 4 {
1282				*flags |= ListItemEndOfList
1283				break gatherlines
1284			}
1285			*flags |= ListItemContainsBlock
1286
1287		// anything following an empty line is only part
1288		// of this item if it is indented 4 spaces
1289		// (regardless of the indentation of the beginning of the item)
1290		case containsBlankLine && indent < 4:
1291			if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1292				// is the next item still a part of this list?
1293				next := i
1294				for data[next] != '\n' {
1295					next++
1296				}
1297				for next < len(data)-1 && data[next] == '\n' {
1298					next++
1299				}
1300				if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1301					*flags |= ListItemEndOfList
1302				}
1303			} else {
1304				*flags |= ListItemEndOfList
1305			}
1306			break gatherlines
1307
1308		// a blank line means this should be parsed as a block
1309		case containsBlankLine:
1310			raw.WriteByte('\n')
1311			*flags |= ListItemContainsBlock
1312		}
1313
1314		// if this line was preceded by one or more blanks,
1315		// re-introduce the blank into the buffer
1316		if containsBlankLine {
1317			containsBlankLine = false
1318			raw.WriteByte('\n')
1319		}
1320
1321		// add the line into the working buffer without prefix
1322		raw.Write(data[line+indent : i])
1323
1324		line = i
1325	}
1326
1327	rawBytes := raw.Bytes()
1328
1329	block := p.addBlock(Item, nil)
1330	block.ListFlags = *flags
1331	block.Tight = false
1332	block.BulletChar = bulletChar
1333	block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1334
1335	// render the contents of the list item
1336	if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1337		// intermediate render of block item, except for definition term
1338		if sublist > 0 {
1339			p.block(rawBytes[:sublist])
1340			p.block(rawBytes[sublist:])
1341		} else {
1342			p.block(rawBytes)
1343		}
1344	} else {
1345		// intermediate render of inline item
1346		if sublist > 0 {
1347			child := p.addChild(Paragraph, 0)
1348			child.content = rawBytes[:sublist]
1349			p.block(rawBytes[sublist:])
1350		} else {
1351			child := p.addChild(Paragraph, 0)
1352			child.content = rawBytes
1353		}
1354	}
1355	return line
1356}
1357
1358// render a single paragraph that has already been parsed out
1359func (p *parser) renderParagraph(data []byte) {
1360	if len(data) == 0 {
1361		return
1362	}
1363
1364	// trim leading spaces
1365	beg := 0
1366	for data[beg] == ' ' {
1367		beg++
1368	}
1369
1370	// trim trailing newline
1371	end := len(data) - 1
1372
1373	// trim trailing spaces
1374	for end > beg && data[end-1] == ' ' {
1375		end--
1376	}
1377
1378	p.addBlock(Paragraph, data[beg:end])
1379}
1380
1381func (p *parser) paragraph(data []byte) int {
1382	// prev: index of 1st char of previous line
1383	// line: index of 1st char of current line
1384	// i: index of cursor/end of current line
1385	var prev, line, i int
1386
1387	// keep going until we find something to mark the end of the paragraph
1388	for i < len(data) {
1389		// mark the beginning of the current line
1390		prev = line
1391		current := data[i:]
1392		line = i
1393
1394		// did we find a blank line marking the end of the paragraph?
1395		if n := p.isEmpty(current); n > 0 {
1396			// did this blank line followed by a definition list item?
1397			if p.flags&DefinitionLists != 0 {
1398				if i < len(data)-1 && data[i+1] == ':' {
1399					return p.list(data[prev:], ListTypeDefinition)
1400				}
1401			}
1402
1403			p.renderParagraph(data[:i])
1404			return i + n
1405		}
1406
1407		// an underline under some text marks a header, so our paragraph ended on prev line
1408		if i > 0 {
1409			if level := p.isUnderlinedHeader(current); level > 0 {
1410				// render the paragraph
1411				p.renderParagraph(data[:prev])
1412
1413				// ignore leading and trailing whitespace
1414				eol := i - 1
1415				for prev < eol && data[prev] == ' ' {
1416					prev++
1417				}
1418				for eol > prev && data[eol-1] == ' ' {
1419					eol--
1420				}
1421
1422				id := ""
1423				if p.flags&AutoHeaderIDs != 0 {
1424					id = sanitized_anchor_name.Create(string(data[prev:eol]))
1425				}
1426
1427				block := p.addBlock(Header, data[prev:eol])
1428				block.Level = level
1429				block.HeaderID = id
1430
1431				// find the end of the underline
1432				for data[i] != '\n' {
1433					i++
1434				}
1435				return i
1436			}
1437		}
1438
1439		// if the next line starts a block of HTML, then the paragraph ends here
1440		if p.flags&LaxHTMLBlocks != 0 {
1441			if data[i] == '<' && p.html(current, false) > 0 {
1442				// rewind to before the HTML block
1443				p.renderParagraph(data[:i])
1444				return i
1445			}
1446		}
1447
1448		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1449		if p.isPrefixHeader(current) || p.isHRule(current) {
1450			p.renderParagraph(data[:i])
1451			return i
1452		}
1453
1454		// if there's a fenced code block, paragraph is over
1455		if p.flags&FencedCode != 0 {
1456			if p.fencedCodeBlock(current, false) > 0 {
1457				p.renderParagraph(data[:i])
1458				return i
1459			}
1460		}
1461
1462		// if there's a definition list item, prev line is a definition term
1463		if p.flags&DefinitionLists != 0 {
1464			if p.dliPrefix(current) != 0 {
1465				return p.list(data[prev:], ListTypeDefinition)
1466			}
1467		}
1468
1469		// if there's a list after this, paragraph is over
1470		if p.flags&NoEmptyLineBeforeBlock != 0 {
1471			if p.uliPrefix(current) != 0 ||
1472				p.oliPrefix(current) != 0 ||
1473				p.quotePrefix(current) != 0 ||
1474				p.codePrefix(current) != 0 {
1475				p.renderParagraph(data[:i])
1476				return i
1477			}
1478		}
1479
1480		// otherwise, scan to the beginning of the next line
1481		for data[i] != '\n' {
1482			i++
1483		}
1484		i++
1485	}
1486
1487	p.renderParagraph(data[:i])
1488	return i
1489}
1490
1491func skipChar(data []byte, start int, char byte) int {
1492	i := start
1493	for i < len(data) && data[i] == char {
1494		i++
1495	}
1496	return i
1497}
1498
1499func skipUntilChar(text []byte, start int, char byte) int {
1500	i := start
1501	for i < len(text) && text[i] != char {
1502		i++
1503	}
1504	return i
1505}
all repos — grayfriday @ 72633fddee675b450f536cb2e3d93549993394c1

blackfriday fork with a few changes