all repos — grayfriday @ 10880f66e2cfd97fe5998a4279706320b5ff5167

blackfriday fork with a few changes

block.go (view raw)

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18
  19	"github.com/shurcooL/sanitized_anchor_name"
  20)
  21
  22// Parse block-level data.
  23// Note: this function and many that it calls assume that
  24// the input buffer ends with a newline.
  25func (p *parser) block(out *bytes.Buffer, data []byte) {
  26	if len(data) == 0 || data[len(data)-1] != '\n' {
  27		panic("block input is missing terminating newline")
  28	}
  29
  30	// this is called recursively: enforce a maximum depth
  31	if p.nesting >= p.maxNesting {
  32		return
  33	}
  34	p.nesting++
  35
  36	// parse out one block-level construct at a time
  37	for len(data) > 0 {
  38		// prefixed header:
  39		//
  40		// # Header 1
  41		// ## Header 2
  42		// ...
  43		// ###### Header 6
  44		if p.isPrefixHeader(data) {
  45			data = data[p.prefixHeader(out, data):]
  46			continue
  47		}
  48
  49		// block of preformatted HTML:
  50		//
  51		// <div>
  52		//     ...
  53		// </div>
  54		if data[0] == '<' {
  55			if i := p.html(out, data, true); i > 0 {
  56				data = data[i:]
  57				continue
  58			}
  59		}
  60
  61		// title block
  62		//
  63		// % stuff
  64		// % more stuff
  65		// % even more stuff
  66		if p.flags&EXTENSION_TITLEBLOCK != 0 {
  67			if data[0] == '%' {
  68				if i := p.titleBlock(out, data, true); i > 0 {
  69					data = data[i:]
  70					continue
  71				}
  72			}
  73		}
  74
  75		// blank lines.  note: returns the # of bytes to skip
  76		if i := p.isEmpty(data); i > 0 {
  77			data = data[i:]
  78			continue
  79		}
  80
  81		// indented code block:
  82		//
  83		//     func max(a, b int) int {
  84		//         if a > b {
  85		//             return a
  86		//         }
  87		//         return b
  88		//      }
  89		if p.codePrefix(data) > 0 {
  90			data = data[p.code(out, data):]
  91			continue
  92		}
  93
  94		// fenced code block:
  95		//
  96		// ``` go
  97		// func fact(n int) int {
  98		//     if n <= 1 {
  99		//         return n
 100		//     }
 101		//     return n * fact(n-1)
 102		// }
 103		// ```
 104		if p.flags&EXTENSION_FENCED_CODE != 0 {
 105			if i := p.fencedCode(out, data, true); i > 0 {
 106				data = data[i:]
 107				continue
 108			}
 109		}
 110
 111		// horizontal rule:
 112		//
 113		// ------
 114		// or
 115		// ******
 116		// or
 117		// ______
 118		if p.isHRule(data) {
 119			p.r.HRule(out)
 120			var i int
 121			for i = 0; data[i] != '\n'; i++ {
 122			}
 123			data = data[i:]
 124			continue
 125		}
 126
 127		// block quote:
 128		//
 129		// > A big quote I found somewhere
 130		// > on the web
 131		if p.quotePrefix(data) > 0 {
 132			data = data[p.quote(out, data):]
 133			continue
 134		}
 135
 136		// table:
 137		//
 138		// Name  | Age | Phone
 139		// ------|-----|---------
 140		// Bob   | 31  | 555-1234
 141		// Alice | 27  | 555-4321
 142		if p.flags&EXTENSION_TABLES != 0 {
 143			if i := p.table(out, data); i > 0 {
 144				data = data[i:]
 145				continue
 146			}
 147		}
 148
 149		// an itemized/unordered list:
 150		//
 151		// * Item 1
 152		// * Item 2
 153		//
 154		// also works with + or -
 155		if p.uliPrefix(data) > 0 {
 156			data = data[p.list(out, data, 0):]
 157			continue
 158		}
 159
 160		// a numbered/ordered list:
 161		//
 162		// 1. Item 1
 163		// 2. Item 2
 164		if p.oliPrefix(data) > 0 {
 165			data = data[p.list(out, data, LIST_TYPE_ORDERED):]
 166			continue
 167		}
 168
 169		// anything else must look like a normal paragraph
 170		// note: this finds underlined headers, too
 171		data = data[p.paragraph(out, data):]
 172	}
 173
 174	p.nesting--
 175}
 176
 177func (p *parser) isPrefixHeader(data []byte) bool {
 178	if data[0] != '#' {
 179		return false
 180	}
 181
 182	if p.flags&EXTENSION_SPACE_HEADERS != 0 {
 183		level := 0
 184		for level < 6 && data[level] == '#' {
 185			level++
 186		}
 187		if data[level] != ' ' {
 188			return false
 189		}
 190	}
 191	return true
 192}
 193
 194func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 195	level := 0
 196	for level < 6 && data[level] == '#' {
 197		level++
 198	}
 199	i := skipChar(data, level, ' ')
 200	end := skipUntilChar(data, i, '\n')
 201	skip := end
 202	id := ""
 203	if p.flags&EXTENSION_HEADER_IDS != 0 {
 204		j, k := 0, 0
 205		// find start/end of header id
 206		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 207		}
 208		for k = j + 1; k < end && data[k] != '}'; k++ {
 209		}
 210		// extract header id iff found
 211		if j < end && k < end {
 212			id = string(data[j+2 : k])
 213			end = j
 214			skip = k + 1
 215			for end > 0 && data[end-1] == ' ' {
 216				end--
 217			}
 218		}
 219	}
 220	for end > 0 && data[end-1] == '#' {
 221		if isBackslashEscaped(data, end-1) {
 222			break
 223		}
 224		end--
 225	}
 226	for end > 0 && data[end-1] == ' ' {
 227		end--
 228	}
 229	if end > i {
 230		if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
 231			id = sanitized_anchor_name.Create(string(data[i:end]))
 232		}
 233		work := func() bool {
 234			p.inline(out, data[i:end])
 235			return true
 236		}
 237		p.r.Header(out, work, level, id)
 238	}
 239	return skip
 240}
 241
 242func (p *parser) isUnderlinedHeader(data []byte) int {
 243	// test of level 1 header
 244	if data[0] == '=' {
 245		i := skipChar(data, 1, '=')
 246		i = skipChar(data, i, ' ')
 247		if data[i] == '\n' {
 248			return 1
 249		} else {
 250			return 0
 251		}
 252	}
 253
 254	// test of level 2 header
 255	if data[0] == '-' {
 256		i := skipChar(data, 1, '-')
 257		i = skipChar(data, i, ' ')
 258		if data[i] == '\n' {
 259			return 2
 260		} else {
 261			return 0
 262		}
 263	}
 264
 265	return 0
 266}
 267
 268func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 269	if data[0] != '%' {
 270		return 0
 271	}
 272	splitData := bytes.Split(data, []byte("\n"))
 273	var i int
 274	for idx, b := range splitData {
 275		if !bytes.HasPrefix(b, []byte("%")) {
 276			i = idx // - 1
 277			break
 278		}
 279	}
 280
 281	data = bytes.Join(splitData[0:i], []byte("\n"))
 282	p.r.TitleBlock(out, data)
 283
 284	return len(data)
 285}
 286
 287func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 288	var i, j int
 289
 290	// identify the opening tag
 291	if data[0] != '<' {
 292		return 0
 293	}
 294	curtag, tagfound := p.htmlFindTag(data[1:])
 295
 296	// handle special cases
 297	if !tagfound {
 298		// check for an HTML comment
 299		if size := p.htmlComment(out, data, doRender); size > 0 {
 300			return size
 301		}
 302
 303		// check for an <hr> tag
 304		if size := p.htmlHr(out, data, doRender); size > 0 {
 305			return size
 306		}
 307
 308		// no special case recognized
 309		return 0
 310	}
 311
 312	// look for an unindented matching closing tag
 313	// followed by a blank line
 314	found := false
 315	/*
 316		closetag := []byte("\n</" + curtag + ">")
 317		j = len(curtag) + 1
 318		for !found {
 319			// scan for a closing tag at the beginning of a line
 320			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 321				j += skip + len(closetag)
 322			} else {
 323				break
 324			}
 325
 326			// see if it is the only thing on the line
 327			if skip := p.isEmpty(data[j:]); skip > 0 {
 328				// see if it is followed by a blank line/eof
 329				j += skip
 330				if j >= len(data) {
 331					found = true
 332					i = j
 333				} else {
 334					if skip := p.isEmpty(data[j:]); skip > 0 {
 335						j += skip
 336						found = true
 337						i = j
 338					}
 339				}
 340			}
 341		}
 342	*/
 343
 344	// if not found, try a second pass looking for indented match
 345	// but not if tag is "ins" or "del" (following original Markdown.pl)
 346	if !found && curtag != "ins" && curtag != "del" {
 347		i = 1
 348		for i < len(data) {
 349			i++
 350			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 351				i++
 352			}
 353
 354			if i+2+len(curtag) >= len(data) {
 355				break
 356			}
 357
 358			j = p.htmlFindEnd(curtag, data[i-1:])
 359
 360			if j > 0 {
 361				i += j - 1
 362				found = true
 363				break
 364			}
 365		}
 366	}
 367
 368	if !found {
 369		return 0
 370	}
 371
 372	// the end of the block has been found
 373	if doRender {
 374		// trim newlines
 375		end := i
 376		for end > 0 && data[end-1] == '\n' {
 377			end--
 378		}
 379		p.r.BlockHtml(out, data[:end])
 380	}
 381
 382	return i
 383}
 384
 385// HTML comment, lax form
 386func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
 387	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 388		return 0
 389	}
 390
 391	i := 5
 392
 393	// scan for an end-of-comment marker, across lines if necessary
 394	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 395		i++
 396	}
 397	i++
 398
 399	// no end-of-comment marker
 400	if i >= len(data) {
 401		return 0
 402	}
 403
 404	// needs to end with a blank line
 405	if j := p.isEmpty(data[i:]); j > 0 {
 406		size := i + j
 407		if doRender {
 408			// trim trailing newlines
 409			end := size
 410			for end > 0 && data[end-1] == '\n' {
 411				end--
 412			}
 413			p.r.BlockHtml(out, data[:end])
 414		}
 415		return size
 416	}
 417
 418	return 0
 419}
 420
 421// HR, which is the only self-closing block tag considered
 422func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 423	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 424		return 0
 425	}
 426	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 427		// not an <hr> tag after all; at least not a valid one
 428		return 0
 429	}
 430
 431	i := 3
 432	for data[i] != '>' && data[i] != '\n' {
 433		i++
 434	}
 435
 436	if data[i] == '>' {
 437		i++
 438		if j := p.isEmpty(data[i:]); j > 0 {
 439			size := i + j
 440			if doRender {
 441				// trim newlines
 442				end := size
 443				for end > 0 && data[end-1] == '\n' {
 444					end--
 445				}
 446				p.r.BlockHtml(out, data[:end])
 447			}
 448			return size
 449		}
 450	}
 451
 452	return 0
 453}
 454
 455func (p *parser) htmlFindTag(data []byte) (string, bool) {
 456	i := 0
 457	for isalnum(data[i]) {
 458		i++
 459	}
 460	key := string(data[:i])
 461	if blockTags[key] {
 462		return key, true
 463	}
 464	return "", false
 465}
 466
 467func (p *parser) htmlFindEnd(tag string, data []byte) int {
 468	// assume data[0] == '<' && data[1] == '/' already tested
 469
 470	// check if tag is a match
 471	closetag := []byte("</" + tag + ">")
 472	if !bytes.HasPrefix(data, closetag) {
 473		return 0
 474	}
 475	i := len(closetag)
 476
 477	// check that the rest of the line is blank
 478	skip := 0
 479	if skip = p.isEmpty(data[i:]); skip == 0 {
 480		return 0
 481	}
 482	i += skip
 483	skip = 0
 484
 485	if i >= len(data) {
 486		return i
 487	}
 488
 489	if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 490		return i
 491	}
 492	if skip = p.isEmpty(data[i:]); skip == 0 {
 493		// following line must be blank
 494		return 0
 495	}
 496
 497	return i + skip
 498}
 499
 500func (p *parser) isEmpty(data []byte) int {
 501	// it is okay to call isEmpty on an empty buffer
 502	if len(data) == 0 {
 503		return 0
 504	}
 505
 506	var i int
 507	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 508		if data[i] != ' ' && data[i] != '\t' {
 509			return 0
 510		}
 511	}
 512	return i + 1
 513}
 514
 515func (p *parser) isHRule(data []byte) bool {
 516	i := 0
 517
 518	// skip up to three spaces
 519	for i < 3 && data[i] == ' ' {
 520		i++
 521	}
 522
 523	// look at the hrule char
 524	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 525		return false
 526	}
 527	c := data[i]
 528
 529	// the whole line must be the char or whitespace
 530	n := 0
 531	for data[i] != '\n' {
 532		switch {
 533		case data[i] == c:
 534			n++
 535		case data[i] != ' ':
 536			return false
 537		}
 538		i++
 539	}
 540
 541	return n >= 3
 542}
 543
 544func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 545	i, size := 0, 0
 546	skip = 0
 547
 548	// skip up to three spaces
 549	for i < len(data) && i < 3 && data[i] == ' ' {
 550		i++
 551	}
 552	if i >= len(data) {
 553		return
 554	}
 555
 556	// check for the marker characters: ~ or `
 557	if data[i] != '~' && data[i] != '`' {
 558		return
 559	}
 560
 561	c := data[i]
 562
 563	// the whole line must be the same char or whitespace
 564	for i < len(data) && data[i] == c {
 565		size++
 566		i++
 567	}
 568
 569	if i >= len(data) {
 570		return
 571	}
 572
 573	// the marker char must occur at least 3 times
 574	if size < 3 {
 575		return
 576	}
 577	marker = string(data[i-size : i])
 578
 579	// if this is the end marker, it must match the beginning marker
 580	if oldmarker != "" && marker != oldmarker {
 581		return
 582	}
 583
 584	if syntax != nil {
 585		syn := 0
 586		i = skipChar(data, i, ' ')
 587
 588		if i >= len(data) {
 589			return
 590		}
 591
 592		syntaxStart := i
 593
 594		if data[i] == '{' {
 595			i++
 596			syntaxStart++
 597
 598			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 599				syn++
 600				i++
 601			}
 602
 603			if i >= len(data) || data[i] != '}' {
 604				return
 605			}
 606
 607			// strip all whitespace at the beginning and the end
 608			// of the {} block
 609			for syn > 0 && isspace(data[syntaxStart]) {
 610				syntaxStart++
 611				syn--
 612			}
 613
 614			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 615				syn--
 616			}
 617
 618			i++
 619		} else {
 620			for i < len(data) && !isspace(data[i]) {
 621				syn++
 622				i++
 623			}
 624		}
 625
 626		language := string(data[syntaxStart : syntaxStart+syn])
 627		*syntax = &language
 628	}
 629
 630	i = skipChar(data, i, ' ')
 631	if i >= len(data) || data[i] != '\n' {
 632		return
 633	}
 634
 635	skip = i + 1
 636	return
 637}
 638
 639func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 640	var lang *string
 641	beg, marker := p.isFencedCode(data, &lang, "")
 642	if beg == 0 || beg >= len(data) {
 643		return 0
 644	}
 645
 646	var work bytes.Buffer
 647
 648	for {
 649		// safe to assume beg < len(data)
 650
 651		// check for the end of the code block
 652		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
 653		if fenceEnd != 0 {
 654			beg += fenceEnd
 655			break
 656		}
 657
 658		// copy the current line
 659		end := skipUntilChar(data, beg, '\n') + 1
 660
 661		// did we reach the end of the buffer without a closing marker?
 662		if end >= len(data) {
 663			return 0
 664		}
 665
 666		// verbatim copy to the working buffer
 667		if doRender {
 668			work.Write(data[beg:end])
 669		}
 670		beg = end
 671	}
 672
 673	syntax := ""
 674	if lang != nil {
 675		syntax = *lang
 676	}
 677
 678	if doRender {
 679		p.r.BlockCode(out, work.Bytes(), syntax)
 680	}
 681
 682	return beg
 683}
 684
 685func (p *parser) table(out *bytes.Buffer, data []byte) int {
 686	var header bytes.Buffer
 687	i, columns := p.tableHeader(&header, data)
 688	if i == 0 {
 689		return 0
 690	}
 691
 692	var body bytes.Buffer
 693
 694	for i < len(data) {
 695		pipes, rowStart := 0, i
 696		for ; data[i] != '\n'; i++ {
 697			if data[i] == '|' {
 698				pipes++
 699			}
 700		}
 701
 702		if pipes == 0 {
 703			i = rowStart
 704			break
 705		}
 706
 707		// include the newline in data sent to tableRow
 708		i++
 709		p.tableRow(&body, data[rowStart:i], columns, false)
 710	}
 711
 712	p.r.Table(out, header.Bytes(), body.Bytes(), columns)
 713
 714	return i
 715}
 716
 717// check if the specified position is preceded by an odd number of backslashes
 718func isBackslashEscaped(data []byte, i int) bool {
 719	backslashes := 0
 720	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 721		backslashes++
 722	}
 723	return backslashes&1 == 1
 724}
 725
 726func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
 727	i := 0
 728	colCount := 1
 729	for i = 0; data[i] != '\n'; i++ {
 730		if data[i] == '|' && !isBackslashEscaped(data, i) {
 731			colCount++
 732		}
 733	}
 734
 735	// doesn't look like a table header
 736	if colCount == 1 {
 737		return
 738	}
 739
 740	// include the newline in the data sent to tableRow
 741	header := data[:i+1]
 742
 743	// column count ignores pipes at beginning or end of line
 744	if data[0] == '|' {
 745		colCount--
 746	}
 747	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 748		colCount--
 749	}
 750
 751	columns = make([]int, colCount)
 752
 753	// move on to the header underline
 754	i++
 755	if i >= len(data) {
 756		return
 757	}
 758
 759	if data[i] == '|' && !isBackslashEscaped(data, i) {
 760		i++
 761	}
 762	i = skipChar(data, i, ' ')
 763
 764	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 765	// and trailing | optional on last column
 766	col := 0
 767	for data[i] != '\n' {
 768		dashes := 0
 769
 770		if data[i] == ':' {
 771			i++
 772			columns[col] |= TABLE_ALIGNMENT_LEFT
 773			dashes++
 774		}
 775		for data[i] == '-' {
 776			i++
 777			dashes++
 778		}
 779		if data[i] == ':' {
 780			i++
 781			columns[col] |= TABLE_ALIGNMENT_RIGHT
 782			dashes++
 783		}
 784		for data[i] == ' ' {
 785			i++
 786		}
 787
 788		// end of column test is messy
 789		switch {
 790		case dashes < 3:
 791			// not a valid column
 792			return
 793
 794		case data[i] == '|' && !isBackslashEscaped(data, i):
 795			// marker found, now skip past trailing whitespace
 796			col++
 797			i++
 798			for data[i] == ' ' {
 799				i++
 800			}
 801
 802			// trailing junk found after last column
 803			if col >= colCount && data[i] != '\n' {
 804				return
 805			}
 806
 807		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 808			// something else found where marker was required
 809			return
 810
 811		case data[i] == '\n':
 812			// marker is optional for the last column
 813			col++
 814
 815		default:
 816			// trailing junk found after last column
 817			return
 818		}
 819	}
 820	if col != colCount {
 821		return
 822	}
 823
 824	p.tableRow(out, header, columns, true)
 825	size = i + 1
 826	return
 827}
 828
 829func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
 830	i, col := 0, 0
 831	var rowWork bytes.Buffer
 832
 833	if data[i] == '|' && !isBackslashEscaped(data, i) {
 834		i++
 835	}
 836
 837	for col = 0; col < len(columns) && i < len(data); col++ {
 838		for data[i] == ' ' {
 839			i++
 840		}
 841
 842		cellStart := i
 843
 844		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 845			i++
 846		}
 847
 848		cellEnd := i
 849
 850		// skip the end-of-cell marker, possibly taking us past end of buffer
 851		i++
 852
 853		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 854			cellEnd--
 855		}
 856
 857		var cellWork bytes.Buffer
 858		p.inline(&cellWork, data[cellStart:cellEnd])
 859
 860		if header {
 861			p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
 862		} else {
 863			p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 864		}
 865	}
 866
 867	// pad it out with empty columns to get the right number
 868	for ; col < len(columns); col++ {
 869		if header {
 870			p.r.TableHeaderCell(&rowWork, nil, columns[col])
 871		} else {
 872			p.r.TableCell(&rowWork, nil, columns[col])
 873		}
 874	}
 875
 876	// silently ignore rows with too many cells
 877
 878	p.r.TableRow(out, rowWork.Bytes())
 879}
 880
 881// returns blockquote prefix length
 882func (p *parser) quotePrefix(data []byte) int {
 883	i := 0
 884	for i < 3 && data[i] == ' ' {
 885		i++
 886	}
 887	if data[i] == '>' {
 888		if data[i+1] == ' ' {
 889			return i + 2
 890		}
 891		return i + 1
 892	}
 893	return 0
 894}
 895
 896// parse a blockquote fragment
 897func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 898	var raw bytes.Buffer
 899	beg, end := 0, 0
 900	for beg < len(data) {
 901		end = beg
 902		for data[end] != '\n' {
 903			end++
 904		}
 905		end++
 906
 907		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 908			// skip the prefix
 909			beg += pre
 910		} else if p.isEmpty(data[beg:]) > 0 &&
 911			(end >= len(data) ||
 912				(p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
 913			// blockquote ends with at least one blank line
 914			// followed by something without a blockquote prefix
 915			break
 916		}
 917
 918		// this line is part of the blockquote
 919		raw.Write(data[beg:end])
 920		beg = end
 921	}
 922
 923	var cooked bytes.Buffer
 924	p.block(&cooked, raw.Bytes())
 925	p.r.BlockQuote(out, cooked.Bytes())
 926	return end
 927}
 928
 929// returns prefix length for block code
 930func (p *parser) codePrefix(data []byte) int {
 931	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 932		return 4
 933	}
 934	return 0
 935}
 936
 937func (p *parser) code(out *bytes.Buffer, data []byte) int {
 938	var work bytes.Buffer
 939
 940	i := 0
 941	for i < len(data) {
 942		beg := i
 943		for data[i] != '\n' {
 944			i++
 945		}
 946		i++
 947
 948		blankline := p.isEmpty(data[beg:i]) > 0
 949		if pre := p.codePrefix(data[beg:i]); pre > 0 {
 950			beg += pre
 951		} else if !blankline {
 952			// non-empty, non-prefixed line breaks the pre
 953			i = beg
 954			break
 955		}
 956
 957		// verbatim copy to the working buffeu
 958		if blankline {
 959			work.WriteByte('\n')
 960		} else {
 961			work.Write(data[beg:i])
 962		}
 963	}
 964
 965	// trim all the \n off the end of work
 966	workbytes := work.Bytes()
 967	eol := len(workbytes)
 968	for eol > 0 && workbytes[eol-1] == '\n' {
 969		eol--
 970	}
 971	if eol != len(workbytes) {
 972		work.Truncate(eol)
 973	}
 974
 975	work.WriteByte('\n')
 976
 977	p.r.BlockCode(out, work.Bytes(), "")
 978
 979	return i
 980}
 981
 982// returns unordered list item prefix
 983func (p *parser) uliPrefix(data []byte) int {
 984	i := 0
 985
 986	// start with up to 3 spaces
 987	for i < 3 && data[i] == ' ' {
 988		i++
 989	}
 990
 991	// need a *, +, or - followed by a space
 992	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
 993		data[i+1] != ' ' {
 994		return 0
 995	}
 996	return i + 2
 997}
 998
 999// returns ordered list item prefix
1000func (p *parser) oliPrefix(data []byte) int {
1001	i := 0
1002
1003	// start with up to 3 spaces
1004	for i < 3 && data[i] == ' ' {
1005		i++
1006	}
1007
1008	// count the digits
1009	start := i
1010	for data[i] >= '0' && data[i] <= '9' {
1011		i++
1012	}
1013
1014	// we need >= 1 digits followed by a dot and a space
1015	if start == i || data[i] != '.' || data[i+1] != ' ' {
1016		return 0
1017	}
1018	return i + 2
1019}
1020
1021// parse ordered or unordered list block
1022func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1023	i := 0
1024	flags |= LIST_ITEM_BEGINNING_OF_LIST
1025	work := func() bool {
1026		for i < len(data) {
1027			skip := p.listItem(out, data[i:], &flags)
1028			i += skip
1029
1030			if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1031				break
1032			}
1033			flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1034		}
1035		return true
1036	}
1037
1038	p.r.List(out, work, flags)
1039	return i
1040}
1041
1042// Parse a single list item.
1043// Assumes initial prefix is already removed if this is a sublist.
1044func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1045	// keep track of the indentation of the first line
1046	itemIndent := 0
1047	for itemIndent < 3 && data[itemIndent] == ' ' {
1048		itemIndent++
1049	}
1050
1051	i := p.uliPrefix(data)
1052	if i == 0 {
1053		i = p.oliPrefix(data)
1054	}
1055	if i == 0 {
1056		return 0
1057	}
1058
1059	// skip leading whitespace on first line
1060	for data[i] == ' ' {
1061		i++
1062	}
1063
1064	// find the end of the line
1065	line := i
1066	for data[i-1] != '\n' {
1067		i++
1068	}
1069
1070	// get working buffer
1071	var raw bytes.Buffer
1072
1073	// put the first line into the working buffer
1074	raw.Write(data[line:i])
1075	line = i
1076
1077	// process the following lines
1078	containsBlankLine := false
1079	sublist := 0
1080
1081gatherlines:
1082	for line < len(data) {
1083		i++
1084
1085		// find the end of this line
1086		for data[i-1] != '\n' {
1087			i++
1088		}
1089
1090		// if it is an empty line, guess that it is part of this item
1091		// and move on to the next line
1092		if p.isEmpty(data[line:i]) > 0 {
1093			containsBlankLine = true
1094			line = i
1095			continue
1096		}
1097
1098		// calculate the indentation
1099		indent := 0
1100		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1101			indent++
1102		}
1103
1104		chunk := data[line+indent : i]
1105
1106		// evaluate how this line fits in
1107		switch {
1108		// is this a nested list item?
1109		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1110			p.oliPrefix(chunk) > 0:
1111
1112			if containsBlankLine {
1113				*flags |= LIST_ITEM_CONTAINS_BLOCK
1114			}
1115
1116			// to be a nested list, it must be indented more
1117			// if not, it is the next item in the same list
1118			if indent <= itemIndent {
1119				break gatherlines
1120			}
1121
1122			// is this the first item in the nested list?
1123			if sublist == 0 {
1124				sublist = raw.Len()
1125			}
1126
1127		// is this a nested prefix header?
1128		case p.isPrefixHeader(chunk):
1129			// if the header is not indented, it is not nested in the list
1130			// and thus ends the list
1131			if containsBlankLine && indent < 4 {
1132				*flags |= LIST_ITEM_END_OF_LIST
1133				break gatherlines
1134			}
1135			*flags |= LIST_ITEM_CONTAINS_BLOCK
1136
1137		// anything following an empty line is only part
1138		// of this item if it is indented 4 spaces
1139		// (regardless of the indentation of the beginning of the item)
1140		case containsBlankLine && indent < 4:
1141			*flags |= LIST_ITEM_END_OF_LIST
1142			break gatherlines
1143
1144		// a blank line means this should be parsed as a block
1145		case containsBlankLine:
1146			raw.WriteByte('\n')
1147			*flags |= LIST_ITEM_CONTAINS_BLOCK
1148		}
1149
1150		// if this line was preceeded by one or more blanks,
1151		// re-introduce the blank into the buffer
1152		if containsBlankLine {
1153			containsBlankLine = false
1154			raw.WriteByte('\n')
1155		}
1156
1157		// add the line into the working buffer without prefix
1158		raw.Write(data[line+indent : i])
1159
1160		line = i
1161	}
1162
1163	rawBytes := raw.Bytes()
1164
1165	// render the contents of the list item
1166	var cooked bytes.Buffer
1167	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1168		// intermediate render of block li
1169		if sublist > 0 {
1170			p.block(&cooked, rawBytes[:sublist])
1171			p.block(&cooked, rawBytes[sublist:])
1172		} else {
1173			p.block(&cooked, rawBytes)
1174		}
1175	} else {
1176		// intermediate render of inline li
1177		if sublist > 0 {
1178			p.inline(&cooked, rawBytes[:sublist])
1179			p.block(&cooked, rawBytes[sublist:])
1180		} else {
1181			p.inline(&cooked, rawBytes)
1182		}
1183	}
1184
1185	// render the actual list item
1186	cookedBytes := cooked.Bytes()
1187	parsedEnd := len(cookedBytes)
1188
1189	// strip trailing newlines
1190	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1191		parsedEnd--
1192	}
1193	p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1194
1195	return line
1196}
1197
1198// render a single paragraph that has already been parsed out
1199func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1200	if len(data) == 0 {
1201		return
1202	}
1203
1204	// trim leading spaces
1205	beg := 0
1206	for data[beg] == ' ' {
1207		beg++
1208	}
1209
1210	// trim trailing newline
1211	end := len(data) - 1
1212
1213	// trim trailing spaces
1214	for end > beg && data[end-1] == ' ' {
1215		end--
1216	}
1217
1218	work := func() bool {
1219		p.inline(out, data[beg:end])
1220		return true
1221	}
1222	p.r.Paragraph(out, work)
1223}
1224
1225func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1226	// prev: index of 1st char of previous line
1227	// line: index of 1st char of current line
1228	// i: index of cursor/end of current line
1229	var prev, line, i int
1230
1231	// keep going until we find something to mark the end of the paragraph
1232	for i < len(data) {
1233		// mark the beginning of the current line
1234		prev = line
1235		current := data[i:]
1236		line = i
1237
1238		// did we find a blank line marking the end of the paragraph?
1239		if n := p.isEmpty(current); n > 0 {
1240			p.renderParagraph(out, data[:i])
1241			return i + n
1242		}
1243
1244		// an underline under some text marks a header, so our paragraph ended on prev line
1245		if i > 0 {
1246			if level := p.isUnderlinedHeader(current); level > 0 {
1247				// render the paragraph
1248				p.renderParagraph(out, data[:prev])
1249
1250				// ignore leading and trailing whitespace
1251				eol := i - 1
1252				for prev < eol && data[prev] == ' ' {
1253					prev++
1254				}
1255				for eol > prev && data[eol-1] == ' ' {
1256					eol--
1257				}
1258
1259				// render the header
1260				// this ugly double closure avoids forcing variables onto the heap
1261				work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1262					return func() bool {
1263						pp.inline(o, d)
1264						return true
1265					}
1266				}(out, p, data[prev:eol])
1267
1268				id := ""
1269				if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1270					id = sanitized_anchor_name.Create(string(data[prev:eol]))
1271				}
1272
1273				p.r.Header(out, work, level, id)
1274
1275				// find the end of the underline
1276				for data[i] != '\n' {
1277					i++
1278				}
1279				return i
1280			}
1281		}
1282
1283		// if the next line starts a block of HTML, then the paragraph ends here
1284		if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1285			if data[i] == '<' && p.html(out, current, false) > 0 {
1286				// rewind to before the HTML block
1287				p.renderParagraph(out, data[:i])
1288				return i
1289			}
1290		}
1291
1292		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1293		if p.isPrefixHeader(current) || p.isHRule(current) {
1294			p.renderParagraph(out, data[:i])
1295			return i
1296		}
1297
1298		// if there's a list after this, paragraph is over
1299		if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1300			if p.uliPrefix(current) != 0 ||
1301				p.oliPrefix(current) != 0 ||
1302				p.quotePrefix(current) != 0 ||
1303				p.codePrefix(current) != 0 {
1304				p.renderParagraph(out, data[:i])
1305				return i
1306			}
1307		}
1308
1309		// otherwise, scan to the beginning of the next line
1310		for data[i] != '\n' {
1311			i++
1312		}
1313		i++
1314	}
1315
1316	p.renderParagraph(out, data[:i])
1317	return i
1318}