icy does git — grayfriday (427a14caf2d7988c34bf172c1822655fe7608444): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import "bytes"
  17
  18// Parse block-level data.
  19// Note: this function and many that it calls assume that
  20// the input buffer ends with a newline.
  21func (p *parser) block(out *bytes.Buffer, data []byte) {
  22	if len(data) == 0 || data[len(data)-1] != '\n' {
  23		panic("block input is missing terminating newline")
  24	}
  25
  26	// this is called recursively: enforce a maximum depth
  27	if p.nesting >= p.maxNesting {
  28		return
  29	}
  30	p.nesting++
  31
  32	// parse out one block-level construct at a time
  33	for len(data) > 0 {
  34		// prefixed header:
  35		//
  36		// # Header 1
  37		// ## Header 2
  38		// ...
  39		// ###### Header 6
  40		if p.isPrefixHeader(data) {
  41			data = data[p.prefixHeader(out, data):]
  42			continue
  43		}
  44
  45		// block of preformatted HTML:
  46		//
  47		// <div>
  48		//     ...
  49		// </div>
  50		if data[0] == '<' {
  51			if i := p.html(out, data, true); i > 0 {
  52				data = data[i:]
  53				continue
  54			}
  55		}
  56
  57		// title block
  58		//
  59		// % stuff
  60		// % more stuff
  61		// % even more stuff
  62		if p.flags&EXTENSION_TITLEBLOCK != 0 {
  63			if data[0] == '%' {
  64				if i := p.titleBlock(out, data, true); i > 0 {
  65					data = data[i:]
  66					continue
  67				}
  68			}
  69		}
  70
  71		// blank lines.  note: returns the # of bytes to skip
  72		if i := p.isEmpty(data); i > 0 {
  73			data = data[i:]
  74			continue
  75		}
  76
  77		// indented code block:
  78		//
  79		//     func max(a, b int) int {
  80		//         if a > b {
  81		//             return a
  82		//         }
  83		//         return b
  84		//      }
  85		if p.codePrefix(data) > 0 {
  86			data = data[p.code(out, data):]
  87			continue
  88		}
  89
  90		// fenced code block:
  91		//
  92		// ``` go
  93		// func fact(n int) int {
  94		//     if n <= 1 {
  95		//         return n
  96		//     }
  97		//     return n * fact(n-1)
  98		// }
  99		// ```
 100		if p.flags&EXTENSION_FENCED_CODE != 0 {
 101			if i := p.fencedCode(out, data, true); i > 0 {
 102				data = data[i:]
 103				continue
 104			}
 105		}
 106
 107		// horizontal rule:
 108		//
 109		// ------
 110		// or
 111		// ******
 112		// or
 113		// ______
 114		if p.isHRule(data) {
 115			p.r.HRule(out)
 116			var i int
 117			for i = 0; data[i] != '\n'; i++ {
 118			}
 119			data = data[i:]
 120			continue
 121		}
 122
 123		// block quote:
 124		//
 125		// > A big quote I found somewhere
 126		// > on the web
 127		if p.quotePrefix(data) > 0 {
 128			data = data[p.quote(out, data):]
 129			continue
 130		}
 131
 132		// table:
 133		//
 134		// Name  | Age | Phone
 135		// ------|-----|---------
 136		// Bob   | 31  | 555-1234
 137		// Alice | 27  | 555-4321
 138		if p.flags&EXTENSION_TABLES != 0 {
 139			if i := p.table(out, data); i > 0 {
 140				data = data[i:]
 141				continue
 142			}
 143		}
 144
 145		// an itemized/unordered list:
 146		//
 147		// * Item 1
 148		// * Item 2
 149		//
 150		// also works with + or -
 151		if p.uliPrefix(data) > 0 {
 152			data = data[p.list(out, data, 0):]
 153			continue
 154		}
 155
 156		// a numbered/ordered list:
 157		//
 158		// 1. Item 1
 159		// 2. Item 2
 160		if p.oliPrefix(data) > 0 {
 161			data = data[p.list(out, data, LIST_TYPE_ORDERED):]
 162			continue
 163		}
 164
 165		// anything else must look like a normal paragraph
 166		// note: this finds underlined headers, too
 167		data = data[p.paragraph(out, data):]
 168	}
 169
 170	p.nesting--
 171}
 172
 173func (p *parser) isPrefixHeader(data []byte) bool {
 174	if data[0] != '#' {
 175		return false
 176	}
 177
 178	if p.flags&EXTENSION_SPACE_HEADERS != 0 {
 179		level := 0
 180		for level < 6 && data[level] == '#' {
 181			level++
 182		}
 183		if data[level] != ' ' {
 184			return false
 185		}
 186	}
 187	return true
 188}
 189
 190func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 191	level := 0
 192	for level < 6 && data[level] == '#' {
 193		level++
 194	}
 195	i, end := 0, 0
 196	for i = level; data[i] == ' '; i++ {
 197	}
 198	for end = i; data[end] != '\n'; end++ {
 199	}
 200	skip := end
 201	id := ""
 202	if p.flags&EXTENSION_HEADER_IDS != 0 {
 203		j, k := 0, 0
 204		// find start/end of header id
 205		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 206		}
 207		for k = j + 1; k < end && data[k] != '}'; k++ {
 208		}
 209		// extract header id iff found
 210		if j < end && k < end {
 211			id = string(data[j+2 : k])
 212			end = j
 213			skip = k + 1
 214			for end > 0 && data[end-1] == ' ' {
 215				end--
 216			}
 217		}
 218	}
 219	for end > 0 && data[end-1] == '#' {
 220		end--
 221	}
 222	for end > 0 && data[end-1] == ' ' {
 223		end--
 224	}
 225	if end > i {
 226		work := func() bool {
 227			p.inline(out, data[i:end])
 228			return true
 229		}
 230		p.r.Header(out, work, level, id)
 231	}
 232	return skip
 233}
 234
 235func (p *parser) isUnderlinedHeader(data []byte) int {
 236	// test of level 1 header
 237	if data[0] == '=' {
 238		i := 1
 239		for data[i] == '=' {
 240			i++
 241		}
 242		for data[i] == ' ' {
 243			i++
 244		}
 245		if data[i] == '\n' {
 246			return 1
 247		} else {
 248			return 0
 249		}
 250	}
 251
 252	// test of level 2 header
 253	if data[0] == '-' {
 254		i := 1
 255		for data[i] == '-' {
 256			i++
 257		}
 258		for data[i] == ' ' {
 259			i++
 260		}
 261		if data[i] == '\n' {
 262			return 2
 263		} else {
 264			return 0
 265		}
 266	}
 267
 268	return 0
 269}
 270
 271func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 272	if data[0] != '%' {
 273		return 0
 274	}
 275	splitData := bytes.Split(data, []byte("\n"))
 276	var i int
 277	for idx, b := range splitData {
 278		if !bytes.HasPrefix(b, []byte("%")) {
 279			i = idx // - 1
 280			break
 281		}
 282	}
 283
 284	data = bytes.Join(splitData[0:i], []byte("\n"))
 285	p.r.TitleBlock(out, data)
 286
 287	return len(data)
 288}
 289
 290func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 291	var i, j int
 292
 293	// identify the opening tag
 294	if data[0] != '<' {
 295		return 0
 296	}
 297	curtag, tagfound := p.htmlFindTag(data[1:])
 298
 299	// handle special cases
 300	if !tagfound {
 301		// check for an HTML comment
 302		if size := p.htmlComment(out, data, doRender); size > 0 {
 303			return size
 304		}
 305
 306		// check for an <hr> tag
 307		if size := p.htmlHr(out, data, doRender); size > 0 {
 308			return size
 309		}
 310
 311		// no special case recognized
 312		return 0
 313	}
 314
 315	// look for an unindented matching closing tag
 316	// followed by a blank line
 317	found := false
 318	/*
 319		closetag := []byte("\n</" + curtag + ">")
 320		j = len(curtag) + 1
 321		for !found {
 322			// scan for a closing tag at the beginning of a line
 323			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 324				j += skip + len(closetag)
 325			} else {
 326				break
 327			}
 328
 329			// see if it is the only thing on the line
 330			if skip := p.isEmpty(data[j:]); skip > 0 {
 331				// see if it is followed by a blank line/eof
 332				j += skip
 333				if j >= len(data) {
 334					found = true
 335					i = j
 336				} else {
 337					if skip := p.isEmpty(data[j:]); skip > 0 {
 338						j += skip
 339						found = true
 340						i = j
 341					}
 342				}
 343			}
 344		}
 345	*/
 346
 347	// if not found, try a second pass looking for indented match
 348	// but not if tag is "ins" or "del" (following original Markdown.pl)
 349	if !found && curtag != "ins" && curtag != "del" {
 350		i = 1
 351		for i < len(data) {
 352			i++
 353			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 354				i++
 355			}
 356
 357			if i+2+len(curtag) >= len(data) {
 358				break
 359			}
 360
 361			j = p.htmlFindEnd(curtag, data[i-1:])
 362
 363			if j > 0 {
 364				i += j - 1
 365				found = true
 366				break
 367			}
 368		}
 369	}
 370
 371	if !found {
 372		return 0
 373	}
 374
 375	// the end of the block has been found
 376	if doRender {
 377		// trim newlines
 378		end := i
 379		for end > 0 && data[end-1] == '\n' {
 380			end--
 381		}
 382		p.r.BlockHtml(out, data[:end])
 383	}
 384
 385	return i
 386}
 387
 388// HTML comment, lax form
 389func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
 390	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 391		return 0
 392	}
 393
 394	i := 5
 395
 396	// scan for an end-of-comment marker, across lines if necessary
 397	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 398		i++
 399	}
 400	i++
 401
 402	// no end-of-comment marker
 403	if i >= len(data) {
 404		return 0
 405	}
 406
 407	// needs to end with a blank line
 408	if j := p.isEmpty(data[i:]); j > 0 {
 409		size := i + j
 410		if doRender {
 411			// trim trailing newlines
 412			end := size
 413			for end > 0 && data[end-1] == '\n' {
 414				end--
 415			}
 416			p.r.BlockHtml(out, data[:end])
 417		}
 418		return size
 419	}
 420
 421	return 0
 422}
 423
 424// HR, which is the only self-closing block tag considered
 425func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 426	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 427		return 0
 428	}
 429	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 430		// not an <hr> tag after all; at least not a valid one
 431		return 0
 432	}
 433
 434	i := 3
 435	for data[i] != '>' && data[i] != '\n' {
 436		i++
 437	}
 438
 439	if data[i] == '>' {
 440		i++
 441		if j := p.isEmpty(data[i:]); j > 0 {
 442			size := i + j
 443			if doRender {
 444				// trim newlines
 445				end := size
 446				for end > 0 && data[end-1] == '\n' {
 447					end--
 448				}
 449				p.r.BlockHtml(out, data[:end])
 450			}
 451			return size
 452		}
 453	}
 454
 455	return 0
 456}
 457
 458func (p *parser) htmlFindTag(data []byte) (string, bool) {
 459	i := 0
 460	for isalnum(data[i]) {
 461		i++
 462	}
 463	key := string(data[:i])
 464	if blockTags[key] {
 465		return key, true
 466	}
 467	return "", false
 468}
 469
 470func (p *parser) htmlFindEnd(tag string, data []byte) int {
 471	// assume data[0] == '<' && data[1] == '/' already tested
 472
 473	// check if tag is a match
 474	closetag := []byte("</" + tag + ">")
 475	if !bytes.HasPrefix(data, closetag) {
 476		return 0
 477	}
 478	i := len(closetag)
 479
 480	// check that the rest of the line is blank
 481	skip := 0
 482	if skip = p.isEmpty(data[i:]); skip == 0 {
 483		return 0
 484	}
 485	i += skip
 486	skip = 0
 487
 488	if i >= len(data) {
 489		return i
 490	}
 491
 492	if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 493		return i
 494	}
 495	if skip = p.isEmpty(data[i:]); skip == 0 {
 496		// following line must be blank
 497		return 0
 498	}
 499
 500	return i + skip
 501}
 502
 503func (p *parser) isEmpty(data []byte) int {
 504	// it is okay to call isEmpty on an empty buffer
 505	if len(data) == 0 {
 506		return 0
 507	}
 508
 509	var i int
 510	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 511		if data[i] != ' ' && data[i] != '\t' {
 512			return 0
 513		}
 514	}
 515	return i + 1
 516}
 517
 518func (p *parser) isHRule(data []byte) bool {
 519	i := 0
 520
 521	// skip up to three spaces
 522	for i < 3 && data[i] == ' ' {
 523		i++
 524	}
 525
 526	// look at the hrule char
 527	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 528		return false
 529	}
 530	c := data[i]
 531
 532	// the whole line must be the char or whitespace
 533	n := 0
 534	for data[i] != '\n' {
 535		switch {
 536		case data[i] == c:
 537			n++
 538		case data[i] != ' ':
 539			return false
 540		}
 541		i++
 542	}
 543
 544	return n >= 3
 545}
 546
 547func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 548	i, size := 0, 0
 549	skip = 0
 550
 551	// skip up to three spaces
 552	for i < 3 && data[i] == ' ' {
 553		i++
 554	}
 555
 556	// check for the marker characters: ~ or `
 557	if data[i] != '~' && data[i] != '`' {
 558		return
 559	}
 560
 561	c := data[i]
 562
 563	// the whole line must be the same char or whitespace
 564	for data[i] == c {
 565		size++
 566		i++
 567	}
 568
 569	// the marker char must occur at least 3 times
 570	if size < 3 {
 571		return
 572	}
 573	marker = string(data[i-size : i])
 574
 575	// if this is the end marker, it must match the beginning marker
 576	if oldmarker != "" && marker != oldmarker {
 577		return
 578	}
 579
 580	if syntax != nil {
 581		syn := 0
 582
 583		for data[i] == ' ' {
 584			i++
 585		}
 586
 587		syntaxStart := i
 588
 589		if data[i] == '{' {
 590			i++
 591			syntaxStart++
 592
 593			for data[i] != '}' && data[i] != '\n' {
 594				syn++
 595				i++
 596			}
 597
 598			if data[i] != '}' {
 599				return
 600			}
 601
 602			// strip all whitespace at the beginning and the end
 603			// of the {} block
 604			for syn > 0 && isspace(data[syntaxStart]) {
 605				syntaxStart++
 606				syn--
 607			}
 608
 609			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 610				syn--
 611			}
 612
 613			i++
 614		} else {
 615			for !isspace(data[i]) {
 616				syn++
 617				i++
 618			}
 619		}
 620
 621		language := string(data[syntaxStart : syntaxStart+syn])
 622		*syntax = &language
 623	}
 624
 625	for data[i] == ' ' {
 626		i++
 627	}
 628	if data[i] != '\n' {
 629		return
 630	}
 631
 632	skip = i + 1
 633	return
 634}
 635
 636func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 637	var lang *string
 638	beg, marker := p.isFencedCode(data, &lang, "")
 639	if beg == 0 || beg >= len(data) {
 640		return 0
 641	}
 642
 643	var work bytes.Buffer
 644
 645	for {
 646		// safe to assume beg < len(data)
 647
 648		// check for the end of the code block
 649		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
 650		if fenceEnd != 0 {
 651			beg += fenceEnd
 652			break
 653		}
 654
 655		// copy the current line
 656		end := beg
 657		for data[end] != '\n' {
 658			end++
 659		}
 660		end++
 661
 662		// did we reach the end of the buffer without a closing marker?
 663		if end >= len(data) {
 664			return 0
 665		}
 666
 667		// verbatim copy to the working buffer
 668		if doRender {
 669			work.Write(data[beg:end])
 670		}
 671		beg = end
 672	}
 673
 674	syntax := ""
 675	if lang != nil {
 676		syntax = *lang
 677	}
 678
 679	if doRender {
 680		p.r.BlockCode(out, work.Bytes(), syntax)
 681	}
 682
 683	return beg
 684}
 685
 686func (p *parser) table(out *bytes.Buffer, data []byte) int {
 687	var header bytes.Buffer
 688	i, columns := p.tableHeader(&header, data)
 689	if i == 0 {
 690		return 0
 691	}
 692
 693	var body bytes.Buffer
 694
 695	for i < len(data) {
 696		pipes, rowStart := 0, i
 697		for ; data[i] != '\n'; i++ {
 698			if data[i] == '|' {
 699				pipes++
 700			}
 701		}
 702
 703		if pipes == 0 {
 704			i = rowStart
 705			break
 706		}
 707
 708		// include the newline in data sent to tableRow
 709		i++
 710		p.tableRow(&body, data[rowStart:i], columns, false)
 711	}
 712
 713	p.r.Table(out, header.Bytes(), body.Bytes(), columns)
 714
 715	return i
 716}
 717
 718// check if the specified position is preceeded by an odd number of backslashes
 719func isBackslashEscaped(data []byte, i int) bool {
 720	backslashes := 0
 721	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 722		backslashes++
 723	}
 724	return backslashes&1 == 1
 725}
 726
 727func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
 728	i := 0
 729	colCount := 1
 730	for i = 0; data[i] != '\n'; i++ {
 731		if data[i] == '|' && !isBackslashEscaped(data, i) {
 732			colCount++
 733		}
 734	}
 735
 736	// doesn't look like a table header
 737	if colCount == 1 {
 738		return
 739	}
 740
 741	// include the newline in the data sent to tableRow
 742	header := data[:i+1]
 743
 744	// column count ignores pipes at beginning or end of line
 745	if data[0] == '|' {
 746		colCount--
 747	}
 748	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 749		colCount--
 750	}
 751
 752	columns = make([]int, colCount)
 753
 754	// move on to the header underline
 755	i++
 756	if i >= len(data) {
 757		return
 758	}
 759
 760	if data[i] == '|' && !isBackslashEscaped(data, i) {
 761		i++
 762	}
 763	for data[i] == ' ' {
 764		i++
 765	}
 766
 767	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 768	// and trailing | optional on last column
 769	col := 0
 770	for data[i] != '\n' {
 771		dashes := 0
 772
 773		if data[i] == ':' {
 774			i++
 775			columns[col] |= TABLE_ALIGNMENT_LEFT
 776			dashes++
 777		}
 778		for data[i] == '-' {
 779			i++
 780			dashes++
 781		}
 782		if data[i] == ':' {
 783			i++
 784			columns[col] |= TABLE_ALIGNMENT_RIGHT
 785			dashes++
 786		}
 787		for data[i] == ' ' {
 788			i++
 789		}
 790
 791		// end of column test is messy
 792		switch {
 793		case dashes < 3:
 794			// not a valid column
 795			return
 796
 797		case data[i] == '|' && !isBackslashEscaped(data, i):
 798			// marker found, now skip past trailing whitespace
 799			col++
 800			i++
 801			for data[i] == ' ' {
 802				i++
 803			}
 804
 805			// trailing junk found after last column
 806			if col >= colCount && data[i] != '\n' {
 807				return
 808			}
 809
 810		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 811			// something else found where marker was required
 812			return
 813
 814		case data[i] == '\n':
 815			// marker is optional for the last column
 816			col++
 817
 818		default:
 819			// trailing junk found after last column
 820			return
 821		}
 822	}
 823	if col != colCount {
 824		return
 825	}
 826
 827	p.tableRow(out, header, columns, true)
 828	size = i + 1
 829	return
 830}
 831
 832func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
 833	i, col := 0, 0
 834	var rowWork bytes.Buffer
 835
 836	if data[i] == '|' && !isBackslashEscaped(data, i) {
 837		i++
 838	}
 839
 840	for col = 0; col < len(columns) && i < len(data); col++ {
 841		for data[i] == ' ' {
 842			i++
 843		}
 844
 845		cellStart := i
 846
 847		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 848			i++
 849		}
 850
 851		cellEnd := i
 852
 853		// skip the end-of-cell marker, possibly taking us past end of buffer
 854		i++
 855
 856		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 857			cellEnd--
 858		}
 859
 860		var cellWork bytes.Buffer
 861		p.inline(&cellWork, data[cellStart:cellEnd])
 862
 863		if header {
 864			p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
 865		} else {
 866			p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 867		}
 868	}
 869
 870	// pad it out with empty columns to get the right number
 871	for ; col < len(columns); col++ {
 872		if header {
 873			p.r.TableHeaderCell(&rowWork, nil, columns[col])
 874		} else {
 875			p.r.TableCell(&rowWork, nil, columns[col])
 876		}
 877	}
 878
 879	// silently ignore rows with too many cells
 880
 881	p.r.TableRow(out, rowWork.Bytes())
 882}
 883
 884// returns blockquote prefix length
 885func (p *parser) quotePrefix(data []byte) int {
 886	i := 0
 887	for i < 3 && data[i] == ' ' {
 888		i++
 889	}
 890	if data[i] == '>' {
 891		if data[i+1] == ' ' {
 892			return i + 2
 893		}
 894		return i + 1
 895	}
 896	return 0
 897}
 898
 899// parse a blockquote fragment
 900func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 901	var raw bytes.Buffer
 902	beg, end := 0, 0
 903	for beg < len(data) {
 904		end = beg
 905		for data[end] != '\n' {
 906			end++
 907		}
 908		end++
 909
 910		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 911			// skip the prefix
 912			beg += pre
 913		} else if p.isEmpty(data[beg:]) > 0 &&
 914			(end >= len(data) ||
 915				(p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
 916			// blockquote ends with at least one blank line
 917			// followed by something without a blockquote prefix
 918			break
 919		}
 920
 921		// this line is part of the blockquote
 922		raw.Write(data[beg:end])
 923		beg = end
 924	}
 925
 926	var cooked bytes.Buffer
 927	p.block(&cooked, raw.Bytes())
 928	p.r.BlockQuote(out, cooked.Bytes())
 929	return end
 930}
 931
 932// returns prefix length for block code
 933func (p *parser) codePrefix(data []byte) int {
 934	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 935		return 4
 936	}
 937	return 0
 938}
 939
 940func (p *parser) code(out *bytes.Buffer, data []byte) int {
 941	var work bytes.Buffer
 942
 943	i := 0
 944	for i < len(data) {
 945		beg := i
 946		for data[i] != '\n' {
 947			i++
 948		}
 949		i++
 950
 951		blankline := p.isEmpty(data[beg:i]) > 0
 952		if pre := p.codePrefix(data[beg:i]); pre > 0 {
 953			beg += pre
 954		} else if !blankline {
 955			// non-empty, non-prefixed line breaks the pre
 956			i = beg
 957			break
 958		}
 959
 960		// verbatim copy to the working buffeu
 961		if blankline {
 962			work.WriteByte('\n')
 963		} else {
 964			work.Write(data[beg:i])
 965		}
 966	}
 967
 968	// trim all the \n off the end of work
 969	workbytes := work.Bytes()
 970	eol := len(workbytes)
 971	for eol > 0 && workbytes[eol-1] == '\n' {
 972		eol--
 973	}
 974	if eol != len(workbytes) {
 975		work.Truncate(eol)
 976	}
 977
 978	work.WriteByte('\n')
 979
 980	p.r.BlockCode(out, work.Bytes(), "")
 981
 982	return i
 983}
 984
 985// returns unordered list item prefix
 986func (p *parser) uliPrefix(data []byte) int {
 987	i := 0
 988
 989	// start with up to 3 spaces
 990	for i < 3 && data[i] == ' ' {
 991		i++
 992	}
 993
 994	// need a *, +, or - followed by a space
 995	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
 996		data[i+1] != ' ' {
 997		return 0
 998	}
 999	return i + 2
1000}
1001
1002// returns ordered list item prefix
1003func (p *parser) oliPrefix(data []byte) int {
1004	i := 0
1005
1006	// start with up to 3 spaces
1007	for i < 3 && data[i] == ' ' {
1008		i++
1009	}
1010
1011	// count the digits
1012	start := i
1013	for data[i] >= '0' && data[i] <= '9' {
1014		i++
1015	}
1016
1017	// we need >= 1 digits followed by a dot and a space
1018	if start == i || data[i] != '.' || data[i+1] != ' ' {
1019		return 0
1020	}
1021	return i + 2
1022}
1023
1024// parse ordered or unordered list block
1025func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1026	i := 0
1027	flags |= LIST_ITEM_BEGINNING_OF_LIST
1028	work := func() bool {
1029		for i < len(data) {
1030			skip := p.listItem(out, data[i:], &flags)
1031			i += skip
1032
1033			if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1034				break
1035			}
1036			flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1037		}
1038		return true
1039	}
1040
1041	p.r.List(out, work, flags)
1042	return i
1043}
1044
1045// Parse a single list item.
1046// Assumes initial prefix is already removed if this is a sublist.
1047func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1048	// keep track of the indentation of the first line
1049	itemIndent := 0
1050	for itemIndent < 3 && data[itemIndent] == ' ' {
1051		itemIndent++
1052	}
1053
1054	i := p.uliPrefix(data)
1055	if i == 0 {
1056		i = p.oliPrefix(data)
1057	}
1058	if i == 0 {
1059		return 0
1060	}
1061
1062	// skip leading whitespace on first line
1063	for data[i] == ' ' {
1064		i++
1065	}
1066
1067	// find the end of the line
1068	line := i
1069	for data[i-1] != '\n' {
1070		i++
1071	}
1072
1073	// get working buffer
1074	var raw bytes.Buffer
1075
1076	// put the first line into the working buffer
1077	raw.Write(data[line:i])
1078	line = i
1079
1080	// process the following lines
1081	containsBlankLine := false
1082	sublist := 0
1083
1084gatherlines:
1085	for line < len(data) {
1086		i++
1087
1088		// find the end of this line
1089		for data[i-1] != '\n' {
1090			i++
1091		}
1092
1093		// if it is an empty line, guess that it is part of this item
1094		// and move on to the next line
1095		if p.isEmpty(data[line:i]) > 0 {
1096			containsBlankLine = true
1097			line = i
1098			continue
1099		}
1100
1101		// calculate the indentation
1102		indent := 0
1103		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1104			indent++
1105		}
1106
1107		chunk := data[line+indent : i]
1108
1109		// evaluate how this line fits in
1110		switch {
1111		// is this a nested list item?
1112		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1113			p.oliPrefix(chunk) > 0:
1114
1115			if containsBlankLine {
1116				*flags |= LIST_ITEM_CONTAINS_BLOCK
1117			}
1118
1119			// to be a nested list, it must be indented more
1120			// if not, it is the next item in the same list
1121			if indent <= itemIndent {
1122				break gatherlines
1123			}
1124
1125			// is this the first item in the the nested list?
1126			if sublist == 0 {
1127				sublist = raw.Len()
1128			}
1129
1130		// is this a nested prefix header?
1131		case p.isPrefixHeader(chunk):
1132			// if the header is not indented, it is not nested in the list
1133			// and thus ends the list
1134			if containsBlankLine && indent < 4 {
1135				*flags |= LIST_ITEM_END_OF_LIST
1136				break gatherlines
1137			}
1138			*flags |= LIST_ITEM_CONTAINS_BLOCK
1139
1140		// anything following an empty line is only part
1141		// of this item if it is indented 4 spaces
1142		// (regardless of the indentation of the beginning of the item)
1143		case containsBlankLine && indent < 4:
1144			*flags |= LIST_ITEM_END_OF_LIST
1145			break gatherlines
1146
1147		// a blank line means this should be parsed as a block
1148		case containsBlankLine:
1149			raw.WriteByte('\n')
1150			*flags |= LIST_ITEM_CONTAINS_BLOCK
1151		}
1152
1153		// if this line was preceeded by one or more blanks,
1154		// re-introduce the blank into the buffer
1155		if containsBlankLine {
1156			containsBlankLine = false
1157			raw.WriteByte('\n')
1158		}
1159
1160		// add the line into the working buffer without prefix
1161		raw.Write(data[line+indent : i])
1162
1163		line = i
1164	}
1165
1166	rawBytes := raw.Bytes()
1167
1168	// render the contents of the list item
1169	var cooked bytes.Buffer
1170	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1171		// intermediate render of block li
1172		if sublist > 0 {
1173			p.block(&cooked, rawBytes[:sublist])
1174			p.block(&cooked, rawBytes[sublist:])
1175		} else {
1176			p.block(&cooked, rawBytes)
1177		}
1178	} else {
1179		// intermediate render of inline li
1180		if sublist > 0 {
1181			p.inline(&cooked, rawBytes[:sublist])
1182			p.block(&cooked, rawBytes[sublist:])
1183		} else {
1184			p.inline(&cooked, rawBytes)
1185		}
1186	}
1187
1188	// render the actual list item
1189	cookedBytes := cooked.Bytes()
1190	parsedEnd := len(cookedBytes)
1191
1192	// strip trailing newlines
1193	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1194		parsedEnd--
1195	}
1196	p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1197
1198	return line
1199}
1200
1201// render a single paragraph that has already been parsed out
1202func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1203	if len(data) == 0 {
1204		return
1205	}
1206
1207	// trim leading spaces
1208	beg := 0
1209	for data[beg] == ' ' {
1210		beg++
1211	}
1212
1213	// trim trailing newline
1214	end := len(data) - 1
1215
1216	// trim trailing spaces
1217	for end > beg && data[end-1] == ' ' {
1218		end--
1219	}
1220
1221	work := func() bool {
1222		p.inline(out, data[beg:end])
1223		return true
1224	}
1225	p.r.Paragraph(out, work)
1226}
1227
1228func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1229	// prev: index of 1st char of previous line
1230	// line: index of 1st char of current line
1231	// i: index of cursor/end of current line
1232	var prev, line, i int
1233
1234	// keep going until we find something to mark the end of the paragraph
1235	for i < len(data) {
1236		// mark the beginning of the current line
1237		prev = line
1238		current := data[i:]
1239		line = i
1240
1241		// did we find a blank line marking the end of the paragraph?
1242		if n := p.isEmpty(current); n > 0 {
1243			p.renderParagraph(out, data[:i])
1244			return i + n
1245		}
1246
1247		// an underline under some text marks a header, so our paragraph ended on prev line
1248		if i > 0 {
1249			if level := p.isUnderlinedHeader(current); level > 0 {
1250				// render the paragraph
1251				p.renderParagraph(out, data[:prev])
1252
1253				// ignore leading and trailing whitespace
1254				eol := i - 1
1255				for prev < eol && data[prev] == ' ' {
1256					prev++
1257				}
1258				for eol > prev && data[eol-1] == ' ' {
1259					eol--
1260				}
1261
1262				// render the header
1263				// this ugly double closure avoids forcing variables onto the heap
1264				work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1265					return func() bool {
1266						pp.inline(o, d)
1267						return true
1268					}
1269				}(out, p, data[prev:eol])
1270				p.r.Header(out, work, level, "")
1271
1272				// find the end of the underline
1273				for data[i] != '\n' {
1274					i++
1275				}
1276				return i
1277			}
1278		}
1279
1280		// if the next line starts a block of HTML, then the paragraph ends here
1281		if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1282			if data[i] == '<' && p.html(out, current, false) > 0 {
1283				// rewind to before the HTML block
1284				p.renderParagraph(out, data[:i])
1285				return i
1286			}
1287		}
1288
1289		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1290		if p.isPrefixHeader(current) || p.isHRule(current) {
1291			p.renderParagraph(out, data[:i])
1292			return i
1293		}
1294
1295		// if there's a list after this, paragraph is over
1296		if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1297			if p.uliPrefix(current) != 0 ||
1298				p.oliPrefix(current) != 0 ||
1299				p.quotePrefix(current) != 0 ||
1300				p.codePrefix(current) != 0 {
1301				p.renderParagraph(out, data[:i])
1302				return i
1303			}
1304		}
1305
1306		// otherwise, scan to the beginning of the next line
1307		for data[i] != '\n' {
1308			i++
1309		}
1310		i++
1311	}
1312
1313	p.renderParagraph(out, data[:i])
1314	return i
1315}
all repos — grayfriday @ 427a14caf2d7988c34bf172c1822655fe7608444

blackfriday fork with a few changes