icy does git — grayfriday (70c92fefd4330506d553d7a1a173a9ee51bc4df3): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18)
  19
  20// parse block-level data
  21func (parser *Parser) parseBlock(out *bytes.Buffer, data []byte) {
  22	// this is called recursively: enforce a maximum depth
  23	if parser.nesting >= parser.maxNesting {
  24		return
  25	}
  26	parser.nesting++
  27
  28	// parse out one block-level construct at a time
  29	for len(data) > 0 {
  30		// prefixed header:
  31		//
  32		// # Header 1
  33		// ## Header 2
  34		// ...
  35		// ###### Header 6
  36		if parser.isPrefixHeader(data) {
  37			data = data[parser.blockPrefixHeader(out, data):]
  38			continue
  39		}
  40
  41		// block of preformatted HTML:
  42		//
  43		// <div>
  44		//     ...
  45		// </div>
  46		if data[0] == '<' {
  47			if i := parser.blockHtml(out, data, true); i > 0 {
  48				data = data[i:]
  49				continue
  50			}
  51		}
  52
  53		// blank lines.  note: returns the # of bytes to skip
  54		if i := parser.isEmpty(data); i > 0 {
  55			data = data[i:]
  56			continue
  57		}
  58
  59		// horizontal rule:
  60		//
  61		// ------
  62		// or
  63		// ******
  64		// or
  65		// ______
  66		if parser.isHRule(data) {
  67			parser.r.HRule(out)
  68			var i int
  69			for i = 0; i < len(data) && data[i] != '\n'; i++ {
  70			}
  71			data = data[i:]
  72			continue
  73		}
  74
  75		// fenced code block:
  76		//
  77		// ``` go
  78		// func fact(n int) int {
  79		//     if n <= 1 {
  80		//         return n
  81		//     }
  82		//     return n * fact(n-1)
  83		// }
  84		// ```
  85		if parser.flags&EXTENSION_FENCED_CODE != 0 {
  86			if i := parser.blockFencedCode(out, data); i > 0 {
  87				data = data[i:]
  88				continue
  89			}
  90		}
  91
  92		// table:
  93		//
  94		// Name  | Age | Phone
  95		// ------|-----|---------
  96		// Bob   | 31  | 555-1234
  97		// Alice | 27  | 555-4321
  98		if parser.flags&EXTENSION_TABLES != 0 {
  99			if i := parser.blockTable(out, data); i > 0 {
 100				data = data[i:]
 101				continue
 102			}
 103		}
 104
 105		// block quote:
 106		//
 107		// > A big quote I found somewhere
 108		// > on the web
 109		if parser.blockQuotePrefix(data) > 0 {
 110			data = data[parser.blockQuote(out, data):]
 111			continue
 112		}
 113
 114		// indented code block:
 115		//
 116		//     func max(a, b int) int {
 117		//         if a > b {
 118		//             return a
 119		//         }
 120		//         return b
 121		//      }
 122		if parser.blockCodePrefix(data) > 0 {
 123			data = data[parser.blockCode(out, data):]
 124			continue
 125		}
 126
 127		// an itemized/unordered list:
 128		//
 129		// * Item 1
 130		// * Item 2
 131		//
 132		// also works with + or -
 133		if parser.blockUliPrefix(data) > 0 {
 134			data = data[parser.blockList(out, data, 0):]
 135			continue
 136		}
 137
 138		// a numbered/ordered list:
 139		//
 140		// 1. Item 1
 141		// 2. Item 2
 142		if parser.blockOliPrefix(data) > 0 {
 143			data = data[parser.blockList(out, data, LIST_TYPE_ORDERED):]
 144			continue
 145		}
 146
 147		// anything else must look like a normal paragraph
 148		// note: this finds underlined headers, too
 149		data = data[parser.blockParagraph(out, data):]
 150	}
 151
 152	parser.nesting--
 153}
 154
 155func (parser *Parser) isPrefixHeader(data []byte) bool {
 156	if data[0] != '#' {
 157		return false
 158	}
 159
 160	if parser.flags&EXTENSION_SPACE_HEADERS != 0 {
 161		level := 0
 162		for level < len(data) && level < 6 && data[level] == '#' {
 163			level++
 164		}
 165		if level < len(data) && data[level] != ' ' && data[level] != '\t' {
 166			return false
 167		}
 168	}
 169	return true
 170}
 171
 172func (parser *Parser) blockPrefixHeader(out *bytes.Buffer, data []byte) int {
 173	level := 0
 174	for level < len(data) && level < 6 && data[level] == '#' {
 175		level++
 176	}
 177	i, end := 0, 0
 178	for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
 179	}
 180	for end = i; end < len(data) && data[end] != '\n'; end++ {
 181	}
 182	skip := end
 183	for end > 0 && data[end-1] == '#' {
 184		end--
 185	}
 186	for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
 187		end--
 188	}
 189	if end > i {
 190		work := func() bool {
 191			parser.parseInline(out, data[i:end])
 192			return true
 193		}
 194		parser.r.Header(out, work, level)
 195	}
 196	return skip
 197}
 198
 199func (parser *Parser) isUnderlinedHeader(data []byte) int {
 200	i := 0
 201
 202	// test of level 1 header
 203	if data[i] == '=' {
 204		for i = 1; i < len(data) && data[i] == '='; i++ {
 205		}
 206		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 207			i++
 208		}
 209		if i >= len(data) || data[i] == '\n' {
 210			return 1
 211		} else {
 212			return 0
 213		}
 214	}
 215
 216	// test of level 2 header
 217	if data[i] == '-' {
 218		for i = 1; i < len(data) && data[i] == '-'; i++ {
 219		}
 220		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 221			i++
 222		}
 223		if i >= len(data) || data[i] == '\n' {
 224			return 2
 225		} else {
 226			return 0
 227		}
 228	}
 229
 230	return 0
 231}
 232
 233func (parser *Parser) blockHtml(out *bytes.Buffer, data []byte, doRender bool) int {
 234	var i, j int
 235
 236	// identify the opening tag
 237	if len(data) < 2 || data[0] != '<' {
 238		return 0
 239	}
 240	curtag, tagfound := parser.blockHtmlFindTag(data[1:])
 241
 242	// handle special cases
 243	if !tagfound {
 244
 245		// HTML comment, lax form
 246		if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
 247			i = 5
 248
 249			for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 250				i++
 251			}
 252			i++
 253
 254			if i < len(data) {
 255				j = parser.isEmpty(data[i:])
 256			}
 257
 258			if j > 0 {
 259				size := i + j
 260				if doRender {
 261					parser.r.BlockHtml(out, data[:size])
 262				}
 263				return size
 264			}
 265		}
 266
 267		// HR, which is the only self-closing block tag considered
 268		if len(data) > 4 &&
 269			(data[1] == 'h' || data[1] == 'H') &&
 270			(data[2] == 'r' || data[2] == 'R') {
 271
 272			i = 3
 273			for i < len(data) && data[i] != '>' {
 274				i++
 275			}
 276
 277			if i+1 < len(data) {
 278				i++
 279				j = parser.isEmpty(data[i:])
 280				if j > 0 {
 281					size := i + j
 282					if doRender {
 283						parser.r.BlockHtml(out, data[:size])
 284					}
 285					return size
 286				}
 287			}
 288		}
 289
 290		// no special case recognized
 291		return 0
 292	}
 293
 294	// look for an unindented matching closing tag
 295	//      followed by a blank line
 296	i = 1
 297	found := false
 298
 299	// if not found, try a second pass looking for indented match
 300	// but not if tag is "ins" or "del" (following original Markdown.pl)
 301	if curtag != "ins" && curtag != "del" {
 302		i = 1
 303		for i < len(data) {
 304			i++
 305			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 306				i++
 307			}
 308
 309			if i+2+len(curtag) >= len(data) {
 310				break
 311			}
 312
 313			j = parser.blockHtmlFindEnd(curtag, data[i-1:])
 314
 315			if j > 0 {
 316				i += j - 1
 317				found = true
 318				break
 319			}
 320		}
 321	}
 322
 323	if !found {
 324		return 0
 325	}
 326
 327	// the end of the block has been found
 328	if doRender {
 329		parser.r.BlockHtml(out, data[:i])
 330	}
 331
 332	return i
 333}
 334
 335func (parser *Parser) blockHtmlFindTag(data []byte) (string, bool) {
 336	i := 0
 337	for i < len(data) && isalnum(data[i]) {
 338		i++
 339	}
 340	if i >= len(data) {
 341		return "", false
 342	}
 343	key := string(data[:i])
 344	if blockTags[key] {
 345		return key, true
 346	}
 347	return "", false
 348}
 349
 350func (parser *Parser) blockHtmlFindEnd(tag string, data []byte) int {
 351	// assume data[0] == '<' && data[1] == '/' already tested
 352
 353	// check if tag is a match
 354	if len(data) < len(tag)+3 || data[len(tag)+2] != '>' ||
 355		bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 {
 356		return 0
 357	}
 358
 359	// check for blank line/eof after the closing tag
 360	i := len(tag) + 3
 361	w := 0
 362	if i < len(data) {
 363		if w = parser.isEmpty(data[i:]); w == 0 {
 364			return 0 // non-blank after tag
 365		}
 366	}
 367	i += w
 368	w = 0
 369
 370	if parser.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 371		if i < len(data) {
 372			w = parser.isEmpty(data[i:])
 373		}
 374	} else {
 375		if i < len(data) {
 376			if w = parser.isEmpty(data[i:]); w == 0 {
 377				return 0 // non-blank line after tag line
 378			}
 379		}
 380	}
 381
 382	return i + w
 383}
 384
 385func (parser *Parser) isEmpty(data []byte) int {
 386	var i int
 387	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 388		if data[i] != ' ' && data[i] != '\t' {
 389			return 0
 390		}
 391	}
 392	return i + 1
 393}
 394
 395func (parser *Parser) isHRule(data []byte) bool {
 396	// skip initial spaces
 397	if len(data) < 3 {
 398		return false
 399	}
 400	i := 0
 401
 402	// skip up to three spaces
 403	for i < 3 && data[i] == ' ' {
 404		i++
 405	}
 406
 407	// look at the hrule char
 408	if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
 409		return false
 410	}
 411	c := data[i]
 412
 413	// the whole line must be the char or whitespace
 414	n := 0
 415	for i < len(data) && data[i] != '\n' {
 416		switch {
 417		case data[i] == c:
 418			n++
 419		case data[i] != ' ' && data[i] != '\t':
 420			return false
 421		}
 422		i++
 423	}
 424
 425	return n >= 3
 426}
 427
 428func (parser *Parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 429	i, size := 0, 0
 430	skip = 0
 431
 432	// skip initial spaces
 433	if len(data) < 3 {
 434		return
 435	}
 436	if data[0] == ' ' {
 437		i++
 438		if data[1] == ' ' {
 439			i++
 440			if data[2] == ' ' {
 441				i++
 442			}
 443		}
 444	}
 445
 446	// check for the marker characters: ~ or `
 447	if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
 448		return
 449	}
 450
 451	c := data[i]
 452
 453	// the whole line must be the same char or whitespace
 454	for i < len(data) && data[i] == c {
 455		size++
 456		i++
 457	}
 458
 459	// the marker char must occur at least 3 times
 460	if size < 3 {
 461		return
 462	}
 463	marker = string(data[i-size : i])
 464
 465	// if this is the end marker, it must match the beginning marker
 466	if oldmarker != "" && marker != oldmarker {
 467		return
 468	}
 469
 470	if syntax != nil {
 471		syn := 0
 472
 473		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 474			i++
 475		}
 476
 477		syntaxStart := i
 478
 479		if i < len(data) && data[i] == '{' {
 480			i++
 481			syntaxStart++
 482
 483			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 484				syn++
 485				i++
 486			}
 487
 488			if i == len(data) || data[i] != '}' {
 489				return
 490			}
 491
 492			// strip all whitespace at the beginning and the end
 493			// of the {} block
 494			for syn > 0 && isspace(data[syntaxStart]) {
 495				syntaxStart++
 496				syn--
 497			}
 498
 499			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 500				syn--
 501			}
 502
 503			i++
 504		} else {
 505			for i < len(data) && !isspace(data[i]) {
 506				syn++
 507				i++
 508			}
 509		}
 510
 511		language := string(data[syntaxStart : syntaxStart+syn])
 512		*syntax = &language
 513	}
 514
 515	for ; i < len(data) && data[i] != '\n'; i++ {
 516		if !isspace(data[i]) {
 517			return
 518		}
 519	}
 520
 521	skip = i + 1
 522	return
 523}
 524
 525func (parser *Parser) blockFencedCode(out *bytes.Buffer, data []byte) int {
 526	var lang *string
 527	beg, marker := parser.isFencedCode(data, &lang, "")
 528	if beg == 0 {
 529		return 0
 530	}
 531
 532	var work bytes.Buffer
 533
 534	for beg < len(data) {
 535		fenceEnd, _ := parser.isFencedCode(data[beg:], nil, marker)
 536		if fenceEnd != 0 {
 537			beg += fenceEnd
 538			break
 539		}
 540
 541		var end int
 542		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
 543		}
 544
 545		if beg < end {
 546			// verbatim copy to the working buffer
 547			if parser.isEmpty(data[beg:]) > 0 {
 548				work.WriteByte('\n')
 549			} else {
 550				work.Write(data[beg:end])
 551			}
 552		}
 553		beg = end
 554
 555		// did we find the end of the buffer without a closing marker?
 556		if beg >= len(data) {
 557			return 0
 558		}
 559	}
 560
 561	if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
 562		work.WriteByte('\n')
 563	}
 564
 565	syntax := ""
 566	if lang != nil {
 567		syntax = *lang
 568	}
 569
 570	parser.r.BlockCode(out, work.Bytes(), syntax)
 571
 572	return beg
 573}
 574
 575func (parser *Parser) blockTable(out *bytes.Buffer, data []byte) int {
 576	var headerWork bytes.Buffer
 577	i, columns, colData := parser.blockTableHeader(&headerWork, data)
 578	if i == 0 {
 579		return 0
 580	}
 581
 582	var bodyWork bytes.Buffer
 583
 584	for i < len(data) {
 585		pipes, rowStart := 0, i
 586		for ; i < len(data) && data[i] != '\n'; i++ {
 587			if data[i] == '|' {
 588				pipes++
 589			}
 590		}
 591
 592		if pipes == 0 || i == len(data) {
 593			i = rowStart
 594			break
 595		}
 596
 597		parser.blockTableRow(&bodyWork, data[rowStart:i], columns, colData)
 598		i++
 599	}
 600
 601	parser.r.Table(out, headerWork.Bytes(), bodyWork.Bytes(), colData)
 602
 603	return i
 604}
 605
 606func (parser *Parser) blockTableHeader(out *bytes.Buffer, data []byte) (size int, columns int, columnData []int) {
 607	i, pipes := 0, 0
 608	columnData = []int{}
 609	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 610		if data[i] == '|' {
 611			pipes++
 612		}
 613	}
 614
 615	if i == len(data) || pipes == 0 {
 616		return 0, 0, columnData
 617	}
 618
 619	headerEnd := i
 620
 621	if data[0] == '|' {
 622		pipes--
 623	}
 624
 625	if i > 2 && data[i-1] == '|' {
 626		pipes--
 627	}
 628
 629	columns = pipes + 1
 630	columnData = make([]int, columns)
 631
 632	// parse the header underline
 633	i++
 634	if i < len(data) && data[i] == '|' {
 635		i++
 636	}
 637
 638	underEnd := i
 639	for underEnd < len(data) && data[underEnd] != '\n' {
 640		underEnd++
 641	}
 642
 643	col := 0
 644	for ; col < columns && i < underEnd; col++ {
 645		dashes := 0
 646
 647		for i < underEnd && (data[i] == ' ' || data[i] == '\t') {
 648			i++
 649		}
 650
 651		if data[i] == ':' {
 652			i++
 653			columnData[col] |= TABLE_ALIGNMENT_LEFT
 654			dashes++
 655		}
 656
 657		for i < underEnd && data[i] == '-' {
 658			i++
 659			dashes++
 660		}
 661
 662		if i < underEnd && data[i] == ':' {
 663			i++
 664			columnData[col] |= TABLE_ALIGNMENT_RIGHT
 665			dashes++
 666		}
 667
 668		for i < underEnd && (data[i] == ' ' || data[i] == '\t') {
 669			i++
 670		}
 671
 672		if i < underEnd && data[i] != '|' {
 673			break
 674		}
 675
 676		if dashes < 3 {
 677			break
 678		}
 679
 680		i++
 681	}
 682
 683	if col < columns {
 684		return 0, 0, columnData
 685	}
 686
 687	parser.blockTableRow(out, data[:headerEnd], columns, columnData)
 688	size = underEnd + 1
 689	return
 690}
 691
 692func (parser *Parser) blockTableRow(out *bytes.Buffer, data []byte, columns int, colData []int) {
 693	i, col := 0, 0
 694	var rowWork bytes.Buffer
 695
 696	if i < len(data) && data[i] == '|' {
 697		i++
 698	}
 699
 700	for col = 0; col < columns && i < len(data); col++ {
 701		for i < len(data) && isspace(data[i]) {
 702			i++
 703		}
 704
 705		cellStart := i
 706
 707		for i < len(data) && data[i] != '|' {
 708			i++
 709		}
 710
 711		cellEnd := i - 1
 712
 713		for cellEnd > cellStart && isspace(data[cellEnd]) {
 714			cellEnd--
 715		}
 716
 717		var cellWork bytes.Buffer
 718		parser.parseInline(&cellWork, data[cellStart:cellEnd+1])
 719
 720		cdata := 0
 721		if col < len(colData) {
 722			cdata = colData[col]
 723		}
 724		parser.r.TableCell(&rowWork, cellWork.Bytes(), cdata)
 725
 726		i++
 727	}
 728
 729	for ; col < columns; col++ {
 730		emptyCell := []byte{}
 731		cdata := 0
 732		if col < len(colData) {
 733			cdata = colData[col]
 734		}
 735		parser.r.TableCell(&rowWork, emptyCell, cdata)
 736	}
 737
 738	parser.r.TableRow(out, rowWork.Bytes())
 739}
 740
 741// returns blockquote prefix length
 742func (parser *Parser) blockQuotePrefix(data []byte) int {
 743	i := 0
 744	for i < len(data) && i < 3 && data[i] == ' ' {
 745		i++
 746	}
 747	if i < len(data) && data[i] == '>' {
 748		if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
 749			return i + 2
 750		}
 751		return i + 1
 752	}
 753	return 0
 754}
 755
 756// parse a blockquote fragment
 757func (parser *Parser) blockQuote(out *bytes.Buffer, data []byte) int {
 758	var block bytes.Buffer
 759	var work bytes.Buffer
 760	beg, end := 0, 0
 761	for beg < len(data) {
 762		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
 763		}
 764
 765		if pre := parser.blockQuotePrefix(data[beg:]); pre > 0 {
 766			beg += pre // skip prefix
 767		} else {
 768			// empty line followed by non-quote line
 769			if parser.isEmpty(data[beg:]) > 0 &&
 770				(end >= len(data) ||
 771					(parser.blockQuotePrefix(data[end:]) == 0 && parser.isEmpty(data[end:]) == 0)) {
 772				break
 773			}
 774		}
 775
 776		if beg < end { // copy into the in-place working buffer
 777			work.Write(data[beg:end])
 778		}
 779		beg = end
 780	}
 781
 782	parser.parseBlock(&block, work.Bytes())
 783	parser.r.BlockQuote(out, block.Bytes())
 784	return end
 785}
 786
 787// returns prefix length for block code
 788func (parser *Parser) blockCodePrefix(data []byte) int {
 789	if len(data) > 0 && data[0] == '\t' {
 790		return 1
 791	}
 792	if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 793		return 4
 794	}
 795	return 0
 796}
 797
 798func (parser *Parser) blockCode(out *bytes.Buffer, data []byte) int {
 799	var work bytes.Buffer
 800
 801	beg, end := 0, 0
 802	for beg < len(data) {
 803		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
 804		}
 805
 806		if pre := parser.blockCodePrefix(data[beg:end]); pre > 0 {
 807			beg += pre
 808		} else {
 809			if parser.isEmpty(data[beg:end]) == 0 {
 810				// non-empty non-prefixed line breaks the pre
 811				break
 812			}
 813		}
 814
 815		if beg < end {
 816			// verbatim copy to the working buffer, escaping entities
 817			if parser.isEmpty(data[beg:end]) > 0 {
 818				work.WriteByte('\n')
 819			} else {
 820				work.Write(data[beg:end])
 821			}
 822		}
 823		beg = end
 824	}
 825
 826	// trim all the \n off the end of work
 827	workbytes := work.Bytes()
 828	n := 0
 829	for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
 830		n++
 831	}
 832	if n > 0 {
 833		work.Truncate(len(workbytes) - n)
 834	}
 835
 836	work.WriteByte('\n')
 837
 838	parser.r.BlockCode(out, work.Bytes(), "")
 839
 840	return beg
 841}
 842
 843// returns unordered list item prefix
 844func (parser *Parser) blockUliPrefix(data []byte) int {
 845	i := 0
 846
 847	// start with up to 3 spaces
 848	for i < len(data) && i < 3 && data[i] == ' ' {
 849		i++
 850	}
 851
 852	// need a *, +, or - followed by a space/tab
 853	if i+1 >= len(data) ||
 854		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
 855		(data[i+1] != ' ' && data[i+1] != '\t') {
 856		return 0
 857	}
 858	return i + 2
 859}
 860
 861// returns ordered list item prefix
 862func (parser *Parser) blockOliPrefix(data []byte) int {
 863	i := 0
 864
 865	// start with up to 3 spaces
 866	for i < len(data) && i < 3 && data[i] == ' ' {
 867		i++
 868	}
 869
 870	// count the digits
 871	start := i
 872	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
 873		i++
 874	}
 875
 876	// we need >= 1 digits followed by a dot and a space/tab
 877	if start == i || data[i] != '.' || i+1 >= len(data) ||
 878		(data[i+1] != ' ' && data[i+1] != '\t') {
 879		return 0
 880	}
 881	return i + 2
 882}
 883
 884// parse ordered or unordered list block
 885func (parser *Parser) blockList(out *bytes.Buffer, data []byte, flags int) int {
 886	i := 0
 887	work := func() bool {
 888		j := 0
 889		for i < len(data) {
 890			j = parser.blockListItem(out, data[i:], &flags)
 891			i += j
 892
 893			if j == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
 894				break
 895			}
 896		}
 897		return true
 898	}
 899
 900	parser.r.List(out, work, flags)
 901	return i
 902}
 903
 904// parse a single list item
 905// assumes initial prefix is already removed
 906func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int) int {
 907	// keep track of the first indentation prefix
 908	beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
 909
 910	for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
 911		orgpre++
 912	}
 913
 914	beg = parser.blockUliPrefix(data)
 915	if beg == 0 {
 916		beg = parser.blockOliPrefix(data)
 917	}
 918	if beg == 0 {
 919		return 0
 920	}
 921
 922	// skip leading whitespace on first line
 923	for beg < len(data) && (data[beg] == ' ' || data[beg] == '\t') {
 924		beg++
 925	}
 926
 927	// skip to the beginning of the following line
 928	end = beg
 929	for end < len(data) && data[end-1] != '\n' {
 930		end++
 931	}
 932
 933	// get working buffers
 934	var work bytes.Buffer
 935	var inter bytes.Buffer
 936
 937	// put the first line into the working buffer
 938	work.Write(data[beg:end])
 939	beg = end
 940
 941	// process the following lines
 942	containsBlankLine, containsBlock := false, false
 943	for beg < len(data) {
 944		end++
 945
 946		for end < len(data) && data[end-1] != '\n' {
 947			end++
 948		}
 949
 950		// process an empty line
 951		if parser.isEmpty(data[beg:end]) > 0 {
 952			containsBlankLine = true
 953			beg = end
 954			continue
 955		}
 956
 957		// calculate the indentation
 958		i = 0
 959		for i < 4 && beg+i < end && data[beg+i] == ' ' {
 960			i++
 961		}
 962
 963		pre = i
 964		if data[beg] == '\t' {
 965			i = 1
 966			pre = TAB_SIZE_DEFAULT
 967			if parser.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
 968				pre = TAB_SIZE_EIGHT
 969			}
 970		}
 971
 972		chunk := data[beg+i : end]
 973
 974		// check for a nested list item
 975		if (parser.blockUliPrefix(chunk) > 0 && !parser.isHRule(chunk)) ||
 976			parser.blockOliPrefix(chunk) > 0 {
 977			if containsBlankLine {
 978				containsBlock = true
 979			}
 980
 981			// the following item must have the same indentation
 982			if pre == orgpre {
 983				break
 984			}
 985
 986			if sublist == 0 {
 987				sublist = work.Len()
 988			}
 989		} else {
 990			// how about a nested prefix header?
 991			if parser.isPrefixHeader(chunk) {
 992				// only nest headers that are indented
 993				if containsBlankLine && i < 4 && data[beg] != '\t' {
 994					*flags |= LIST_ITEM_END_OF_LIST
 995					break
 996				}
 997				containsBlock = true
 998			} else {
 999				// only join stuff after empty lines when indented
1000				if containsBlankLine && i < 4 && data[beg] != '\t' {
1001					*flags |= LIST_ITEM_END_OF_LIST
1002					break
1003				} else {
1004					if containsBlankLine {
1005						work.WriteByte('\n')
1006						containsBlock = true
1007					}
1008				}
1009			}
1010		}
1011
1012		containsBlankLine = false
1013
1014		// add the line into the working buffer without prefix
1015		work.Write(data[beg+i : end])
1016		beg = end
1017	}
1018
1019	// render li contents
1020	if containsBlock {
1021		*flags |= LIST_ITEM_CONTAINS_BLOCK
1022	}
1023
1024	workbytes := work.Bytes()
1025	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1026		// intermediate render of block li
1027		if sublist > 0 && sublist < len(workbytes) {
1028			parser.parseBlock(&inter, workbytes[:sublist])
1029			parser.parseBlock(&inter, workbytes[sublist:])
1030		} else {
1031			parser.parseBlock(&inter, workbytes)
1032		}
1033	} else {
1034		// intermediate render of inline li
1035		if sublist > 0 && sublist < len(workbytes) {
1036			parser.parseInline(&inter, workbytes[:sublist])
1037			parser.parseBlock(&inter, workbytes[sublist:])
1038		} else {
1039			parser.parseInline(&inter, workbytes)
1040		}
1041	}
1042
1043	// render li itself
1044	parser.r.ListItem(out, inter.Bytes(), *flags)
1045
1046	return beg
1047}
1048
1049// render a single paragraph that has already been parsed out
1050func (parser *Parser) renderParagraph(out *bytes.Buffer, data []byte) {
1051	// trim leading whitespace
1052	beg := 0
1053	for beg < len(data) && isspace(data[beg]) {
1054		beg++
1055	}
1056
1057	// trim trailing whitespace
1058	end := len(data)
1059	for end > beg && isspace(data[end-1]) {
1060		end--
1061	}
1062	if end == beg {
1063		return
1064	}
1065
1066	work := func() bool {
1067		parser.parseInline(out, data[beg:end])
1068		return true
1069	}
1070	parser.r.Paragraph(out, work)
1071}
1072
1073func (parser *Parser) blockParagraph(out *bytes.Buffer, data []byte) int {
1074	// prev: index of 1st char of previous line
1075	// line: index of 1st char of current line
1076	// i: index of cursor/end of current line
1077	var prev, line, i int
1078
1079	// keep going until we find something to mark the end of the paragraph
1080	for i < len(data) {
1081		// mark the beginning of the current line
1082		prev = line
1083		current := data[i:]
1084		line = i
1085
1086		// did we find a blank line marking the end of the paragraph?
1087		if n := parser.isEmpty(current); n > 0 {
1088			parser.renderParagraph(out, data[:i])
1089			return i + n
1090		}
1091
1092		// an underline under some text marks a header, so our paragraph ended on prev line
1093		if i > 0 {
1094			if level := parser.isUnderlinedHeader(current); level > 0 {
1095				// render the paragraph
1096				parser.renderParagraph(out, data[:prev])
1097
1098				// ignore leading and trailing whitespace
1099				eol := i - 1
1100				for prev < eol && (data[prev] == ' ' || data[prev] == '\t') {
1101					prev++
1102				}
1103				for eol > prev && (data[eol-1] == ' ' || data[eol-1] == '\t') {
1104					eol--
1105				}
1106
1107				// render the header
1108				// this ugly double closure avoids forcing variables onto the heap
1109				work := func(o *bytes.Buffer, p *Parser, d []byte) func() bool {
1110					return func() bool {
1111						p.parseInline(o, d)
1112						return true
1113					}
1114				}(out, parser, data[prev:eol])
1115				parser.r.Header(out, work, level)
1116
1117				// find the end of the underline
1118				for ; i < len(data) && data[i] != '\n'; i++ {
1119				}
1120				return i
1121			}
1122		}
1123
1124		// if the next line starts a block of HTML, then the paragraph ends here
1125		if parser.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1126			if data[i] == '<' && parser.blockHtml(out, current, false) > 0 {
1127				// rewind to before the HTML block
1128				parser.renderParagraph(out, data[:i])
1129				return i
1130			}
1131		}
1132
1133		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1134		if parser.isPrefixHeader(current) || parser.isHRule(current) {
1135			parser.renderParagraph(out, data[:i])
1136			return i
1137		}
1138
1139		// otherwise, scan to the beginning of the next line
1140		i++
1141		for i < len(data) && data[i-1] != '\n' {
1142			i++
1143		}
1144	}
1145
1146	parser.renderParagraph(out, data[:i])
1147	return i
1148}
all repos — grayfriday @ 70c92fefd4330506d553d7a1a173a9ee51bc4df3

blackfriday fork with a few changes