icy does git — grayfriday (cf01a94556f19b31205611c7aa5f92ddf2381081): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18)
  19
  20// Parse block-level data.
  21// Note: this function and many that it calls assume that
  22// the input buffer ends with a newline.
  23func (p *parser) block(out *bytes.Buffer, data []byte) {
  24	if len(data) == 0 || data[len(data)-1] != '\n' {
  25		panic("block input is missing terminating newline")
  26	}
  27
  28	// this is called recursively: enforce a maximum depth
  29	if p.nesting >= p.maxNesting {
  30		return
  31	}
  32	p.nesting++
  33
  34	// parse out one block-level construct at a time
  35	for len(data) > 0 {
  36		// prefixed header:
  37		//
  38		// # Header 1
  39		// ## Header 2
  40		// ...
  41		// ###### Header 6
  42		if p.isPrefixHeader(data) {
  43			data = data[p.prefixHeader(out, data):]
  44			continue
  45		}
  46
  47		// block of preformatted HTML:
  48		//
  49		// <div>
  50		//     ...
  51		// </div>
  52		if data[0] == '<' {
  53			if i := p.html(out, data, true); i > 0 {
  54				data = data[i:]
  55				continue
  56			}
  57		}
  58
  59		// blank lines.  note: returns the # of bytes to skip
  60		if i := p.isEmpty(data); i > 0 {
  61			data = data[i:]
  62			continue
  63		}
  64
  65		// indented code block:
  66		//
  67		//     func max(a, b int) int {
  68		//         if a > b {
  69		//             return a
  70		//         }
  71		//         return b
  72		//      }
  73		if p.codePrefix(data) > 0 {
  74			data = data[p.code(out, data):]
  75			continue
  76		}
  77
  78		// fenced code block:
  79		//
  80		// ``` go
  81		// func fact(n int) int {
  82		//     if n <= 1 {
  83		//         return n
  84		//     }
  85		//     return n * fact(n-1)
  86		// }
  87		// ```
  88		if p.flags&EXTENSION_FENCED_CODE != 0 {
  89			if i := p.fencedCode(out, data); i > 0 {
  90				data = data[i:]
  91				continue
  92			}
  93		}
  94
  95		// horizontal rule:
  96		//
  97		// ------
  98		// or
  99		// ******
 100		// or
 101		// ______
 102		if p.isHRule(data) {
 103			p.r.HRule(out)
 104			var i int
 105			for i = 0; data[i] != '\n'; i++ {
 106			}
 107			data = data[i:]
 108			continue
 109		}
 110
 111		// block quote:
 112		//
 113		// > A big quote I found somewhere
 114		// > on the web
 115		if p.quotePrefix(data) > 0 {
 116			data = data[p.quote(out, data):]
 117			continue
 118		}
 119
 120		// table:
 121		//
 122		// Name  | Age | Phone
 123		// ------|-----|---------
 124		// Bob   | 31  | 555-1234
 125		// Alice | 27  | 555-4321
 126		if p.flags&EXTENSION_TABLES != 0 {
 127			if i := p.table(out, data); i > 0 {
 128				data = data[i:]
 129				continue
 130			}
 131		}
 132
 133		// an itemized/unordered list:
 134		//
 135		// * Item 1
 136		// * Item 2
 137		//
 138		// also works with + or -
 139		if p.uliPrefix(data) > 0 {
 140			data = data[p.list(out, data, 0):]
 141			continue
 142		}
 143
 144		// a numbered/ordered list:
 145		//
 146		// 1. Item 1
 147		// 2. Item 2
 148		if p.oliPrefix(data) > 0 {
 149			data = data[p.list(out, data, LIST_TYPE_ORDERED):]
 150			continue
 151		}
 152
 153		// anything else must look like a normal paragraph
 154		// note: this finds underlined headers, too
 155		data = data[p.paragraph(out, data):]
 156	}
 157
 158	p.nesting--
 159}
 160
 161func (p *parser) isPrefixHeader(data []byte) bool {
 162	if data[0] != '#' {
 163		return false
 164	}
 165
 166	if p.flags&EXTENSION_SPACE_HEADERS != 0 {
 167		level := 0
 168		for level < 6 && data[level] == '#' {
 169			level++
 170		}
 171		if data[level] != ' ' {
 172			return false
 173		}
 174	}
 175	return true
 176}
 177
 178func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 179	level := 0
 180	for level < 6 && data[level] == '#' {
 181		level++
 182	}
 183	i, end := 0, 0
 184	for i = level; data[i] == ' '; i++ {
 185	}
 186	for end = i; data[end] != '\n'; end++ {
 187	}
 188	skip := end
 189	id := ""
 190	if p.flags&EXTENSION_HEADER_IDS != 0 {
 191		j, k := 0, 0
 192		for j = i; j < end - 1 && data[j] != '{' && data[j+1] != '#'; j++ {
 193		}
 194		for k = j + 1; k < end && data[k] != '}'; k++ {
 195		}
 196		if j < end && k < end {
 197			id = string(data[j+2:k])
 198			end = j
 199		}
 200	}
 201	for end > 0 && data[end-1] == '#' {
 202		end--
 203	}
 204	for end > 0 && data[end-1] == ' ' {
 205		end--
 206	}
 207	if end > i {
 208		work := func() bool {
 209			p.inline(out, data[i:end])
 210			return true
 211		}
 212		p.r.Header(out, work, level, id)
 213	}
 214	return skip
 215}
 216
 217func (p *parser) isUnderlinedHeader(data []byte) int {
 218	// test of level 1 header
 219	if data[0] == '=' {
 220		i := 1
 221		for data[i] == '=' {
 222			i++
 223		}
 224		for data[i] == ' ' {
 225			i++
 226		}
 227		if data[i] == '\n' {
 228			return 1
 229		} else {
 230			return 0
 231		}
 232	}
 233
 234	// test of level 2 header
 235	if data[0] == '-' {
 236		i := 1
 237		for data[i] == '-' {
 238			i++
 239		}
 240		for data[i] == ' ' {
 241			i++
 242		}
 243		if data[i] == '\n' {
 244			return 2
 245		} else {
 246			return 0
 247		}
 248	}
 249
 250	return 0
 251}
 252
 253func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 254	var i, j int
 255
 256	// identify the opening tag
 257	if data[0] != '<' {
 258		return 0
 259	}
 260	curtag, tagfound := p.htmlFindTag(data[1:])
 261
 262	// handle special cases
 263	if !tagfound {
 264		// check for an HTML comment
 265		if size := p.htmlComment(out, data, doRender); size > 0 {
 266			return size
 267		}
 268
 269		// check for an <hr> tag
 270		if size := p.htmlHr(out, data, doRender); size > 0 {
 271			return size
 272		}
 273
 274		// no special case recognized
 275		return 0
 276	}
 277
 278	// look for an unindented matching closing tag
 279	// followed by a blank line
 280	found := false
 281	/*
 282		closetag := []byte("\n</" + curtag + ">")
 283		j = len(curtag) + 1
 284		for !found {
 285			// scan for a closing tag at the beginning of a line
 286			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 287				j += skip + len(closetag)
 288			} else {
 289				break
 290			}
 291
 292			// see if it is the only thing on the line
 293			if skip := p.isEmpty(data[j:]); skip > 0 {
 294				// see if it is followed by a blank line/eof
 295				j += skip
 296				if j >= len(data) {
 297					found = true
 298					i = j
 299				} else {
 300					if skip := p.isEmpty(data[j:]); skip > 0 {
 301						j += skip
 302						found = true
 303						i = j
 304					}
 305				}
 306			}
 307		}
 308	*/
 309
 310	// if not found, try a second pass looking for indented match
 311	// but not if tag is "ins" or "del" (following original Markdown.pl)
 312	if !found && curtag != "ins" && curtag != "del" {
 313		i = 1
 314		for i < len(data) {
 315			i++
 316			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 317				i++
 318			}
 319
 320			if i+2+len(curtag) >= len(data) {
 321				break
 322			}
 323
 324			j = p.htmlFindEnd(curtag, data[i-1:])
 325
 326			if j > 0 {
 327				i += j - 1
 328				found = true
 329				break
 330			}
 331		}
 332	}
 333
 334	if !found {
 335		return 0
 336	}
 337
 338	// the end of the block has been found
 339	if doRender {
 340		// trim newlines
 341		end := i
 342		for end > 0 && data[end-1] == '\n' {
 343			end--
 344		}
 345		p.r.BlockHtml(out, data[:end])
 346	}
 347
 348	return i
 349}
 350
 351// HTML comment, lax form
 352func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
 353	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 354		return 0
 355	}
 356
 357	i := 5
 358
 359	// scan for an end-of-comment marker, across lines if necessary
 360	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 361		i++
 362	}
 363	i++
 364
 365	// no end-of-comment marker
 366	if i >= len(data) {
 367		return 0
 368	}
 369
 370	// needs to end with a blank line
 371	if j := p.isEmpty(data[i:]); j > 0 {
 372		size := i + j
 373		if doRender {
 374			// trim trailing newlines
 375			end := size
 376			for end > 0 && data[end-1] == '\n' {
 377				end--
 378			}
 379			p.r.BlockHtml(out, data[:end])
 380		}
 381		return size
 382	}
 383
 384	return 0
 385}
 386
 387// HR, which is the only self-closing block tag considered
 388func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 389	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 390		return 0
 391	}
 392	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 393		// not an <hr> tag after all; at least not a valid one
 394		return 0
 395	}
 396
 397	i := 3
 398	for data[i] != '>' && data[i] != '\n' {
 399		i++
 400	}
 401
 402	if data[i] == '>' {
 403		i++
 404		if j := p.isEmpty(data[i:]); j > 0 {
 405			size := i + j
 406			if doRender {
 407				// trim newlines
 408				end := size
 409				for end > 0 && data[end-1] == '\n' {
 410					end--
 411				}
 412				p.r.BlockHtml(out, data[:end])
 413			}
 414			return size
 415		}
 416	}
 417
 418	return 0
 419}
 420
 421func (p *parser) htmlFindTag(data []byte) (string, bool) {
 422	i := 0
 423	for isalnum(data[i]) {
 424		i++
 425	}
 426	key := string(data[:i])
 427	if blockTags[key] {
 428		return key, true
 429	}
 430	return "", false
 431}
 432
 433func (p *parser) htmlFindEnd(tag string, data []byte) int {
 434	// assume data[0] == '<' && data[1] == '/' already tested
 435
 436	// check if tag is a match
 437	closetag := []byte("</" + tag + ">")
 438	if !bytes.HasPrefix(data, closetag) {
 439		return 0
 440	}
 441	i := len(closetag)
 442
 443	// check that the rest of the line is blank
 444	skip := 0
 445	if skip = p.isEmpty(data[i:]); skip == 0 {
 446		return 0
 447	}
 448	i += skip
 449	skip = 0
 450
 451	if i >= len(data) {
 452		return i
 453	}
 454
 455	if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 456		return i
 457	}
 458	if skip = p.isEmpty(data[i:]); skip == 0 {
 459		// following line must be blank
 460		return 0
 461	}
 462
 463	return i + skip
 464}
 465
 466func (p *parser) isEmpty(data []byte) int {
 467	// it is okay to call isEmpty on an empty buffer
 468	if len(data) == 0 {
 469		return 0
 470	}
 471
 472	var i int
 473	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 474		if data[i] != ' ' && data[i] != '\t' {
 475			return 0
 476		}
 477	}
 478	return i + 1
 479}
 480
 481func (p *parser) isHRule(data []byte) bool {
 482	i := 0
 483
 484	// skip up to three spaces
 485	for i < 3 && data[i] == ' ' {
 486		i++
 487	}
 488
 489	// look at the hrule char
 490	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 491		return false
 492	}
 493	c := data[i]
 494
 495	// the whole line must be the char or whitespace
 496	n := 0
 497	for data[i] != '\n' {
 498		switch {
 499		case data[i] == c:
 500			n++
 501		case data[i] != ' ':
 502			return false
 503		}
 504		i++
 505	}
 506
 507	return n >= 3
 508}
 509
 510func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 511	i, size := 0, 0
 512	skip = 0
 513
 514	// skip up to three spaces
 515	for i < 3 && data[i] == ' ' {
 516		i++
 517	}
 518
 519	// check for the marker characters: ~ or `
 520	if data[i] != '~' && data[i] != '`' {
 521		return
 522	}
 523
 524	c := data[i]
 525
 526	// the whole line must be the same char or whitespace
 527	for data[i] == c {
 528		size++
 529		i++
 530	}
 531
 532	// the marker char must occur at least 3 times
 533	if size < 3 {
 534		return
 535	}
 536	marker = string(data[i-size : i])
 537
 538	// if this is the end marker, it must match the beginning marker
 539	if oldmarker != "" && marker != oldmarker {
 540		return
 541	}
 542
 543	if syntax != nil {
 544		syn := 0
 545
 546		for data[i] == ' ' {
 547			i++
 548		}
 549
 550		syntaxStart := i
 551
 552		if data[i] == '{' {
 553			i++
 554			syntaxStart++
 555
 556			for data[i] != '}' && data[i] != '\n' {
 557				syn++
 558				i++
 559			}
 560
 561			if data[i] != '}' {
 562				return
 563			}
 564
 565			// strip all whitespace at the beginning and the end
 566			// of the {} block
 567			for syn > 0 && isspace(data[syntaxStart]) {
 568				syntaxStart++
 569				syn--
 570			}
 571
 572			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 573				syn--
 574			}
 575
 576			i++
 577		} else {
 578			for !isspace(data[i]) {
 579				syn++
 580				i++
 581			}
 582		}
 583
 584		language := string(data[syntaxStart : syntaxStart+syn])
 585		*syntax = &language
 586	}
 587
 588	for data[i] == ' ' {
 589		i++
 590	}
 591	if data[i] != '\n' {
 592		return
 593	}
 594
 595	skip = i + 1
 596	return
 597}
 598
 599func (p *parser) fencedCode(out *bytes.Buffer, data []byte) int {
 600	var lang *string
 601	beg, marker := p.isFencedCode(data, &lang, "")
 602	if beg == 0 || beg >= len(data) {
 603		return 0
 604	}
 605
 606	var work bytes.Buffer
 607
 608	for {
 609		// safe to assume beg < len(data)
 610
 611		// check for the end of the code block
 612		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
 613		if fenceEnd != 0 {
 614			beg += fenceEnd
 615			break
 616		}
 617
 618		// copy the current line
 619		end := beg
 620		for data[end] != '\n' {
 621			end++
 622		}
 623		end++
 624
 625		// did we reach the end of the buffer without a closing marker?
 626		if end >= len(data) {
 627			return 0
 628		}
 629
 630		// verbatim copy to the working buffer
 631		work.Write(data[beg:end])
 632		beg = end
 633	}
 634
 635	syntax := ""
 636	if lang != nil {
 637		syntax = *lang
 638	}
 639
 640	p.r.BlockCode(out, work.Bytes(), syntax)
 641
 642	return beg
 643}
 644
 645func (p *parser) table(out *bytes.Buffer, data []byte) int {
 646	var header bytes.Buffer
 647	i, columns := p.tableHeader(&header, data)
 648	if i == 0 {
 649		return 0
 650	}
 651
 652	var body bytes.Buffer
 653
 654	for i < len(data) {
 655		pipes, rowStart := 0, i
 656		for ; data[i] != '\n'; i++ {
 657			if data[i] == '|' {
 658				pipes++
 659			}
 660		}
 661
 662		if pipes == 0 {
 663			i = rowStart
 664			break
 665		}
 666
 667		// include the newline in data sent to tableRow
 668		i++
 669		p.tableRow(&body, data[rowStart:i], columns, false)
 670	}
 671
 672	p.r.Table(out, header.Bytes(), body.Bytes(), columns)
 673
 674	return i
 675}
 676
 677// check if the specified position is preceeded by an odd number of backslashes
 678func isBackslashEscaped(data []byte, i int) bool {
 679	backslashes := 0
 680	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 681		backslashes++
 682	}
 683	return backslashes&1 == 1
 684}
 685
 686func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
 687	i := 0
 688	colCount := 1
 689	for i = 0; data[i] != '\n'; i++ {
 690		if data[i] == '|' && !isBackslashEscaped(data, i) {
 691			colCount++
 692		}
 693	}
 694
 695	// doesn't look like a table header
 696	if colCount == 1 {
 697		return
 698	}
 699
 700	// include the newline in the data sent to tableRow
 701	header := data[:i+1]
 702
 703	// column count ignores pipes at beginning or end of line
 704	if data[0] == '|' {
 705		colCount--
 706	}
 707	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 708		colCount--
 709	}
 710
 711	columns = make([]int, colCount)
 712
 713	// move on to the header underline
 714	i++
 715	if i >= len(data) {
 716		return
 717	}
 718
 719	if data[i] == '|' && !isBackslashEscaped(data, i) {
 720		i++
 721	}
 722	for data[i] == ' ' {
 723		i++
 724	}
 725
 726	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 727	// and trailing | optional on last column
 728	col := 0
 729	for data[i] != '\n' {
 730		dashes := 0
 731
 732		if data[i] == ':' {
 733			i++
 734			columns[col] |= TABLE_ALIGNMENT_LEFT
 735			dashes++
 736		}
 737		for data[i] == '-' {
 738			i++
 739			dashes++
 740		}
 741		if data[i] == ':' {
 742			i++
 743			columns[col] |= TABLE_ALIGNMENT_RIGHT
 744			dashes++
 745		}
 746		for data[i] == ' ' {
 747			i++
 748		}
 749
 750		// end of column test is messy
 751		switch {
 752		case dashes < 3:
 753			// not a valid column
 754			return
 755
 756		case data[i] == '|' && !isBackslashEscaped(data, i):
 757			// marker found, now skip past trailing whitespace
 758			col++
 759			i++
 760			for data[i] == ' ' {
 761				i++
 762			}
 763
 764			// trailing junk found after last column
 765			if col >= colCount && data[i] != '\n' {
 766				return
 767			}
 768
 769		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 770			// something else found where marker was required
 771			return
 772
 773		case data[i] == '\n':
 774			// marker is optional for the last column
 775			col++
 776
 777		default:
 778			// trailing junk found after last column
 779			return
 780		}
 781	}
 782	if col != colCount {
 783		return
 784	}
 785
 786	p.tableRow(out, header, columns, true)
 787	size = i + 1
 788	return
 789}
 790
 791func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
 792	i, col := 0, 0
 793	var rowWork bytes.Buffer
 794
 795	if data[i] == '|' && !isBackslashEscaped(data, i) {
 796		i++
 797	}
 798
 799	for col = 0; col < len(columns) && i < len(data); col++ {
 800		for data[i] == ' ' {
 801			i++
 802		}
 803
 804		cellStart := i
 805
 806		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 807			i++
 808		}
 809
 810		cellEnd := i
 811
 812		// skip the end-of-cell marker, possibly taking us past end of buffer
 813		i++
 814
 815		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 816			cellEnd--
 817		}
 818
 819		var cellWork bytes.Buffer
 820		p.inline(&cellWork, data[cellStart:cellEnd])
 821
 822		if header {
 823			p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
 824		} else {
 825			p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 826		}
 827	}
 828
 829	// pad it out with empty columns to get the right number
 830	for ; col < len(columns); col++ {
 831		if header {
 832			p.r.TableHeaderCell(&rowWork, nil, columns[col])
 833		} else {
 834			p.r.TableCell(&rowWork, nil, columns[col])
 835		}
 836	}
 837
 838	// silently ignore rows with too many cells
 839
 840	p.r.TableRow(out, rowWork.Bytes())
 841}
 842
 843// returns blockquote prefix length
 844func (p *parser) quotePrefix(data []byte) int {
 845	i := 0
 846	for i < 3 && data[i] == ' ' {
 847		i++
 848	}
 849	if data[i] == '>' {
 850		if data[i+1] == ' ' {
 851			return i + 2
 852		}
 853		return i + 1
 854	}
 855	return 0
 856}
 857
 858// parse a blockquote fragment
 859func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 860	var raw bytes.Buffer
 861	beg, end := 0, 0
 862	for beg < len(data) {
 863		end = beg
 864		for data[end] != '\n' {
 865			end++
 866		}
 867		end++
 868
 869		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 870			// skip the prefix
 871			beg += pre
 872		} else if p.isEmpty(data[beg:]) > 0 &&
 873			(end >= len(data) ||
 874				(p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
 875			// blockquote ends with at least one blank line
 876			// followed by something without a blockquote prefix
 877			break
 878		}
 879
 880		// this line is part of the blockquote
 881		raw.Write(data[beg:end])
 882		beg = end
 883	}
 884
 885	var cooked bytes.Buffer
 886	p.block(&cooked, raw.Bytes())
 887	p.r.BlockQuote(out, cooked.Bytes())
 888	return end
 889}
 890
 891// returns prefix length for block code
 892func (p *parser) codePrefix(data []byte) int {
 893	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 894		return 4
 895	}
 896	return 0
 897}
 898
 899func (p *parser) code(out *bytes.Buffer, data []byte) int {
 900	var work bytes.Buffer
 901
 902	i := 0
 903	for i < len(data) {
 904		beg := i
 905		for data[i] != '\n' {
 906			i++
 907		}
 908		i++
 909
 910		blankline := p.isEmpty(data[beg:i]) > 0
 911		if pre := p.codePrefix(data[beg:i]); pre > 0 {
 912			beg += pre
 913		} else if !blankline {
 914			// non-empty, non-prefixed line breaks the pre
 915			i = beg
 916			break
 917		}
 918
 919		// verbatim copy to the working buffeu
 920		if blankline {
 921			work.WriteByte('\n')
 922		} else {
 923			work.Write(data[beg:i])
 924		}
 925	}
 926
 927	// trim all the \n off the end of work
 928	workbytes := work.Bytes()
 929	eol := len(workbytes)
 930	for eol > 0 && workbytes[eol-1] == '\n' {
 931		eol--
 932	}
 933	if eol != len(workbytes) {
 934		work.Truncate(eol)
 935	}
 936
 937	work.WriteByte('\n')
 938
 939	p.r.BlockCode(out, work.Bytes(), "")
 940
 941	return i
 942}
 943
 944// returns unordered list item prefix
 945func (p *parser) uliPrefix(data []byte) int {
 946	i := 0
 947
 948	// start with up to 3 spaces
 949	for i < 3 && data[i] == ' ' {
 950		i++
 951	}
 952
 953	// need a *, +, or - followed by a space
 954	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
 955		data[i+1] != ' ' {
 956		return 0
 957	}
 958	return i + 2
 959}
 960
 961// returns ordered list item prefix
 962func (p *parser) oliPrefix(data []byte) int {
 963	i := 0
 964
 965	// start with up to 3 spaces
 966	for i < 3 && data[i] == ' ' {
 967		i++
 968	}
 969
 970	// count the digits
 971	start := i
 972	for data[i] >= '0' && data[i] <= '9' {
 973		i++
 974	}
 975
 976	// we need >= 1 digits followed by a dot and a space
 977	if start == i || data[i] != '.' || data[i+1] != ' ' {
 978		return 0
 979	}
 980	return i + 2
 981}
 982
 983// parse ordered or unordered list block
 984func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
 985	i := 0
 986	flags |= LIST_ITEM_BEGINNING_OF_LIST
 987	work := func() bool {
 988		for i < len(data) {
 989			skip := p.listItem(out, data[i:], &flags)
 990			i += skip
 991
 992			if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
 993				break
 994			}
 995			flags &= ^LIST_ITEM_BEGINNING_OF_LIST
 996		}
 997		return true
 998	}
 999
1000	p.r.List(out, work, flags)
1001	return i
1002}
1003
1004// Parse a single list item.
1005// Assumes initial prefix is already removed if this is a sublist.
1006func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1007	// keep track of the indentation of the first line
1008	itemIndent := 0
1009	for itemIndent < 3 && data[itemIndent] == ' ' {
1010		itemIndent++
1011	}
1012
1013	i := p.uliPrefix(data)
1014	if i == 0 {
1015		i = p.oliPrefix(data)
1016	}
1017	if i == 0 {
1018		return 0
1019	}
1020
1021	// skip leading whitespace on first line
1022	for data[i] == ' ' {
1023		i++
1024	}
1025
1026	// find the end of the line
1027	line := i
1028	for data[i-1] != '\n' {
1029		i++
1030	}
1031
1032	// get working buffer
1033	var raw bytes.Buffer
1034
1035	// put the first line into the working buffer
1036	raw.Write(data[line:i])
1037	line = i
1038
1039	// process the following lines
1040	containsBlankLine := false
1041	sublist := 0
1042
1043gatherlines:
1044	for line < len(data) {
1045		i++
1046
1047		// find the end of this line
1048		for data[i-1] != '\n' {
1049			i++
1050		}
1051
1052		// if it is an empty line, guess that it is part of this item
1053		// and move on to the next line
1054		if p.isEmpty(data[line:i]) > 0 {
1055			containsBlankLine = true
1056			line = i
1057			continue
1058		}
1059
1060		// calculate the indentation
1061		indent := 0
1062		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1063			indent++
1064		}
1065
1066		chunk := data[line+indent : i]
1067
1068		// evaluate how this line fits in
1069		switch {
1070		// is this a nested list item?
1071		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1072			p.oliPrefix(chunk) > 0:
1073
1074			if containsBlankLine {
1075				*flags |= LIST_ITEM_CONTAINS_BLOCK
1076			}
1077
1078			// to be a nested list, it must be indented more
1079			// if not, it is the next item in the same list
1080			if indent <= itemIndent {
1081				break gatherlines
1082			}
1083
1084			// is this the first item in the the nested list?
1085			if sublist == 0 {
1086				sublist = raw.Len()
1087			}
1088
1089		// is this a nested prefix header?
1090		case p.isPrefixHeader(chunk):
1091			// if the header is not indented, it is not nested in the list
1092			// and thus ends the list
1093			if containsBlankLine && indent < 4 {
1094				*flags |= LIST_ITEM_END_OF_LIST
1095				break gatherlines
1096			}
1097			*flags |= LIST_ITEM_CONTAINS_BLOCK
1098
1099		// anything following an empty line is only part
1100		// of this item if it is indented 4 spaces
1101		// (regardless of the indentation of the beginning of the item)
1102		case containsBlankLine && indent < 4:
1103			*flags |= LIST_ITEM_END_OF_LIST
1104			break gatherlines
1105
1106		// a blank line means this should be parsed as a block
1107		case containsBlankLine:
1108			raw.WriteByte('\n')
1109			*flags |= LIST_ITEM_CONTAINS_BLOCK
1110		}
1111
1112		// if this line was preceeded by one or more blanks,
1113		// re-introduce the blank into the buffer
1114		if containsBlankLine {
1115			containsBlankLine = false
1116			raw.WriteByte('\n')
1117		}
1118
1119		// add the line into the working buffer without prefix
1120		raw.Write(data[line+indent : i])
1121
1122		line = i
1123	}
1124
1125	rawBytes := raw.Bytes()
1126
1127	// render the contents of the list item
1128	var cooked bytes.Buffer
1129	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1130		// intermediate render of block li
1131		if sublist > 0 {
1132			p.block(&cooked, rawBytes[:sublist])
1133			p.block(&cooked, rawBytes[sublist:])
1134		} else {
1135			p.block(&cooked, rawBytes)
1136		}
1137	} else {
1138		// intermediate render of inline li
1139		if sublist > 0 {
1140			p.inline(&cooked, rawBytes[:sublist])
1141			p.block(&cooked, rawBytes[sublist:])
1142		} else {
1143			p.inline(&cooked, rawBytes)
1144		}
1145	}
1146
1147	// render the actual list item
1148	cookedBytes := cooked.Bytes()
1149	parsedEnd := len(cookedBytes)
1150
1151	// strip trailing newlines
1152	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1153		parsedEnd--
1154	}
1155	p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1156
1157	return line
1158}
1159
1160// render a single paragraph that has already been parsed out
1161func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1162	if len(data) == 0 {
1163		return
1164	}
1165
1166	// trim leading spaces
1167	beg := 0
1168	for data[beg] == ' ' {
1169		beg++
1170	}
1171
1172	// trim trailing newline
1173	end := len(data) - 1
1174
1175	// trim trailing spaces
1176	for end > beg && data[end-1] == ' ' {
1177		end--
1178	}
1179
1180	work := func() bool {
1181		p.inline(out, data[beg:end])
1182		return true
1183	}
1184	p.r.Paragraph(out, work)
1185}
1186
1187func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1188	// prev: index of 1st char of previous line
1189	// line: index of 1st char of current line
1190	// i: index of cursor/end of current line
1191	var prev, line, i int
1192
1193	// keep going until we find something to mark the end of the paragraph
1194	for i < len(data) {
1195		// mark the beginning of the current line
1196		prev = line
1197		current := data[i:]
1198		line = i
1199
1200		// did we find a blank line marking the end of the paragraph?
1201		if n := p.isEmpty(current); n > 0 {
1202			p.renderParagraph(out, data[:i])
1203			return i + n
1204		}
1205
1206		// an underline under some text marks a header, so our paragraph ended on prev line
1207		if i > 0 {
1208			if level := p.isUnderlinedHeader(current); level > 0 {
1209				// render the paragraph
1210				p.renderParagraph(out, data[:prev])
1211
1212				// ignore leading and trailing whitespace
1213				eol := i - 1
1214				for prev < eol && data[prev] == ' ' {
1215					prev++
1216				}
1217				for eol > prev && data[eol-1] == ' ' {
1218					eol--
1219				}
1220
1221				// render the header
1222				// this ugly double closure avoids forcing variables onto the heap
1223				work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1224					return func() bool {
1225						pp.inline(o, d)
1226						return true
1227					}
1228				}(out, p, data[prev:eol])
1229				p.r.Header(out, work, level, "")
1230
1231				// find the end of the underline
1232				for data[i] != '\n' {
1233					i++
1234				}
1235				return i
1236			}
1237		}
1238
1239		// if the next line starts a block of HTML, then the paragraph ends here
1240		if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1241			if data[i] == '<' && p.html(out, current, false) > 0 {
1242				// rewind to before the HTML block
1243				p.renderParagraph(out, data[:i])
1244				return i
1245			}
1246		}
1247
1248		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1249		if p.isPrefixHeader(current) || p.isHRule(current) {
1250			p.renderParagraph(out, data[:i])
1251			return i
1252		}
1253
1254		// if there's a list after this, paragraph is over
1255		if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1256			if p.uliPrefix(current) != 0 ||
1257				p.oliPrefix(current) != 0 ||
1258				p.quotePrefix(current) != 0 ||
1259				p.codePrefix(current) != 0 {
1260				p.renderParagraph(out, data[:i])
1261				return i
1262			}
1263		}
1264
1265		// otherwise, scan to the beginning of the next line
1266		for data[i] != '\n' {
1267			i++
1268		}
1269		i++
1270	}
1271
1272	p.renderParagraph(out, data[:i])
1273	return i
1274}
all repos — grayfriday @ cf01a94556f19b31205611c7aa5f92ddf2381081

blackfriday fork with a few changes