icy does git — grayfriday (7ad5f9c1197c54c0be24fb0b0b1d75eb7ef19d89): block.go

block.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18)
  19
  20// Parse block-level data.
  21// Note: this function and many that it calls assume that
  22// the input buffer ends with a newline.
  23func (p *parser) block(out *bytes.Buffer, data []byte) {
  24	if len(data) == 0 || data[len(data)-1] != '\n' {
  25		panic("block input is missing terminating newline")
  26	}
  27
  28	// this is called recursively: enforce a maximum depth
  29	if p.nesting >= p.maxNesting {
  30		return
  31	}
  32	p.nesting++
  33
  34	// parse out one block-level construct at a time
  35	for len(data) > 0 {
  36		// prefixed header:
  37		//
  38		// # Header 1
  39		// ## Header 2
  40		// ...
  41		// ###### Header 6
  42		if p.isPrefixHeader(data) {
  43			data = data[p.prefixHeader(out, data):]
  44			continue
  45		}
  46
  47		// block of preformatted HTML:
  48		//
  49		// <div>
  50		//     ...
  51		// </div>
  52		if data[0] == '<' {
  53			if i := p.html(out, data, true); i > 0 {
  54				data = data[i:]
  55				continue
  56			}
  57		}
  58
  59		// blank lines.  note: returns the # of bytes to skip
  60		if i := p.isEmpty(data); i > 0 {
  61			data = data[i:]
  62			continue
  63		}
  64
  65		// indented code block:
  66		//
  67		//     func max(a, b int) int {
  68		//         if a > b {
  69		//             return a
  70		//         }
  71		//         return b
  72		//      }
  73		if p.codePrefix(data) > 0 {
  74			data = data[p.code(out, data):]
  75			continue
  76		}
  77
  78		// fenced code block:
  79		//
  80		// ``` go
  81		// func fact(n int) int {
  82		//     if n <= 1 {
  83		//         return n
  84		//     }
  85		//     return n * fact(n-1)
  86		// }
  87		// ```
  88		if p.flags&EXTENSION_FENCED_CODE != 0 {
  89			if i := p.fencedCode(out, data); i > 0 {
  90				data = data[i:]
  91				continue
  92			}
  93		}
  94
  95		// horizontal rule:
  96		//
  97		// ------
  98		// or
  99		// ******
 100		// or
 101		// ______
 102		if p.isHRule(data) {
 103			p.r.HRule(out)
 104			var i int
 105			for i = 0; data[i] != '\n'; i++ {
 106			}
 107			data = data[i:]
 108			continue
 109		}
 110
 111		// block quote:
 112		//
 113		// > A big quote I found somewhere
 114		// > on the web
 115		if p.quotePrefix(data) > 0 {
 116			data = data[p.quote(out, data):]
 117			continue
 118		}
 119
 120		// table:
 121		//
 122		// Name  | Age | Phone
 123		// ------|-----|---------
 124		// Bob   | 31  | 555-1234
 125		// Alice | 27  | 555-4321
 126		if p.flags&EXTENSION_TABLES != 0 {
 127			if i := p.table(out, data); i > 0 {
 128				data = data[i:]
 129				continue
 130			}
 131		}
 132
 133		// an itemized/unordered list:
 134		//
 135		// * Item 1
 136		// * Item 2
 137		//
 138		// also works with + or -
 139		if p.uliPrefix(data) > 0 {
 140			data = data[p.list(out, data, 0):]
 141			continue
 142		}
 143
 144		// a numbered/ordered list:
 145		//
 146		// 1. Item 1
 147		// 2. Item 2
 148		if p.oliPrefix(data) > 0 {
 149			data = data[p.list(out, data, LIST_TYPE_ORDERED):]
 150			continue
 151		}
 152
 153		// anything else must look like a normal paragraph
 154		// note: this finds underlined headers, too
 155		data = data[p.paragraph(out, data):]
 156	}
 157
 158	p.nesting--
 159}
 160
 161func (p *parser) isPrefixHeader(data []byte) bool {
 162	if data[0] != '#' {
 163		return false
 164	}
 165
 166	if p.flags&EXTENSION_SPACE_HEADERS != 0 {
 167		level := 0
 168		for level < 6 && data[level] == '#' {
 169			level++
 170		}
 171		if data[level] != ' ' {
 172			return false
 173		}
 174	}
 175	return true
 176}
 177
 178func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 179	level := 0
 180	for level < 6 && data[level] == '#' {
 181		level++
 182	}
 183	i, end := 0, 0
 184	for i = level; data[i] == ' '; i++ {
 185	}
 186	for end = i; data[end] != '\n'; end++ {
 187	}
 188	skip := end
 189	id := ""
 190	if p.flags&EXTENSION_HEADER_IDS != 0 {
 191		j, k := 0, 0
 192		for j = i; j < end - 1 && data[j] != '{' && data[j+1] != '#'; j++ {
 193		}
 194		for k = j + 1; k < end && data[k] != '}'; k++ {
 195		}
 196		if j < end && k < end {
 197			id = string(data[j+2:k])
 198			end = j
 199			skip = k + 1
 200		}
 201	}
 202	for end > 0 && data[end-1] == '#' {
 203		end--
 204	}
 205	for end > 0 && data[end-1] == ' ' {
 206		end--
 207	}
 208	if end > i {
 209		work := func() bool {
 210			p.inline(out, data[i:end])
 211			return true
 212		}
 213		p.r.Header(out, work, level, id)
 214	}
 215	return skip
 216}
 217
 218func (p *parser) isUnderlinedHeader(data []byte) int {
 219	// test of level 1 header
 220	if data[0] == '=' {
 221		i := 1
 222		for data[i] == '=' {
 223			i++
 224		}
 225		for data[i] == ' ' {
 226			i++
 227		}
 228		if data[i] == '\n' {
 229			return 1
 230		} else {
 231			return 0
 232		}
 233	}
 234
 235	// test of level 2 header
 236	if data[0] == '-' {
 237		i := 1
 238		for data[i] == '-' {
 239			i++
 240		}
 241		for data[i] == ' ' {
 242			i++
 243		}
 244		if data[i] == '\n' {
 245			return 2
 246		} else {
 247			return 0
 248		}
 249	}
 250
 251	return 0
 252}
 253
 254func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 255	var i, j int
 256
 257	// identify the opening tag
 258	if data[0] != '<' {
 259		return 0
 260	}
 261	curtag, tagfound := p.htmlFindTag(data[1:])
 262
 263	// handle special cases
 264	if !tagfound {
 265		// check for an HTML comment
 266		if size := p.htmlComment(out, data, doRender); size > 0 {
 267			return size
 268		}
 269
 270		// check for an <hr> tag
 271		if size := p.htmlHr(out, data, doRender); size > 0 {
 272			return size
 273		}
 274
 275		// no special case recognized
 276		return 0
 277	}
 278
 279	// look for an unindented matching closing tag
 280	// followed by a blank line
 281	found := false
 282	/*
 283		closetag := []byte("\n</" + curtag + ">")
 284		j = len(curtag) + 1
 285		for !found {
 286			// scan for a closing tag at the beginning of a line
 287			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 288				j += skip + len(closetag)
 289			} else {
 290				break
 291			}
 292
 293			// see if it is the only thing on the line
 294			if skip := p.isEmpty(data[j:]); skip > 0 {
 295				// see if it is followed by a blank line/eof
 296				j += skip
 297				if j >= len(data) {
 298					found = true
 299					i = j
 300				} else {
 301					if skip := p.isEmpty(data[j:]); skip > 0 {
 302						j += skip
 303						found = true
 304						i = j
 305					}
 306				}
 307			}
 308		}
 309	*/
 310
 311	// if not found, try a second pass looking for indented match
 312	// but not if tag is "ins" or "del" (following original Markdown.pl)
 313	if !found && curtag != "ins" && curtag != "del" {
 314		i = 1
 315		for i < len(data) {
 316			i++
 317			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 318				i++
 319			}
 320
 321			if i+2+len(curtag) >= len(data) {
 322				break
 323			}
 324
 325			j = p.htmlFindEnd(curtag, data[i-1:])
 326
 327			if j > 0 {
 328				i += j - 1
 329				found = true
 330				break
 331			}
 332		}
 333	}
 334
 335	if !found {
 336		return 0
 337	}
 338
 339	// the end of the block has been found
 340	if doRender {
 341		// trim newlines
 342		end := i
 343		for end > 0 && data[end-1] == '\n' {
 344			end--
 345		}
 346		p.r.BlockHtml(out, data[:end])
 347	}
 348
 349	return i
 350}
 351
 352// HTML comment, lax form
 353func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
 354	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 355		return 0
 356	}
 357
 358	i := 5
 359
 360	// scan for an end-of-comment marker, across lines if necessary
 361	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 362		i++
 363	}
 364	i++
 365
 366	// no end-of-comment marker
 367	if i >= len(data) {
 368		return 0
 369	}
 370
 371	// needs to end with a blank line
 372	if j := p.isEmpty(data[i:]); j > 0 {
 373		size := i + j
 374		if doRender {
 375			// trim trailing newlines
 376			end := size
 377			for end > 0 && data[end-1] == '\n' {
 378				end--
 379			}
 380			p.r.BlockHtml(out, data[:end])
 381		}
 382		return size
 383	}
 384
 385	return 0
 386}
 387
 388// HR, which is the only self-closing block tag considered
 389func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 390	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 391		return 0
 392	}
 393	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 394		// not an <hr> tag after all; at least not a valid one
 395		return 0
 396	}
 397
 398	i := 3
 399	for data[i] != '>' && data[i] != '\n' {
 400		i++
 401	}
 402
 403	if data[i] == '>' {
 404		i++
 405		if j := p.isEmpty(data[i:]); j > 0 {
 406			size := i + j
 407			if doRender {
 408				// trim newlines
 409				end := size
 410				for end > 0 && data[end-1] == '\n' {
 411					end--
 412				}
 413				p.r.BlockHtml(out, data[:end])
 414			}
 415			return size
 416		}
 417	}
 418
 419	return 0
 420}
 421
 422func (p *parser) htmlFindTag(data []byte) (string, bool) {
 423	i := 0
 424	for isalnum(data[i]) {
 425		i++
 426	}
 427	key := string(data[:i])
 428	if blockTags[key] {
 429		return key, true
 430	}
 431	return "", false
 432}
 433
 434func (p *parser) htmlFindEnd(tag string, data []byte) int {
 435	// assume data[0] == '<' && data[1] == '/' already tested
 436
 437	// check if tag is a match
 438	closetag := []byte("</" + tag + ">")
 439	if !bytes.HasPrefix(data, closetag) {
 440		return 0
 441	}
 442	i := len(closetag)
 443
 444	// check that the rest of the line is blank
 445	skip := 0
 446	if skip = p.isEmpty(data[i:]); skip == 0 {
 447		return 0
 448	}
 449	i += skip
 450	skip = 0
 451
 452	if i >= len(data) {
 453		return i
 454	}
 455
 456	if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 457		return i
 458	}
 459	if skip = p.isEmpty(data[i:]); skip == 0 {
 460		// following line must be blank
 461		return 0
 462	}
 463
 464	return i + skip
 465}
 466
 467func (p *parser) isEmpty(data []byte) int {
 468	// it is okay to call isEmpty on an empty buffer
 469	if len(data) == 0 {
 470		return 0
 471	}
 472
 473	var i int
 474	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 475		if data[i] != ' ' && data[i] != '\t' {
 476			return 0
 477		}
 478	}
 479	return i + 1
 480}
 481
 482func (p *parser) isHRule(data []byte) bool {
 483	i := 0
 484
 485	// skip up to three spaces
 486	for i < 3 && data[i] == ' ' {
 487		i++
 488	}
 489
 490	// look at the hrule char
 491	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 492		return false
 493	}
 494	c := data[i]
 495
 496	// the whole line must be the char or whitespace
 497	n := 0
 498	for data[i] != '\n' {
 499		switch {
 500		case data[i] == c:
 501			n++
 502		case data[i] != ' ':
 503			return false
 504		}
 505		i++
 506	}
 507
 508	return n >= 3
 509}
 510
 511func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
 512	i, size := 0, 0
 513	skip = 0
 514
 515	// skip up to three spaces
 516	for i < 3 && data[i] == ' ' {
 517		i++
 518	}
 519
 520	// check for the marker characters: ~ or `
 521	if data[i] != '~' && data[i] != '`' {
 522		return
 523	}
 524
 525	c := data[i]
 526
 527	// the whole line must be the same char or whitespace
 528	for data[i] == c {
 529		size++
 530		i++
 531	}
 532
 533	// the marker char must occur at least 3 times
 534	if size < 3 {
 535		return
 536	}
 537	marker = string(data[i-size : i])
 538
 539	// if this is the end marker, it must match the beginning marker
 540	if oldmarker != "" && marker != oldmarker {
 541		return
 542	}
 543
 544	if syntax != nil {
 545		syn := 0
 546
 547		for data[i] == ' ' {
 548			i++
 549		}
 550
 551		syntaxStart := i
 552
 553		if data[i] == '{' {
 554			i++
 555			syntaxStart++
 556
 557			for data[i] != '}' && data[i] != '\n' {
 558				syn++
 559				i++
 560			}
 561
 562			if data[i] != '}' {
 563				return
 564			}
 565
 566			// strip all whitespace at the beginning and the end
 567			// of the {} block
 568			for syn > 0 && isspace(data[syntaxStart]) {
 569				syntaxStart++
 570				syn--
 571			}
 572
 573			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 574				syn--
 575			}
 576
 577			i++
 578		} else {
 579			for !isspace(data[i]) {
 580				syn++
 581				i++
 582			}
 583		}
 584
 585		language := string(data[syntaxStart : syntaxStart+syn])
 586		*syntax = &language
 587	}
 588
 589	for data[i] == ' ' {
 590		i++
 591	}
 592	if data[i] != '\n' {
 593		return
 594	}
 595
 596	skip = i + 1
 597	return
 598}
 599
 600func (p *parser) fencedCode(out *bytes.Buffer, data []byte) int {
 601	var lang *string
 602	beg, marker := p.isFencedCode(data, &lang, "")
 603	if beg == 0 || beg >= len(data) {
 604		return 0
 605	}
 606
 607	var work bytes.Buffer
 608
 609	for {
 610		// safe to assume beg < len(data)
 611
 612		// check for the end of the code block
 613		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
 614		if fenceEnd != 0 {
 615			beg += fenceEnd
 616			break
 617		}
 618
 619		// copy the current line
 620		end := beg
 621		for data[end] != '\n' {
 622			end++
 623		}
 624		end++
 625
 626		// did we reach the end of the buffer without a closing marker?
 627		if end >= len(data) {
 628			return 0
 629		}
 630
 631		// verbatim copy to the working buffer
 632		work.Write(data[beg:end])
 633		beg = end
 634	}
 635
 636	syntax := ""
 637	if lang != nil {
 638		syntax = *lang
 639	}
 640
 641	p.r.BlockCode(out, work.Bytes(), syntax)
 642
 643	return beg
 644}
 645
 646func (p *parser) table(out *bytes.Buffer, data []byte) int {
 647	var header bytes.Buffer
 648	i, columns := p.tableHeader(&header, data)
 649	if i == 0 {
 650		return 0
 651	}
 652
 653	var body bytes.Buffer
 654
 655	for i < len(data) {
 656		pipes, rowStart := 0, i
 657		for ; data[i] != '\n'; i++ {
 658			if data[i] == '|' {
 659				pipes++
 660			}
 661		}
 662
 663		if pipes == 0 {
 664			i = rowStart
 665			break
 666		}
 667
 668		// include the newline in data sent to tableRow
 669		i++
 670		p.tableRow(&body, data[rowStart:i], columns, false)
 671	}
 672
 673	p.r.Table(out, header.Bytes(), body.Bytes(), columns)
 674
 675	return i
 676}
 677
 678// check if the specified position is preceeded by an odd number of backslashes
 679func isBackslashEscaped(data []byte, i int) bool {
 680	backslashes := 0
 681	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 682		backslashes++
 683	}
 684	return backslashes&1 == 1
 685}
 686
 687func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
 688	i := 0
 689	colCount := 1
 690	for i = 0; data[i] != '\n'; i++ {
 691		if data[i] == '|' && !isBackslashEscaped(data, i) {
 692			colCount++
 693		}
 694	}
 695
 696	// doesn't look like a table header
 697	if colCount == 1 {
 698		return
 699	}
 700
 701	// include the newline in the data sent to tableRow
 702	header := data[:i+1]
 703
 704	// column count ignores pipes at beginning or end of line
 705	if data[0] == '|' {
 706		colCount--
 707	}
 708	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 709		colCount--
 710	}
 711
 712	columns = make([]int, colCount)
 713
 714	// move on to the header underline
 715	i++
 716	if i >= len(data) {
 717		return
 718	}
 719
 720	if data[i] == '|' && !isBackslashEscaped(data, i) {
 721		i++
 722	}
 723	for data[i] == ' ' {
 724		i++
 725	}
 726
 727	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 728	// and trailing | optional on last column
 729	col := 0
 730	for data[i] != '\n' {
 731		dashes := 0
 732
 733		if data[i] == ':' {
 734			i++
 735			columns[col] |= TABLE_ALIGNMENT_LEFT
 736			dashes++
 737		}
 738		for data[i] == '-' {
 739			i++
 740			dashes++
 741		}
 742		if data[i] == ':' {
 743			i++
 744			columns[col] |= TABLE_ALIGNMENT_RIGHT
 745			dashes++
 746		}
 747		for data[i] == ' ' {
 748			i++
 749		}
 750
 751		// end of column test is messy
 752		switch {
 753		case dashes < 3:
 754			// not a valid column
 755			return
 756
 757		case data[i] == '|' && !isBackslashEscaped(data, i):
 758			// marker found, now skip past trailing whitespace
 759			col++
 760			i++
 761			for data[i] == ' ' {
 762				i++
 763			}
 764
 765			// trailing junk found after last column
 766			if col >= colCount && data[i] != '\n' {
 767				return
 768			}
 769
 770		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 771			// something else found where marker was required
 772			return
 773
 774		case data[i] == '\n':
 775			// marker is optional for the last column
 776			col++
 777
 778		default:
 779			// trailing junk found after last column
 780			return
 781		}
 782	}
 783	if col != colCount {
 784		return
 785	}
 786
 787	p.tableRow(out, header, columns, true)
 788	size = i + 1
 789	return
 790}
 791
 792func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
 793	i, col := 0, 0
 794	var rowWork bytes.Buffer
 795
 796	if data[i] == '|' && !isBackslashEscaped(data, i) {
 797		i++
 798	}
 799
 800	for col = 0; col < len(columns) && i < len(data); col++ {
 801		for data[i] == ' ' {
 802			i++
 803		}
 804
 805		cellStart := i
 806
 807		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 808			i++
 809		}
 810
 811		cellEnd := i
 812
 813		// skip the end-of-cell marker, possibly taking us past end of buffer
 814		i++
 815
 816		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 817			cellEnd--
 818		}
 819
 820		var cellWork bytes.Buffer
 821		p.inline(&cellWork, data[cellStart:cellEnd])
 822
 823		if header {
 824			p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
 825		} else {
 826			p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 827		}
 828	}
 829
 830	// pad it out with empty columns to get the right number
 831	for ; col < len(columns); col++ {
 832		if header {
 833			p.r.TableHeaderCell(&rowWork, nil, columns[col])
 834		} else {
 835			p.r.TableCell(&rowWork, nil, columns[col])
 836		}
 837	}
 838
 839	// silently ignore rows with too many cells
 840
 841	p.r.TableRow(out, rowWork.Bytes())
 842}
 843
 844// returns blockquote prefix length
 845func (p *parser) quotePrefix(data []byte) int {
 846	i := 0
 847	for i < 3 && data[i] == ' ' {
 848		i++
 849	}
 850	if data[i] == '>' {
 851		if data[i+1] == ' ' {
 852			return i + 2
 853		}
 854		return i + 1
 855	}
 856	return 0
 857}
 858
 859// parse a blockquote fragment
 860func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 861	var raw bytes.Buffer
 862	beg, end := 0, 0
 863	for beg < len(data) {
 864		end = beg
 865		for data[end] != '\n' {
 866			end++
 867		}
 868		end++
 869
 870		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 871			// skip the prefix
 872			beg += pre
 873		} else if p.isEmpty(data[beg:]) > 0 &&
 874			(end >= len(data) ||
 875				(p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
 876			// blockquote ends with at least one blank line
 877			// followed by something without a blockquote prefix
 878			break
 879		}
 880
 881		// this line is part of the blockquote
 882		raw.Write(data[beg:end])
 883		beg = end
 884	}
 885
 886	var cooked bytes.Buffer
 887	p.block(&cooked, raw.Bytes())
 888	p.r.BlockQuote(out, cooked.Bytes())
 889	return end
 890}
 891
 892// returns prefix length for block code
 893func (p *parser) codePrefix(data []byte) int {
 894	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 895		return 4
 896	}
 897	return 0
 898}
 899
 900func (p *parser) code(out *bytes.Buffer, data []byte) int {
 901	var work bytes.Buffer
 902
 903	i := 0
 904	for i < len(data) {
 905		beg := i
 906		for data[i] != '\n' {
 907			i++
 908		}
 909		i++
 910
 911		blankline := p.isEmpty(data[beg:i]) > 0
 912		if pre := p.codePrefix(data[beg:i]); pre > 0 {
 913			beg += pre
 914		} else if !blankline {
 915			// non-empty, non-prefixed line breaks the pre
 916			i = beg
 917			break
 918		}
 919
 920		// verbatim copy to the working buffeu
 921		if blankline {
 922			work.WriteByte('\n')
 923		} else {
 924			work.Write(data[beg:i])
 925		}
 926	}
 927
 928	// trim all the \n off the end of work
 929	workbytes := work.Bytes()
 930	eol := len(workbytes)
 931	for eol > 0 && workbytes[eol-1] == '\n' {
 932		eol--
 933	}
 934	if eol != len(workbytes) {
 935		work.Truncate(eol)
 936	}
 937
 938	work.WriteByte('\n')
 939
 940	p.r.BlockCode(out, work.Bytes(), "")
 941
 942	return i
 943}
 944
 945// returns unordered list item prefix
 946func (p *parser) uliPrefix(data []byte) int {
 947	i := 0
 948
 949	// start with up to 3 spaces
 950	for i < 3 && data[i] == ' ' {
 951		i++
 952	}
 953
 954	// need a *, +, or - followed by a space
 955	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
 956		data[i+1] != ' ' {
 957		return 0
 958	}
 959	return i + 2
 960}
 961
 962// returns ordered list item prefix
 963func (p *parser) oliPrefix(data []byte) int {
 964	i := 0
 965
 966	// start with up to 3 spaces
 967	for i < 3 && data[i] == ' ' {
 968		i++
 969	}
 970
 971	// count the digits
 972	start := i
 973	for data[i] >= '0' && data[i] <= '9' {
 974		i++
 975	}
 976
 977	// we need >= 1 digits followed by a dot and a space
 978	if start == i || data[i] != '.' || data[i+1] != ' ' {
 979		return 0
 980	}
 981	return i + 2
 982}
 983
 984// parse ordered or unordered list block
 985func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
 986	i := 0
 987	flags |= LIST_ITEM_BEGINNING_OF_LIST
 988	work := func() bool {
 989		for i < len(data) {
 990			skip := p.listItem(out, data[i:], &flags)
 991			i += skip
 992
 993			if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
 994				break
 995			}
 996			flags &= ^LIST_ITEM_BEGINNING_OF_LIST
 997		}
 998		return true
 999	}
1000
1001	p.r.List(out, work, flags)
1002	return i
1003}
1004
1005// Parse a single list item.
1006// Assumes initial prefix is already removed if this is a sublist.
1007func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1008	// keep track of the indentation of the first line
1009	itemIndent := 0
1010	for itemIndent < 3 && data[itemIndent] == ' ' {
1011		itemIndent++
1012	}
1013
1014	i := p.uliPrefix(data)
1015	if i == 0 {
1016		i = p.oliPrefix(data)
1017	}
1018	if i == 0 {
1019		return 0
1020	}
1021
1022	// skip leading whitespace on first line
1023	for data[i] == ' ' {
1024		i++
1025	}
1026
1027	// find the end of the line
1028	line := i
1029	for data[i-1] != '\n' {
1030		i++
1031	}
1032
1033	// get working buffer
1034	var raw bytes.Buffer
1035
1036	// put the first line into the working buffer
1037	raw.Write(data[line:i])
1038	line = i
1039
1040	// process the following lines
1041	containsBlankLine := false
1042	sublist := 0
1043
1044gatherlines:
1045	for line < len(data) {
1046		i++
1047
1048		// find the end of this line
1049		for data[i-1] != '\n' {
1050			i++
1051		}
1052
1053		// if it is an empty line, guess that it is part of this item
1054		// and move on to the next line
1055		if p.isEmpty(data[line:i]) > 0 {
1056			containsBlankLine = true
1057			line = i
1058			continue
1059		}
1060
1061		// calculate the indentation
1062		indent := 0
1063		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1064			indent++
1065		}
1066
1067		chunk := data[line+indent : i]
1068
1069		// evaluate how this line fits in
1070		switch {
1071		// is this a nested list item?
1072		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1073			p.oliPrefix(chunk) > 0:
1074
1075			if containsBlankLine {
1076				*flags |= LIST_ITEM_CONTAINS_BLOCK
1077			}
1078
1079			// to be a nested list, it must be indented more
1080			// if not, it is the next item in the same list
1081			if indent <= itemIndent {
1082				break gatherlines
1083			}
1084
1085			// is this the first item in the the nested list?
1086			if sublist == 0 {
1087				sublist = raw.Len()
1088			}
1089
1090		// is this a nested prefix header?
1091		case p.isPrefixHeader(chunk):
1092			// if the header is not indented, it is not nested in the list
1093			// and thus ends the list
1094			if containsBlankLine && indent < 4 {
1095				*flags |= LIST_ITEM_END_OF_LIST
1096				break gatherlines
1097			}
1098			*flags |= LIST_ITEM_CONTAINS_BLOCK
1099
1100		// anything following an empty line is only part
1101		// of this item if it is indented 4 spaces
1102		// (regardless of the indentation of the beginning of the item)
1103		case containsBlankLine && indent < 4:
1104			*flags |= LIST_ITEM_END_OF_LIST
1105			break gatherlines
1106
1107		// a blank line means this should be parsed as a block
1108		case containsBlankLine:
1109			raw.WriteByte('\n')
1110			*flags |= LIST_ITEM_CONTAINS_BLOCK
1111		}
1112
1113		// if this line was preceeded by one or more blanks,
1114		// re-introduce the blank into the buffer
1115		if containsBlankLine {
1116			containsBlankLine = false
1117			raw.WriteByte('\n')
1118		}
1119
1120		// add the line into the working buffer without prefix
1121		raw.Write(data[line+indent : i])
1122
1123		line = i
1124	}
1125
1126	rawBytes := raw.Bytes()
1127
1128	// render the contents of the list item
1129	var cooked bytes.Buffer
1130	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1131		// intermediate render of block li
1132		if sublist > 0 {
1133			p.block(&cooked, rawBytes[:sublist])
1134			p.block(&cooked, rawBytes[sublist:])
1135		} else {
1136			p.block(&cooked, rawBytes)
1137		}
1138	} else {
1139		// intermediate render of inline li
1140		if sublist > 0 {
1141			p.inline(&cooked, rawBytes[:sublist])
1142			p.block(&cooked, rawBytes[sublist:])
1143		} else {
1144			p.inline(&cooked, rawBytes)
1145		}
1146	}
1147
1148	// render the actual list item
1149	cookedBytes := cooked.Bytes()
1150	parsedEnd := len(cookedBytes)
1151
1152	// strip trailing newlines
1153	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1154		parsedEnd--
1155	}
1156	p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1157
1158	return line
1159}
1160
1161// render a single paragraph that has already been parsed out
1162func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1163	if len(data) == 0 {
1164		return
1165	}
1166
1167	// trim leading spaces
1168	beg := 0
1169	for data[beg] == ' ' {
1170		beg++
1171	}
1172
1173	// trim trailing newline
1174	end := len(data) - 1
1175
1176	// trim trailing spaces
1177	for end > beg && data[end-1] == ' ' {
1178		end--
1179	}
1180
1181	work := func() bool {
1182		p.inline(out, data[beg:end])
1183		return true
1184	}
1185	p.r.Paragraph(out, work)
1186}
1187
1188func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1189	// prev: index of 1st char of previous line
1190	// line: index of 1st char of current line
1191	// i: index of cursor/end of current line
1192	var prev, line, i int
1193
1194	// keep going until we find something to mark the end of the paragraph
1195	for i < len(data) {
1196		// mark the beginning of the current line
1197		prev = line
1198		current := data[i:]
1199		line = i
1200
1201		// did we find a blank line marking the end of the paragraph?
1202		if n := p.isEmpty(current); n > 0 {
1203			p.renderParagraph(out, data[:i])
1204			return i + n
1205		}
1206
1207		// an underline under some text marks a header, so our paragraph ended on prev line
1208		if i > 0 {
1209			if level := p.isUnderlinedHeader(current); level > 0 {
1210				// render the paragraph
1211				p.renderParagraph(out, data[:prev])
1212
1213				// ignore leading and trailing whitespace
1214				eol := i - 1
1215				for prev < eol && data[prev] == ' ' {
1216					prev++
1217				}
1218				for eol > prev && data[eol-1] == ' ' {
1219					eol--
1220				}
1221
1222				// render the header
1223				// this ugly double closure avoids forcing variables onto the heap
1224				work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1225					return func() bool {
1226						pp.inline(o, d)
1227						return true
1228					}
1229				}(out, p, data[prev:eol])
1230				p.r.Header(out, work, level, "")
1231
1232				// find the end of the underline
1233				for data[i] != '\n' {
1234					i++
1235				}
1236				return i
1237			}
1238		}
1239
1240		// if the next line starts a block of HTML, then the paragraph ends here
1241		if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1242			if data[i] == '<' && p.html(out, current, false) > 0 {
1243				// rewind to before the HTML block
1244				p.renderParagraph(out, data[:i])
1245				return i
1246			}
1247		}
1248
1249		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1250		if p.isPrefixHeader(current) || p.isHRule(current) {
1251			p.renderParagraph(out, data[:i])
1252			return i
1253		}
1254
1255		// if there's a list after this, paragraph is over
1256		if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1257			if p.uliPrefix(current) != 0 ||
1258				p.oliPrefix(current) != 0 ||
1259				p.quotePrefix(current) != 0 ||
1260				p.codePrefix(current) != 0 {
1261				p.renderParagraph(out, data[:i])
1262				return i
1263			}
1264		}
1265
1266		// otherwise, scan to the beginning of the next line
1267		for data[i] != '\n' {
1268			i++
1269		}
1270		i++
1271	}
1272
1273	p.renderParagraph(out, data[:i])
1274	return i
1275}
all repos — grayfriday @ 7ad5f9c1197c54c0be24fb0b0b1d75eb7ef19d89

blackfriday fork with a few changes