icy does git — grayfriday (fb56a7e01e0e5f67a4854838504c2565d6224236): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  25
  26	// TODO: improve this regexp to catch all possible entities:
  27	htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
  28)
  29
  30// Functions to parse text within a block
  31// Each function returns the number of chars taken care of
  32// data is the complete block being rendered
  33// offset is the number of valid chars before the current cursor
  34
  35func (p *parser) inline(currBlock *Node, data []byte) {
  36	// this is called recursively: enforce a maximum depth
  37	if p.nesting >= p.maxNesting {
  38		return
  39	}
  40	p.nesting++
  41
  42	i, end := 0, 0
  43	for i < len(data) {
  44		// Stop at EOL
  45		if data[i] == '\n' && i+1 == len(data) {
  46			break
  47		}
  48
  49		for ; end < len(data); end++ {
  50			if p.inlineCallback[data[end]] != nil {
  51				break
  52			}
  53		}
  54
  55		if end >= len(data) {
  56			if data[end-1] == '\n' {
  57				currBlock.AppendChild(text(data[i : end-1]))
  58			} else {
  59				currBlock.AppendChild(text(data[i:end]))
  60			}
  61			break
  62		}
  63
  64		// call the trigger
  65		handler := p.inlineCallback[data[end]]
  66		if consumed, node := handler(p, data, end); consumed == 0 {
  67			// No action from the callback.
  68			end++
  69		} else {
  70			// Copy inactive chars into the output.
  71			currBlock.AppendChild(text(data[i:end]))
  72			if node != nil {
  73				currBlock.AppendChild(node)
  74			}
  75			// Skip past whatever the callback used.
  76			i = end + consumed
  77			end = i
  78		}
  79	}
  80
  81	p.nesting--
  82}
  83
  84// single and double emphasis parsing
  85func emphasis(p *parser, data []byte, offset int) (int, *Node) {
  86	data = data[offset:]
  87	c := data[0]
  88
  89	if len(data) > 2 && data[1] != c {
  90		// whitespace cannot follow an opening emphasis;
  91		// strikethrough only takes two characters '~~'
  92		if c == '~' || isspace(data[1]) {
  93			return 0, nil
  94		}
  95		ret, node := helperEmphasis(p, data[1:], c)
  96		if ret == 0 {
  97			return 0, nil
  98		}
  99
 100		return ret + 1, node
 101	}
 102
 103	if len(data) > 3 && data[1] == c && data[2] != c {
 104		if isspace(data[2]) {
 105			return 0, nil
 106		}
 107		ret, node := helperDoubleEmphasis(p, data[2:], c)
 108		if ret == 0 {
 109			return 0, nil
 110		}
 111
 112		return ret + 2, node
 113	}
 114
 115	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 116		if c == '~' || isspace(data[3]) {
 117			return 0, nil
 118		}
 119		ret, node := helperTripleEmphasis(p, data, 3, c)
 120		if ret == 0 {
 121			return 0, nil
 122		}
 123
 124		return ret + 3, node
 125	}
 126
 127	return 0, nil
 128}
 129
 130func codeSpan(p *parser, data []byte, offset int) (int, *Node) {
 131	data = data[offset:]
 132
 133	nb := 0
 134
 135	// count the number of backticks in the delimiter
 136	for nb < len(data) && data[nb] == '`' {
 137		nb++
 138	}
 139
 140	// find the next delimiter
 141	i, end := 0, 0
 142	for end = nb; end < len(data) && i < nb; end++ {
 143		if data[end] == '`' {
 144			i++
 145		} else {
 146			i = 0
 147		}
 148	}
 149
 150	// no matching delimiter?
 151	if i < nb && end >= len(data) {
 152		return 0, nil
 153	}
 154
 155	// trim outside whitespace
 156	fBegin := nb
 157	for fBegin < end && data[fBegin] == ' ' {
 158		fBegin++
 159	}
 160
 161	fEnd := end - nb
 162	for fEnd > fBegin && data[fEnd-1] == ' ' {
 163		fEnd--
 164	}
 165
 166	// render the code span
 167	if fBegin != fEnd {
 168		code := NewNode(Code)
 169		code.Literal = data[fBegin:fEnd]
 170		return end, code
 171	}
 172
 173	return end, nil
 174}
 175
 176// newline preceded by two spaces becomes <br>
 177func maybeLineBreak(p *parser, data []byte, offset int) (int, *Node) {
 178	origOffset := offset
 179	for offset < len(data) && data[offset] == ' ' {
 180		offset++
 181	}
 182
 183	if offset < len(data) && data[offset] == '\n' {
 184		if offset-origOffset >= 2 {
 185			return offset - origOffset + 1, NewNode(Hardbreak)
 186		}
 187		return offset - origOffset, nil
 188	}
 189	return 0, nil
 190}
 191
 192// newline without two spaces works when HardLineBreak is enabled
 193func lineBreak(p *parser, data []byte, offset int) (int, *Node) {
 194	if p.flags&HardLineBreak != 0 {
 195		return 1, NewNode(Hardbreak)
 196	}
 197	return 0, nil
 198}
 199
 200type linkType int
 201
 202const (
 203	linkNormal linkType = iota
 204	linkImg
 205	linkDeferredFootnote
 206	linkInlineFootnote
 207)
 208
 209func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
 210	if t == linkDeferredFootnote {
 211		return false
 212	}
 213	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
 214}
 215
 216func maybeImage(p *parser, data []byte, offset int) (int, *Node) {
 217	if offset < len(data)-1 && data[offset+1] == '[' {
 218		return link(p, data, offset)
 219	}
 220	return 0, nil
 221}
 222
 223func maybeInlineFootnote(p *parser, data []byte, offset int) (int, *Node) {
 224	if offset < len(data)-1 && data[offset+1] == '[' {
 225		return link(p, data, offset)
 226	}
 227	return 0, nil
 228}
 229
 230// '[': parse a link or an image or a footnote
 231func link(p *parser, data []byte, offset int) (int, *Node) {
 232	// no links allowed inside regular links, footnote, and deferred footnotes
 233	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 234		return 0, nil
 235	}
 236
 237	var t linkType
 238	switch {
 239	// special case: ![^text] == deferred footnote (that follows something with
 240	// an exclamation point)
 241	case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
 242		t = linkDeferredFootnote
 243	// ![alt] == image
 244	case offset >= 0 && data[offset] == '!':
 245		t = linkImg
 246		offset++
 247	// ^[text] == inline footnote
 248	// [^refId] == deferred footnote
 249	case p.flags&Footnotes != 0:
 250		if offset >= 0 && data[offset] == '^' {
 251			t = linkInlineFootnote
 252			offset++
 253		} else if len(data)-1 > offset && data[offset+1] == '^' {
 254			t = linkDeferredFootnote
 255		}
 256	// [text] == regular link
 257	default:
 258		t = linkNormal
 259	}
 260
 261	data = data[offset:]
 262
 263	var (
 264		i                       = 1
 265		noteID                  int
 266		title, link, altContent []byte
 267		textHasNl               = false
 268	)
 269
 270	if t == linkDeferredFootnote {
 271		i++
 272	}
 273
 274	// look for the matching closing bracket
 275	for level := 1; level > 0 && i < len(data); i++ {
 276		switch {
 277		case data[i] == '\n':
 278			textHasNl = true
 279
 280		case data[i-1] == '\\':
 281			continue
 282
 283		case data[i] == '[':
 284			level++
 285
 286		case data[i] == ']':
 287			level--
 288			if level <= 0 {
 289				i-- // compensate for extra i++ in for loop
 290			}
 291		}
 292	}
 293
 294	if i >= len(data) {
 295		return 0, nil
 296	}
 297
 298	txtE := i
 299	i++
 300	var footnoteNode *Node
 301
 302	// skip any amount of whitespace or newline
 303	// (this is much more lax than original markdown syntax)
 304	for i < len(data) && isspace(data[i]) {
 305		i++
 306	}
 307
 308	// inline style link
 309	switch {
 310	case i < len(data) && data[i] == '(':
 311		// skip initial whitespace
 312		i++
 313
 314		for i < len(data) && isspace(data[i]) {
 315			i++
 316		}
 317
 318		linkB := i
 319
 320		// look for link end: ' " )
 321	findlinkend:
 322		for i < len(data) {
 323			switch {
 324			case data[i] == '\\':
 325				i += 2
 326
 327			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 328				break findlinkend
 329
 330			default:
 331				i++
 332			}
 333		}
 334
 335		if i >= len(data) {
 336			return 0, nil
 337		}
 338		linkE := i
 339
 340		// look for title end if present
 341		titleB, titleE := 0, 0
 342		if data[i] == '\'' || data[i] == '"' {
 343			i++
 344			titleB = i
 345
 346		findtitleend:
 347			for i < len(data) {
 348				switch {
 349				case data[i] == '\\':
 350					i += 2
 351
 352				case data[i] == ')':
 353					break findtitleend
 354
 355				default:
 356					i++
 357				}
 358			}
 359
 360			if i >= len(data) {
 361				return 0, nil
 362			}
 363
 364			// skip whitespace after title
 365			titleE = i - 1
 366			for titleE > titleB && isspace(data[titleE]) {
 367				titleE--
 368			}
 369
 370			// check for closing quote presence
 371			if data[titleE] != '\'' && data[titleE] != '"' {
 372				titleB, titleE = 0, 0
 373				linkE = i
 374			}
 375		}
 376
 377		// remove whitespace at the end of the link
 378		for linkE > linkB && isspace(data[linkE-1]) {
 379			linkE--
 380		}
 381
 382		// remove optional angle brackets around the link
 383		if data[linkB] == '<' {
 384			linkB++
 385		}
 386		if data[linkE-1] == '>' {
 387			linkE--
 388		}
 389
 390		// build escaped link and title
 391		if linkE > linkB {
 392			link = data[linkB:linkE]
 393		}
 394
 395		if titleE > titleB {
 396			title = data[titleB:titleE]
 397		}
 398
 399		i++
 400
 401	// reference style link
 402	case isReferenceStyleLink(data, i, t):
 403		var id []byte
 404		altContentConsidered := false
 405
 406		// look for the id
 407		i++
 408		linkB := i
 409		for i < len(data) && data[i] != ']' {
 410			i++
 411		}
 412		if i >= len(data) {
 413			return 0, nil
 414		}
 415		linkE := i
 416
 417		// find the reference
 418		if linkB == linkE {
 419			if textHasNl {
 420				var b bytes.Buffer
 421
 422				for j := 1; j < txtE; j++ {
 423					switch {
 424					case data[j] != '\n':
 425						b.WriteByte(data[j])
 426					case data[j-1] != ' ':
 427						b.WriteByte(' ')
 428					}
 429				}
 430
 431				id = b.Bytes()
 432			} else {
 433				id = data[1:txtE]
 434				altContentConsidered = true
 435			}
 436		} else {
 437			id = data[linkB:linkE]
 438		}
 439
 440		// find the reference with matching id
 441		lr, ok := p.getRef(string(id))
 442		if !ok {
 443			return 0, nil
 444		}
 445
 446		// keep link and title from reference
 447		link = lr.link
 448		title = lr.title
 449		if altContentConsidered {
 450			altContent = lr.text
 451		}
 452		i++
 453
 454	// shortcut reference style link or reference or inline footnote
 455	default:
 456		var id []byte
 457
 458		// craft the id
 459		if textHasNl {
 460			var b bytes.Buffer
 461
 462			for j := 1; j < txtE; j++ {
 463				switch {
 464				case data[j] != '\n':
 465					b.WriteByte(data[j])
 466				case data[j-1] != ' ':
 467					b.WriteByte(' ')
 468				}
 469			}
 470
 471			id = b.Bytes()
 472		} else {
 473			if t == linkDeferredFootnote {
 474				id = data[2:txtE] // get rid of the ^
 475			} else {
 476				id = data[1:txtE]
 477			}
 478		}
 479
 480		footnoteNode = NewNode(Item)
 481		if t == linkInlineFootnote {
 482			// create a new reference
 483			noteID = len(p.notes) + 1
 484
 485			var fragment []byte
 486			if len(id) > 0 {
 487				if len(id) < 16 {
 488					fragment = make([]byte, len(id))
 489				} else {
 490					fragment = make([]byte, 16)
 491				}
 492				copy(fragment, slugify(id))
 493			} else {
 494				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
 495			}
 496
 497			ref := &reference{
 498				noteID:   noteID,
 499				hasBlock: false,
 500				link:     fragment,
 501				title:    id,
 502				footnote: footnoteNode,
 503			}
 504
 505			p.notes = append(p.notes, ref)
 506
 507			link = ref.link
 508			title = ref.title
 509		} else {
 510			// find the reference with matching id
 511			lr, ok := p.getRef(string(id))
 512			if !ok {
 513				return 0, nil
 514			}
 515
 516			if t == linkDeferredFootnote {
 517				lr.noteID = len(p.notes) + 1
 518				lr.footnote = footnoteNode
 519				p.notes = append(p.notes, lr)
 520			}
 521
 522			// keep link and title from reference
 523			link = lr.link
 524			// if inline footnote, title == footnote contents
 525			title = lr.title
 526			noteID = lr.noteID
 527		}
 528
 529		// rewind the whitespace
 530		i = txtE + 1
 531	}
 532
 533	var uLink []byte
 534	if t == linkNormal || t == linkImg {
 535		if len(link) > 0 {
 536			var uLinkBuf bytes.Buffer
 537			unescapeText(&uLinkBuf, link)
 538			uLink = uLinkBuf.Bytes()
 539		}
 540
 541		// links need something to click on and somewhere to go
 542		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
 543			return 0, nil
 544		}
 545	}
 546
 547	// call the relevant rendering function
 548	var linkNode *Node
 549	switch t {
 550	case linkNormal:
 551		linkNode = NewNode(Link)
 552		linkNode.Destination = normalizeURI(uLink)
 553		linkNode.Title = title
 554		if len(altContent) > 0 {
 555			linkNode.AppendChild(text(altContent))
 556		} else {
 557			// links cannot contain other links, so turn off link parsing
 558			// temporarily and recurse
 559			insideLink := p.insideLink
 560			p.insideLink = true
 561			p.inline(linkNode, data[1:txtE])
 562			p.insideLink = insideLink
 563		}
 564
 565	case linkImg:
 566		linkNode = NewNode(Image)
 567		linkNode.Destination = uLink
 568		linkNode.Title = title
 569		linkNode.AppendChild(text(data[1:txtE]))
 570		i++
 571
 572	case linkInlineFootnote, linkDeferredFootnote:
 573		linkNode = NewNode(Link)
 574		linkNode.Destination = link
 575		linkNode.Title = title
 576		linkNode.NoteID = noteID
 577		linkNode.Footnote = footnoteNode
 578		if t == linkInlineFootnote {
 579			i++
 580		}
 581
 582	default:
 583		return 0, nil
 584	}
 585
 586	return i, linkNode
 587}
 588
 589func (p *parser) inlineHTMLComment(data []byte) int {
 590	if len(data) < 5 {
 591		return 0
 592	}
 593	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 594		return 0
 595	}
 596	i := 5
 597	// scan for an end-of-comment marker, across lines if necessary
 598	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 599		i++
 600	}
 601	// no end-of-comment marker
 602	if i >= len(data) {
 603		return 0
 604	}
 605	return i + 1
 606}
 607
 608func stripMailto(link []byte) []byte {
 609	if bytes.HasPrefix(link, []byte("mailto://")) {
 610		return link[9:]
 611	} else if bytes.HasPrefix(link, []byte("mailto:")) {
 612		return link[7:]
 613	} else {
 614		return link
 615	}
 616}
 617
 618// autolinkType specifies a kind of autolink that gets detected.
 619type autolinkType int
 620
 621// These are the possible flag values for the autolink renderer.
 622const (
 623	notAutolink autolinkType = iota
 624	normalAutolink
 625	emailAutolink
 626)
 627
 628// '<' when tags or autolinks are allowed
 629func leftAngle(p *parser, data []byte, offset int) (int, *Node) {
 630	data = data[offset:]
 631	altype, end := tagLength(data)
 632	if size := p.inlineHTMLComment(data); size > 0 {
 633		end = size
 634	}
 635	if end > 2 {
 636		if altype != notAutolink {
 637			var uLink bytes.Buffer
 638			unescapeText(&uLink, data[1:end+1-2])
 639			if uLink.Len() > 0 {
 640				link := uLink.Bytes()
 641				node := NewNode(Link)
 642				node.Destination = link
 643				if altype == emailAutolink {
 644					node.Destination = append([]byte("mailto:"), link...)
 645				}
 646				node.AppendChild(text(stripMailto(link)))
 647				return end, node
 648			}
 649		} else {
 650			htmlTag := NewNode(HTMLSpan)
 651			htmlTag.Literal = data[:end]
 652			return end, htmlTag
 653		}
 654	}
 655
 656	return end, nil
 657}
 658
 659// '\\' backslash escape
 660var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
 661
 662func escape(p *parser, data []byte, offset int) (int, *Node) {
 663	data = data[offset:]
 664
 665	if len(data) > 1 {
 666		if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
 667			return 2, NewNode(Hardbreak)
 668		}
 669		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 670			return 0, nil
 671		}
 672
 673		return 2, text(data[1:2])
 674	}
 675
 676	return 2, nil
 677}
 678
 679func unescapeText(ob *bytes.Buffer, src []byte) {
 680	i := 0
 681	for i < len(src) {
 682		org := i
 683		for i < len(src) && src[i] != '\\' {
 684			i++
 685		}
 686
 687		if i > org {
 688			ob.Write(src[org:i])
 689		}
 690
 691		if i+1 >= len(src) {
 692			break
 693		}
 694
 695		ob.WriteByte(src[i+1])
 696		i += 2
 697	}
 698}
 699
 700// '&' escaped when it doesn't belong to an entity
 701// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 702func entity(p *parser, data []byte, offset int) (int, *Node) {
 703	data = data[offset:]
 704
 705	end := 1
 706
 707	if end < len(data) && data[end] == '#' {
 708		end++
 709	}
 710
 711	for end < len(data) && isalnum(data[end]) {
 712		end++
 713	}
 714
 715	if end < len(data) && data[end] == ';' {
 716		end++ // real entity
 717	} else {
 718		return 0, nil // lone '&'
 719	}
 720
 721	ent := data[:end]
 722	// undo &amp; escaping or it will be converted to &amp;amp; by another
 723	// escaper in the renderer
 724	if bytes.Equal(ent, []byte("&amp;")) {
 725		ent = []byte{'&'}
 726	}
 727
 728	return end, text(ent)
 729}
 730
 731func linkEndsWithEntity(data []byte, linkEnd int) bool {
 732	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
 733	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 734}
 735
 736func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) {
 737	// quick check to rule out most false hits
 738	if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
 739		return 0, nil
 740	}
 741	prefixes := []string{
 742		"http://",
 743		"https://",
 744		"ftp://",
 745		"file://",
 746		"mailto:",
 747	}
 748	for _, prefix := range prefixes {
 749		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 750		if endOfHead > len(data) {
 751			endOfHead = len(data)
 752		}
 753		head := bytes.ToLower(data[offset:endOfHead])
 754		if bytes.HasPrefix(head, []byte(prefix)) {
 755			return autoLink(p, data, offset)
 756		}
 757	}
 758	return 0, nil
 759}
 760
 761func autoLink(p *parser, data []byte, offset int) (int, *Node) {
 762	// Now a more expensive check to see if we're not inside an anchor element
 763	anchorStart := offset
 764	offsetFromAnchor := 0
 765	for anchorStart > 0 && data[anchorStart] != '<' {
 766		anchorStart--
 767		offsetFromAnchor++
 768	}
 769
 770	anchorStr := anchorRe.Find(data[anchorStart:])
 771	if anchorStr != nil {
 772		anchorClose := NewNode(HTMLSpan)
 773		anchorClose.Literal = anchorStr[offsetFromAnchor:]
 774		return len(anchorStr) - offsetFromAnchor, anchorClose
 775	}
 776
 777	// scan backward for a word boundary
 778	rewind := 0
 779	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 780		rewind++
 781	}
 782	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 783		return 0, nil
 784	}
 785
 786	origData := data
 787	data = data[offset-rewind:]
 788
 789	if !isSafeLink(data) {
 790		return 0, nil
 791	}
 792
 793	linkEnd := 0
 794	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 795		linkEnd++
 796	}
 797
 798	// Skip punctuation at the end of the link
 799	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 800		linkEnd--
 801	}
 802
 803	// But don't skip semicolon if it's a part of escaped entity:
 804	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 805		linkEnd--
 806	}
 807
 808	// See if the link finishes with a punctuation sign that can be closed.
 809	var copen byte
 810	switch data[linkEnd-1] {
 811	case '"':
 812		copen = '"'
 813	case '\'':
 814		copen = '\''
 815	case ')':
 816		copen = '('
 817	case ']':
 818		copen = '['
 819	case '}':
 820		copen = '{'
 821	default:
 822		copen = 0
 823	}
 824
 825	if copen != 0 {
 826		bufEnd := offset - rewind + linkEnd - 2
 827
 828		openDelim := 1
 829
 830		/* Try to close the final punctuation sign in this same line;
 831		 * if we managed to close it outside of the URL, that means that it's
 832		 * not part of the URL. If it closes inside the URL, that means it
 833		 * is part of the URL.
 834		 *
 835		 * Examples:
 836		 *
 837		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 838		 *              => http://www.pokemon.com/Pikachu_(Electric)
 839		 *
 840		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 841		 *              => http://www.pokemon.com/Pikachu_(Electric)
 842		 *
 843		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 844		 *              => http://www.pokemon.com/Pikachu_(Electric))
 845		 *
 846		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 847		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 848		 */
 849
 850		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 851			if origData[bufEnd] == data[linkEnd-1] {
 852				openDelim++
 853			}
 854
 855			if origData[bufEnd] == copen {
 856				openDelim--
 857			}
 858
 859			bufEnd--
 860		}
 861
 862		if openDelim == 0 {
 863			linkEnd--
 864		}
 865	}
 866
 867	var uLink bytes.Buffer
 868	unescapeText(&uLink, data[:linkEnd])
 869
 870	if uLink.Len() > 0 {
 871		node := NewNode(Link)
 872		node.Destination = uLink.Bytes()
 873		node.AppendChild(text(uLink.Bytes()))
 874		return linkEnd, node
 875	}
 876
 877	return linkEnd, nil
 878}
 879
 880func isEndOfLink(char byte) bool {
 881	return isspace(char) || char == '<'
 882}
 883
 884var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 885var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
 886
 887func isSafeLink(link []byte) bool {
 888	for _, path := range validPaths {
 889		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
 890			if len(link) == len(path) {
 891				return true
 892			} else if isalnum(link[len(path)]) {
 893				return true
 894			}
 895		}
 896	}
 897
 898	for _, prefix := range validUris {
 899		// TODO: handle unicode here
 900		// case-insensitive prefix test
 901		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 902			return true
 903		}
 904	}
 905
 906	return false
 907}
 908
 909// return the length of the given tag, or 0 is it's not valid
 910func tagLength(data []byte) (autolink autolinkType, end int) {
 911	var i, j int
 912
 913	// a valid tag can't be shorter than 3 chars
 914	if len(data) < 3 {
 915		return notAutolink, 0
 916	}
 917
 918	// begins with a '<' optionally followed by '/', followed by letter or number
 919	if data[0] != '<' {
 920		return notAutolink, 0
 921	}
 922	if data[1] == '/' {
 923		i = 2
 924	} else {
 925		i = 1
 926	}
 927
 928	if !isalnum(data[i]) {
 929		return notAutolink, 0
 930	}
 931
 932	// scheme test
 933	autolink = notAutolink
 934
 935	// try to find the beginning of an URI
 936	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 937		i++
 938	}
 939
 940	if i > 1 && i < len(data) && data[i] == '@' {
 941		if j = isMailtoAutoLink(data[i:]); j != 0 {
 942			return emailAutolink, i + j
 943		}
 944	}
 945
 946	if i > 2 && i < len(data) && data[i] == ':' {
 947		autolink = normalAutolink
 948		i++
 949	}
 950
 951	// complete autolink test: no whitespace or ' or "
 952	switch {
 953	case i >= len(data):
 954		autolink = notAutolink
 955	case autolink != notAutolink:
 956		j = i
 957
 958		for i < len(data) {
 959			if data[i] == '\\' {
 960				i += 2
 961			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 962				break
 963			} else {
 964				i++
 965			}
 966
 967		}
 968
 969		if i >= len(data) {
 970			return autolink, 0
 971		}
 972		if i > j && data[i] == '>' {
 973			return autolink, i + 1
 974		}
 975
 976		// one of the forbidden chars has been found
 977		autolink = notAutolink
 978	}
 979	i += bytes.IndexByte(data[i:], '>')
 980	if i < 0 {
 981		return autolink, 0
 982	}
 983	return autolink, i + 1
 984}
 985
 986// look for the address part of a mail autolink and '>'
 987// this is less strict than the original markdown e-mail address matching
 988func isMailtoAutoLink(data []byte) int {
 989	nb := 0
 990
 991	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
 992	for i := 0; i < len(data); i++ {
 993		if isalnum(data[i]) {
 994			continue
 995		}
 996
 997		switch data[i] {
 998		case '@':
 999			nb++
1000
1001		case '-', '.', '_':
1002			break
1003
1004		case '>':
1005			if nb == 1 {
1006				return i + 1
1007			}
1008			return 0
1009		default:
1010			return 0
1011		}
1012	}
1013
1014	return 0
1015}
1016
1017// look for the next emph char, skipping other constructs
1018func helperFindEmphChar(data []byte, c byte) int {
1019	i := 0
1020
1021	for i < len(data) {
1022		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1023			i++
1024		}
1025		if i >= len(data) {
1026			return 0
1027		}
1028		// do not count escaped chars
1029		if i != 0 && data[i-1] == '\\' {
1030			i++
1031			continue
1032		}
1033		if data[i] == c {
1034			return i
1035		}
1036
1037		if data[i] == '`' {
1038			// skip a code span
1039			tmpI := 0
1040			i++
1041			for i < len(data) && data[i] != '`' {
1042				if tmpI == 0 && data[i] == c {
1043					tmpI = i
1044				}
1045				i++
1046			}
1047			if i >= len(data) {
1048				return tmpI
1049			}
1050			i++
1051		} else if data[i] == '[' {
1052			// skip a link
1053			tmpI := 0
1054			i++
1055			for i < len(data) && data[i] != ']' {
1056				if tmpI == 0 && data[i] == c {
1057					tmpI = i
1058				}
1059				i++
1060			}
1061			i++
1062			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1063				i++
1064			}
1065			if i >= len(data) {
1066				return tmpI
1067			}
1068			if data[i] != '[' && data[i] != '(' { // not a link
1069				if tmpI > 0 {
1070					return tmpI
1071				}
1072				continue
1073			}
1074			cc := data[i]
1075			i++
1076			for i < len(data) && data[i] != cc {
1077				if tmpI == 0 && data[i] == c {
1078					return i
1079				}
1080				i++
1081			}
1082			if i >= len(data) {
1083				return tmpI
1084			}
1085			i++
1086		}
1087	}
1088	return 0
1089}
1090
1091func helperEmphasis(p *parser, data []byte, c byte) (int, *Node) {
1092	i := 0
1093
1094	// skip one symbol if coming from emph3
1095	if len(data) > 1 && data[0] == c && data[1] == c {
1096		i = 1
1097	}
1098
1099	for i < len(data) {
1100		length := helperFindEmphChar(data[i:], c)
1101		if length == 0 {
1102			return 0, nil
1103		}
1104		i += length
1105		if i >= len(data) {
1106			return 0, nil
1107		}
1108
1109		if i+1 < len(data) && data[i+1] == c {
1110			i++
1111			continue
1112		}
1113
1114		if data[i] == c && !isspace(data[i-1]) {
1115
1116			if p.flags&NoIntraEmphasis != 0 {
1117				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1118					continue
1119				}
1120			}
1121
1122			emph := NewNode(Emph)
1123			p.inline(emph, data[:i])
1124			return i + 1, emph
1125		}
1126	}
1127
1128	return 0, nil
1129}
1130
1131func helperDoubleEmphasis(p *parser, data []byte, c byte) (int, *Node) {
1132	i := 0
1133
1134	for i < len(data) {
1135		length := helperFindEmphChar(data[i:], c)
1136		if length == 0 {
1137			return 0, nil
1138		}
1139		i += length
1140
1141		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1142			nodeType := Strong
1143			if c == '~' {
1144				nodeType = Del
1145			}
1146			node := NewNode(nodeType)
1147			p.inline(node, data[:i])
1148			return i + 2, node
1149		}
1150		i++
1151	}
1152	return 0, nil
1153}
1154
1155func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) (int, *Node) {
1156	i := 0
1157	origData := data
1158	data = data[offset:]
1159
1160	for i < len(data) {
1161		length := helperFindEmphChar(data[i:], c)
1162		if length == 0 {
1163			return 0, nil
1164		}
1165		i += length
1166
1167		// skip whitespace preceded symbols
1168		if data[i] != c || isspace(data[i-1]) {
1169			continue
1170		}
1171
1172		switch {
1173		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1174			// triple symbol found
1175			strong := NewNode(Strong)
1176			em := NewNode(Emph)
1177			strong.AppendChild(em)
1178			p.inline(em, data[:i])
1179			return i + 3, strong
1180		case (i+1 < len(data) && data[i+1] == c):
1181			// double symbol found, hand over to emph1
1182			length, node := helperEmphasis(p, origData[offset-2:], c)
1183			if length == 0 {
1184				return 0, nil
1185			}
1186			return length - 2, node
1187		default:
1188			// single symbol found, hand over to emph2
1189			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1190			if length == 0 {
1191				return 0, nil
1192			}
1193			return length - 1, node
1194		}
1195	}
1196	return 0, nil
1197}
1198
1199func text(s []byte) *Node {
1200	node := NewNode(Text)
1201	node.Literal = s
1202	return node
1203}
1204
1205func normalizeURI(s []byte) []byte {
1206	return s // TODO: implement
1207}
all repos — grayfriday @ fb56a7e01e0e5f67a4854838504c2565d6224236

blackfriday fork with a few changes