icy does git — grayfriday (5e1065fa45a3fc42cf35ae962a4a7d6aa208d27c): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  25
  26	// TODO: improve this regexp to catch all possible entities:
  27	htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
  28)
  29
  30// Functions to parse text within a block
  31// Each function returns the number of chars taken care of
  32// data is the complete block being rendered
  33// offset is the number of valid chars before the current cursor
  34
  35func (p *Parser) inline(currBlock *Node, data []byte) {
  36	// handlers might call us recursively: enforce a maximum depth
  37	if p.nesting >= p.maxNesting || len(data) == 0 {
  38		return
  39	}
  40	p.nesting++
  41	beg, end := 0, 0
  42	for end < len(data) {
  43		handler := p.inlineCallback[data[end]]
  44		if handler != nil {
  45			if consumed, node := handler(p, data, end); consumed == 0 {
  46				// No action from the callback.
  47				end++
  48			} else {
  49				// Copy inactive chars into the output.
  50				currBlock.AppendChild(text(data[beg:end]))
  51				if node != nil {
  52					currBlock.AppendChild(node)
  53				}
  54				// Skip past whatever the callback used.
  55				beg = end + consumed
  56				end = beg
  57			}
  58		} else {
  59			end++
  60		}
  61	}
  62	if beg < len(data) {
  63		if data[end-1] == '\n' {
  64			end--
  65		}
  66		currBlock.AppendChild(text(data[beg:end]))
  67	}
  68	p.nesting--
  69}
  70
  71// single and double emphasis parsing
  72func emphasis(p *Parser, data []byte, offset int) (int, *Node) {
  73	data = data[offset:]
  74	c := data[0]
  75
  76	if len(data) > 2 && data[1] != c {
  77		// whitespace cannot follow an opening emphasis;
  78		// strikethrough only takes two characters '~~'
  79		if c == '~' || isspace(data[1]) {
  80			return 0, nil
  81		}
  82		ret, node := helperEmphasis(p, data[1:], c)
  83		if ret == 0 {
  84			return 0, nil
  85		}
  86
  87		return ret + 1, node
  88	}
  89
  90	if len(data) > 3 && data[1] == c && data[2] != c {
  91		if isspace(data[2]) {
  92			return 0, nil
  93		}
  94		ret, node := helperDoubleEmphasis(p, data[2:], c)
  95		if ret == 0 {
  96			return 0, nil
  97		}
  98
  99		return ret + 2, node
 100	}
 101
 102	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 103		if c == '~' || isspace(data[3]) {
 104			return 0, nil
 105		}
 106		ret, node := helperTripleEmphasis(p, data, 3, c)
 107		if ret == 0 {
 108			return 0, nil
 109		}
 110
 111		return ret + 3, node
 112	}
 113
 114	return 0, nil
 115}
 116
 117func codeSpan(p *Parser, data []byte, offset int) (int, *Node) {
 118	data = data[offset:]
 119
 120	nb := 0
 121
 122	// count the number of backticks in the delimiter
 123	for nb < len(data) && data[nb] == '`' {
 124		nb++
 125	}
 126
 127	// find the next delimiter
 128	i, end := 0, 0
 129	for end = nb; end < len(data) && i < nb; end++ {
 130		if data[end] == '`' {
 131			i++
 132		} else {
 133			i = 0
 134		}
 135	}
 136
 137	// no matching delimiter?
 138	if i < nb && end >= len(data) {
 139		return 0, nil
 140	}
 141
 142	// trim outside whitespace
 143	fBegin := nb
 144	for fBegin < end && data[fBegin] == ' ' {
 145		fBegin++
 146	}
 147
 148	fEnd := end - nb
 149	for fEnd > fBegin && data[fEnd-1] == ' ' {
 150		fEnd--
 151	}
 152
 153	// render the code span
 154	if fBegin != fEnd {
 155		code := NewNode(Code)
 156		code.Literal = data[fBegin:fEnd]
 157		return end, code
 158	}
 159
 160	return end, nil
 161}
 162
 163// newline preceded by two spaces becomes <br>
 164func maybeLineBreak(p *Parser, data []byte, offset int) (int, *Node) {
 165	origOffset := offset
 166	for offset < len(data) && data[offset] == ' ' {
 167		offset++
 168	}
 169
 170	if offset < len(data) && data[offset] == '\n' {
 171		if offset-origOffset >= 2 {
 172			return offset - origOffset + 1, NewNode(Hardbreak)
 173		}
 174		return offset - origOffset, nil
 175	}
 176	return 0, nil
 177}
 178
 179// newline without two spaces works when HardLineBreak is enabled
 180func lineBreak(p *Parser, data []byte, offset int) (int, *Node) {
 181	if p.flags&HardLineBreak != 0 {
 182		return 1, NewNode(Hardbreak)
 183	}
 184	return 0, nil
 185}
 186
 187type linkType int
 188
 189const (
 190	linkNormal linkType = iota
 191	linkImg
 192	linkDeferredFootnote
 193	linkInlineFootnote
 194)
 195
 196func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
 197	if t == linkDeferredFootnote {
 198		return false
 199	}
 200	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
 201}
 202
 203func maybeImage(p *Parser, data []byte, offset int) (int, *Node) {
 204	if offset < len(data)-1 && data[offset+1] == '[' {
 205		return link(p, data, offset)
 206	}
 207	return 0, nil
 208}
 209
 210func maybeInlineFootnote(p *Parser, data []byte, offset int) (int, *Node) {
 211	if offset < len(data)-1 && data[offset+1] == '[' {
 212		return link(p, data, offset)
 213	}
 214	return 0, nil
 215}
 216
 217// '[': parse a link or an image or a footnote
 218func link(p *Parser, data []byte, offset int) (int, *Node) {
 219	// no links allowed inside regular links, footnote, and deferred footnotes
 220	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 221		return 0, nil
 222	}
 223
 224	var t linkType
 225	switch {
 226	// special case: ![^text] == deferred footnote (that follows something with
 227	// an exclamation point)
 228	case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
 229		t = linkDeferredFootnote
 230	// ![alt] == image
 231	case offset >= 0 && data[offset] == '!':
 232		t = linkImg
 233		offset++
 234	// ^[text] == inline footnote
 235	// [^refId] == deferred footnote
 236	case p.flags&Footnotes != 0:
 237		if offset >= 0 && data[offset] == '^' {
 238			t = linkInlineFootnote
 239			offset++
 240		} else if len(data)-1 > offset && data[offset+1] == '^' {
 241			t = linkDeferredFootnote
 242		}
 243	// [text] == regular link
 244	default:
 245		t = linkNormal
 246	}
 247
 248	data = data[offset:]
 249
 250	var (
 251		i                       = 1
 252		noteID                  int
 253		title, link, altContent []byte
 254		textHasNl               = false
 255	)
 256
 257	if t == linkDeferredFootnote {
 258		i++
 259	}
 260
 261	// look for the matching closing bracket
 262	for level := 1; level > 0 && i < len(data); i++ {
 263		switch {
 264		case data[i] == '\n':
 265			textHasNl = true
 266
 267		case data[i-1] == '\\':
 268			continue
 269
 270		case data[i] == '[':
 271			level++
 272
 273		case data[i] == ']':
 274			level--
 275			if level <= 0 {
 276				i-- // compensate for extra i++ in for loop
 277			}
 278		}
 279	}
 280
 281	if i >= len(data) {
 282		return 0, nil
 283	}
 284
 285	txtE := i
 286	i++
 287	var footnoteNode *Node
 288
 289	// skip any amount of whitespace or newline
 290	// (this is much more lax than original markdown syntax)
 291	for i < len(data) && isspace(data[i]) {
 292		i++
 293	}
 294
 295	// inline style link
 296	switch {
 297	case i < len(data) && data[i] == '(':
 298		// skip initial whitespace
 299		i++
 300
 301		for i < len(data) && isspace(data[i]) {
 302			i++
 303		}
 304
 305		linkB := i
 306
 307		// look for link end: ' " )
 308	findlinkend:
 309		for i < len(data) {
 310			switch {
 311			case data[i] == '\\':
 312				i += 2
 313
 314			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 315				break findlinkend
 316
 317			default:
 318				i++
 319			}
 320		}
 321
 322		if i >= len(data) {
 323			return 0, nil
 324		}
 325		linkE := i
 326
 327		// look for title end if present
 328		titleB, titleE := 0, 0
 329		if data[i] == '\'' || data[i] == '"' {
 330			i++
 331			titleB = i
 332
 333		findtitleend:
 334			for i < len(data) {
 335				switch {
 336				case data[i] == '\\':
 337					i += 2
 338
 339				case data[i] == ')':
 340					break findtitleend
 341
 342				default:
 343					i++
 344				}
 345			}
 346
 347			if i >= len(data) {
 348				return 0, nil
 349			}
 350
 351			// skip whitespace after title
 352			titleE = i - 1
 353			for titleE > titleB && isspace(data[titleE]) {
 354				titleE--
 355			}
 356
 357			// check for closing quote presence
 358			if data[titleE] != '\'' && data[titleE] != '"' {
 359				titleB, titleE = 0, 0
 360				linkE = i
 361			}
 362		}
 363
 364		// remove whitespace at the end of the link
 365		for linkE > linkB && isspace(data[linkE-1]) {
 366			linkE--
 367		}
 368
 369		// remove optional angle brackets around the link
 370		if data[linkB] == '<' {
 371			linkB++
 372		}
 373		if data[linkE-1] == '>' {
 374			linkE--
 375		}
 376
 377		// build escaped link and title
 378		if linkE > linkB {
 379			link = data[linkB:linkE]
 380		}
 381
 382		if titleE > titleB {
 383			title = data[titleB:titleE]
 384		}
 385
 386		i++
 387
 388	// reference style link
 389	case isReferenceStyleLink(data, i, t):
 390		var id []byte
 391		altContentConsidered := false
 392
 393		// look for the id
 394		i++
 395		linkB := i
 396		for i < len(data) && data[i] != ']' {
 397			i++
 398		}
 399		if i >= len(data) {
 400			return 0, nil
 401		}
 402		linkE := i
 403
 404		// find the reference
 405		if linkB == linkE {
 406			if textHasNl {
 407				var b bytes.Buffer
 408
 409				for j := 1; j < txtE; j++ {
 410					switch {
 411					case data[j] != '\n':
 412						b.WriteByte(data[j])
 413					case data[j-1] != ' ':
 414						b.WriteByte(' ')
 415					}
 416				}
 417
 418				id = b.Bytes()
 419			} else {
 420				id = data[1:txtE]
 421				altContentConsidered = true
 422			}
 423		} else {
 424			id = data[linkB:linkE]
 425		}
 426
 427		// find the reference with matching id
 428		lr, ok := p.getRef(string(id))
 429		if !ok {
 430			return 0, nil
 431		}
 432
 433		// keep link and title from reference
 434		link = lr.link
 435		title = lr.title
 436		if altContentConsidered {
 437			altContent = lr.text
 438		}
 439		i++
 440
 441	// shortcut reference style link or reference or inline footnote
 442	default:
 443		var id []byte
 444
 445		// craft the id
 446		if textHasNl {
 447			var b bytes.Buffer
 448
 449			for j := 1; j < txtE; j++ {
 450				switch {
 451				case data[j] != '\n':
 452					b.WriteByte(data[j])
 453				case data[j-1] != ' ':
 454					b.WriteByte(' ')
 455				}
 456			}
 457
 458			id = b.Bytes()
 459		} else {
 460			if t == linkDeferredFootnote {
 461				id = data[2:txtE] // get rid of the ^
 462			} else {
 463				id = data[1:txtE]
 464			}
 465		}
 466
 467		footnoteNode = NewNode(Item)
 468		if t == linkInlineFootnote {
 469			// create a new reference
 470			noteID = len(p.notes) + 1
 471
 472			var fragment []byte
 473			if len(id) > 0 {
 474				if len(id) < 16 {
 475					fragment = make([]byte, len(id))
 476				} else {
 477					fragment = make([]byte, 16)
 478				}
 479				copy(fragment, slugify(id))
 480			} else {
 481				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
 482			}
 483
 484			ref := &reference{
 485				noteID:   noteID,
 486				hasBlock: false,
 487				link:     fragment,
 488				title:    id,
 489				footnote: footnoteNode,
 490			}
 491
 492			p.notes = append(p.notes, ref)
 493
 494			link = ref.link
 495			title = ref.title
 496		} else {
 497			// find the reference with matching id
 498			lr, ok := p.getRef(string(id))
 499			if !ok {
 500				return 0, nil
 501			}
 502
 503			if t == linkDeferredFootnote {
 504				lr.noteID = len(p.notes) + 1
 505				lr.footnote = footnoteNode
 506				p.notes = append(p.notes, lr)
 507			}
 508
 509			// keep link and title from reference
 510			link = lr.link
 511			// if inline footnote, title == footnote contents
 512			title = lr.title
 513			noteID = lr.noteID
 514		}
 515
 516		// rewind the whitespace
 517		i = txtE + 1
 518	}
 519
 520	var uLink []byte
 521	if t == linkNormal || t == linkImg {
 522		if len(link) > 0 {
 523			var uLinkBuf bytes.Buffer
 524			unescapeText(&uLinkBuf, link)
 525			uLink = uLinkBuf.Bytes()
 526		}
 527
 528		// links need something to click on and somewhere to go
 529		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
 530			return 0, nil
 531		}
 532	}
 533
 534	// call the relevant rendering function
 535	var linkNode *Node
 536	switch t {
 537	case linkNormal:
 538		linkNode = NewNode(Link)
 539		linkNode.Destination = normalizeURI(uLink)
 540		linkNode.Title = title
 541		if len(altContent) > 0 {
 542			linkNode.AppendChild(text(altContent))
 543		} else {
 544			// links cannot contain other links, so turn off link parsing
 545			// temporarily and recurse
 546			insideLink := p.insideLink
 547			p.insideLink = true
 548			p.inline(linkNode, data[1:txtE])
 549			p.insideLink = insideLink
 550		}
 551
 552	case linkImg:
 553		linkNode = NewNode(Image)
 554		linkNode.Destination = uLink
 555		linkNode.Title = title
 556		linkNode.AppendChild(text(data[1:txtE]))
 557		i++
 558
 559	case linkInlineFootnote, linkDeferredFootnote:
 560		linkNode = NewNode(Link)
 561		linkNode.Destination = link
 562		linkNode.Title = title
 563		linkNode.NoteID = noteID
 564		linkNode.Footnote = footnoteNode
 565		if t == linkInlineFootnote {
 566			i++
 567		}
 568
 569	default:
 570		return 0, nil
 571	}
 572
 573	return i, linkNode
 574}
 575
 576func (p *Parser) inlineHTMLComment(data []byte) int {
 577	if len(data) < 5 {
 578		return 0
 579	}
 580	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 581		return 0
 582	}
 583	i := 5
 584	// scan for an end-of-comment marker, across lines if necessary
 585	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 586		i++
 587	}
 588	// no end-of-comment marker
 589	if i >= len(data) {
 590		return 0
 591	}
 592	return i + 1
 593}
 594
 595func stripMailto(link []byte) []byte {
 596	if bytes.HasPrefix(link, []byte("mailto://")) {
 597		return link[9:]
 598	} else if bytes.HasPrefix(link, []byte("mailto:")) {
 599		return link[7:]
 600	} else {
 601		return link
 602	}
 603}
 604
 605// autolinkType specifies a kind of autolink that gets detected.
 606type autolinkType int
 607
 608// These are the possible flag values for the autolink renderer.
 609const (
 610	notAutolink autolinkType = iota
 611	normalAutolink
 612	emailAutolink
 613)
 614
 615// '<' when tags or autolinks are allowed
 616func leftAngle(p *Parser, data []byte, offset int) (int, *Node) {
 617	data = data[offset:]
 618	altype, end := tagLength(data)
 619	if size := p.inlineHTMLComment(data); size > 0 {
 620		end = size
 621	}
 622	if end > 2 {
 623		if altype != notAutolink {
 624			var uLink bytes.Buffer
 625			unescapeText(&uLink, data[1:end+1-2])
 626			if uLink.Len() > 0 {
 627				link := uLink.Bytes()
 628				node := NewNode(Link)
 629				node.Destination = link
 630				if altype == emailAutolink {
 631					node.Destination = append([]byte("mailto:"), link...)
 632				}
 633				node.AppendChild(text(stripMailto(link)))
 634				return end, node
 635			}
 636		} else {
 637			htmlTag := NewNode(HTMLSpan)
 638			htmlTag.Literal = data[:end]
 639			return end, htmlTag
 640		}
 641	}
 642
 643	return end, nil
 644}
 645
 646// '\\' backslash escape
 647var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
 648
 649func escape(p *Parser, data []byte, offset int) (int, *Node) {
 650	data = data[offset:]
 651
 652	if len(data) > 1 {
 653		if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
 654			return 2, NewNode(Hardbreak)
 655		}
 656		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 657			return 0, nil
 658		}
 659
 660		return 2, text(data[1:2])
 661	}
 662
 663	return 2, nil
 664}
 665
 666func unescapeText(ob *bytes.Buffer, src []byte) {
 667	i := 0
 668	for i < len(src) {
 669		org := i
 670		for i < len(src) && src[i] != '\\' {
 671			i++
 672		}
 673
 674		if i > org {
 675			ob.Write(src[org:i])
 676		}
 677
 678		if i+1 >= len(src) {
 679			break
 680		}
 681
 682		ob.WriteByte(src[i+1])
 683		i += 2
 684	}
 685}
 686
 687// '&' escaped when it doesn't belong to an entity
 688// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 689func entity(p *Parser, data []byte, offset int) (int, *Node) {
 690	data = data[offset:]
 691
 692	end := 1
 693
 694	if end < len(data) && data[end] == '#' {
 695		end++
 696	}
 697
 698	for end < len(data) && isalnum(data[end]) {
 699		end++
 700	}
 701
 702	if end < len(data) && data[end] == ';' {
 703		end++ // real entity
 704	} else {
 705		return 0, nil // lone '&'
 706	}
 707
 708	ent := data[:end]
 709	// undo &amp; escaping or it will be converted to &amp;amp; by another
 710	// escaper in the renderer
 711	if bytes.Equal(ent, []byte("&amp;")) {
 712		ent = []byte{'&'}
 713	}
 714
 715	return end, text(ent)
 716}
 717
 718func linkEndsWithEntity(data []byte, linkEnd int) bool {
 719	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
 720	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 721}
 722
 723// hasPrefixCaseInsensitive is a custom implementation of
 724//     strings.HasPrefix(strings.ToLower(s), prefix)
 725// we rolled our own because ToLower pulls in a huge machinery of lowercasing
 726// anything from Unicode and that's very slow. Since this func will only be
 727// used on ASCII protocol prefixes, we can take shortcuts.
 728func hasPrefixCaseInsensitive(s, prefix []byte) bool {
 729	if len(s) < len(prefix) {
 730		return false
 731	}
 732	delta := byte('a' - 'A')
 733	for i, b := range prefix {
 734		if b != s[i] && b != s[i]+delta {
 735			return false
 736		}
 737	}
 738	return true
 739}
 740
 741var protocolPrefixes = [][]byte{
 742	[]byte("http://"),
 743	[]byte("https://"),
 744	[]byte("ftp://"),
 745	[]byte("file://"),
 746	[]byte("mailto:"),
 747}
 748
 749const shortestPrefix = 6 // len("ftp://"), the shortest of the above
 750
 751func maybeAutoLink(p *Parser, data []byte, offset int) (int, *Node) {
 752	// quick check to rule out most false hits
 753	if p.insideLink || len(data) < offset+shortestPrefix {
 754		return 0, nil
 755	}
 756	for _, prefix := range protocolPrefixes {
 757		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 758		if endOfHead > len(data) {
 759			endOfHead = len(data)
 760		}
 761		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
 762			return autoLink(p, data, offset)
 763		}
 764	}
 765	return 0, nil
 766}
 767
 768func autoLink(p *Parser, data []byte, offset int) (int, *Node) {
 769	// Now a more expensive check to see if we're not inside an anchor element
 770	anchorStart := offset
 771	offsetFromAnchor := 0
 772	for anchorStart > 0 && data[anchorStart] != '<' {
 773		anchorStart--
 774		offsetFromAnchor++
 775	}
 776
 777	anchorStr := anchorRe.Find(data[anchorStart:])
 778	if anchorStr != nil {
 779		anchorClose := NewNode(HTMLSpan)
 780		anchorClose.Literal = anchorStr[offsetFromAnchor:]
 781		return len(anchorStr) - offsetFromAnchor, anchorClose
 782	}
 783
 784	// scan backward for a word boundary
 785	rewind := 0
 786	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 787		rewind++
 788	}
 789	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 790		return 0, nil
 791	}
 792
 793	origData := data
 794	data = data[offset-rewind:]
 795
 796	if !isSafeLink(data) {
 797		return 0, nil
 798	}
 799
 800	linkEnd := 0
 801	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 802		linkEnd++
 803	}
 804
 805	// Skip punctuation at the end of the link
 806	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 807		linkEnd--
 808	}
 809
 810	// But don't skip semicolon if it's a part of escaped entity:
 811	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 812		linkEnd--
 813	}
 814
 815	// See if the link finishes with a punctuation sign that can be closed.
 816	var copen byte
 817	switch data[linkEnd-1] {
 818	case '"':
 819		copen = '"'
 820	case '\'':
 821		copen = '\''
 822	case ')':
 823		copen = '('
 824	case ']':
 825		copen = '['
 826	case '}':
 827		copen = '{'
 828	default:
 829		copen = 0
 830	}
 831
 832	if copen != 0 {
 833		bufEnd := offset - rewind + linkEnd - 2
 834
 835		openDelim := 1
 836
 837		/* Try to close the final punctuation sign in this same line;
 838		 * if we managed to close it outside of the URL, that means that it's
 839		 * not part of the URL. If it closes inside the URL, that means it
 840		 * is part of the URL.
 841		 *
 842		 * Examples:
 843		 *
 844		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 845		 *              => http://www.pokemon.com/Pikachu_(Electric)
 846		 *
 847		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 848		 *              => http://www.pokemon.com/Pikachu_(Electric)
 849		 *
 850		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 851		 *              => http://www.pokemon.com/Pikachu_(Electric))
 852		 *
 853		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 854		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 855		 */
 856
 857		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 858			if origData[bufEnd] == data[linkEnd-1] {
 859				openDelim++
 860			}
 861
 862			if origData[bufEnd] == copen {
 863				openDelim--
 864			}
 865
 866			bufEnd--
 867		}
 868
 869		if openDelim == 0 {
 870			linkEnd--
 871		}
 872	}
 873
 874	var uLink bytes.Buffer
 875	unescapeText(&uLink, data[:linkEnd])
 876
 877	if uLink.Len() > 0 {
 878		node := NewNode(Link)
 879		node.Destination = uLink.Bytes()
 880		node.AppendChild(text(uLink.Bytes()))
 881		return linkEnd, node
 882	}
 883
 884	return linkEnd, nil
 885}
 886
 887func isEndOfLink(char byte) bool {
 888	return isspace(char) || char == '<'
 889}
 890
 891var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 892var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
 893
 894func isSafeLink(link []byte) bool {
 895	for _, path := range validPaths {
 896		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
 897			if len(link) == len(path) {
 898				return true
 899			} else if isalnum(link[len(path)]) {
 900				return true
 901			}
 902		}
 903	}
 904
 905	for _, prefix := range validUris {
 906		// TODO: handle unicode here
 907		// case-insensitive prefix test
 908		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 909			return true
 910		}
 911	}
 912
 913	return false
 914}
 915
 916// return the length of the given tag, or 0 is it's not valid
 917func tagLength(data []byte) (autolink autolinkType, end int) {
 918	var i, j int
 919
 920	// a valid tag can't be shorter than 3 chars
 921	if len(data) < 3 {
 922		return notAutolink, 0
 923	}
 924
 925	// begins with a '<' optionally followed by '/', followed by letter or number
 926	if data[0] != '<' {
 927		return notAutolink, 0
 928	}
 929	if data[1] == '/' {
 930		i = 2
 931	} else {
 932		i = 1
 933	}
 934
 935	if !isalnum(data[i]) {
 936		return notAutolink, 0
 937	}
 938
 939	// scheme test
 940	autolink = notAutolink
 941
 942	// try to find the beginning of an URI
 943	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 944		i++
 945	}
 946
 947	if i > 1 && i < len(data) && data[i] == '@' {
 948		if j = isMailtoAutoLink(data[i:]); j != 0 {
 949			return emailAutolink, i + j
 950		}
 951	}
 952
 953	if i > 2 && i < len(data) && data[i] == ':' {
 954		autolink = normalAutolink
 955		i++
 956	}
 957
 958	// complete autolink test: no whitespace or ' or "
 959	switch {
 960	case i >= len(data):
 961		autolink = notAutolink
 962	case autolink != notAutolink:
 963		j = i
 964
 965		for i < len(data) {
 966			if data[i] == '\\' {
 967				i += 2
 968			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 969				break
 970			} else {
 971				i++
 972			}
 973
 974		}
 975
 976		if i >= len(data) {
 977			return autolink, 0
 978		}
 979		if i > j && data[i] == '>' {
 980			return autolink, i + 1
 981		}
 982
 983		// one of the forbidden chars has been found
 984		autolink = notAutolink
 985	}
 986	i += bytes.IndexByte(data[i:], '>')
 987	if i < 0 {
 988		return autolink, 0
 989	}
 990	return autolink, i + 1
 991}
 992
 993// look for the address part of a mail autolink and '>'
 994// this is less strict than the original markdown e-mail address matching
 995func isMailtoAutoLink(data []byte) int {
 996	nb := 0
 997
 998	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
 999	for i := 0; i < len(data); i++ {
1000		if isalnum(data[i]) {
1001			continue
1002		}
1003
1004		switch data[i] {
1005		case '@':
1006			nb++
1007
1008		case '-', '.', '_':
1009			break
1010
1011		case '>':
1012			if nb == 1 {
1013				return i + 1
1014			}
1015			return 0
1016		default:
1017			return 0
1018		}
1019	}
1020
1021	return 0
1022}
1023
1024// look for the next emph char, skipping other constructs
1025func helperFindEmphChar(data []byte, c byte) int {
1026	i := 0
1027
1028	for i < len(data) {
1029		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1030			i++
1031		}
1032		if i >= len(data) {
1033			return 0
1034		}
1035		// do not count escaped chars
1036		if i != 0 && data[i-1] == '\\' {
1037			i++
1038			continue
1039		}
1040		if data[i] == c {
1041			return i
1042		}
1043
1044		if data[i] == '`' {
1045			// skip a code span
1046			tmpI := 0
1047			i++
1048			for i < len(data) && data[i] != '`' {
1049				if tmpI == 0 && data[i] == c {
1050					tmpI = i
1051				}
1052				i++
1053			}
1054			if i >= len(data) {
1055				return tmpI
1056			}
1057			i++
1058		} else if data[i] == '[' {
1059			// skip a link
1060			tmpI := 0
1061			i++
1062			for i < len(data) && data[i] != ']' {
1063				if tmpI == 0 && data[i] == c {
1064					tmpI = i
1065				}
1066				i++
1067			}
1068			i++
1069			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1070				i++
1071			}
1072			if i >= len(data) {
1073				return tmpI
1074			}
1075			if data[i] != '[' && data[i] != '(' { // not a link
1076				if tmpI > 0 {
1077					return tmpI
1078				}
1079				continue
1080			}
1081			cc := data[i]
1082			i++
1083			for i < len(data) && data[i] != cc {
1084				if tmpI == 0 && data[i] == c {
1085					return i
1086				}
1087				i++
1088			}
1089			if i >= len(data) {
1090				return tmpI
1091			}
1092			i++
1093		}
1094	}
1095	return 0
1096}
1097
1098func helperEmphasis(p *Parser, data []byte, c byte) (int, *Node) {
1099	i := 0
1100
1101	// skip one symbol if coming from emph3
1102	if len(data) > 1 && data[0] == c && data[1] == c {
1103		i = 1
1104	}
1105
1106	for i < len(data) {
1107		length := helperFindEmphChar(data[i:], c)
1108		if length == 0 {
1109			return 0, nil
1110		}
1111		i += length
1112		if i >= len(data) {
1113			return 0, nil
1114		}
1115
1116		if i+1 < len(data) && data[i+1] == c {
1117			i++
1118			continue
1119		}
1120
1121		if data[i] == c && !isspace(data[i-1]) {
1122
1123			if p.flags&NoIntraEmphasis != 0 {
1124				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1125					continue
1126				}
1127			}
1128
1129			emph := NewNode(Emph)
1130			p.inline(emph, data[:i])
1131			return i + 1, emph
1132		}
1133	}
1134
1135	return 0, nil
1136}
1137
1138func helperDoubleEmphasis(p *Parser, data []byte, c byte) (int, *Node) {
1139	i := 0
1140
1141	for i < len(data) {
1142		length := helperFindEmphChar(data[i:], c)
1143		if length == 0 {
1144			return 0, nil
1145		}
1146		i += length
1147
1148		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1149			nodeType := Strong
1150			if c == '~' {
1151				nodeType = Del
1152			}
1153			node := NewNode(nodeType)
1154			p.inline(node, data[:i])
1155			return i + 2, node
1156		}
1157		i++
1158	}
1159	return 0, nil
1160}
1161
1162func helperTripleEmphasis(p *Parser, data []byte, offset int, c byte) (int, *Node) {
1163	i := 0
1164	origData := data
1165	data = data[offset:]
1166
1167	for i < len(data) {
1168		length := helperFindEmphChar(data[i:], c)
1169		if length == 0 {
1170			return 0, nil
1171		}
1172		i += length
1173
1174		// skip whitespace preceded symbols
1175		if data[i] != c || isspace(data[i-1]) {
1176			continue
1177		}
1178
1179		switch {
1180		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1181			// triple symbol found
1182			strong := NewNode(Strong)
1183			em := NewNode(Emph)
1184			strong.AppendChild(em)
1185			p.inline(em, data[:i])
1186			return i + 3, strong
1187		case (i+1 < len(data) && data[i+1] == c):
1188			// double symbol found, hand over to emph1
1189			length, node := helperEmphasis(p, origData[offset-2:], c)
1190			if length == 0 {
1191				return 0, nil
1192			}
1193			return length - 2, node
1194		default:
1195			// single symbol found, hand over to emph2
1196			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1197			if length == 0 {
1198				return 0, nil
1199			}
1200			return length - 1, node
1201		}
1202	}
1203	return 0, nil
1204}
1205
1206func text(s []byte) *Node {
1207	node := NewNode(Text)
1208	node.Literal = s
1209	return node
1210}
1211
1212func normalizeURI(s []byte) []byte {
1213	return s // TODO: implement
1214}
all repos — grayfriday @ 5e1065fa45a3fc42cf35ae962a4a7d6aa208d27c

blackfriday fork with a few changes