icy does git — grayfriday (04673c9f2899eac2c8c1a44df63682963ad4e121): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  25)
  26
  27// Functions to parse text within a block
  28// Each function returns the number of chars taken care of
  29// data is the complete block being rendered
  30// offset is the number of valid chars before the current cursor
  31
  32func (p *parser) inline(data []byte) {
  33	// this is called recursively: enforce a maximum depth
  34	if p.nesting >= p.maxNesting {
  35		return
  36	}
  37	p.nesting++
  38
  39	i, end := 0, 0
  40	for i < len(data) {
  41		// Stop at EOL
  42		if data[i] == '\n' && i+1 == len(data) {
  43			break
  44		}
  45		// Copy inactive chars into the output, but first check for one quirk:
  46		// 'h', 'm' and 'f' all might trigger a check for autolink processing
  47		// and end this run of inactive characters. However, there's one nasty
  48		// case where breaking this run would be bad: in smartypants fraction
  49		// detection, we expect things like "1/2th" to be in a single run. So
  50		// we check here if an 'h' is followed by 't' (from 'http') and if it's
  51		// not, we short circuit the 'h' into the run of inactive characters.
  52		//
  53		// Also, in a similar fashion maybeLineBreak breaks this run of chars,
  54		// but smartDash processor relies on seeing context around the dashes.
  55		// Fix this somehow.
  56		for end < len(data) {
  57			if data[end] == ' ' {
  58				consumed, br := maybeLineBreak(p, data, end)
  59				if consumed > 0 {
  60					p.currBlock.appendChild(text(data[i:end]))
  61					if br {
  62						p.currBlock.appendChild(NewNode(Hardbreak))
  63					}
  64					i = end
  65					i += consumed
  66					end = i
  67				} else {
  68					end++
  69				}
  70				continue
  71			}
  72			if p.inlineCallback[data[end]] != nil {
  73				if end+1 < len(data) && data[end] == 'h' && data[end+1] != 't' {
  74					end++
  75				} else {
  76					break
  77				}
  78			} else {
  79				end++
  80			}
  81		}
  82
  83		p.currBlock.appendChild(text(data[i:end]))
  84
  85		if end >= len(data) {
  86			break
  87		}
  88		i = end
  89
  90		// call the trigger
  91		handler := p.inlineCallback[data[end]]
  92		if consumed := handler(p, data, i); consumed == 0 {
  93			// no action from the callback; buffer the byte for later
  94			end = i + 1
  95		} else {
  96			// skip past whatever the callback used
  97			i += consumed
  98			end = i
  99		}
 100	}
 101
 102	p.nesting--
 103}
 104
 105// single and double emphasis parsing
 106func emphasis(p *parser, data []byte, offset int) int {
 107	data = data[offset:]
 108	c := data[0]
 109	ret := 0
 110
 111	if len(data) > 2 && data[1] != c {
 112		// whitespace cannot follow an opening emphasis;
 113		// strikethrough only takes two characters '~~'
 114		if c == '~' || isspace(data[1]) {
 115			return 0
 116		}
 117		if ret = helperEmphasis(p, data[1:], c); ret == 0 {
 118			return 0
 119		}
 120
 121		return ret + 1
 122	}
 123
 124	if len(data) > 3 && data[1] == c && data[2] != c {
 125		if isspace(data[2]) {
 126			return 0
 127		}
 128		if ret = helperDoubleEmphasis(p, data[2:], c); ret == 0 {
 129			return 0
 130		}
 131
 132		return ret + 2
 133	}
 134
 135	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 136		if c == '~' || isspace(data[3]) {
 137			return 0
 138		}
 139		if ret = helperTripleEmphasis(p, data, 3, c); ret == 0 {
 140			return 0
 141		}
 142
 143		return ret + 3
 144	}
 145
 146	return 0
 147}
 148
 149func codeSpan(p *parser, data []byte, offset int) int {
 150	data = data[offset:]
 151
 152	nb := 0
 153
 154	// count the number of backticks in the delimiter
 155	for nb < len(data) && data[nb] == '`' {
 156		nb++
 157	}
 158
 159	// find the next delimiter
 160	i, end := 0, 0
 161	for end = nb; end < len(data) && i < nb; end++ {
 162		if data[end] == '`' {
 163			i++
 164		} else {
 165			i = 0
 166		}
 167	}
 168
 169	// no matching delimiter?
 170	if i < nb && end >= len(data) {
 171		return 0
 172	}
 173
 174	// trim outside whitespace
 175	fBegin := nb
 176	for fBegin < end && data[fBegin] == ' ' {
 177		fBegin++
 178	}
 179
 180	fEnd := end - nb
 181	for fEnd > fBegin && data[fEnd-1] == ' ' {
 182		fEnd--
 183	}
 184
 185	// render the code span
 186	if fBegin != fEnd {
 187		code := NewNode(Code)
 188		code.Literal = data[fBegin:fEnd]
 189		p.currBlock.appendChild(code)
 190	}
 191
 192	return end
 193
 194}
 195
 196// newline preceded by two spaces becomes <br>
 197func maybeLineBreak(p *parser, data []byte, offset int) (int, bool) {
 198	origOffset := offset
 199	for offset < len(data) && data[offset] == ' ' {
 200		offset++
 201	}
 202	if offset < len(data) && data[offset] == '\n' {
 203		if offset-origOffset >= 2 {
 204			return offset - origOffset + 1, true
 205		}
 206		return offset - origOffset, false
 207	}
 208	return 0, false
 209}
 210
 211// newline without two spaces works when HardLineBreak is enabled
 212func lineBreak(p *parser, data []byte, offset int) int {
 213	if p.flags&HardLineBreak != 0 {
 214		p.currBlock.appendChild(NewNode(Hardbreak))
 215		return 1
 216	}
 217	return 0
 218}
 219
 220type linkType int
 221
 222const (
 223	linkNormal linkType = iota
 224	linkImg
 225	linkDeferredFootnote
 226	linkInlineFootnote
 227)
 228
 229func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
 230	if t == linkDeferredFootnote {
 231		return false
 232	}
 233	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
 234}
 235
 236func maybeImage(p *parser, data []byte, offset int) int {
 237	if offset < len(data)-1 && data[offset+1] == '[' {
 238		return link(p, data, offset)
 239	}
 240	return 0
 241}
 242
 243func maybeInlineFootnote(p *parser, data []byte, offset int) int {
 244	if offset < len(data)-1 && data[offset+1] == '[' {
 245		return link(p, data, offset)
 246	}
 247	return 0
 248}
 249
 250// '[': parse a link or an image or a footnote
 251func link(p *parser, data []byte, offset int) int {
 252	// no links allowed inside regular links, footnote, and deferred footnotes
 253	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 254		return 0
 255	}
 256
 257	var t linkType
 258	switch {
 259	// special case: ![^text] == deferred footnote (that follows something with
 260	// an exclamation point)
 261	case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
 262		t = linkDeferredFootnote
 263	// ![alt] == image
 264	case offset >= 0 && data[offset] == '!':
 265		t = linkImg
 266		offset += 1
 267	// ^[text] == inline footnote
 268	// [^refId] == deferred footnote
 269	case p.flags&Footnotes != 0:
 270		if offset >= 0 && data[offset] == '^' {
 271			t = linkInlineFootnote
 272			offset += 1
 273		} else if len(data)-1 > offset && data[offset+1] == '^' {
 274			t = linkDeferredFootnote
 275		}
 276	// [text] == regular link
 277	default:
 278		t = linkNormal
 279	}
 280
 281	data = data[offset:]
 282
 283	var (
 284		i                       = 1
 285		noteId                  int
 286		title, link, altContent []byte
 287		textHasNl               = false
 288	)
 289
 290	if t == linkDeferredFootnote {
 291		i++
 292	}
 293
 294	// look for the matching closing bracket
 295	for level := 1; level > 0 && i < len(data); i++ {
 296		switch {
 297		case data[i] == '\n':
 298			textHasNl = true
 299
 300		case data[i-1] == '\\':
 301			continue
 302
 303		case data[i] == '[':
 304			level++
 305
 306		case data[i] == ']':
 307			level--
 308			if level <= 0 {
 309				i-- // compensate for extra i++ in for loop
 310			}
 311		}
 312	}
 313
 314	if i >= len(data) {
 315		return 0
 316	}
 317
 318	txtE := i
 319	i++
 320
 321	// skip any amount of whitespace or newline
 322	// (this is much more lax than original markdown syntax)
 323	for i < len(data) && isspace(data[i]) {
 324		i++
 325	}
 326
 327	// inline style link
 328	switch {
 329	case i < len(data) && data[i] == '(':
 330		// skip initial whitespace
 331		i++
 332
 333		for i < len(data) && isspace(data[i]) {
 334			i++
 335		}
 336
 337		linkB := i
 338
 339		// look for link end: ' " )
 340	findlinkend:
 341		for i < len(data) {
 342			switch {
 343			case data[i] == '\\':
 344				i += 2
 345
 346			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 347				break findlinkend
 348
 349			default:
 350				i++
 351			}
 352		}
 353
 354		if i >= len(data) {
 355			return 0
 356		}
 357		linkE := i
 358
 359		// look for title end if present
 360		titleB, titleE := 0, 0
 361		if data[i] == '\'' || data[i] == '"' {
 362			i++
 363			titleB = i
 364
 365		findtitleend:
 366			for i < len(data) {
 367				switch {
 368				case data[i] == '\\':
 369					i += 2
 370
 371				case data[i] == ')':
 372					break findtitleend
 373
 374				default:
 375					i++
 376				}
 377			}
 378
 379			if i >= len(data) {
 380				return 0
 381			}
 382
 383			// skip whitespace after title
 384			titleE = i - 1
 385			for titleE > titleB && isspace(data[titleE]) {
 386				titleE--
 387			}
 388
 389			// check for closing quote presence
 390			if data[titleE] != '\'' && data[titleE] != '"' {
 391				titleB, titleE = 0, 0
 392				linkE = i
 393			}
 394		}
 395
 396		// remove whitespace at the end of the link
 397		for linkE > linkB && isspace(data[linkE-1]) {
 398			linkE--
 399		}
 400
 401		// remove optional angle brackets around the link
 402		if data[linkB] == '<' {
 403			linkB++
 404		}
 405		if data[linkE-1] == '>' {
 406			linkE--
 407		}
 408
 409		// build escaped link and title
 410		if linkE > linkB {
 411			link = data[linkB:linkE]
 412		}
 413
 414		if titleE > titleB {
 415			title = data[titleB:titleE]
 416		}
 417
 418		i++
 419
 420	// reference style link
 421	case isReferenceStyleLink(data, i, t):
 422		var id []byte
 423		altContentConsidered := false
 424
 425		// look for the id
 426		i++
 427		linkB := i
 428		for i < len(data) && data[i] != ']' {
 429			i++
 430		}
 431		if i >= len(data) {
 432			return 0
 433		}
 434		linkE := i
 435
 436		// find the reference
 437		if linkB == linkE {
 438			if textHasNl {
 439				var b bytes.Buffer
 440
 441				for j := 1; j < txtE; j++ {
 442					switch {
 443					case data[j] != '\n':
 444						b.WriteByte(data[j])
 445					case data[j-1] != ' ':
 446						b.WriteByte(' ')
 447					}
 448				}
 449
 450				id = b.Bytes()
 451			} else {
 452				id = data[1:txtE]
 453				altContentConsidered = true
 454			}
 455		} else {
 456			id = data[linkB:linkE]
 457		}
 458
 459		// find the reference with matching id
 460		lr, ok := p.getRef(string(id))
 461		if !ok {
 462			return 0
 463		}
 464
 465		// keep link and title from reference
 466		link = lr.link
 467		title = lr.title
 468		if altContentConsidered {
 469			altContent = lr.text
 470		}
 471		i++
 472
 473	// shortcut reference style link or reference or inline footnote
 474	default:
 475		var id []byte
 476
 477		// craft the id
 478		if textHasNl {
 479			var b bytes.Buffer
 480
 481			for j := 1; j < txtE; j++ {
 482				switch {
 483				case data[j] != '\n':
 484					b.WriteByte(data[j])
 485				case data[j-1] != ' ':
 486					b.WriteByte(' ')
 487				}
 488			}
 489
 490			id = b.Bytes()
 491		} else {
 492			if t == linkDeferredFootnote {
 493				id = data[2:txtE] // get rid of the ^
 494			} else {
 495				id = data[1:txtE]
 496			}
 497		}
 498
 499		if t == linkInlineFootnote {
 500			// create a new reference
 501			noteId = len(p.notes) + 1
 502
 503			var fragment []byte
 504			if len(id) > 0 {
 505				if len(id) < 16 {
 506					fragment = make([]byte, len(id))
 507				} else {
 508					fragment = make([]byte, 16)
 509				}
 510				copy(fragment, slugify(id))
 511			} else {
 512				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
 513			}
 514
 515			ref := &reference{
 516				noteId:   noteId,
 517				hasBlock: false,
 518				link:     fragment,
 519				title:    id,
 520			}
 521
 522			p.notes = append(p.notes, ref)
 523
 524			link = ref.link
 525			title = ref.title
 526		} else {
 527			// find the reference with matching id
 528			lr, ok := p.getRef(string(id))
 529			if !ok {
 530				return 0
 531			}
 532
 533			if t == linkDeferredFootnote {
 534				lr.noteId = len(p.notes) + 1
 535				p.notes = append(p.notes, lr)
 536			}
 537
 538			// keep link and title from reference
 539			link = lr.link
 540			// if inline footnote, title == footnote contents
 541			title = lr.title
 542			noteId = lr.noteId
 543		}
 544
 545		// rewind the whitespace
 546		i = txtE + 1
 547	}
 548
 549	var uLink []byte
 550	if t == linkNormal || t == linkImg {
 551		if len(link) > 0 {
 552			var uLinkBuf bytes.Buffer
 553			unescapeText(&uLinkBuf, link)
 554			uLink = uLinkBuf.Bytes()
 555		}
 556
 557		// links need something to click on and somewhere to go
 558		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
 559			return 0
 560		}
 561	}
 562
 563	// call the relevant rendering function
 564	switch t {
 565	case linkNormal:
 566		linkNode := NewNode(Link)
 567		linkNode.Destination = normalizeURI(uLink)
 568		linkNode.Title = title
 569		p.currBlock.appendChild(linkNode)
 570		if len(altContent) > 0 {
 571			linkNode.appendChild(text(altContent))
 572		} else {
 573			// links cannot contain other links, so turn off link parsing
 574			// temporarily and recurse
 575			insideLink := p.insideLink
 576			p.insideLink = true
 577			tmpNode := p.currBlock
 578			p.currBlock = linkNode
 579			p.inline(data[1:txtE])
 580			p.currBlock = tmpNode
 581			p.insideLink = insideLink
 582		}
 583
 584	case linkImg:
 585		linkNode := NewNode(Image)
 586		linkNode.Destination = uLink
 587		linkNode.Title = title
 588		p.currBlock.appendChild(linkNode)
 589		linkNode.appendChild(text(data[1:txtE]))
 590		i += 1
 591
 592	case linkInlineFootnote, linkDeferredFootnote:
 593		linkNode := NewNode(Link)
 594		linkNode.Destination = link
 595		linkNode.Title = title
 596		linkNode.NoteID = noteId
 597		p.currBlock.appendChild(linkNode)
 598		if t == linkInlineFootnote {
 599			i++
 600		}
 601
 602	default:
 603		return 0
 604	}
 605
 606	return i
 607}
 608
 609func (p *parser) inlineHtmlComment(data []byte) int {
 610	if len(data) < 5 {
 611		return 0
 612	}
 613	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 614		return 0
 615	}
 616	i := 5
 617	// scan for an end-of-comment marker, across lines if necessary
 618	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 619		i++
 620	}
 621	// no end-of-comment marker
 622	if i >= len(data) {
 623		return 0
 624	}
 625	return i + 1
 626}
 627
 628func stripMailto(link []byte) []byte {
 629	if bytes.HasPrefix(link, []byte("mailto://")) {
 630		return link[9:]
 631	} else if bytes.HasPrefix(link, []byte("mailto:")) {
 632		return link[7:]
 633	} else {
 634		return link
 635	}
 636}
 637
 638// '<' when tags or autolinks are allowed
 639func leftAngle(p *parser, data []byte, offset int) int {
 640	data = data[offset:]
 641	altype := LinkTypeNotAutolink
 642	end := tagLength(data, &altype)
 643	if size := p.inlineHtmlComment(data); size > 0 {
 644		end = size
 645	}
 646	if end > 2 {
 647		if altype != LinkTypeNotAutolink {
 648			var uLink bytes.Buffer
 649			unescapeText(&uLink, data[1:end+1-2])
 650			if uLink.Len() > 0 {
 651				link := uLink.Bytes()
 652				node := NewNode(Link)
 653				node.Destination = link
 654				if altype == LinkTypeEmail {
 655					node.Destination = append([]byte("mailto:"), link...)
 656				}
 657				p.currBlock.appendChild(node)
 658				node.appendChild(text(stripMailto(link)))
 659			}
 660		} else {
 661			htmlTag := NewNode(HtmlSpan)
 662			htmlTag.Literal = data[:end]
 663			p.currBlock.appendChild(htmlTag)
 664		}
 665	}
 666
 667	return end
 668}
 669
 670// '\\' backslash escape
 671var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
 672
 673func escape(p *parser, data []byte, offset int) int {
 674	data = data[offset:]
 675
 676	if len(data) > 1 {
 677		if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
 678			p.currBlock.appendChild(NewNode(Hardbreak))
 679			return 2
 680		}
 681		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 682			return 0
 683		}
 684
 685		p.currBlock.appendChild(text(data[1:2]))
 686	}
 687
 688	return 2
 689}
 690
 691func unescapeText(ob *bytes.Buffer, src []byte) {
 692	i := 0
 693	for i < len(src) {
 694		org := i
 695		for i < len(src) && src[i] != '\\' {
 696			i++
 697		}
 698
 699		if i > org {
 700			ob.Write(src[org:i])
 701		}
 702
 703		if i+1 >= len(src) {
 704			break
 705		}
 706
 707		ob.WriteByte(src[i+1])
 708		i += 2
 709	}
 710}
 711
 712// '&' escaped when it doesn't belong to an entity
 713// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 714func entity(p *parser, data []byte, offset int) int {
 715	data = data[offset:]
 716
 717	end := 1
 718
 719	if end < len(data) && data[end] == '#' {
 720		end++
 721	}
 722
 723	for end < len(data) && isalnum(data[end]) {
 724		end++
 725	}
 726
 727	if end < len(data) && data[end] == ';' {
 728		end++ // real entity
 729	} else {
 730		return 0 // lone '&'
 731	}
 732
 733	ent := data[:end]
 734	// undo &amp; escaping or it will be converted to &amp;amp; by another
 735	// escaper in the renderer
 736	if bytes.Equal(ent, []byte("&amp;")) {
 737		ent = []byte{'&'}
 738	}
 739	p.currBlock.appendChild(text(ent))
 740
 741	return end
 742}
 743
 744func linkEndsWithEntity(data []byte, linkEnd int) bool {
 745	entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
 746	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 747}
 748
 749func maybeAutoLink(p *parser, data []byte, offset int) int {
 750	// quick check to rule out most false hits
 751	if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
 752		return 0
 753	}
 754	prefixes := []string{
 755		"http://",
 756		"https://",
 757		"ftp://",
 758		"file://",
 759		"mailto:",
 760	}
 761	for _, prefix := range prefixes {
 762		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 763		if endOfHead > len(data) {
 764			endOfHead = len(data)
 765		}
 766		head := bytes.ToLower(data[offset:endOfHead])
 767		if bytes.HasPrefix(head, []byte(prefix)) {
 768			return autoLink(p, data, offset)
 769		}
 770	}
 771	return 0
 772}
 773
 774func autoLink(p *parser, data []byte, offset int) int {
 775	// Now a more expensive check to see if we're not inside an anchor element
 776	anchorStart := offset
 777	offsetFromAnchor := 0
 778	for anchorStart > 0 && data[anchorStart] != '<' {
 779		anchorStart--
 780		offsetFromAnchor++
 781	}
 782
 783	anchorStr := anchorRe.Find(data[anchorStart:])
 784	if anchorStr != nil {
 785		anchorClose := NewNode(HtmlSpan)
 786		anchorClose.Literal = anchorStr[offsetFromAnchor:]
 787		p.currBlock.appendChild(anchorClose)
 788		return len(anchorStr) - offsetFromAnchor
 789	}
 790
 791	// scan backward for a word boundary
 792	rewind := 0
 793	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 794		rewind++
 795	}
 796	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 797		return 0
 798	}
 799
 800	origData := data
 801	data = data[offset-rewind:]
 802
 803	if !isSafeLink(data) {
 804		return 0
 805	}
 806
 807	linkEnd := 0
 808	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 809		linkEnd++
 810	}
 811
 812	// Skip punctuation at the end of the link
 813	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 814		linkEnd--
 815	}
 816
 817	// But don't skip semicolon if it's a part of escaped entity:
 818	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 819		linkEnd--
 820	}
 821
 822	// See if the link finishes with a punctuation sign that can be closed.
 823	var copen byte
 824	switch data[linkEnd-1] {
 825	case '"':
 826		copen = '"'
 827	case '\'':
 828		copen = '\''
 829	case ')':
 830		copen = '('
 831	case ']':
 832		copen = '['
 833	case '}':
 834		copen = '{'
 835	default:
 836		copen = 0
 837	}
 838
 839	if copen != 0 {
 840		bufEnd := offset - rewind + linkEnd - 2
 841
 842		openDelim := 1
 843
 844		/* Try to close the final punctuation sign in this same line;
 845		 * if we managed to close it outside of the URL, that means that it's
 846		 * not part of the URL. If it closes inside the URL, that means it
 847		 * is part of the URL.
 848		 *
 849		 * Examples:
 850		 *
 851		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 852		 *              => http://www.pokemon.com/Pikachu_(Electric)
 853		 *
 854		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 855		 *              => http://www.pokemon.com/Pikachu_(Electric)
 856		 *
 857		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 858		 *              => http://www.pokemon.com/Pikachu_(Electric))
 859		 *
 860		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 861		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 862		 */
 863
 864		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 865			if origData[bufEnd] == data[linkEnd-1] {
 866				openDelim++
 867			}
 868
 869			if origData[bufEnd] == copen {
 870				openDelim--
 871			}
 872
 873			bufEnd--
 874		}
 875
 876		if openDelim == 0 {
 877			linkEnd--
 878		}
 879	}
 880
 881	var uLink bytes.Buffer
 882	unescapeText(&uLink, data[:linkEnd])
 883
 884	if uLink.Len() > 0 {
 885		node := NewNode(Link)
 886		node.Destination = uLink.Bytes()
 887		p.currBlock.appendChild(node)
 888		node.appendChild(text(uLink.Bytes()))
 889	}
 890
 891	return linkEnd
 892}
 893
 894func isEndOfLink(char byte) bool {
 895	return isspace(char) || char == '<'
 896}
 897
 898var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 899var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
 900
 901func isSafeLink(link []byte) bool {
 902	for _, path := range validPaths {
 903		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
 904			if len(link) == len(path) {
 905				return true
 906			} else if isalnum(link[len(path)]) {
 907				return true
 908			}
 909		}
 910	}
 911
 912	for _, prefix := range validUris {
 913		// TODO: handle unicode here
 914		// case-insensitive prefix test
 915		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 916			return true
 917		}
 918	}
 919
 920	return false
 921}
 922
 923// return the length of the given tag, or 0 is it's not valid
 924func tagLength(data []byte, autolink *LinkType) int {
 925	var i, j int
 926
 927	// a valid tag can't be shorter than 3 chars
 928	if len(data) < 3 {
 929		return 0
 930	}
 931
 932	// begins with a '<' optionally followed by '/', followed by letter or number
 933	if data[0] != '<' {
 934		return 0
 935	}
 936	if data[1] == '/' {
 937		i = 2
 938	} else {
 939		i = 1
 940	}
 941
 942	if !isalnum(data[i]) {
 943		return 0
 944	}
 945
 946	// scheme test
 947	*autolink = LinkTypeNotAutolink
 948
 949	// try to find the beginning of an URI
 950	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 951		i++
 952	}
 953
 954	if i > 1 && i < len(data) && data[i] == '@' {
 955		if j = isMailtoAutoLink(data[i:]); j != 0 {
 956			*autolink = LinkTypeEmail
 957			return i + j
 958		}
 959	}
 960
 961	if i > 2 && i < len(data) && data[i] == ':' {
 962		*autolink = LinkTypeNormal
 963		i++
 964	}
 965
 966	// complete autolink test: no whitespace or ' or "
 967	switch {
 968	case i >= len(data):
 969		*autolink = LinkTypeNotAutolink
 970	case *autolink != 0:
 971		j = i
 972
 973		for i < len(data) {
 974			if data[i] == '\\' {
 975				i += 2
 976			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 977				break
 978			} else {
 979				i++
 980			}
 981
 982		}
 983
 984		if i >= len(data) {
 985			return 0
 986		}
 987		if i > j && data[i] == '>' {
 988			return i + 1
 989		}
 990
 991		// one of the forbidden chars has been found
 992		*autolink = LinkTypeNotAutolink
 993	}
 994
 995	// look for something looking like a tag end
 996	for i < len(data) && data[i] != '>' {
 997		i++
 998	}
 999	if i >= len(data) {
1000		return 0
1001	}
1002	return i + 1
1003}
1004
1005// look for the address part of a mail autolink and '>'
1006// this is less strict than the original markdown e-mail address matching
1007func isMailtoAutoLink(data []byte) int {
1008	nb := 0
1009
1010	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1011	for i := 0; i < len(data); i++ {
1012		if isalnum(data[i]) {
1013			continue
1014		}
1015
1016		switch data[i] {
1017		case '@':
1018			nb++
1019
1020		case '-', '.', '_':
1021			break
1022
1023		case '>':
1024			if nb == 1 {
1025				return i + 1
1026			} else {
1027				return 0
1028			}
1029		default:
1030			return 0
1031		}
1032	}
1033
1034	return 0
1035}
1036
1037// look for the next emph char, skipping other constructs
1038func helperFindEmphChar(data []byte, c byte) int {
1039	i := 0
1040
1041	for i < len(data) {
1042		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1043			i++
1044		}
1045		if i >= len(data) {
1046			return 0
1047		}
1048		// do not count escaped chars
1049		if i != 0 && data[i-1] == '\\' {
1050			i++
1051			continue
1052		}
1053		if data[i] == c {
1054			return i
1055		}
1056
1057		if data[i] == '`' {
1058			// skip a code span
1059			tmpI := 0
1060			i++
1061			for i < len(data) && data[i] != '`' {
1062				if tmpI == 0 && data[i] == c {
1063					tmpI = i
1064				}
1065				i++
1066			}
1067			if i >= len(data) {
1068				return tmpI
1069			}
1070			i++
1071		} else if data[i] == '[' {
1072			// skip a link
1073			tmpI := 0
1074			i++
1075			for i < len(data) && data[i] != ']' {
1076				if tmpI == 0 && data[i] == c {
1077					tmpI = i
1078				}
1079				i++
1080			}
1081			i++
1082			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1083				i++
1084			}
1085			if i >= len(data) {
1086				return tmpI
1087			}
1088			if data[i] != '[' && data[i] != '(' { // not a link
1089				if tmpI > 0 {
1090					return tmpI
1091				} else {
1092					continue
1093				}
1094			}
1095			cc := data[i]
1096			i++
1097			for i < len(data) && data[i] != cc {
1098				if tmpI == 0 && data[i] == c {
1099					return i
1100				}
1101				i++
1102			}
1103			if i >= len(data) {
1104				return tmpI
1105			}
1106			i++
1107		}
1108	}
1109	return 0
1110}
1111
1112func helperEmphasis(p *parser, data []byte, c byte) int {
1113	i := 0
1114
1115	// skip one symbol if coming from emph3
1116	if len(data) > 1 && data[0] == c && data[1] == c {
1117		i = 1
1118	}
1119
1120	for i < len(data) {
1121		length := helperFindEmphChar(data[i:], c)
1122		if length == 0 {
1123			return 0
1124		}
1125		i += length
1126		if i >= len(data) {
1127			return 0
1128		}
1129
1130		if i+1 < len(data) && data[i+1] == c {
1131			i++
1132			continue
1133		}
1134
1135		if data[i] == c && !isspace(data[i-1]) {
1136
1137			if p.flags&NoIntraEmphasis != 0 {
1138				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1139					continue
1140				}
1141			}
1142
1143			emph := NewNode(Emph)
1144			p.currBlock.appendChild(emph)
1145			tmp := p.currBlock
1146			p.currBlock = emph
1147			p.inline(data[:i])
1148			p.currBlock = tmp
1149			return i + 1
1150		}
1151	}
1152
1153	return 0
1154}
1155
1156func helperDoubleEmphasis(p *parser, data []byte, c byte) int {
1157	i := 0
1158
1159	for i < len(data) {
1160		length := helperFindEmphChar(data[i:], c)
1161		if length == 0 {
1162			return 0
1163		}
1164		i += length
1165
1166		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1167			nodeType := Strong
1168			if c == '~' {
1169				nodeType = Del
1170			}
1171			node := NewNode(nodeType)
1172			p.currBlock.appendChild(node)
1173			tmp := p.currBlock
1174			p.currBlock = node
1175			p.inline(data[:i])
1176			p.currBlock = tmp
1177			return i + 2
1178		}
1179		i++
1180	}
1181	return 0
1182}
1183
1184func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) int {
1185	i := 0
1186	origData := data
1187	data = data[offset:]
1188
1189	for i < len(data) {
1190		length := helperFindEmphChar(data[i:], c)
1191		if length == 0 {
1192			return 0
1193		}
1194		i += length
1195
1196		// skip whitespace preceded symbols
1197		if data[i] != c || isspace(data[i-1]) {
1198			continue
1199		}
1200
1201		switch {
1202		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1203			// triple symbol found
1204			strong := NewNode(Strong)
1205			em := NewNode(Emph)
1206			strong.appendChild(em)
1207			p.currBlock.appendChild(strong)
1208			tmp := p.currBlock
1209			p.currBlock = em
1210			p.inline(data[:i])
1211			p.currBlock = tmp
1212			return i + 3
1213		case (i+1 < len(data) && data[i+1] == c):
1214			// double symbol found, hand over to emph1
1215			length = helperEmphasis(p, origData[offset-2:], c)
1216			if length == 0 {
1217				return 0
1218			} else {
1219				return length - 2
1220			}
1221		default:
1222			// single symbol found, hand over to emph2
1223			length = helperDoubleEmphasis(p, origData[offset-1:], c)
1224			if length == 0 {
1225				return 0
1226			} else {
1227				return length - 1
1228			}
1229		}
1230	}
1231	return 0
1232}
1233
1234func text(s []byte) *Node {
1235	node := NewNode(Text)
1236	node.Literal = s
1237	return node
1238}
1239
1240func normalizeURI(s []byte) []byte {
1241	return s // TODO: implement
1242}
all repos — grayfriday @ 04673c9f2899eac2c8c1a44df63682963ad4e121

blackfriday fork with a few changes