icy does git — grayfriday (75a3bab973d9bd4e822640517e2fb039a07b32b9): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  25
  26	// https://www.w3.org/TR/html5/syntax.html#character-references
  27	// highest unicode code point in 17 planes (2^20): 1,114,112d =
  28	// 7 dec digits or 6 hex digits
  29	// named entity references can be 2-31 characters with stuff like &lt;
  30	// at one end and &CounterClockwiseContourIntegral; at the other. There
  31	// are also sometimes numbers at the end, although this isn't inherent
  32	// in the specification; there are never numbers anywhere else in
  33	// current character references, though; see &frac34; and &blk12;, etc.
  34	// https://www.w3.org/TR/html5/syntax.html#named-character-references
  35	//
  36	// entity := "&" (named group | number ref) ";"
  37	// named group := [a-zA-Z]{2,31}[0-9]{0,2}
  38	// number ref := "#" (dec ref | hex ref)
  39	// dec ref := [0-9]{1,7}
  40	// hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
  41	htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
  42)
  43
  44// Functions to parse text within a block
  45// Each function returns the number of chars taken care of
  46// data is the complete block being rendered
  47// offset is the number of valid chars before the current cursor
  48
  49func (p *Markdown) inline(currBlock *Node, data []byte) {
  50	// handlers might call us recursively: enforce a maximum depth
  51	if p.nesting >= p.maxNesting || len(data) == 0 {
  52		return
  53	}
  54	p.nesting++
  55	beg, end := 0, 0
  56	for end < len(data) {
  57		handler := p.inlineCallback[data[end]]
  58		if handler != nil {
  59			if consumed, node := handler(p, data, end); consumed == 0 {
  60				// No action from the callback.
  61				end++
  62			} else {
  63				// Copy inactive chars into the output.
  64				currBlock.AppendChild(text(data[beg:end]))
  65				if node != nil {
  66					currBlock.AppendChild(node)
  67				}
  68				// Skip past whatever the callback used.
  69				beg = end + consumed
  70				end = beg
  71			}
  72		} else {
  73			end++
  74		}
  75	}
  76	if beg < len(data) {
  77		if data[end-1] == '\n' {
  78			end--
  79		}
  80		currBlock.AppendChild(text(data[beg:end]))
  81	}
  82	p.nesting--
  83}
  84
  85// single and double emphasis parsing
  86func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
  87	data = data[offset:]
  88	c := data[0]
  89
  90	if len(data) > 2 && data[1] != c {
  91		// whitespace cannot follow an opening emphasis;
  92		// strikethrough only takes two characters '~~'
  93		if c == '~' || isspace(data[1]) {
  94			return 0, nil
  95		}
  96		ret, node := helperEmphasis(p, data[1:], c)
  97		if ret == 0 {
  98			return 0, nil
  99		}
 100
 101		return ret + 1, node
 102	}
 103
 104	if len(data) > 3 && data[1] == c && data[2] != c {
 105		if isspace(data[2]) {
 106			return 0, nil
 107		}
 108		ret, node := helperDoubleEmphasis(p, data[2:], c)
 109		if ret == 0 {
 110			return 0, nil
 111		}
 112
 113		return ret + 2, node
 114	}
 115
 116	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 117		if c == '~' || isspace(data[3]) {
 118			return 0, nil
 119		}
 120		ret, node := helperTripleEmphasis(p, data, 3, c)
 121		if ret == 0 {
 122			return 0, nil
 123		}
 124
 125		return ret + 3, node
 126	}
 127
 128	return 0, nil
 129}
 130
 131func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
 132	data = data[offset:]
 133
 134	nb := 0
 135
 136	// count the number of backticks in the delimiter
 137	for nb < len(data) && data[nb] == '`' {
 138		nb++
 139	}
 140
 141	// find the next delimiter
 142	i, end := 0, 0
 143	for end = nb; end < len(data) && i < nb; end++ {
 144		if data[end] == '`' {
 145			i++
 146		} else {
 147			i = 0
 148		}
 149	}
 150
 151	// no matching delimiter?
 152	if i < nb && end >= len(data) {
 153		return 0, nil
 154	}
 155
 156	// trim outside whitespace
 157	fBegin := nb
 158	for fBegin < end && data[fBegin] == ' ' {
 159		fBegin++
 160	}
 161
 162	fEnd := end - nb
 163	for fEnd > fBegin && data[fEnd-1] == ' ' {
 164		fEnd--
 165	}
 166
 167	// render the code span
 168	if fBegin != fEnd {
 169		code := NewNode(Code)
 170		code.Literal = data[fBegin:fEnd]
 171		return end, code
 172	}
 173
 174	return end, nil
 175}
 176
 177// newline preceded by two spaces becomes <br>
 178func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
 179	origOffset := offset
 180	for offset < len(data) && data[offset] == ' ' {
 181		offset++
 182	}
 183
 184	if offset < len(data) && data[offset] == '\n' {
 185		if offset-origOffset >= 2 {
 186			return offset - origOffset + 1, NewNode(Hardbreak)
 187		}
 188		return offset - origOffset, nil
 189	}
 190	return 0, nil
 191}
 192
 193// newline without two spaces works when HardLineBreak is enabled
 194func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
 195	if p.extensions&HardLineBreak != 0 {
 196		return 1, NewNode(Hardbreak)
 197	}
 198	return 0, nil
 199}
 200
 201type linkType int
 202
 203const (
 204	linkNormal linkType = iota
 205	linkImg
 206	linkDeferredFootnote
 207	linkInlineFootnote
 208)
 209
 210func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
 211	if t == linkDeferredFootnote {
 212		return false
 213	}
 214	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
 215}
 216
 217func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
 218	if offset < len(data)-1 && data[offset+1] == '[' {
 219		return link(p, data, offset)
 220	}
 221	return 0, nil
 222}
 223
 224func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
 225	if offset < len(data)-1 && data[offset+1] == '[' {
 226		return link(p, data, offset)
 227	}
 228	return 0, nil
 229}
 230
 231// '[': parse a link or an image or a footnote
 232func link(p *Markdown, data []byte, offset int) (int, *Node) {
 233	// no links allowed inside regular links, footnote, and deferred footnotes
 234	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 235		return 0, nil
 236	}
 237
 238	var t linkType
 239	switch {
 240	// special case: ![^text] == deferred footnote (that follows something with
 241	// an exclamation point)
 242	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
 243		t = linkDeferredFootnote
 244	// ![alt] == image
 245	case offset >= 0 && data[offset] == '!':
 246		t = linkImg
 247		offset++
 248	// ^[text] == inline footnote
 249	// [^refId] == deferred footnote
 250	case p.extensions&Footnotes != 0:
 251		if offset >= 0 && data[offset] == '^' {
 252			t = linkInlineFootnote
 253			offset++
 254		} else if len(data)-1 > offset && data[offset+1] == '^' {
 255			t = linkDeferredFootnote
 256		}
 257	// [text] == regular link
 258	default:
 259		t = linkNormal
 260	}
 261
 262	data = data[offset:]
 263
 264	var (
 265		i                       = 1
 266		noteID                  int
 267		title, link, altContent []byte
 268		textHasNl               = false
 269	)
 270
 271	if t == linkDeferredFootnote {
 272		i++
 273	}
 274
 275	// look for the matching closing bracket
 276	for level := 1; level > 0 && i < len(data); i++ {
 277		switch {
 278		case data[i] == '\n':
 279			textHasNl = true
 280
 281		case data[i-1] == '\\':
 282			continue
 283
 284		case data[i] == '[':
 285			level++
 286
 287		case data[i] == ']':
 288			level--
 289			if level <= 0 {
 290				i-- // compensate for extra i++ in for loop
 291			}
 292		}
 293	}
 294
 295	if i >= len(data) {
 296		return 0, nil
 297	}
 298
 299	txtE := i
 300	i++
 301	var footnoteNode *Node
 302
 303	// skip any amount of whitespace or newline
 304	// (this is much more lax than original markdown syntax)
 305	for i < len(data) && isspace(data[i]) {
 306		i++
 307	}
 308
 309	// inline style link
 310	switch {
 311	case i < len(data) && data[i] == '(':
 312		// skip initial whitespace
 313		i++
 314
 315		for i < len(data) && isspace(data[i]) {
 316			i++
 317		}
 318
 319		linkB := i
 320
 321		// look for link end: ' " )
 322	findlinkend:
 323		for i < len(data) {
 324			switch {
 325			case data[i] == '\\':
 326				i += 2
 327
 328			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 329				break findlinkend
 330
 331			default:
 332				i++
 333			}
 334		}
 335
 336		if i >= len(data) {
 337			return 0, nil
 338		}
 339		linkE := i
 340
 341		// look for title end if present
 342		titleB, titleE := 0, 0
 343		if data[i] == '\'' || data[i] == '"' {
 344			i++
 345			titleB = i
 346
 347		findtitleend:
 348			for i < len(data) {
 349				switch {
 350				case data[i] == '\\':
 351					i += 2
 352
 353				case data[i] == ')':
 354					break findtitleend
 355
 356				default:
 357					i++
 358				}
 359			}
 360
 361			if i >= len(data) {
 362				return 0, nil
 363			}
 364
 365			// skip whitespace after title
 366			titleE = i - 1
 367			for titleE > titleB && isspace(data[titleE]) {
 368				titleE--
 369			}
 370
 371			// check for closing quote presence
 372			if data[titleE] != '\'' && data[titleE] != '"' {
 373				titleB, titleE = 0, 0
 374				linkE = i
 375			}
 376		}
 377
 378		// remove whitespace at the end of the link
 379		for linkE > linkB && isspace(data[linkE-1]) {
 380			linkE--
 381		}
 382
 383		// remove optional angle brackets around the link
 384		if data[linkB] == '<' {
 385			linkB++
 386		}
 387		if data[linkE-1] == '>' {
 388			linkE--
 389		}
 390
 391		// build escaped link and title
 392		if linkE > linkB {
 393			link = data[linkB:linkE]
 394		}
 395
 396		if titleE > titleB {
 397			title = data[titleB:titleE]
 398		}
 399
 400		i++
 401
 402	// reference style link
 403	case isReferenceStyleLink(data, i, t):
 404		var id []byte
 405		altContentConsidered := false
 406
 407		// look for the id
 408		i++
 409		linkB := i
 410		for i < len(data) && data[i] != ']' {
 411			i++
 412		}
 413		if i >= len(data) {
 414			return 0, nil
 415		}
 416		linkE := i
 417
 418		// find the reference
 419		if linkB == linkE {
 420			if textHasNl {
 421				var b bytes.Buffer
 422
 423				for j := 1; j < txtE; j++ {
 424					switch {
 425					case data[j] != '\n':
 426						b.WriteByte(data[j])
 427					case data[j-1] != ' ':
 428						b.WriteByte(' ')
 429					}
 430				}
 431
 432				id = b.Bytes()
 433			} else {
 434				id = data[1:txtE]
 435				altContentConsidered = true
 436			}
 437		} else {
 438			id = data[linkB:linkE]
 439		}
 440
 441		// find the reference with matching id
 442		lr, ok := p.getRef(string(id))
 443		if !ok {
 444			return 0, nil
 445		}
 446
 447		// keep link and title from reference
 448		link = lr.link
 449		title = lr.title
 450		if altContentConsidered {
 451			altContent = lr.text
 452		}
 453		i++
 454
 455	// shortcut reference style link or reference or inline footnote
 456	default:
 457		var id []byte
 458
 459		// craft the id
 460		if textHasNl {
 461			var b bytes.Buffer
 462
 463			for j := 1; j < txtE; j++ {
 464				switch {
 465				case data[j] != '\n':
 466					b.WriteByte(data[j])
 467				case data[j-1] != ' ':
 468					b.WriteByte(' ')
 469				}
 470			}
 471
 472			id = b.Bytes()
 473		} else {
 474			if t == linkDeferredFootnote {
 475				id = data[2:txtE] // get rid of the ^
 476			} else {
 477				id = data[1:txtE]
 478			}
 479		}
 480
 481		footnoteNode = NewNode(Item)
 482		if t == linkInlineFootnote {
 483			// create a new reference
 484			noteID = len(p.notes) + 1
 485
 486			var fragment []byte
 487			if len(id) > 0 {
 488				if len(id) < 16 {
 489					fragment = make([]byte, len(id))
 490				} else {
 491					fragment = make([]byte, 16)
 492				}
 493				copy(fragment, slugify(id))
 494			} else {
 495				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
 496			}
 497
 498			ref := &reference{
 499				noteID:   noteID,
 500				hasBlock: false,
 501				link:     fragment,
 502				title:    id,
 503				footnote: footnoteNode,
 504			}
 505
 506			p.notes = append(p.notes, ref)
 507
 508			link = ref.link
 509			title = ref.title
 510		} else {
 511			// find the reference with matching id
 512			lr, ok := p.getRef(string(id))
 513			if !ok {
 514				return 0, nil
 515			}
 516
 517			if t == linkDeferredFootnote {
 518				lr.noteID = len(p.notes) + 1
 519				lr.footnote = footnoteNode
 520				p.notes = append(p.notes, lr)
 521			}
 522
 523			// keep link and title from reference
 524			link = lr.link
 525			// if inline footnote, title == footnote contents
 526			title = lr.title
 527			noteID = lr.noteID
 528		}
 529
 530		// rewind the whitespace
 531		i = txtE + 1
 532	}
 533
 534	var uLink []byte
 535	if t == linkNormal || t == linkImg {
 536		if len(link) > 0 {
 537			var uLinkBuf bytes.Buffer
 538			unescapeText(&uLinkBuf, link)
 539			uLink = uLinkBuf.Bytes()
 540		}
 541
 542		// links need something to click on and somewhere to go
 543		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
 544			return 0, nil
 545		}
 546	}
 547
 548	// call the relevant rendering function
 549	var linkNode *Node
 550	switch t {
 551	case linkNormal:
 552		linkNode = NewNode(Link)
 553		linkNode.Destination = normalizeURI(uLink)
 554		linkNode.Title = title
 555		if len(altContent) > 0 {
 556			linkNode.AppendChild(text(altContent))
 557		} else {
 558			// links cannot contain other links, so turn off link parsing
 559			// temporarily and recurse
 560			insideLink := p.insideLink
 561			p.insideLink = true
 562			p.inline(linkNode, data[1:txtE])
 563			p.insideLink = insideLink
 564		}
 565
 566	case linkImg:
 567		linkNode = NewNode(Image)
 568		linkNode.Destination = uLink
 569		linkNode.Title = title
 570		linkNode.AppendChild(text(data[1:txtE]))
 571		i++
 572
 573	case linkInlineFootnote, linkDeferredFootnote:
 574		linkNode = NewNode(Link)
 575		linkNode.Destination = link
 576		linkNode.Title = title
 577		linkNode.NoteID = noteID
 578		linkNode.Footnote = footnoteNode
 579		if t == linkInlineFootnote {
 580			i++
 581		}
 582
 583	default:
 584		return 0, nil
 585	}
 586
 587	return i, linkNode
 588}
 589
 590func (p *Markdown) inlineHTMLComment(data []byte) int {
 591	if len(data) < 5 {
 592		return 0
 593	}
 594	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 595		return 0
 596	}
 597	i := 5
 598	// scan for an end-of-comment marker, across lines if necessary
 599	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 600		i++
 601	}
 602	// no end-of-comment marker
 603	if i >= len(data) {
 604		return 0
 605	}
 606	return i + 1
 607}
 608
 609func stripMailto(link []byte) []byte {
 610	if bytes.HasPrefix(link, []byte("mailto://")) {
 611		return link[9:]
 612	} else if bytes.HasPrefix(link, []byte("mailto:")) {
 613		return link[7:]
 614	} else {
 615		return link
 616	}
 617}
 618
 619// autolinkType specifies a kind of autolink that gets detected.
 620type autolinkType int
 621
 622// These are the possible flag values for the autolink renderer.
 623const (
 624	notAutolink autolinkType = iota
 625	normalAutolink
 626	emailAutolink
 627)
 628
 629// '<' when tags or autolinks are allowed
 630func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
 631	data = data[offset:]
 632	altype, end := tagLength(data)
 633	if size := p.inlineHTMLComment(data); size > 0 {
 634		end = size
 635	}
 636	if end > 2 {
 637		if altype != notAutolink {
 638			var uLink bytes.Buffer
 639			unescapeText(&uLink, data[1:end+1-2])
 640			if uLink.Len() > 0 {
 641				link := uLink.Bytes()
 642				node := NewNode(Link)
 643				node.Destination = link
 644				if altype == emailAutolink {
 645					node.Destination = append([]byte("mailto:"), link...)
 646				}
 647				node.AppendChild(text(stripMailto(link)))
 648				return end, node
 649			}
 650		} else {
 651			htmlTag := NewNode(HTMLSpan)
 652			htmlTag.Literal = data[:end]
 653			return end, htmlTag
 654		}
 655	}
 656
 657	return end, nil
 658}
 659
 660// '\\' backslash escape
 661var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
 662
 663func escape(p *Markdown, data []byte, offset int) (int, *Node) {
 664	data = data[offset:]
 665
 666	if len(data) > 1 {
 667		if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
 668			return 2, NewNode(Hardbreak)
 669		}
 670		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 671			return 0, nil
 672		}
 673
 674		return 2, text(data[1:2])
 675	}
 676
 677	return 2, nil
 678}
 679
 680func unescapeText(ob *bytes.Buffer, src []byte) {
 681	i := 0
 682	for i < len(src) {
 683		org := i
 684		for i < len(src) && src[i] != '\\' {
 685			i++
 686		}
 687
 688		if i > org {
 689			ob.Write(src[org:i])
 690		}
 691
 692		if i+1 >= len(src) {
 693			break
 694		}
 695
 696		ob.WriteByte(src[i+1])
 697		i += 2
 698	}
 699}
 700
 701// '&' escaped when it doesn't belong to an entity
 702// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 703func entity(p *Markdown, data []byte, offset int) (int, *Node) {
 704	data = data[offset:]
 705
 706	end := 1
 707
 708	if end < len(data) && data[end] == '#' {
 709		end++
 710	}
 711
 712	for end < len(data) && isalnum(data[end]) {
 713		end++
 714	}
 715
 716	if end < len(data) && data[end] == ';' {
 717		end++ // real entity
 718	} else {
 719		return 0, nil // lone '&'
 720	}
 721
 722	ent := data[:end]
 723	// undo &amp; escaping or it will be converted to &amp;amp; by another
 724	// escaper in the renderer
 725	if bytes.Equal(ent, []byte("&amp;")) {
 726		ent = []byte{'&'}
 727	}
 728
 729	return end, text(ent)
 730}
 731
 732func linkEndsWithEntity(data []byte, linkEnd int) bool {
 733	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
 734	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 735}
 736
 737// hasPrefixCaseInsensitive is a custom implementation of
 738//     strings.HasPrefix(strings.ToLower(s), prefix)
 739// we rolled our own because ToLower pulls in a huge machinery of lowercasing
 740// anything from Unicode and that's very slow. Since this func will only be
 741// used on ASCII protocol prefixes, we can take shortcuts.
 742func hasPrefixCaseInsensitive(s, prefix []byte) bool {
 743	if len(s) < len(prefix) {
 744		return false
 745	}
 746	delta := byte('a' - 'A')
 747	for i, b := range prefix {
 748		if b != s[i] && b != s[i]+delta {
 749			return false
 750		}
 751	}
 752	return true
 753}
 754
 755var protocolPrefixes = [][]byte{
 756	[]byte("http://"),
 757	[]byte("https://"),
 758	[]byte("ftp://"),
 759	[]byte("file://"),
 760	[]byte("mailto:"),
 761}
 762
 763const shortestPrefix = 6 // len("ftp://"), the shortest of the above
 764
 765func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
 766	// quick check to rule out most false hits
 767	if p.insideLink || len(data) < offset+shortestPrefix {
 768		return 0, nil
 769	}
 770	for _, prefix := range protocolPrefixes {
 771		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 772		if endOfHead > len(data) {
 773			endOfHead = len(data)
 774		}
 775		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
 776			return autoLink(p, data, offset)
 777		}
 778	}
 779	return 0, nil
 780}
 781
 782func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
 783	// Now a more expensive check to see if we're not inside an anchor element
 784	anchorStart := offset
 785	offsetFromAnchor := 0
 786	for anchorStart > 0 && data[anchorStart] != '<' {
 787		anchorStart--
 788		offsetFromAnchor++
 789	}
 790
 791	anchorStr := anchorRe.Find(data[anchorStart:])
 792	if anchorStr != nil {
 793		anchorClose := NewNode(HTMLSpan)
 794		anchorClose.Literal = anchorStr[offsetFromAnchor:]
 795		return len(anchorStr) - offsetFromAnchor, anchorClose
 796	}
 797
 798	// scan backward for a word boundary
 799	rewind := 0
 800	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 801		rewind++
 802	}
 803	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 804		return 0, nil
 805	}
 806
 807	origData := data
 808	data = data[offset-rewind:]
 809
 810	if !isSafeLink(data) {
 811		return 0, nil
 812	}
 813
 814	linkEnd := 0
 815	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 816		linkEnd++
 817	}
 818
 819	// Skip punctuation at the end of the link
 820	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 821		linkEnd--
 822	}
 823
 824	// But don't skip semicolon if it's a part of escaped entity:
 825	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 826		linkEnd--
 827	}
 828
 829	// See if the link finishes with a punctuation sign that can be closed.
 830	var copen byte
 831	switch data[linkEnd-1] {
 832	case '"':
 833		copen = '"'
 834	case '\'':
 835		copen = '\''
 836	case ')':
 837		copen = '('
 838	case ']':
 839		copen = '['
 840	case '}':
 841		copen = '{'
 842	default:
 843		copen = 0
 844	}
 845
 846	if copen != 0 {
 847		bufEnd := offset - rewind + linkEnd - 2
 848
 849		openDelim := 1
 850
 851		/* Try to close the final punctuation sign in this same line;
 852		 * if we managed to close it outside of the URL, that means that it's
 853		 * not part of the URL. If it closes inside the URL, that means it
 854		 * is part of the URL.
 855		 *
 856		 * Examples:
 857		 *
 858		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 859		 *              => http://www.pokemon.com/Pikachu_(Electric)
 860		 *
 861		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 862		 *              => http://www.pokemon.com/Pikachu_(Electric)
 863		 *
 864		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 865		 *              => http://www.pokemon.com/Pikachu_(Electric))
 866		 *
 867		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 868		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 869		 */
 870
 871		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 872			if origData[bufEnd] == data[linkEnd-1] {
 873				openDelim++
 874			}
 875
 876			if origData[bufEnd] == copen {
 877				openDelim--
 878			}
 879
 880			bufEnd--
 881		}
 882
 883		if openDelim == 0 {
 884			linkEnd--
 885		}
 886	}
 887
 888	var uLink bytes.Buffer
 889	unescapeText(&uLink, data[:linkEnd])
 890
 891	if uLink.Len() > 0 {
 892		node := NewNode(Link)
 893		node.Destination = uLink.Bytes()
 894		node.AppendChild(text(uLink.Bytes()))
 895		return linkEnd, node
 896	}
 897
 898	return linkEnd, nil
 899}
 900
 901func isEndOfLink(char byte) bool {
 902	return isspace(char) || char == '<'
 903}
 904
 905var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 906var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
 907
 908func isSafeLink(link []byte) bool {
 909	for _, path := range validPaths {
 910		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
 911			if len(link) == len(path) {
 912				return true
 913			} else if isalnum(link[len(path)]) {
 914				return true
 915			}
 916		}
 917	}
 918
 919	for _, prefix := range validUris {
 920		// TODO: handle unicode here
 921		// case-insensitive prefix test
 922		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 923			return true
 924		}
 925	}
 926
 927	return false
 928}
 929
 930// return the length of the given tag, or 0 is it's not valid
 931func tagLength(data []byte) (autolink autolinkType, end int) {
 932	var i, j int
 933
 934	// a valid tag can't be shorter than 3 chars
 935	if len(data) < 3 {
 936		return notAutolink, 0
 937	}
 938
 939	// begins with a '<' optionally followed by '/', followed by letter or number
 940	if data[0] != '<' {
 941		return notAutolink, 0
 942	}
 943	if data[1] == '/' {
 944		i = 2
 945	} else {
 946		i = 1
 947	}
 948
 949	if !isalnum(data[i]) {
 950		return notAutolink, 0
 951	}
 952
 953	// scheme test
 954	autolink = notAutolink
 955
 956	// try to find the beginning of an URI
 957	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 958		i++
 959	}
 960
 961	if i > 1 && i < len(data) && data[i] == '@' {
 962		if j = isMailtoAutoLink(data[i:]); j != 0 {
 963			return emailAutolink, i + j
 964		}
 965	}
 966
 967	if i > 2 && i < len(data) && data[i] == ':' {
 968		autolink = normalAutolink
 969		i++
 970	}
 971
 972	// complete autolink test: no whitespace or ' or "
 973	switch {
 974	case i >= len(data):
 975		autolink = notAutolink
 976	case autolink != notAutolink:
 977		j = i
 978
 979		for i < len(data) {
 980			if data[i] == '\\' {
 981				i += 2
 982			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 983				break
 984			} else {
 985				i++
 986			}
 987
 988		}
 989
 990		if i >= len(data) {
 991			return autolink, 0
 992		}
 993		if i > j && data[i] == '>' {
 994			return autolink, i + 1
 995		}
 996
 997		// one of the forbidden chars has been found
 998		autolink = notAutolink
 999	}
1000	i += bytes.IndexByte(data[i:], '>')
1001	if i < 0 {
1002		return autolink, 0
1003	}
1004	return autolink, i + 1
1005}
1006
1007// look for the address part of a mail autolink and '>'
1008// this is less strict than the original markdown e-mail address matching
1009func isMailtoAutoLink(data []byte) int {
1010	nb := 0
1011
1012	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1013	for i := 0; i < len(data); i++ {
1014		if isalnum(data[i]) {
1015			continue
1016		}
1017
1018		switch data[i] {
1019		case '@':
1020			nb++
1021
1022		case '-', '.', '_':
1023			break
1024
1025		case '>':
1026			if nb == 1 {
1027				return i + 1
1028			}
1029			return 0
1030		default:
1031			return 0
1032		}
1033	}
1034
1035	return 0
1036}
1037
1038// look for the next emph char, skipping other constructs
1039func helperFindEmphChar(data []byte, c byte) int {
1040	i := 0
1041
1042	for i < len(data) {
1043		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1044			i++
1045		}
1046		if i >= len(data) {
1047			return 0
1048		}
1049		// do not count escaped chars
1050		if i != 0 && data[i-1] == '\\' {
1051			i++
1052			continue
1053		}
1054		if data[i] == c {
1055			return i
1056		}
1057
1058		if data[i] == '`' {
1059			// skip a code span
1060			tmpI := 0
1061			i++
1062			for i < len(data) && data[i] != '`' {
1063				if tmpI == 0 && data[i] == c {
1064					tmpI = i
1065				}
1066				i++
1067			}
1068			if i >= len(data) {
1069				return tmpI
1070			}
1071			i++
1072		} else if data[i] == '[' {
1073			// skip a link
1074			tmpI := 0
1075			i++
1076			for i < len(data) && data[i] != ']' {
1077				if tmpI == 0 && data[i] == c {
1078					tmpI = i
1079				}
1080				i++
1081			}
1082			i++
1083			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1084				i++
1085			}
1086			if i >= len(data) {
1087				return tmpI
1088			}
1089			if data[i] != '[' && data[i] != '(' { // not a link
1090				if tmpI > 0 {
1091					return tmpI
1092				}
1093				continue
1094			}
1095			cc := data[i]
1096			i++
1097			for i < len(data) && data[i] != cc {
1098				if tmpI == 0 && data[i] == c {
1099					return i
1100				}
1101				i++
1102			}
1103			if i >= len(data) {
1104				return tmpI
1105			}
1106			i++
1107		}
1108	}
1109	return 0
1110}
1111
1112func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1113	i := 0
1114
1115	// skip one symbol if coming from emph3
1116	if len(data) > 1 && data[0] == c && data[1] == c {
1117		i = 1
1118	}
1119
1120	for i < len(data) {
1121		length := helperFindEmphChar(data[i:], c)
1122		if length == 0 {
1123			return 0, nil
1124		}
1125		i += length
1126		if i >= len(data) {
1127			return 0, nil
1128		}
1129
1130		if i+1 < len(data) && data[i+1] == c {
1131			i++
1132			continue
1133		}
1134
1135		if data[i] == c && !isspace(data[i-1]) {
1136
1137			if p.extensions&NoIntraEmphasis != 0 {
1138				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1139					continue
1140				}
1141			}
1142
1143			emph := NewNode(Emph)
1144			p.inline(emph, data[:i])
1145			return i + 1, emph
1146		}
1147	}
1148
1149	return 0, nil
1150}
1151
1152func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1153	i := 0
1154
1155	for i < len(data) {
1156		length := helperFindEmphChar(data[i:], c)
1157		if length == 0 {
1158			return 0, nil
1159		}
1160		i += length
1161
1162		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1163			nodeType := Strong
1164			if c == '~' {
1165				nodeType = Del
1166			}
1167			node := NewNode(nodeType)
1168			p.inline(node, data[:i])
1169			return i + 2, node
1170		}
1171		i++
1172	}
1173	return 0, nil
1174}
1175
1176func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
1177	i := 0
1178	origData := data
1179	data = data[offset:]
1180
1181	for i < len(data) {
1182		length := helperFindEmphChar(data[i:], c)
1183		if length == 0 {
1184			return 0, nil
1185		}
1186		i += length
1187
1188		// skip whitespace preceded symbols
1189		if data[i] != c || isspace(data[i-1]) {
1190			continue
1191		}
1192
1193		switch {
1194		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1195			// triple symbol found
1196			strong := NewNode(Strong)
1197			em := NewNode(Emph)
1198			strong.AppendChild(em)
1199			p.inline(em, data[:i])
1200			return i + 3, strong
1201		case (i+1 < len(data) && data[i+1] == c):
1202			// double symbol found, hand over to emph1
1203			length, node := helperEmphasis(p, origData[offset-2:], c)
1204			if length == 0 {
1205				return 0, nil
1206			}
1207			return length - 2, node
1208		default:
1209			// single symbol found, hand over to emph2
1210			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1211			if length == 0 {
1212				return 0, nil
1213			}
1214			return length - 1, node
1215		}
1216	}
1217	return 0, nil
1218}
1219
1220func text(s []byte) *Node {
1221	node := NewNode(Text)
1222	node.Literal = s
1223	return node
1224}
1225
1226func normalizeURI(s []byte) []byte {
1227	return s // TODO: implement
1228}
all repos — grayfriday @ 75a3bab973d9bd4e822640517e2fb039a07b32b9

blackfriday fork with a few changes