icy does git — grayfriday (b0bdfbec4ceab22844aa766b3856aa95753ffde8): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  24)
  25
  26// Functions to parse text within a block
  27// Each function returns the number of chars taken care of
  28// data is the complete block being rendered
  29// offset is the number of valid chars before the current cursor
  30
  31func (p *parser) inline(out *bytes.Buffer, data []byte) {
  32	// this is called recursively: enforce a maximum depth
  33	if p.nesting >= p.maxNesting {
  34		return
  35	}
  36	p.nesting++
  37
  38	i, end := 0, 0
  39	for i < len(data) {
  40		// copy inactive chars into the output
  41		for end < len(data) && p.inlineCallback[data[end]] == nil {
  42			end++
  43		}
  44
  45		p.r.NormalText(out, data[i:end])
  46
  47		if end >= len(data) {
  48			break
  49		}
  50		i = end
  51
  52		// call the trigger
  53		handler := p.inlineCallback[data[end]]
  54		if consumed := handler(p, out, data, i); consumed == 0 {
  55			// no action from the callback; buffer the byte for later
  56			end = i + 1
  57		} else {
  58			// skip past whatever the callback used
  59			i += consumed
  60			end = i
  61		}
  62	}
  63
  64	p.nesting--
  65}
  66
  67// single and double emphasis parsing
  68func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
  69	data = data[offset:]
  70	c := data[0]
  71	ret := 0
  72
  73	if len(data) > 2 && data[1] != c {
  74		// whitespace cannot follow an opening emphasis;
  75		// strikethrough only takes two characters '~~'
  76		if c == '~' || isspace(data[1]) {
  77			return 0
  78		}
  79		if ret = helperEmphasis(p, out, data[1:], c); ret == 0 {
  80			return 0
  81		}
  82
  83		return ret + 1
  84	}
  85
  86	if len(data) > 3 && data[1] == c && data[2] != c {
  87		if isspace(data[2]) {
  88			return 0
  89		}
  90		if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 {
  91			return 0
  92		}
  93
  94		return ret + 2
  95	}
  96
  97	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
  98		if c == '~' || isspace(data[3]) {
  99			return 0
 100		}
 101		if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 {
 102			return 0
 103		}
 104
 105		return ret + 3
 106	}
 107
 108	return 0
 109}
 110
 111func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 112	data = data[offset:]
 113
 114	nb := 0
 115
 116	// count the number of backticks in the delimiter
 117	for nb < len(data) && data[nb] == '`' {
 118		nb++
 119	}
 120
 121	// find the next delimiter
 122	i, end := 0, 0
 123	for end = nb; end < len(data) && i < nb; end++ {
 124		if data[end] == '`' {
 125			i++
 126		} else {
 127			i = 0
 128		}
 129	}
 130
 131	// no matching delimiter?
 132	if i < nb && end >= len(data) {
 133		return 0
 134	}
 135
 136	// trim outside whitespace
 137	fBegin := nb
 138	for fBegin < end && data[fBegin] == ' ' {
 139		fBegin++
 140	}
 141
 142	fEnd := end - nb
 143	for fEnd > fBegin && data[fEnd-1] == ' ' {
 144		fEnd--
 145	}
 146
 147	// render the code span
 148	if fBegin != fEnd {
 149		p.r.CodeSpan(out, data[fBegin:fEnd])
 150	}
 151
 152	return end
 153
 154}
 155
 156// newline preceded by two spaces becomes <br>
 157// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled
 158func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 159	// remove trailing spaces from out
 160	outBytes := out.Bytes()
 161	end := len(outBytes)
 162	eol := end
 163	for eol > 0 && outBytes[eol-1] == ' ' {
 164		eol--
 165	}
 166	out.Truncate(eol)
 167
 168	// should there be a hard line break here?
 169	if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && end-eol < 2 {
 170		return 0
 171	}
 172
 173	p.r.LineBreak(out)
 174	return 1
 175}
 176
 177type linkType int
 178
 179const (
 180	linkNormal linkType = iota
 181	linkImg
 182	linkDeferredFootnote
 183	linkInlineFootnote
 184)
 185
 186// '[': parse a link or an image or a footnote
 187func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 188	// no links allowed inside regular links, footnote, and deferred footnotes
 189	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 190		return 0
 191	}
 192
 193	// [text] == regular link
 194	// ![alt] == image
 195	// ^[text] == inline footnote
 196	// [^refId] == deferred footnote
 197	var t linkType
 198	if offset > 0 && data[offset-1] == '!' {
 199		t = linkImg
 200	} else if p.flags&EXTENSION_FOOTNOTES != 0 {
 201		if offset > 0 && data[offset-1] == '^' {
 202			t = linkInlineFootnote
 203		} else if len(data)-1 > offset && data[offset+1] == '^' {
 204			t = linkDeferredFootnote
 205		}
 206	}
 207
 208	data = data[offset:]
 209
 210	var (
 211		i           = 1
 212		noteId      int
 213		title, link []byte
 214		textHasNl   = false
 215	)
 216
 217	if t == linkDeferredFootnote {
 218		i++
 219	}
 220
 221	// look for the matching closing bracket
 222	for level := 1; level > 0 && i < len(data); i++ {
 223		switch {
 224		case data[i] == '\n':
 225			textHasNl = true
 226
 227		case data[i-1] == '\\':
 228			continue
 229
 230		case data[i] == '[':
 231			level++
 232
 233		case data[i] == ']':
 234			level--
 235			if level <= 0 {
 236				i-- // compensate for extra i++ in for loop
 237			}
 238		}
 239	}
 240
 241	if i >= len(data) {
 242		return 0
 243	}
 244
 245	txtE := i
 246	i++
 247
 248	// skip any amount of whitespace or newline
 249	// (this is much more lax than original markdown syntax)
 250	for i < len(data) && isspace(data[i]) {
 251		i++
 252	}
 253
 254	// inline style link
 255	switch {
 256	case i < len(data) && data[i] == '(':
 257		// skip initial whitespace
 258		i++
 259
 260		for i < len(data) && isspace(data[i]) {
 261			i++
 262		}
 263
 264		linkB := i
 265
 266		// look for link end: ' " )
 267	findlinkend:
 268		for i < len(data) {
 269			switch {
 270			case data[i] == '\\':
 271				i += 2
 272
 273			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 274				break findlinkend
 275
 276			default:
 277				i++
 278			}
 279		}
 280
 281		if i >= len(data) {
 282			return 0
 283		}
 284		linkE := i
 285
 286		// look for title end if present
 287		titleB, titleE := 0, 0
 288		if data[i] == '\'' || data[i] == '"' {
 289			i++
 290			titleB = i
 291
 292		findtitleend:
 293			for i < len(data) {
 294				switch {
 295				case data[i] == '\\':
 296					i += 2
 297
 298				case data[i] == ')':
 299					break findtitleend
 300
 301				default:
 302					i++
 303				}
 304			}
 305
 306			if i >= len(data) {
 307				return 0
 308			}
 309
 310			// skip whitespace after title
 311			titleE = i - 1
 312			for titleE > titleB && isspace(data[titleE]) {
 313				titleE--
 314			}
 315
 316			// check for closing quote presence
 317			if data[titleE] != '\'' && data[titleE] != '"' {
 318				titleB, titleE = 0, 0
 319				linkE = i
 320			}
 321		}
 322
 323		// remove whitespace at the end of the link
 324		for linkE > linkB && isspace(data[linkE-1]) {
 325			linkE--
 326		}
 327
 328		// remove optional angle brackets around the link
 329		if data[linkB] == '<' {
 330			linkB++
 331		}
 332		if data[linkE-1] == '>' {
 333			linkE--
 334		}
 335
 336		// build escaped link and title
 337		if linkE > linkB {
 338			link = data[linkB:linkE]
 339		}
 340
 341		if titleE > titleB {
 342			title = data[titleB:titleE]
 343		}
 344
 345		i++
 346
 347	// reference style link
 348	case i < len(data) && data[i] == '[':
 349		var id []byte
 350
 351		// look for the id
 352		i++
 353		linkB := i
 354		for i < len(data) && data[i] != ']' {
 355			i++
 356		}
 357		if i >= len(data) {
 358			return 0
 359		}
 360		linkE := i
 361
 362		// find the reference
 363		if linkB == linkE {
 364			if textHasNl {
 365				var b bytes.Buffer
 366
 367				for j := 1; j < txtE; j++ {
 368					switch {
 369					case data[j] != '\n':
 370						b.WriteByte(data[j])
 371					case data[j-1] != ' ':
 372						b.WriteByte(' ')
 373					}
 374				}
 375
 376				id = b.Bytes()
 377			} else {
 378				id = data[1:txtE]
 379			}
 380		} else {
 381			id = data[linkB:linkE]
 382		}
 383
 384		// find the reference with matching id (ids are case-insensitive)
 385		key := string(bytes.ToLower(id))
 386		lr, ok := p.refs[key]
 387		if !ok {
 388			return 0
 389
 390		}
 391
 392		// keep link and title from reference
 393		link = lr.link
 394		title = lr.title
 395		i++
 396
 397	// shortcut reference style link or reference or inline footnote
 398	default:
 399		var id []byte
 400
 401		// craft the id
 402		if textHasNl {
 403			var b bytes.Buffer
 404
 405			for j := 1; j < txtE; j++ {
 406				switch {
 407				case data[j] != '\n':
 408					b.WriteByte(data[j])
 409				case data[j-1] != ' ':
 410					b.WriteByte(' ')
 411				}
 412			}
 413
 414			id = b.Bytes()
 415		} else {
 416			if t == linkDeferredFootnote {
 417				id = data[2:txtE] // get rid of the ^
 418			} else {
 419				id = data[1:txtE]
 420			}
 421		}
 422
 423		key := string(bytes.ToLower(id))
 424		if t == linkInlineFootnote {
 425			// create a new reference
 426			noteId = len(p.notes) + 1
 427
 428			var fragment []byte
 429			if len(id) > 0 {
 430				if len(id) < 16 {
 431					fragment = make([]byte, len(id))
 432				} else {
 433					fragment = make([]byte, 16)
 434				}
 435				copy(fragment, slugify(id))
 436			} else {
 437				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
 438			}
 439
 440			ref := &reference{
 441				noteId:   noteId,
 442				hasBlock: false,
 443				link:     fragment,
 444				title:    id,
 445			}
 446
 447			p.notes = append(p.notes, ref)
 448
 449			link = ref.link
 450			title = ref.title
 451		} else {
 452			// find the reference with matching id
 453			lr, ok := p.refs[key]
 454			if !ok {
 455				return 0
 456			}
 457
 458			if t == linkDeferredFootnote {
 459				lr.noteId = len(p.notes) + 1
 460				p.notes = append(p.notes, lr)
 461			}
 462
 463			// keep link and title from reference
 464			link = lr.link
 465			// if inline footnote, title == footnote contents
 466			title = lr.title
 467			noteId = lr.noteId
 468		}
 469
 470		// rewind the whitespace
 471		i = txtE + 1
 472	}
 473
 474	// build content: img alt is escaped, link content is parsed
 475	var content bytes.Buffer
 476	if txtE > 1 {
 477		if t == linkImg {
 478			content.Write(data[1:txtE])
 479		} else {
 480			// links cannot contain other links, so turn off link parsing temporarily
 481			insideLink := p.insideLink
 482			p.insideLink = true
 483			p.inline(&content, data[1:txtE])
 484			p.insideLink = insideLink
 485		}
 486	}
 487
 488	var uLink []byte
 489	if t == linkNormal || t == linkImg {
 490		if len(link) > 0 {
 491			var uLinkBuf bytes.Buffer
 492			unescapeText(&uLinkBuf, link)
 493			uLink = uLinkBuf.Bytes()
 494		}
 495
 496		// links need something to click on and somewhere to go
 497		if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
 498			return 0
 499		}
 500	}
 501
 502	// call the relevant rendering function
 503	switch t {
 504	case linkNormal:
 505		p.r.Link(out, uLink, title, content.Bytes())
 506
 507	case linkImg:
 508		outSize := out.Len()
 509		outBytes := out.Bytes()
 510		if outSize > 0 && outBytes[outSize-1] == '!' {
 511			out.Truncate(outSize - 1)
 512		}
 513
 514		p.r.Image(out, uLink, title, content.Bytes())
 515
 516	case linkInlineFootnote:
 517		outSize := out.Len()
 518		outBytes := out.Bytes()
 519		if outSize > 0 && outBytes[outSize-1] == '^' {
 520			out.Truncate(outSize - 1)
 521		}
 522
 523		p.r.FootnoteRef(out, link, noteId)
 524
 525	case linkDeferredFootnote:
 526		p.r.FootnoteRef(out, link, noteId)
 527
 528	default:
 529		return 0
 530	}
 531
 532	return i
 533}
 534
 535// '<' when tags or autolinks are allowed
 536func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 537	data = data[offset:]
 538	altype := LINK_TYPE_NOT_AUTOLINK
 539	end := tagLength(data, &altype)
 540
 541	if end > 2 {
 542		if altype != LINK_TYPE_NOT_AUTOLINK {
 543			var uLink bytes.Buffer
 544			unescapeText(&uLink, data[1:end+1-2])
 545			if uLink.Len() > 0 {
 546				p.r.AutoLink(out, uLink.Bytes(), altype)
 547			}
 548		} else {
 549			p.r.RawHtmlTag(out, data[:end])
 550		}
 551	}
 552
 553	return end
 554}
 555
 556// '\\' backslash escape
 557var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>")
 558
 559func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 560	data = data[offset:]
 561
 562	if len(data) > 1 {
 563		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 564			return 0
 565		}
 566
 567		p.r.NormalText(out, data[1:2])
 568	}
 569
 570	return 2
 571}
 572
 573func unescapeText(ob *bytes.Buffer, src []byte) {
 574	i := 0
 575	for i < len(src) {
 576		org := i
 577		for i < len(src) && src[i] != '\\' {
 578			i++
 579		}
 580
 581		if i > org {
 582			ob.Write(src[org:i])
 583		}
 584
 585		if i+1 >= len(src) {
 586			break
 587		}
 588
 589		ob.WriteByte(src[i+1])
 590		i += 2
 591	}
 592}
 593
 594// '&' escaped when it doesn't belong to an entity
 595// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 596func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 597	data = data[offset:]
 598
 599	end := 1
 600
 601	if end < len(data) && data[end] == '#' {
 602		end++
 603	}
 604
 605	for end < len(data) && isalnum(data[end]) {
 606		end++
 607	}
 608
 609	if end < len(data) && data[end] == ';' {
 610		end++ // real entity
 611	} else {
 612		return 0 // lone '&'
 613	}
 614
 615	p.r.Entity(out, data[:end])
 616
 617	return end
 618}
 619
 620func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 621	// quick check to rule out most false hits on ':'
 622	if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
 623		return 0
 624	}
 625
 626	// Now a more expensive check to see if we're not inside an anchor element
 627	anchorStart := offset
 628	offsetFromAnchor := 0
 629	for anchorStart > 0 && data[anchorStart] != '<' {
 630		anchorStart--
 631		offsetFromAnchor++
 632	}
 633
 634	anchorStr := anchorRe.Find(data[anchorStart:])
 635	if anchorStr != nil {
 636		out.Write(anchorStr[offsetFromAnchor:])
 637		return len(anchorStr) - offsetFromAnchor
 638	}
 639
 640	// scan backward for a word boundary
 641	rewind := 0
 642	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 643		rewind++
 644	}
 645	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 646		return 0
 647	}
 648
 649	origData := data
 650	data = data[offset-rewind:]
 651
 652	if !isSafeLink(data) {
 653		return 0
 654	}
 655
 656	linkEnd := 0
 657	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 658		linkEnd++
 659	}
 660
 661	// Skip punctuation at the end of the link
 662	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',' || data[linkEnd-1] == ';') && data[linkEnd-2] != '\\' {
 663		linkEnd--
 664	}
 665
 666	// See if the link finishes with a punctuation sign that can be closed.
 667	var copen byte
 668	switch data[linkEnd-1] {
 669	case '"':
 670		copen = '"'
 671	case '\'':
 672		copen = '\''
 673	case ')':
 674		copen = '('
 675	case ']':
 676		copen = '['
 677	case '}':
 678		copen = '{'
 679	default:
 680		copen = 0
 681	}
 682
 683	if copen != 0 {
 684		bufEnd := offset - rewind + linkEnd - 2
 685
 686		openDelim := 1
 687
 688		/* Try to close the final punctuation sign in this same line;
 689		 * if we managed to close it outside of the URL, that means that it's
 690		 * not part of the URL. If it closes inside the URL, that means it
 691		 * is part of the URL.
 692		 *
 693		 * Examples:
 694		 *
 695		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 696		 *              => http://www.pokemon.com/Pikachu_(Electric)
 697		 *
 698		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 699		 *              => http://www.pokemon.com/Pikachu_(Electric)
 700		 *
 701		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 702		 *              => http://www.pokemon.com/Pikachu_(Electric))
 703		 *
 704		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 705		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 706		 */
 707
 708		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 709			if origData[bufEnd] == data[linkEnd-1] {
 710				openDelim++
 711			}
 712
 713			if origData[bufEnd] == copen {
 714				openDelim--
 715			}
 716
 717			bufEnd--
 718		}
 719
 720		if openDelim == 0 {
 721			linkEnd--
 722		}
 723	}
 724
 725	// we were triggered on the ':', so we need to rewind the output a bit
 726	if out.Len() >= rewind {
 727		out.Truncate(len(out.Bytes()) - rewind)
 728	}
 729
 730	var uLink bytes.Buffer
 731	unescapeText(&uLink, data[:linkEnd])
 732
 733	if uLink.Len() > 0 {
 734		p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
 735	}
 736
 737	return linkEnd - rewind
 738}
 739
 740func isEndOfLink(char byte) bool {
 741	return isspace(char) || char == '<'
 742}
 743
 744var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://"), []byte("/")}
 745
 746func isSafeLink(link []byte) bool {
 747	for _, prefix := range validUris {
 748		// TODO: handle unicode here
 749		// case-insensitive prefix test
 750		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 751			return true
 752		}
 753	}
 754
 755	return false
 756}
 757
 758// return the length of the given tag, or 0 is it's not valid
 759func tagLength(data []byte, autolink *int) int {
 760	var i, j int
 761
 762	// a valid tag can't be shorter than 3 chars
 763	if len(data) < 3 {
 764		return 0
 765	}
 766
 767	// begins with a '<' optionally followed by '/', followed by letter or number
 768	if data[0] != '<' {
 769		return 0
 770	}
 771	if data[1] == '/' {
 772		i = 2
 773	} else {
 774		i = 1
 775	}
 776
 777	if !isalnum(data[i]) {
 778		return 0
 779	}
 780
 781	// scheme test
 782	*autolink = LINK_TYPE_NOT_AUTOLINK
 783
 784	// try to find the beginning of an URI
 785	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 786		i++
 787	}
 788
 789	if i > 1 && i < len(data) && data[i] == '@' {
 790		if j = isMailtoAutoLink(data[i:]); j != 0 {
 791			*autolink = LINK_TYPE_EMAIL
 792			return i + j
 793		}
 794	}
 795
 796	if i > 2 && i < len(data) && data[i] == ':' {
 797		*autolink = LINK_TYPE_NORMAL
 798		i++
 799	}
 800
 801	// complete autolink test: no whitespace or ' or "
 802	switch {
 803	case i >= len(data):
 804		*autolink = LINK_TYPE_NOT_AUTOLINK
 805	case *autolink != 0:
 806		j = i
 807
 808		for i < len(data) {
 809			if data[i] == '\\' {
 810				i += 2
 811			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 812				break
 813			} else {
 814				i++
 815			}
 816
 817		}
 818
 819		if i >= len(data) {
 820			return 0
 821		}
 822		if i > j && data[i] == '>' {
 823			return i + 1
 824		}
 825
 826		// one of the forbidden chars has been found
 827		*autolink = LINK_TYPE_NOT_AUTOLINK
 828	}
 829
 830	// look for something looking like a tag end
 831	for i < len(data) && data[i] != '>' {
 832		i++
 833	}
 834	if i >= len(data) {
 835		return 0
 836	}
 837	return i + 1
 838}
 839
 840// look for the address part of a mail autolink and '>'
 841// this is less strict than the original markdown e-mail address matching
 842func isMailtoAutoLink(data []byte) int {
 843	nb := 0
 844
 845	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
 846	for i := 0; i < len(data); i++ {
 847		if isalnum(data[i]) {
 848			continue
 849		}
 850
 851		switch data[i] {
 852		case '@':
 853			nb++
 854
 855		case '-', '.', '_':
 856			break
 857
 858		case '>':
 859			if nb == 1 {
 860				return i + 1
 861			} else {
 862				return 0
 863			}
 864		default:
 865			return 0
 866		}
 867	}
 868
 869	return 0
 870}
 871
 872// look for the next emph char, skipping other constructs
 873func helperFindEmphChar(data []byte, c byte) int {
 874	i := 1
 875
 876	for i < len(data) {
 877		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
 878			i++
 879		}
 880		if i >= len(data) {
 881			return 0
 882		}
 883		if data[i] == c {
 884			return i
 885		}
 886
 887		// do not count escaped chars
 888		if i != 0 && data[i-1] == '\\' {
 889			i++
 890			continue
 891		}
 892
 893		if data[i] == '`' {
 894			// skip a code span
 895			tmpI := 0
 896			i++
 897			for i < len(data) && data[i] != '`' {
 898				if tmpI == 0 && data[i] == c {
 899					tmpI = i
 900				}
 901				i++
 902			}
 903			if i >= len(data) {
 904				return tmpI
 905			}
 906			i++
 907		} else if data[i] == '[' {
 908			// skip a link
 909			tmpI := 0
 910			i++
 911			for i < len(data) && data[i] != ']' {
 912				if tmpI == 0 && data[i] == c {
 913					tmpI = i
 914				}
 915				i++
 916			}
 917			i++
 918			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
 919				i++
 920			}
 921			if i >= len(data) {
 922				return tmpI
 923			}
 924			if data[i] != '[' && data[i] != '(' { // not a link
 925				if tmpI > 0 {
 926					return tmpI
 927				} else {
 928					continue
 929				}
 930			}
 931			cc := data[i]
 932			i++
 933			for i < len(data) && data[i] != cc {
 934				if tmpI == 0 && data[i] == c {
 935					tmpI = i
 936				}
 937				i++
 938			}
 939			if i >= len(data) {
 940				return tmpI
 941			}
 942			i++
 943		}
 944	}
 945	return 0
 946}
 947
 948func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
 949	i := 0
 950
 951	// skip one symbol if coming from emph3
 952	if len(data) > 1 && data[0] == c && data[1] == c {
 953		i = 1
 954	}
 955
 956	for i < len(data) {
 957		length := helperFindEmphChar(data[i:], c)
 958		if length == 0 {
 959			return 0
 960		}
 961		i += length
 962		if i >= len(data) {
 963			return 0
 964		}
 965
 966		if i+1 < len(data) && data[i+1] == c {
 967			i++
 968			continue
 969		}
 970
 971		if data[i] == c && !isspace(data[i-1]) {
 972
 973			if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
 974				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
 975					continue
 976				}
 977			}
 978
 979			var work bytes.Buffer
 980			p.inline(&work, data[:i])
 981			p.r.Emphasis(out, work.Bytes())
 982			return i + 1
 983		}
 984	}
 985
 986	return 0
 987}
 988
 989func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
 990	i := 0
 991
 992	for i < len(data) {
 993		length := helperFindEmphChar(data[i:], c)
 994		if length == 0 {
 995			return 0
 996		}
 997		i += length
 998
 999		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1000			var work bytes.Buffer
1001			p.inline(&work, data[:i])
1002
1003			if work.Len() > 0 {
1004				// pick the right renderer
1005				if c == '~' {
1006					p.r.StrikeThrough(out, work.Bytes())
1007				} else {
1008					p.r.DoubleEmphasis(out, work.Bytes())
1009				}
1010			}
1011			return i + 2
1012		}
1013		i++
1014	}
1015	return 0
1016}
1017
1018func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int {
1019	i := 0
1020	origData := data
1021	data = data[offset:]
1022
1023	for i < len(data) {
1024		length := helperFindEmphChar(data[i:], c)
1025		if length == 0 {
1026			return 0
1027		}
1028		i += length
1029
1030		// skip whitespace preceded symbols
1031		if data[i] != c || isspace(data[i-1]) {
1032			continue
1033		}
1034
1035		switch {
1036		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1037			// triple symbol found
1038			var work bytes.Buffer
1039
1040			p.inline(&work, data[:i])
1041			if work.Len() > 0 {
1042				p.r.TripleEmphasis(out, work.Bytes())
1043			}
1044			return i + 3
1045		case (i+1 < len(data) && data[i+1] == c):
1046			// double symbol found, hand over to emph1
1047			length = helperEmphasis(p, out, origData[offset-2:], c)
1048			if length == 0 {
1049				return 0
1050			} else {
1051				return length - 2
1052			}
1053		default:
1054			// single symbol found, hand over to emph2
1055			length = helperDoubleEmphasis(p, out, origData[offset-1:], c)
1056			if length == 0 {
1057				return 0
1058			} else {
1059				return length - 1
1060			}
1061		}
1062	}
1063	return 0
1064}
all repos — grayfriday @ b0bdfbec4ceab22844aa766b3856aa95753ffde8

blackfriday fork with a few changes