icy does git — grayfriday (be9cbc634a630f5e9d75a3e3d985152b17305b2f): inline.go

inline.go (view raw)
   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse inline elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"regexp"
  19	"strconv"
  20)
  21
  22var (
  23	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  24)
  25
  26// Functions to parse text within a block
  27// Each function returns the number of chars taken care of
  28// data is the complete block being rendered
  29// offset is the number of valid chars before the current cursor
  30
  31func (p *parser) inline(out *bytes.Buffer, data []byte) {
  32	// this is called recursively: enforce a maximum depth
  33	if p.nesting >= p.maxNesting {
  34		return
  35	}
  36	p.nesting++
  37
  38	i, end := 0, 0
  39	for i < len(data) {
  40		// copy inactive chars into the output
  41		for end < len(data) && p.inlineCallback[data[end]] == nil {
  42			end++
  43		}
  44
  45		p.r.NormalText(out, data[i:end])
  46
  47		if end >= len(data) {
  48			break
  49		}
  50		i = end
  51
  52		// call the trigger
  53		handler := p.inlineCallback[data[end]]
  54		if consumed := handler(p, out, data, i); consumed == 0 {
  55			// no action from the callback; buffer the byte for later
  56			end = i + 1
  57		} else {
  58			// skip past whatever the callback used
  59			i += consumed
  60			end = i
  61		}
  62	}
  63
  64	p.nesting--
  65}
  66
  67// single and double emphasis parsing
  68func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
  69	data = data[offset:]
  70	c := data[0]
  71	ret := 0
  72
  73	if len(data) > 2 && data[1] != c {
  74		// whitespace cannot follow an opening emphasis;
  75		// strikethrough only takes two characters '~~'
  76		if c == '~' || isspace(data[1]) {
  77			return 0
  78		}
  79		if ret = helperEmphasis(p, out, data[1:], c); ret == 0 {
  80			return 0
  81		}
  82
  83		return ret + 1
  84	}
  85
  86	if len(data) > 3 && data[1] == c && data[2] != c {
  87		if isspace(data[2]) {
  88			return 0
  89		}
  90		if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 {
  91			return 0
  92		}
  93
  94		return ret + 2
  95	}
  96
  97	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
  98		if c == '~' || isspace(data[3]) {
  99			return 0
 100		}
 101		if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 {
 102			return 0
 103		}
 104
 105		return ret + 3
 106	}
 107
 108	return 0
 109}
 110
 111func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 112	data = data[offset:]
 113
 114	nb := 0
 115
 116	// count the number of backticks in the delimiter
 117	for nb < len(data) && data[nb] == '`' {
 118		nb++
 119	}
 120
 121	// find the next delimiter
 122	i, end := 0, 0
 123	for end = nb; end < len(data) && i < nb; end++ {
 124		if data[end] == '`' {
 125			i++
 126		} else {
 127			i = 0
 128		}
 129	}
 130
 131	// no matching delimiter?
 132	if i < nb && end >= len(data) {
 133		return 0
 134	}
 135
 136	// trim outside whitespace
 137	fBegin := nb
 138	for fBegin < end && data[fBegin] == ' ' {
 139		fBegin++
 140	}
 141
 142	fEnd := end - nb
 143	for fEnd > fBegin && data[fEnd-1] == ' ' {
 144		fEnd--
 145	}
 146
 147	// render the code span
 148	if fBegin != fEnd {
 149		p.r.CodeSpan(out, data[fBegin:fEnd])
 150	}
 151
 152	return end
 153
 154}
 155
 156// newline preceded by two spaces becomes <br>
 157// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled
 158func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 159	// remove trailing spaces from out
 160	outBytes := out.Bytes()
 161	end := len(outBytes)
 162	eol := end
 163	for eol > 0 && outBytes[eol-1] == ' ' {
 164		eol--
 165	}
 166	out.Truncate(eol)
 167
 168	// should there be a hard line break here?
 169	if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && end-eol < 2 {
 170		return 0
 171	}
 172
 173	p.r.LineBreak(out)
 174	return 1
 175}
 176
 177type linkType int
 178
 179const (
 180	linkNormal linkType = iota
 181	linkImg
 182	linkDeferredFootnote
 183	linkInlineFootnote
 184)
 185
 186// '[': parse a link or an image or a footnote
 187func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 188	// no links allowed inside regular links, footnote, and deferred footnotes
 189	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 190		return 0
 191	}
 192
 193	// [text] == regular link
 194	// ![alt] == image
 195	// ^[text] == inline footnote
 196	// [^refId] == deferred footnote
 197	var t linkType
 198	if offset > 0 && data[offset-1] == '!' {
 199		t = linkImg
 200	} else if p.flags&EXTENSION_FOOTNOTES != 0 {
 201		if offset > 0 && data[offset-1] == '^' {
 202			t = linkInlineFootnote
 203		} else if len(data)-1 > offset && data[offset+1] == '^' {
 204			t = linkDeferredFootnote
 205		}
 206	}
 207
 208	data = data[offset:]
 209
 210	var (
 211		i           = 1
 212		noteId      int
 213		title, link []byte
 214		textHasNl   = false
 215	)
 216
 217	if t == linkDeferredFootnote {
 218		i++
 219	}
 220
 221	// look for the matching closing bracket
 222	for level := 1; level > 0 && i < len(data); i++ {
 223		switch {
 224		case data[i] == '\n':
 225			textHasNl = true
 226
 227		case data[i-1] == '\\':
 228			continue
 229
 230		case data[i] == '[':
 231			level++
 232
 233		case data[i] == ']':
 234			level--
 235			if level <= 0 {
 236				i-- // compensate for extra i++ in for loop
 237			}
 238		}
 239	}
 240
 241	if i >= len(data) {
 242		return 0
 243	}
 244
 245	txtE := i
 246	i++
 247
 248	// skip any amount of whitespace or newline
 249	// (this is much more lax than original markdown syntax)
 250	for i < len(data) && isspace(data[i]) {
 251		i++
 252	}
 253
 254	// inline style link
 255	switch {
 256	case i < len(data) && data[i] == '(':
 257		// skip initial whitespace
 258		i++
 259
 260		for i < len(data) && isspace(data[i]) {
 261			i++
 262		}
 263
 264		linkB := i
 265
 266		// look for link end: ' " )
 267	findlinkend:
 268		for i < len(data) {
 269			switch {
 270			case data[i] == '\\':
 271				i += 2
 272
 273			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 274				break findlinkend
 275
 276			default:
 277				i++
 278			}
 279		}
 280
 281		if i >= len(data) {
 282			return 0
 283		}
 284		linkE := i
 285
 286		// look for title end if present
 287		titleB, titleE := 0, 0
 288		if data[i] == '\'' || data[i] == '"' {
 289			i++
 290			titleB = i
 291
 292		findtitleend:
 293			for i < len(data) {
 294				switch {
 295				case data[i] == '\\':
 296					i += 2
 297
 298				case data[i] == ')':
 299					break findtitleend
 300
 301				default:
 302					i++
 303				}
 304			}
 305
 306			if i >= len(data) {
 307				return 0
 308			}
 309
 310			// skip whitespace after title
 311			titleE = i - 1
 312			for titleE > titleB && isspace(data[titleE]) {
 313				titleE--
 314			}
 315
 316			// check for closing quote presence
 317			if data[titleE] != '\'' && data[titleE] != '"' {
 318				titleB, titleE = 0, 0
 319				linkE = i
 320			}
 321		}
 322
 323		// remove whitespace at the end of the link
 324		for linkE > linkB && isspace(data[linkE-1]) {
 325			linkE--
 326		}
 327
 328		// remove optional angle brackets around the link
 329		if data[linkB] == '<' {
 330			linkB++
 331		}
 332		if data[linkE-1] == '>' {
 333			linkE--
 334		}
 335
 336		// build escaped link and title
 337		if linkE > linkB {
 338			link = data[linkB:linkE]
 339		}
 340
 341		if titleE > titleB {
 342			title = data[titleB:titleE]
 343		}
 344
 345		i++
 346
 347	// reference style link
 348	case i < len(data) && data[i] == '[':
 349		var id []byte
 350
 351		// look for the id
 352		i++
 353		linkB := i
 354		for i < len(data) && data[i] != ']' {
 355			i++
 356		}
 357		if i >= len(data) {
 358			return 0
 359		}
 360		linkE := i
 361
 362		// find the reference
 363		if linkB == linkE {
 364			if textHasNl {
 365				var b bytes.Buffer
 366
 367				for j := 1; j < txtE; j++ {
 368					switch {
 369					case data[j] != '\n':
 370						b.WriteByte(data[j])
 371					case data[j-1] != ' ':
 372						b.WriteByte(' ')
 373					}
 374				}
 375
 376				id = b.Bytes()
 377			} else {
 378				id = data[1:txtE]
 379			}
 380		} else {
 381			id = data[linkB:linkE]
 382		}
 383
 384		// find the reference with matching id (ids are case-insensitive)
 385		key := string(bytes.ToLower(id))
 386		lr, ok := p.refs[key]
 387		if !ok {
 388			return 0
 389
 390		}
 391
 392		// keep link and title from reference
 393		link = lr.link
 394		title = lr.title
 395		i++
 396
 397	// shortcut reference style link or reference or inline footnote
 398	default:
 399		var id []byte
 400
 401		// craft the id
 402		if textHasNl {
 403			var b bytes.Buffer
 404
 405			for j := 1; j < txtE; j++ {
 406				switch {
 407				case data[j] != '\n':
 408					b.WriteByte(data[j])
 409				case data[j-1] != ' ':
 410					b.WriteByte(' ')
 411				}
 412			}
 413
 414			id = b.Bytes()
 415		} else {
 416			if t == linkDeferredFootnote {
 417				id = data[2:txtE] // get rid of the ^
 418			} else {
 419				id = data[1:txtE]
 420			}
 421		}
 422
 423		key := string(bytes.ToLower(id))
 424		if t == linkInlineFootnote {
 425			// create a new reference
 426			noteId = len(p.notes) + 1
 427
 428			var fragment []byte
 429			if len(id) > 0 {
 430				if len(id) < 16 {
 431					fragment = make([]byte, len(id))
 432				} else {
 433					fragment = make([]byte, 16)
 434				}
 435				copy(fragment, slugify(id))
 436			} else {
 437				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
 438			}
 439
 440			ref := &reference{
 441				noteId:   noteId,
 442				hasBlock: false,
 443				link:     fragment,
 444				title:    id,
 445			}
 446
 447			p.notes = append(p.notes, ref)
 448
 449			link = ref.link
 450			title = ref.title
 451		} else {
 452			// find the reference with matching id
 453			lr, ok := p.refs[key]
 454			if !ok {
 455				return 0
 456			}
 457
 458			if t == linkDeferredFootnote {
 459				lr.noteId = len(p.notes) + 1
 460				p.notes = append(p.notes, lr)
 461			}
 462
 463			// keep link and title from reference
 464			link = lr.link
 465			// if inline footnote, title == footnote contents
 466			title = lr.title
 467			noteId = lr.noteId
 468		}
 469
 470		// rewind the whitespace
 471		i = txtE + 1
 472	}
 473
 474	// build content: img alt is escaped, link content is parsed
 475	var content bytes.Buffer
 476	if txtE > 1 {
 477		if t == linkImg {
 478			content.Write(data[1:txtE])
 479		} else {
 480			// links cannot contain other links, so turn off link parsing temporarily
 481			insideLink := p.insideLink
 482			p.insideLink = true
 483			p.inline(&content, data[1:txtE])
 484			p.insideLink = insideLink
 485		}
 486	}
 487
 488	var uLink []byte
 489	if t == linkNormal || t == linkImg {
 490		if len(link) > 0 {
 491			var uLinkBuf bytes.Buffer
 492			unescapeText(&uLinkBuf, link)
 493			uLink = uLinkBuf.Bytes()
 494		}
 495
 496		// links need something to click on and somewhere to go
 497		if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
 498			return 0
 499		}
 500	}
 501
 502	// call the relevant rendering function
 503	switch t {
 504	case linkNormal:
 505		p.r.Link(out, uLink, title, content.Bytes())
 506
 507	case linkImg:
 508		outSize := out.Len()
 509		outBytes := out.Bytes()
 510		if outSize > 0 && outBytes[outSize-1] == '!' {
 511			out.Truncate(outSize - 1)
 512		}
 513
 514		p.r.Image(out, uLink, title, content.Bytes())
 515
 516	case linkInlineFootnote:
 517		outSize := out.Len()
 518		outBytes := out.Bytes()
 519		if outSize > 0 && outBytes[outSize-1] == '^' {
 520			out.Truncate(outSize - 1)
 521		}
 522
 523		p.r.FootnoteRef(out, link, noteId)
 524
 525	case linkDeferredFootnote:
 526		p.r.FootnoteRef(out, link, noteId)
 527
 528	default:
 529		return 0
 530	}
 531
 532	return i
 533}
 534
 535// '<' when tags or autolinks are allowed
 536func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 537	data = data[offset:]
 538	altype := LINK_TYPE_NOT_AUTOLINK
 539	end := tagLength(data, &altype)
 540
 541	if end > 2 {
 542		if altype != LINK_TYPE_NOT_AUTOLINK {
 543			var uLink bytes.Buffer
 544			unescapeText(&uLink, data[1:end+1-2])
 545			if uLink.Len() > 0 {
 546				p.r.AutoLink(out, uLink.Bytes(), altype)
 547			}
 548		} else {
 549			p.r.RawHtmlTag(out, data[:end])
 550		}
 551	}
 552
 553	return end
 554}
 555
 556// '\\' backslash escape
 557var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>")
 558
 559func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 560	data = data[offset:]
 561
 562	if len(data) > 1 {
 563		if bytes.IndexByte(escapeChars, data[1]) < 0 {
 564			return 0
 565		}
 566
 567		p.r.NormalText(out, data[1:2])
 568	}
 569
 570	return 2
 571}
 572
 573func unescapeText(ob *bytes.Buffer, src []byte) {
 574	i := 0
 575	for i < len(src) {
 576		org := i
 577		for i < len(src) && src[i] != '\\' {
 578			i++
 579		}
 580
 581		if i > org {
 582			ob.Write(src[org:i])
 583		}
 584
 585		if i+1 >= len(src) {
 586			break
 587		}
 588
 589		ob.WriteByte(src[i+1])
 590		i += 2
 591	}
 592}
 593
 594// '&' escaped when it doesn't belong to an entity
 595// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 596func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 597	data = data[offset:]
 598
 599	end := 1
 600
 601	if end < len(data) && data[end] == '#' {
 602		end++
 603	}
 604
 605	for end < len(data) && isalnum(data[end]) {
 606		end++
 607	}
 608
 609	if end < len(data) && data[end] == ';' {
 610		end++ // real entity
 611	} else {
 612		return 0 // lone '&'
 613	}
 614
 615	p.r.Entity(out, data[:end])
 616
 617	return end
 618}
 619
 620func linkEndsWithEntity(data []byte, linkEnd int) bool {
 621	entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
 622	if entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd {
 623		return true
 624	}
 625	return false
 626}
 627
 628func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 629	// quick check to rule out most false hits on ':'
 630	if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
 631		return 0
 632	}
 633
 634	// Now a more expensive check to see if we're not inside an anchor element
 635	anchorStart := offset
 636	offsetFromAnchor := 0
 637	for anchorStart > 0 && data[anchorStart] != '<' {
 638		anchorStart--
 639		offsetFromAnchor++
 640	}
 641
 642	anchorStr := anchorRe.Find(data[anchorStart:])
 643	if anchorStr != nil {
 644		out.Write(anchorStr[offsetFromAnchor:])
 645		return len(anchorStr) - offsetFromAnchor
 646	}
 647
 648	// scan backward for a word boundary
 649	rewind := 0
 650	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
 651		rewind++
 652	}
 653	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 654		return 0
 655	}
 656
 657	origData := data
 658	data = data[offset-rewind:]
 659
 660	if !isSafeLink(data) {
 661		return 0
 662	}
 663
 664	linkEnd := 0
 665	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 666		linkEnd++
 667	}
 668
 669	// Skip punctuation at the end of the link
 670	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 671		linkEnd--
 672	}
 673
 674	// But don't skip semicolon if it's a part of escaped entity:
 675	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 676		linkEnd--
 677	}
 678
 679	// See if the link finishes with a punctuation sign that can be closed.
 680	var copen byte
 681	switch data[linkEnd-1] {
 682	case '"':
 683		copen = '"'
 684	case '\'':
 685		copen = '\''
 686	case ')':
 687		copen = '('
 688	case ']':
 689		copen = '['
 690	case '}':
 691		copen = '{'
 692	default:
 693		copen = 0
 694	}
 695
 696	if copen != 0 {
 697		bufEnd := offset - rewind + linkEnd - 2
 698
 699		openDelim := 1
 700
 701		/* Try to close the final punctuation sign in this same line;
 702		 * if we managed to close it outside of the URL, that means that it's
 703		 * not part of the URL. If it closes inside the URL, that means it
 704		 * is part of the URL.
 705		 *
 706		 * Examples:
 707		 *
 708		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 709		 *              => http://www.pokemon.com/Pikachu_(Electric)
 710		 *
 711		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 712		 *              => http://www.pokemon.com/Pikachu_(Electric)
 713		 *
 714		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 715		 *              => http://www.pokemon.com/Pikachu_(Electric))
 716		 *
 717		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 718		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 719		 */
 720
 721		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 722			if origData[bufEnd] == data[linkEnd-1] {
 723				openDelim++
 724			}
 725
 726			if origData[bufEnd] == copen {
 727				openDelim--
 728			}
 729
 730			bufEnd--
 731		}
 732
 733		if openDelim == 0 {
 734			linkEnd--
 735		}
 736	}
 737
 738	// we were triggered on the ':', so we need to rewind the output a bit
 739	if out.Len() >= rewind {
 740		out.Truncate(len(out.Bytes()) - rewind)
 741	}
 742
 743	var uLink bytes.Buffer
 744	unescapeText(&uLink, data[:linkEnd])
 745
 746	if uLink.Len() > 0 {
 747		p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
 748	}
 749
 750	return linkEnd - rewind
 751}
 752
 753func isEndOfLink(char byte) bool {
 754	return isspace(char) || char == '<'
 755}
 756
 757var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://"), []byte("/")}
 758
 759func isSafeLink(link []byte) bool {
 760	for _, prefix := range validUris {
 761		// TODO: handle unicode here
 762		// case-insensitive prefix test
 763		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
 764			return true
 765		}
 766	}
 767
 768	return false
 769}
 770
 771// return the length of the given tag, or 0 is it's not valid
 772func tagLength(data []byte, autolink *int) int {
 773	var i, j int
 774
 775	// a valid tag can't be shorter than 3 chars
 776	if len(data) < 3 {
 777		return 0
 778	}
 779
 780	// begins with a '<' optionally followed by '/', followed by letter or number
 781	if data[0] != '<' {
 782		return 0
 783	}
 784	if data[1] == '/' {
 785		i = 2
 786	} else {
 787		i = 1
 788	}
 789
 790	if !isalnum(data[i]) {
 791		return 0
 792	}
 793
 794	// scheme test
 795	*autolink = LINK_TYPE_NOT_AUTOLINK
 796
 797	// try to find the beginning of an URI
 798	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 799		i++
 800	}
 801
 802	if i > 1 && i < len(data) && data[i] == '@' {
 803		if j = isMailtoAutoLink(data[i:]); j != 0 {
 804			*autolink = LINK_TYPE_EMAIL
 805			return i + j
 806		}
 807	}
 808
 809	if i > 2 && i < len(data) && data[i] == ':' {
 810		*autolink = LINK_TYPE_NORMAL
 811		i++
 812	}
 813
 814	// complete autolink test: no whitespace or ' or "
 815	switch {
 816	case i >= len(data):
 817		*autolink = LINK_TYPE_NOT_AUTOLINK
 818	case *autolink != 0:
 819		j = i
 820
 821		for i < len(data) {
 822			if data[i] == '\\' {
 823				i += 2
 824			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 825				break
 826			} else {
 827				i++
 828			}
 829
 830		}
 831
 832		if i >= len(data) {
 833			return 0
 834		}
 835		if i > j && data[i] == '>' {
 836			return i + 1
 837		}
 838
 839		// one of the forbidden chars has been found
 840		*autolink = LINK_TYPE_NOT_AUTOLINK
 841	}
 842
 843	// look for something looking like a tag end
 844	for i < len(data) && data[i] != '>' {
 845		i++
 846	}
 847	if i >= len(data) {
 848		return 0
 849	}
 850	return i + 1
 851}
 852
 853// look for the address part of a mail autolink and '>'
 854// this is less strict than the original markdown e-mail address matching
 855func isMailtoAutoLink(data []byte) int {
 856	nb := 0
 857
 858	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
 859	for i := 0; i < len(data); i++ {
 860		if isalnum(data[i]) {
 861			continue
 862		}
 863
 864		switch data[i] {
 865		case '@':
 866			nb++
 867
 868		case '-', '.', '_':
 869			break
 870
 871		case '>':
 872			if nb == 1 {
 873				return i + 1
 874			} else {
 875				return 0
 876			}
 877		default:
 878			return 0
 879		}
 880	}
 881
 882	return 0
 883}
 884
 885// look for the next emph char, skipping other constructs
 886func helperFindEmphChar(data []byte, c byte) int {
 887	i := 1
 888
 889	for i < len(data) {
 890		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
 891			i++
 892		}
 893		if i >= len(data) {
 894			return 0
 895		}
 896		if data[i] == c {
 897			return i
 898		}
 899
 900		// do not count escaped chars
 901		if i != 0 && data[i-1] == '\\' {
 902			i++
 903			continue
 904		}
 905
 906		if data[i] == '`' {
 907			// skip a code span
 908			tmpI := 0
 909			i++
 910			for i < len(data) && data[i] != '`' {
 911				if tmpI == 0 && data[i] == c {
 912					tmpI = i
 913				}
 914				i++
 915			}
 916			if i >= len(data) {
 917				return tmpI
 918			}
 919			i++
 920		} else if data[i] == '[' {
 921			// skip a link
 922			tmpI := 0
 923			i++
 924			for i < len(data) && data[i] != ']' {
 925				if tmpI == 0 && data[i] == c {
 926					tmpI = i
 927				}
 928				i++
 929			}
 930			i++
 931			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
 932				i++
 933			}
 934			if i >= len(data) {
 935				return tmpI
 936			}
 937			if data[i] != '[' && data[i] != '(' { // not a link
 938				if tmpI > 0 {
 939					return tmpI
 940				} else {
 941					continue
 942				}
 943			}
 944			cc := data[i]
 945			i++
 946			for i < len(data) && data[i] != cc {
 947				if tmpI == 0 && data[i] == c {
 948					tmpI = i
 949				}
 950				i++
 951			}
 952			if i >= len(data) {
 953				return tmpI
 954			}
 955			i++
 956		}
 957	}
 958	return 0
 959}
 960
 961func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
 962	i := 0
 963
 964	// skip one symbol if coming from emph3
 965	if len(data) > 1 && data[0] == c && data[1] == c {
 966		i = 1
 967	}
 968
 969	for i < len(data) {
 970		length := helperFindEmphChar(data[i:], c)
 971		if length == 0 {
 972			return 0
 973		}
 974		i += length
 975		if i >= len(data) {
 976			return 0
 977		}
 978
 979		if i+1 < len(data) && data[i+1] == c {
 980			i++
 981			continue
 982		}
 983
 984		if data[i] == c && !isspace(data[i-1]) {
 985
 986			if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
 987				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
 988					continue
 989				}
 990			}
 991
 992			var work bytes.Buffer
 993			p.inline(&work, data[:i])
 994			p.r.Emphasis(out, work.Bytes())
 995			return i + 1
 996		}
 997	}
 998
 999	return 0
1000}
1001
1002func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
1003	i := 0
1004
1005	for i < len(data) {
1006		length := helperFindEmphChar(data[i:], c)
1007		if length == 0 {
1008			return 0
1009		}
1010		i += length
1011
1012		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1013			var work bytes.Buffer
1014			p.inline(&work, data[:i])
1015
1016			if work.Len() > 0 {
1017				// pick the right renderer
1018				if c == '~' {
1019					p.r.StrikeThrough(out, work.Bytes())
1020				} else {
1021					p.r.DoubleEmphasis(out, work.Bytes())
1022				}
1023			}
1024			return i + 2
1025		}
1026		i++
1027	}
1028	return 0
1029}
1030
1031func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int {
1032	i := 0
1033	origData := data
1034	data = data[offset:]
1035
1036	for i < len(data) {
1037		length := helperFindEmphChar(data[i:], c)
1038		if length == 0 {
1039			return 0
1040		}
1041		i += length
1042
1043		// skip whitespace preceded symbols
1044		if data[i] != c || isspace(data[i-1]) {
1045			continue
1046		}
1047
1048		switch {
1049		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1050			// triple symbol found
1051			var work bytes.Buffer
1052
1053			p.inline(&work, data[:i])
1054			if work.Len() > 0 {
1055				p.r.TripleEmphasis(out, work.Bytes())
1056			}
1057			return i + 3
1058		case (i+1 < len(data) && data[i+1] == c):
1059			// double symbol found, hand over to emph1
1060			length = helperEmphasis(p, out, origData[offset-2:], c)
1061			if length == 0 {
1062				return 0
1063			} else {
1064				return length - 2
1065			}
1066		default:
1067			// single symbol found, hand over to emph2
1068			length = helperDoubleEmphasis(p, out, origData[offset-1:], c)
1069			if length == 0 {
1070				return 0
1071			} else {
1072				return length - 1
1073			}
1074		}
1075	}
1076	return 0
1077}
all repos — grayfriday @ be9cbc634a630f5e9d75a3e3d985152b17305b2f

blackfriday fork with a few changes