all repos — grayfriday @ a3ff1b5d3996c43ae81064802fa1247e92a599e0

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Black Friday Markdown Processor
   3// Originally based on http://github.com/tanoku/upskirt
   4// by Russ Ross <russ@russross.com>
   5//
   6
   7//
   8//
   9// Markdown parsing and processing
  10//
  11//
  12
  13package blackfriday
  14
  15import (
  16	"bytes"
  17	"sort"
  18	"unicode"
  19)
  20
  21// These are the supported markdown parsing extensions.
  22// OR these values together to select multiple extensions.
  23const (
  24	EXTENSION_NO_INTRA_EMPHASIS = 1 << iota
  25	EXTENSION_TABLES
  26	EXTENSION_FENCED_CODE
  27	EXTENSION_AUTOLINK
  28	EXTENSION_STRIKETHROUGH
  29	EXTENSION_LAX_HTML_BLOCKS
  30	EXTENSION_SPACE_HEADERS
  31)
  32
  33// These are the possible flag values for the link renderer.
  34// Only a single one of these values will be used; they are not ORed together.
  35// These are mostly of interest if you are writing a new output format.
  36const (
  37	LINK_TYPE_NOT_AUTOLINK = iota
  38	LINK_TYPE_NORMAL
  39	LINK_TYPE_EMAIL
  40)
  41
  42// These are the possible flag values for the listitem renderer.
  43// Multiple flag values may be ORed together.
  44// These are mostly of interest if you are writing a new output format.
  45const (
  46	LIST_TYPE_ORDERED = 1 << iota
  47	LIST_ITEM_CONTAINS_BLOCK
  48	LIST_ITEM_END_OF_LIST
  49)
  50
  51// These are the possible flag values for the table cell renderer.
  52// Only a single one of these values will be used; they are not ORed together.
  53// These are mostly of interest if you are writing a new output format.
  54const (
  55	TABLE_ALIGNMENT_LEFT = 1 << iota
  56	TABLE_ALIGNMENT_RIGHT
  57	TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT)
  58)
  59
  60// The size of a tab stop.
  61const TAB_SIZE = 4
  62
  63// These are the tags that are recognized as HTML block tags.
  64// Any of these can be included in markdown text without special escaping.
  65var block_tags = map[string]bool{
  66	"p":          true,
  67	"dl":         true,
  68	"h1":         true,
  69	"h2":         true,
  70	"h3":         true,
  71	"h4":         true,
  72	"h5":         true,
  73	"h6":         true,
  74	"ol":         true,
  75	"ul":         true,
  76	"del":        true,
  77	"div":        true,
  78	"ins":        true,
  79	"pre":        true,
  80	"form":       true,
  81	"math":       true,
  82	"table":      true,
  83	"iframe":     true,
  84	"script":     true,
  85	"fieldset":   true,
  86	"noscript":   true,
  87	"blockquote": true,
  88}
  89
  90// This struct defines the rendering interface.
  91// A series of callback functions are registered to form a complete renderer.
  92// A single interface{} value field is provided, and that value is handed to
  93// each callback. Leaving a field blank suppresses rendering that type of output
  94// except where noted.
  95//
  96// This is mostly of interest if you are implementing a new rendering format.
  97// Most users will use the convenience functions to fill in this structure.
  98type Renderer struct {
  99	// block-level callbacks---nil skips the block
 100	blockcode  func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
 101	blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
 102	blockhtml  func(ob *bytes.Buffer, text []byte, opaque interface{})
 103	header     func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
 104	hrule      func(ob *bytes.Buffer, opaque interface{})
 105	list       func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
 106	listitem   func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
 107	paragraph  func(ob *bytes.Buffer, text []byte, opaque interface{})
 108	table      func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
 109	table_row  func(ob *bytes.Buffer, text []byte, opaque interface{})
 110	table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
 111
 112	// span-level callbacks---nil or return 0 prints the span verbatim
 113	autolink        func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
 114	codespan        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
 115	double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
 116	emphasis        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
 117	image           func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
 118	linebreak       func(ob *bytes.Buffer, opaque interface{}) int
 119	link            func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
 120	raw_html_tag    func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
 121	triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
 122	strikethrough   func(ob *bytes.Buffer, text []byte, opaque interface{}) int
 123
 124	// low-level callbacks---nil copies input directly into the output
 125	entity      func(ob *bytes.Buffer, entity []byte, opaque interface{})
 126	normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
 127
 128	// header and footer
 129	doc_header func(ob *bytes.Buffer, opaque interface{})
 130	doc_footer func(ob *bytes.Buffer, opaque interface{})
 131
 132	// user data---passed back to every callback
 133	opaque interface{}
 134}
 135
 136//
 137
 138type render struct {
 139	mk          *Renderer
 140	refs        link_ref_array
 141	active_char [256]int
 142	ext_flags   uint32
 143	nesting     int
 144	max_nesting int
 145}
 146
 147const (
 148	MD_CHAR_NONE = iota
 149	MD_CHAR_EMPHASIS
 150	MD_CHAR_CODESPAN
 151	MD_CHAR_LINEBREAK
 152	MD_CHAR_LINK
 153	MD_CHAR_LANGLE
 154	MD_CHAR_ESCAPE
 155	MD_CHAR_ENTITITY
 156	MD_CHAR_AUTOLINK
 157)
 158
 159
 160//
 161//
 162// Public interface
 163//
 164//
 165
 166// Parse and render a block of markdown-encoded text.
 167// The renderer is used to format the output, and extensions dictates which
 168// non-standard extensions are enabled.
 169func Markdown(output *bytes.Buffer, input []byte, renderer *Renderer, extensions uint32) {
 170	// no point in parsing if we can't render
 171	if renderer == nil {
 172		return
 173	}
 174
 175	// fill in the character-level parsers
 176	markdown_char_ptrs[MD_CHAR_NONE] = nil
 177	markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
 178	markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
 179	markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
 180	markdown_char_ptrs[MD_CHAR_LINK] = char_link
 181	markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
 182	markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
 183	markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
 184	markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
 185
 186	// fill in the render structure
 187	rndr := new(render)
 188	rndr.mk = renderer
 189	rndr.ext_flags = extensions
 190	rndr.max_nesting = 16
 191
 192	if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
 193		rndr.active_char['*'] = MD_CHAR_EMPHASIS
 194		rndr.active_char['_'] = MD_CHAR_EMPHASIS
 195		if extensions&EXTENSION_STRIKETHROUGH != 0 {
 196			rndr.active_char['~'] = MD_CHAR_EMPHASIS
 197		}
 198	}
 199	if rndr.mk.codespan != nil {
 200		rndr.active_char['`'] = MD_CHAR_CODESPAN
 201	}
 202	if rndr.mk.linebreak != nil {
 203		rndr.active_char['\n'] = MD_CHAR_LINEBREAK
 204	}
 205	if rndr.mk.image != nil || rndr.mk.link != nil {
 206		rndr.active_char['['] = MD_CHAR_LINK
 207	}
 208	rndr.active_char['<'] = MD_CHAR_LANGLE
 209	rndr.active_char['\\'] = MD_CHAR_ESCAPE
 210	rndr.active_char['&'] = MD_CHAR_ENTITITY
 211
 212	if extensions&EXTENSION_AUTOLINK != 0 {
 213		rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
 214		rndr.active_char['H'] = MD_CHAR_AUTOLINK
 215
 216		rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
 217		rndr.active_char['F'] = MD_CHAR_AUTOLINK
 218
 219		rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
 220		rndr.active_char['M'] = MD_CHAR_AUTOLINK
 221	}
 222
 223	// first pass: look for references, copy everything else
 224	text := bytes.NewBuffer(nil)
 225	beg, end := 0, 0
 226	for beg < len(input) { // iterate over lines
 227		if end = is_ref(rndr, input[beg:]); end > 0 {
 228			beg += end
 229		} else { // skip to the next line
 230			end = beg
 231			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
 232				end++
 233			}
 234
 235			// add the line body if present
 236			if end > beg {
 237				expand_tabs(text, input[beg:end])
 238			}
 239
 240			for end < len(input) && (input[end] == '\n' || input[end] == '\r') {
 241				// add one \n per newline
 242				if input[end] == '\n' || (end+1 < len(input) && input[end+1] != '\n') {
 243					text.WriteByte('\n')
 244				}
 245				end++
 246			}
 247
 248			beg = end
 249		}
 250	}
 251
 252	// sort the reference array
 253	if len(rndr.refs) > 1 {
 254		sort.Sort(rndr.refs)
 255	}
 256
 257	// second pass: actual rendering
 258	if rndr.mk.doc_header != nil {
 259		rndr.mk.doc_header(output, rndr.mk.opaque)
 260	}
 261
 262	if text.Len() > 0 {
 263		// add a final newline if not already present
 264		finalchar := text.Bytes()[text.Len()-1]
 265		if finalchar != '\n' && finalchar != '\r' {
 266			text.WriteByte('\n')
 267		}
 268		parse_block(output, rndr, text.Bytes())
 269	}
 270
 271	if rndr.mk.doc_footer != nil {
 272		rndr.mk.doc_footer(output, rndr.mk.opaque)
 273	}
 274
 275	if rndr.nesting != 0 {
 276		panic("Nesting level did not end at zero")
 277	}
 278}
 279
 280
 281//
 282// Inline parsing
 283// Functions to parse text within a block.
 284//
 285
 286// closures to render active chars, each:
 287//   returns the number of chars taken care of
 288//   data is the complete block being rendered
 289//   offset is the number of valid chars before the data
 290//
 291// Note: this is filled in in Markdown to prevent an initilization loop
 292var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
 293
 294func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
 295	if rndr.nesting >= rndr.max_nesting {
 296		return
 297	}
 298	rndr.nesting++
 299
 300	i, end := 0, 0
 301	for i < len(data) {
 302		// copy inactive chars into the output
 303		for end < len(data) && rndr.active_char[data[end]] == 0 {
 304			end++
 305		}
 306
 307		if rndr.mk.normal_text != nil {
 308			rndr.mk.normal_text(ob, data[i:end], rndr.mk.opaque)
 309		} else {
 310			ob.Write(data[i:end])
 311		}
 312
 313		if end >= len(data) {
 314			break
 315		}
 316		i = end
 317
 318		// call the trigger
 319		action := rndr.active_char[data[end]]
 320		end = markdown_char_ptrs[action](ob, rndr, data, i)
 321
 322		if end == 0 { // no action from the callback
 323			end = i + 1
 324		} else {
 325			i += end
 326			end = i
 327		}
 328	}
 329
 330	rndr.nesting--
 331}
 332
 333// single and double emphasis parsing
 334func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 335	data = data[offset:]
 336	c := data[0]
 337	ret := 0
 338
 339	if len(data) > 2 && data[1] != c {
 340		// whitespace cannot follow an opening emphasis;
 341		// strikethrough only takes two characters '~~'
 342		if c == '~' || isspace(data[1]) {
 343			return 0
 344		}
 345		if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
 346			return 0
 347		}
 348
 349		return ret + 1
 350	}
 351
 352	if len(data) > 3 && data[1] == c && data[2] != c {
 353		if isspace(data[2]) {
 354			return 0
 355		}
 356		if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
 357			return 0
 358		}
 359
 360		return ret + 2
 361	}
 362
 363	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 364		if c == '~' || isspace(data[3]) {
 365			return 0
 366		}
 367		if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
 368			return 0
 369		}
 370
 371		return ret + 3
 372	}
 373
 374	return 0
 375}
 376
 377func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 378	data = data[offset:]
 379
 380	nb := 0
 381
 382	// count the number of backticks in the delimiter
 383	for nb < len(data) && data[nb] == '`' {
 384		nb++
 385	}
 386
 387	// find the next delimiter
 388	i, end := 0, 0
 389	for end = nb; end < len(data) && i < nb; end++ {
 390		if data[end] == '`' {
 391			i++
 392		} else {
 393			i = 0
 394		}
 395	}
 396
 397	if i < nb && end >= len(data) {
 398		return 0 // no matching delimiter
 399	}
 400
 401	// trim outside whitespace
 402	f_begin := nb
 403	for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
 404		f_begin++
 405	}
 406
 407	f_end := end - nb
 408	for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
 409		f_end--
 410	}
 411
 412	// real code span
 413	if rndr.mk.codespan == nil {
 414		return 0
 415	}
 416	if f_begin < f_end {
 417		if rndr.mk.codespan(ob, data[f_begin:f_end], rndr.mk.opaque) == 0 {
 418			end = 0
 419		}
 420	} else {
 421		if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
 422			end = 0
 423		}
 424	}
 425
 426	return end
 427
 428}
 429
 430// '\n' preceded by two spaces
 431func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 432	if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
 433		return 0
 434	}
 435
 436	// remove trailing spaces from ob and render
 437	ob_bytes := ob.Bytes()
 438	end := len(ob_bytes)
 439	for end > 0 && ob_bytes[end-1] == ' ' {
 440		end--
 441	}
 442	ob.Truncate(end)
 443
 444	if rndr.mk.linebreak == nil {
 445		return 0
 446	}
 447	if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
 448		return 1
 449	} else {
 450		return 0
 451	}
 452
 453	return 0
 454}
 455
 456// '[': parse a link or an image
 457func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 458	is_img := offset > 0 && data[offset-1] == '!'
 459
 460	data = data[offset:]
 461
 462	i := 1
 463	var title, link []byte
 464	text_has_nl := false
 465
 466	// check whether the correct renderer exists
 467	if (is_img && rndr.mk.image == nil) || (!is_img && rndr.mk.link == nil) {
 468		return 0
 469	}
 470
 471	// look for the matching closing bracket
 472	for level := 1; level > 0 && i < len(data); i++ {
 473		switch {
 474		case data[i] == '\n':
 475			text_has_nl = true
 476
 477		case data[i-1] == '\\':
 478			continue
 479
 480		case data[i] == '[':
 481			level++
 482
 483		case data[i] == ']':
 484			level--
 485			if level <= 0 {
 486				i-- // compensate for extra i++ in for loop
 487			}
 488		}
 489	}
 490
 491	if i >= len(data) {
 492		return 0
 493	}
 494
 495	txt_e := i
 496	i++
 497
 498	// skip any amount of whitespace or newline
 499	// (this is much more lax than original markdown syntax)
 500	for i < len(data) && isspace(data[i]) {
 501		i++
 502	}
 503
 504	// inline style link
 505	switch {
 506	case i < len(data) && data[i] == '(':
 507		// skip initial whitespace
 508		i++
 509
 510		for i < len(data) && isspace(data[i]) {
 511			i++
 512		}
 513
 514		link_b := i
 515
 516		// look for link end: ' " )
 517		for i < len(data) {
 518			if data[i] == '\\' {
 519				i += 2
 520			} else {
 521				if data[i] == ')' || data[i] == '\'' || data[i] == '"' {
 522					break
 523				}
 524				i++
 525			}
 526		}
 527
 528		if i >= len(data) {
 529			return 0
 530		}
 531		link_e := i
 532
 533		// look for title end if present
 534		title_b, title_e := 0, 0
 535		if data[i] == '\'' || data[i] == '"' {
 536			i++
 537			title_b = i
 538
 539			for i < len(data) {
 540				if data[i] == '\\' {
 541					i += 2
 542				} else {
 543					if data[i] == ')' {
 544						break
 545					}
 546					i++
 547				}
 548			}
 549
 550			if i >= len(data) {
 551				return 0
 552			}
 553
 554			// skip whitespace after title
 555			title_e = i - 1
 556			for title_e > title_b && isspace(data[title_e]) {
 557				title_e--
 558			}
 559
 560			// check for closing quote presence
 561			if data[title_e] != '\'' && data[title_e] != '"' {
 562				title_b, title_e = 0, 0
 563				link_e = i
 564			}
 565		}
 566
 567		// remove whitespace at the end of the link
 568		for link_e > link_b && isspace(data[link_e-1]) {
 569			link_e--
 570		}
 571
 572		// remove optional angle brackets around the link
 573		if data[link_b] == '<' {
 574			link_b++
 575		}
 576		if data[link_e-1] == '>' {
 577			link_e--
 578		}
 579
 580		// build escaped link and title
 581		if link_e > link_b {
 582			link = data[link_b:link_e]
 583		}
 584
 585		if title_e > title_b {
 586			title = data[title_b:title_e]
 587		}
 588
 589		i++
 590
 591	// reference style link
 592	case i < len(data) && data[i] == '[':
 593		var id []byte
 594
 595		// look for the id
 596		i++
 597		link_b := i
 598		for i < len(data) && data[i] != ']' {
 599			i++
 600		}
 601		if i >= len(data) {
 602			return 0
 603		}
 604		link_e := i
 605
 606		// find the link_ref
 607		if link_b == link_e {
 608			if text_has_nl {
 609				b := bytes.NewBuffer(nil)
 610
 611				for j := 1; j < txt_e; j++ {
 612					switch {
 613					case data[j] != '\n':
 614						b.WriteByte(data[j])
 615					case data[j-1] != ' ':
 616						b.WriteByte(' ')
 617					}
 618				}
 619
 620				id = b.Bytes()
 621			} else {
 622				id = data[1:txt_e]
 623			}
 624		} else {
 625			id = data[link_b:link_e]
 626		}
 627
 628		// find the link_ref with matching id
 629		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 630			return !byteslice_less(rndr.refs[i].id, id)
 631		})
 632		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 633			return 0
 634		}
 635		lr := rndr.refs[index]
 636
 637		// keep link and title from link_ref
 638		link = lr.link
 639		title = lr.title
 640		i++
 641
 642	// shortcut reference style link
 643	default:
 644		var id []byte
 645
 646		// craft the id
 647		if text_has_nl {
 648			b := bytes.NewBuffer(nil)
 649
 650			for j := 1; j < txt_e; j++ {
 651				switch {
 652				case data[j] != '\n':
 653					b.WriteByte(data[j])
 654				case data[j-1] != ' ':
 655					b.WriteByte(' ')
 656				}
 657			}
 658
 659			id = b.Bytes()
 660		} else {
 661			id = data[1:txt_e]
 662		}
 663
 664		// find the link_ref with matching id
 665		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 666			return !byteslice_less(rndr.refs[i].id, id)
 667		})
 668		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 669			return 0
 670		}
 671		lr := rndr.refs[index]
 672
 673		// keep link and title from link_ref
 674		link = lr.link
 675		title = lr.title
 676
 677		// rewind the whitespace
 678		i = txt_e + 1
 679	}
 680
 681	// build content: img alt is escaped, link content is parsed
 682	content := bytes.NewBuffer(nil)
 683	if txt_e > 1 {
 684		if is_img {
 685			content.Write(data[1:txt_e])
 686		} else {
 687			parse_inline(content, rndr, data[1:txt_e])
 688		}
 689	}
 690
 691	var u_link []byte
 692	if len(link) > 0 {
 693		u_link_buf := bytes.NewBuffer(nil)
 694		unescape_text(u_link_buf, link)
 695		u_link = u_link_buf.Bytes()
 696	}
 697
 698	// call the relevant rendering function
 699	ret := 0
 700	if is_img {
 701		ob_size := ob.Len()
 702		ob_bytes := ob.Bytes()
 703		if ob_size > 0 && ob_bytes[ob_size-1] == '!' {
 704			ob.Truncate(ob_size - 1)
 705		}
 706
 707		ret = rndr.mk.image(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 708	} else {
 709		ret = rndr.mk.link(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 710	}
 711
 712	if ret > 0 {
 713		return i
 714	}
 715	return 0
 716}
 717
 718// '<' when tags or autolinks are allowed
 719func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 720	data = data[offset:]
 721	altype := LINK_TYPE_NOT_AUTOLINK
 722	end := tag_length(data, &altype)
 723	ret := 0
 724
 725	if end > 2 {
 726		switch {
 727		case rndr.mk.autolink != nil && altype != LINK_TYPE_NOT_AUTOLINK:
 728			u_link := bytes.NewBuffer(nil)
 729			unescape_text(u_link, data[1:end+1-2])
 730			ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
 731		case rndr.mk.raw_html_tag != nil:
 732			ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
 733		}
 734	}
 735
 736	if ret == 0 {
 737		return 0
 738	}
 739	return end
 740}
 741
 742// '\\' backslash escape
 743var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
 744
 745func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 746	data = data[offset:]
 747
 748	if len(data) > 1 {
 749		if bytes.IndexByte(escape_chars, data[1]) < 0 {
 750			return 0
 751		}
 752
 753		if rndr.mk.normal_text != nil {
 754			rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
 755		} else {
 756			ob.WriteByte(data[1])
 757		}
 758	}
 759
 760	return 2
 761}
 762
 763// '&' escaped when it doesn't belong to an entity
 764// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 765func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 766	data = data[offset:]
 767
 768	end := 1
 769
 770	if end < len(data) && data[end] == '#' {
 771		end++
 772	}
 773
 774	for end < len(data) && isalnum(data[end]) {
 775		end++
 776	}
 777
 778	if end < len(data) && data[end] == ';' {
 779		end++ // real entity
 780	} else {
 781		return 0 // lone '&'
 782	}
 783
 784	if rndr.mk.entity != nil {
 785		rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
 786	} else {
 787		ob.Write(data[:end])
 788	}
 789
 790	return end
 791}
 792
 793func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 794	orig_data := data
 795	data = data[offset:]
 796
 797	if offset > 0 {
 798		if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) {
 799			return 0
 800		}
 801	}
 802
 803	if !is_safe_link(data) {
 804		return 0
 805	}
 806
 807	link_end := 0
 808	for link_end < len(data) && !isspace(data[link_end]) {
 809		link_end++
 810	}
 811
 812	// Skip punctuation at the end of the link
 813	if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' {
 814		link_end--
 815	}
 816
 817	// See if the link finishes with a punctuation sign that can be closed.
 818	var copen byte
 819	switch data[link_end-1] {
 820	case '"':
 821		copen = '"'
 822	case '\'':
 823		copen = '\''
 824	case ')':
 825		copen = '('
 826	case ']':
 827		copen = '['
 828	case '}':
 829		copen = '{'
 830	default:
 831		copen = 0
 832	}
 833
 834	if copen != 0 {
 835		buf_end := offset + link_end - 2
 836
 837		open_delim := 1
 838
 839		/* Try to close the final punctuation sign in this same line;
 840		 * if we managed to close it outside of the URL, that means that it's
 841		 * not part of the URL. If it closes inside the URL, that means it
 842		 * is part of the URL.
 843		 *
 844		 * Examples:
 845		 *
 846		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 847		 *              => http://www.pokemon.com/Pikachu_(Electric)
 848		 *
 849		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 850		 *              => http://www.pokemon.com/Pikachu_(Electric)
 851		 *
 852		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 853		 *              => http://www.pokemon.com/Pikachu_(Electric))
 854		 *
 855		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 856		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 857		 */
 858
 859		for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 {
 860			if orig_data[buf_end] == data[link_end-1] {
 861				open_delim++
 862			}
 863
 864			if orig_data[buf_end] == copen {
 865				open_delim--
 866			}
 867
 868			buf_end--
 869		}
 870
 871		if open_delim == 0 {
 872			link_end--
 873		}
 874	}
 875
 876	if rndr.mk.autolink != nil {
 877		u_link := bytes.NewBuffer(nil)
 878		unescape_text(u_link, data[:link_end])
 879
 880		rndr.mk.autolink(ob, u_link.Bytes(), LINK_TYPE_NORMAL, rndr.mk.opaque)
 881	}
 882
 883	return link_end
 884}
 885
 886var valid_uris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 887
 888func is_safe_link(link []byte) bool {
 889	for _, prefix := range valid_uris {
 890		if len(link) > len(prefix) && !byteslice_less(link[:len(prefix)], prefix) && !byteslice_less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) {
 891			return true
 892		}
 893	}
 894
 895	return false
 896}
 897
 898// return the length of the given tag, or 0 is it's not valid
 899func tag_length(data []byte, autolink *int) int {
 900	var i, j int
 901
 902	// a valid tag can't be shorter than 3 chars
 903	if len(data) < 3 {
 904		return 0
 905	}
 906
 907	// begins with a '<' optionally followed by '/', followed by letter or number
 908	if data[0] != '<' {
 909		return 0
 910	}
 911	if data[1] == '/' {
 912		i = 2
 913	} else {
 914		i = 1
 915	}
 916
 917	if !isalnum(data[i]) {
 918		return 0
 919	}
 920
 921	// scheme test
 922	*autolink = LINK_TYPE_NOT_AUTOLINK
 923
 924	// try to find the beggining of an URI
 925	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 926		i++
 927	}
 928
 929	if i > 1 && data[i] == '@' {
 930		if j = is_mail_autolink(data[i:]); j != 0 {
 931			*autolink = LINK_TYPE_EMAIL
 932			return i + j
 933		}
 934	}
 935
 936	if i > 2 && data[i] == ':' {
 937		*autolink = LINK_TYPE_NORMAL
 938		i++
 939	}
 940
 941	// complete autolink test: no whitespace or ' or "
 942	switch {
 943	case i >= len(data):
 944		*autolink = LINK_TYPE_NOT_AUTOLINK
 945	case *autolink != 0:
 946		j = i
 947
 948		for i < len(data) {
 949			if data[i] == '\\' {
 950				i += 2
 951			} else {
 952				if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
 953					break
 954				} else {
 955					i++
 956				}
 957			}
 958
 959		}
 960
 961		if i >= len(data) {
 962			return 0
 963		}
 964		if i > j && data[i] == '>' {
 965			return i + 1
 966		}
 967
 968		// one of the forbidden chars has been found
 969		*autolink = LINK_TYPE_NOT_AUTOLINK
 970	}
 971
 972	// look for something looking like a tag end
 973	for i < len(data) && data[i] != '>' {
 974		i++
 975	}
 976	if i >= len(data) {
 977		return 0
 978	}
 979	return i + 1
 980}
 981
 982// look for the address part of a mail autolink and '>'
 983// this is less strict than the original markdown e-mail address matching
 984func is_mail_autolink(data []byte) int {
 985	nb := 0
 986
 987	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
 988	for i := 0; i < len(data); i++ {
 989		if isalnum(data[i]) {
 990			continue
 991		}
 992
 993		switch data[i] {
 994		case '@':
 995			nb++
 996
 997		case '-', '.', '_':
 998			break
 999
1000		case '>':
1001			if nb == 1 {
1002				return i + 1
1003			} else {
1004				return 0
1005			}
1006		default:
1007			return 0
1008		}
1009	}
1010
1011	return 0
1012}
1013
1014// look for the next emph char, skipping other constructs
1015func find_emph_char(data []byte, c byte) int {
1016	i := 1
1017
1018	for i < len(data) {
1019		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1020			i++
1021		}
1022		if i >= len(data) {
1023			return 0
1024		}
1025		if data[i] == c {
1026			return i
1027		}
1028
1029		// do not count escaped chars
1030		if i != 0 && data[i-1] == '\\' {
1031			i++
1032			continue
1033		}
1034
1035		if data[i] == '`' {
1036			// skip a code span
1037			tmp_i := 0
1038			i++
1039			for i < len(data) && data[i] != '`' {
1040				if tmp_i == 0 && data[i] == c {
1041					tmp_i = i
1042				}
1043				i++
1044			}
1045			if i >= len(data) {
1046				return tmp_i
1047			}
1048			i++
1049		} else {
1050			if data[i] == '[' {
1051				// skip a link
1052				tmp_i := 0
1053				i++
1054				for i < len(data) && data[i] != ']' {
1055					if tmp_i == 0 && data[i] == c {
1056						tmp_i = i
1057					}
1058					i++
1059				}
1060				i++
1061				for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
1062					i++
1063				}
1064				if i >= len(data) {
1065					return tmp_i
1066				}
1067				if data[i] != '[' && data[i] != '(' { // not a link
1068					if tmp_i > 0 {
1069						return tmp_i
1070					} else {
1071						continue
1072					}
1073				}
1074				cc := data[i]
1075				i++
1076				for i < len(data) && data[i] != cc {
1077					if tmp_i == 0 && data[i] == c {
1078						tmp_i = i
1079					}
1080					i++
1081				}
1082				if i >= len(data) {
1083					return tmp_i
1084				}
1085				i++
1086			}
1087		}
1088	}
1089	return 0
1090}
1091
1092func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1093	i := 0
1094
1095	if rndr.mk.emphasis == nil {
1096		return 0
1097	}
1098
1099	// skip one symbol if coming from emph3
1100	if len(data) > 1 && data[0] == c && data[1] == c {
1101		i = 1
1102	}
1103
1104	for i < len(data) {
1105		length := find_emph_char(data[i:], c)
1106		if length == 0 {
1107			return 0
1108		}
1109		i += length
1110		if i >= len(data) {
1111			return 0
1112		}
1113
1114		if i+1 < len(data) && data[i+1] == c {
1115			i++
1116			continue
1117		}
1118
1119		if data[i] == c && !isspace(data[i-1]) {
1120
1121			if rndr.ext_flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
1122				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1123					continue
1124				}
1125			}
1126
1127			work := bytes.NewBuffer(nil)
1128			parse_inline(work, rndr, data[:i])
1129			r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
1130			if r > 0 {
1131				return i + 1
1132			} else {
1133				return 0
1134			}
1135		}
1136	}
1137
1138	return 0
1139}
1140
1141func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1142	render_method := rndr.mk.double_emphasis
1143	if c == '~' {
1144		render_method = rndr.mk.strikethrough
1145	}
1146
1147	if render_method == nil {
1148		return 0
1149	}
1150
1151	i := 0
1152
1153	for i < len(data) {
1154		length := find_emph_char(data[i:], c)
1155		if length == 0 {
1156			return 0
1157		}
1158		i += length
1159
1160		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1161			work := bytes.NewBuffer(nil)
1162			parse_inline(work, rndr, data[:i])
1163			r := render_method(ob, work.Bytes(), rndr.mk.opaque)
1164			if r > 0 {
1165				return i + 2
1166			} else {
1167				return 0
1168			}
1169		}
1170		i++
1171	}
1172	return 0
1173}
1174
1175func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
1176	i := 0
1177	orig_data := data
1178	data = data[offset:]
1179
1180	for i < len(data) {
1181		length := find_emph_char(data[i:], c)
1182		if length == 0 {
1183			return 0
1184		}
1185		i += length
1186
1187		// skip whitespace preceded symbols
1188		if data[i] != c || isspace(data[i-1]) {
1189			continue
1190		}
1191
1192		switch {
1193		case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
1194			// triple symbol found
1195			work := bytes.NewBuffer(nil)
1196
1197			parse_inline(work, rndr, data[:i])
1198			r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
1199			if r > 0 {
1200				return i + 3
1201			} else {
1202				return 0
1203			}
1204		case (i+1 < len(data) && data[i+1] == c):
1205			// double symbol found, hand over to emph1
1206			length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
1207			if length == 0 {
1208				return 0
1209			} else {
1210				return length - 2
1211			}
1212		default:
1213			// single symbol found, hand over to emph2
1214			length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
1215			if length == 0 {
1216				return 0
1217			} else {
1218				return length - 1
1219			}
1220		}
1221	}
1222	return 0
1223}
1224
1225
1226//
1227// Block parsing
1228// Functions to parse block-level elements.
1229//
1230
1231// parse block-level data
1232func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
1233	if rndr.nesting >= rndr.max_nesting {
1234		return
1235	}
1236	rndr.nesting++
1237
1238	for len(data) > 0 {
1239		if is_atxheader(rndr, data) {
1240			data = data[parse_atxheader(ob, rndr, data):]
1241			continue
1242		}
1243		if data[0] == '<' && rndr.mk.blockhtml != nil {
1244			if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
1245				data = data[i:]
1246				continue
1247			}
1248		}
1249		if i := is_empty(data); i > 0 {
1250			data = data[i:]
1251			continue
1252		}
1253		if is_hrule(data) {
1254			if rndr.mk.hrule != nil {
1255				rndr.mk.hrule(ob, rndr.mk.opaque)
1256			}
1257			var i int
1258			for i = 0; i < len(data) && data[i] != '\n'; i++ {
1259			}
1260			data = data[i:]
1261			continue
1262		}
1263		if rndr.ext_flags&EXTENSION_FENCED_CODE != 0 {
1264			if i := parse_fencedcode(ob, rndr, data); i > 0 {
1265				data = data[i:]
1266				continue
1267			}
1268		}
1269		if rndr.ext_flags&EXTENSION_TABLES != 0 {
1270			if i := parse_table(ob, rndr, data); i > 0 {
1271				data = data[i:]
1272				continue
1273			}
1274		}
1275		if prefix_quote(data) > 0 {
1276			data = data[parse_blockquote(ob, rndr, data):]
1277			continue
1278		}
1279		if prefix_code(data) > 0 {
1280			data = data[parse_blockcode(ob, rndr, data):]
1281			continue
1282		}
1283		if prefix_uli(data) > 0 {
1284			data = data[parse_list(ob, rndr, data, 0):]
1285			continue
1286		}
1287		if prefix_oli(data) > 0 {
1288			data = data[parse_list(ob, rndr, data, LIST_TYPE_ORDERED):]
1289			continue
1290		}
1291
1292		data = data[parse_paragraph(ob, rndr, data):]
1293	}
1294
1295	rndr.nesting--
1296}
1297
1298func is_atxheader(rndr *render, data []byte) bool {
1299	if data[0] != '#' {
1300		return false
1301	}
1302
1303	if rndr.ext_flags&EXTENSION_SPACE_HEADERS != 0 {
1304		level := 0
1305		for level < len(data) && level < 6 && data[level] == '#' {
1306			level++
1307		}
1308		if level < len(data) && data[level] != ' ' && data[level] != '\t' {
1309			return false
1310		}
1311	}
1312	return true
1313}
1314
1315func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
1316	level := 0
1317	for level < len(data) && level < 6 && data[level] == '#' {
1318		level++
1319	}
1320	i, end := 0, 0
1321	for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
1322	}
1323	for end = i; end < len(data) && data[end] != '\n'; end++ {
1324	}
1325	skip := end
1326	for end > 0 && data[end-1] == '#' {
1327		end--
1328	}
1329	for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
1330		end--
1331	}
1332	if end > i {
1333		work := bytes.NewBuffer(nil)
1334		parse_inline(work, rndr, data[i:end])
1335		if rndr.mk.header != nil {
1336			rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1337		}
1338	}
1339	return skip
1340}
1341
1342func is_headerline(data []byte) int {
1343	i := 0
1344
1345	// test of level 1 header
1346	if data[i] == '=' {
1347		for i = 1; i < len(data) && data[i] == '='; i++ {
1348		}
1349		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1350			i++
1351		}
1352		if i >= len(data) || data[i] == '\n' {
1353			return 1
1354		} else {
1355			return 0
1356		}
1357	}
1358
1359	// test of level 2 header
1360	if data[i] == '-' {
1361		for i = 1; i < len(data) && data[i] == '-'; i++ {
1362		}
1363		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1364			i++
1365		}
1366		if i >= len(data) || data[i] == '\n' {
1367			return 2
1368		} else {
1369			return 0
1370		}
1371	}
1372
1373	return 0
1374}
1375
1376func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1377	var i, j int
1378
1379	// identify the opening tag
1380	if len(data) < 2 || data[0] != '<' {
1381		return 0
1382	}
1383	curtag, tagfound := find_block_tag(data[1:])
1384
1385	// handle special cases
1386	if !tagfound {
1387
1388		// HTML comment, laxist form
1389		if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1390			i = 5
1391
1392			for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1393				i++
1394			}
1395			i++
1396
1397			if i < len(data) {
1398				j = is_empty(data[i:])
1399			}
1400
1401			if j > 0 {
1402				size := i + j
1403				if do_render && rndr.mk.blockhtml != nil {
1404					rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1405				}
1406				return size
1407			}
1408		}
1409
1410		// HR, which is the only self-closing block tag considered
1411		if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1412			i = 3
1413			for i < len(data) && data[i] != '>' {
1414				i++
1415			}
1416
1417			if i+1 < len(data) {
1418				i++
1419				j = is_empty(data[i:])
1420				if j > 0 {
1421					size := i + j
1422					if do_render && rndr.mk.blockhtml != nil {
1423						rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1424					}
1425					return size
1426				}
1427			}
1428		}
1429
1430		// no special case recognized
1431		return 0
1432	}
1433
1434	// look for an unindented matching closing tag
1435	//      followed by a blank line
1436	i = 1
1437	found := false
1438
1439	// if not found, try a second pass looking for indented match
1440	// but not if tag is "ins" or "del" (following original Markdown.pl)
1441	if curtag != "ins" && curtag != "del" {
1442		i = 1
1443		for i < len(data) {
1444			i++
1445			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1446				i++
1447			}
1448
1449			if i+2+len(curtag) >= len(data) {
1450				break
1451			}
1452
1453			j = htmlblock_end(curtag, rndr, data[i-1:])
1454
1455			if j > 0 {
1456				i += j - 1
1457				found = true
1458				break
1459			}
1460		}
1461	}
1462
1463	if !found {
1464		return 0
1465	}
1466
1467	// the end of the block has been found
1468	if do_render && rndr.mk.blockhtml != nil {
1469		rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1470	}
1471
1472	return i
1473}
1474
1475func find_block_tag(data []byte) (string, bool) {
1476	i := 0
1477	for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1478		i++
1479	}
1480	if i >= len(data) {
1481		return "", false
1482	}
1483	key := string(data[:i])
1484	if block_tags[key] {
1485		return key, true
1486	}
1487	return "", false
1488}
1489
1490func htmlblock_end(tag string, rndr *render, data []byte) int {
1491	// assume data[0] == '<' && data[1] == '/' already tested
1492
1493	// check if tag is a match
1494	if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1495		return 0
1496	}
1497
1498	// check white lines
1499	i := len(tag) + 3
1500	w := 0
1501	if i < len(data) {
1502		if w = is_empty(data[i:]); w == 0 {
1503			return 0 // non-blank after tag
1504		}
1505	}
1506	i += w
1507	w = 0
1508
1509	if rndr.ext_flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1510		if i < len(data) {
1511			w = is_empty(data[i:])
1512		}
1513	} else {
1514		if i < len(data) {
1515			if w = is_empty(data[i:]); w == 0 {
1516				return 0 // non-blank line after tag line
1517			}
1518		}
1519	}
1520
1521	return i + w
1522}
1523
1524func is_empty(data []byte) int {
1525	var i int
1526	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1527		if data[i] != ' ' && data[i] != '\t' {
1528			return 0
1529		}
1530	}
1531	return i + 1
1532}
1533
1534func is_hrule(data []byte) bool {
1535	// skip initial spaces
1536	if len(data) < 3 {
1537		return false
1538	}
1539	i := 0
1540	if data[0] == ' ' {
1541		i++
1542		if data[1] == ' ' {
1543			i++
1544			if data[2] == ' ' {
1545				i++
1546			}
1547		}
1548	}
1549
1550	// look at the hrule char
1551	if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1552		return false
1553	}
1554	c := data[i]
1555
1556	// the whole line must be the char or whitespace
1557	n := 0
1558	for i < len(data) && data[i] != '\n' {
1559		switch {
1560		case data[i] == c:
1561			n++
1562		case data[i] != ' ' && data[i] != '\t':
1563			return false
1564		}
1565		i++
1566	}
1567
1568	return n >= 3
1569}
1570
1571func is_codefence(data []byte, syntax **string) int {
1572	i, n := 0, 0
1573
1574	// skip initial spaces
1575	if len(data) < 3 {
1576		return 0
1577	}
1578	if data[0] == ' ' {
1579		i++
1580		if data[1] == ' ' {
1581			i++
1582			if data[2] == ' ' {
1583				i++
1584			}
1585		}
1586	}
1587
1588	// look at the hrule char
1589	if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1590		return 0
1591	}
1592
1593	c := data[i]
1594
1595	// the whole line must be the char or whitespace
1596	for i < len(data) && data[i] == c {
1597		n++
1598		i++
1599	}
1600
1601	if n < 3 {
1602		return 0
1603	}
1604
1605	if syntax != nil {
1606		syn := 0
1607
1608		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1609			i++
1610		}
1611
1612		syntax_start := i
1613
1614		if i < len(data) && data[i] == '{' {
1615			i++
1616			syntax_start++
1617
1618			for i < len(data) && data[i] != '}' && data[i] != '\n' {
1619				syn++
1620				i++
1621			}
1622
1623			if i == len(data) || data[i] != '}' {
1624				return 0
1625			}
1626
1627			// string all whitespace at the beginning and the end
1628			// of the {} block
1629			for syn > 0 && isspace(data[syntax_start]) {
1630				syntax_start++
1631				syn--
1632			}
1633
1634			for syn > 0 && isspace(data[syntax_start+syn-1]) {
1635				syn--
1636			}
1637
1638			i++
1639		} else {
1640			for i < len(data) && !isspace(data[i]) {
1641				syn++
1642				i++
1643			}
1644		}
1645
1646		language := string(data[syntax_start : syntax_start+syn])
1647		*syntax = &language
1648	}
1649
1650	for i < len(data) && data[i] != '\n' {
1651		if !isspace(data[i]) {
1652			return 0
1653		}
1654		i++
1655	}
1656
1657	return i + 1
1658}
1659
1660func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1661	var lang *string
1662	beg := is_codefence(data, &lang)
1663	if beg == 0 {
1664		return 0
1665	}
1666
1667	work := bytes.NewBuffer(nil)
1668
1669	for beg < len(data) {
1670		fence_end := is_codefence(data[beg:], nil)
1671		if fence_end != 0 {
1672			beg += fence_end
1673			break
1674		}
1675
1676		var end int
1677		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1678		}
1679
1680		if beg < end {
1681			// verbatim copy to the working buffer, escaping entities
1682			if is_empty(data[beg:]) > 0 {
1683				work.WriteByte('\n')
1684			} else {
1685				work.Write(data[beg:end])
1686			}
1687		}
1688		beg = end
1689	}
1690
1691	if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1692		work.WriteByte('\n')
1693	}
1694
1695	if rndr.mk.blockcode != nil {
1696		syntax := ""
1697		if lang != nil {
1698			syntax = *lang
1699		}
1700
1701		rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1702	}
1703
1704	return beg
1705}
1706
1707func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1708	header_work := bytes.NewBuffer(nil)
1709	i, columns, col_data := parse_table_header(header_work, rndr, data)
1710	if i > 0 {
1711		body_work := bytes.NewBuffer(nil)
1712
1713		for i < len(data) {
1714			pipes, row_start := 0, i
1715			for ; i < len(data) && data[i] != '\n'; i++ {
1716				if data[i] == '|' {
1717					pipes++
1718				}
1719			}
1720
1721			if pipes == 0 || i == len(data) {
1722				i = row_start
1723				break
1724			}
1725
1726			parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1727			i++
1728		}
1729
1730		if rndr.mk.table != nil {
1731			rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1732		}
1733	}
1734
1735	return i
1736}
1737
1738func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1739	i, pipes := 0, 0
1740	column_data = []int{}
1741	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1742		if data[i] == '|' {
1743			pipes++
1744		}
1745	}
1746
1747	if i == len(data) || pipes == 0 {
1748		return 0, 0, column_data
1749	}
1750
1751	header_end := i
1752
1753	if data[0] == '|' {
1754		pipes--
1755	}
1756
1757	if i > 2 && data[i-1] == '|' {
1758		pipes--
1759	}
1760
1761	columns = pipes + 1
1762	column_data = make([]int, columns)
1763
1764	// parse the header underline
1765	i++
1766	if i < len(data) && data[i] == '|' {
1767		i++
1768	}
1769
1770	under_end := i
1771	for under_end < len(data) && data[under_end] != '\n' {
1772		under_end++
1773	}
1774
1775	col := 0
1776	for ; col < columns && i < under_end; col++ {
1777		dashes := 0
1778
1779		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1780			i++
1781		}
1782
1783		if data[i] == ':' {
1784			i++
1785			column_data[col] |= TABLE_ALIGNMENT_LEFT
1786			dashes++
1787		}
1788
1789		for i < under_end && data[i] == '-' {
1790			i++
1791			dashes++
1792		}
1793
1794		if i < under_end && data[i] == ':' {
1795			i++
1796			column_data[col] |= TABLE_ALIGNMENT_RIGHT
1797			dashes++
1798		}
1799
1800		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1801			i++
1802		}
1803
1804		if i < under_end && data[i] != '|' {
1805			break
1806		}
1807
1808		if dashes < 3 {
1809			break
1810		}
1811
1812		i++
1813	}
1814
1815	if col < columns {
1816		return 0, 0, column_data
1817	}
1818
1819	parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1820	size = under_end + 1
1821	return
1822}
1823
1824func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1825	i, col := 0, 0
1826	row_work := bytes.NewBuffer(nil)
1827
1828	if i < len(data) && data[i] == '|' {
1829		i++
1830	}
1831
1832	for col = 0; col < columns && i < len(data); col++ {
1833		for i < len(data) && isspace(data[i]) {
1834			i++
1835		}
1836
1837		cell_start := i
1838
1839		for i < len(data) && data[i] != '|' {
1840			i++
1841		}
1842
1843		cell_end := i - 1
1844
1845		for cell_end > cell_start && isspace(data[cell_end]) {
1846			cell_end--
1847		}
1848
1849		cell_work := bytes.NewBuffer(nil)
1850		parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1851
1852		if rndr.mk.table_cell != nil {
1853			cdata := 0
1854			if col < len(col_data) {
1855				cdata = col_data[col]
1856			}
1857			rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1858		}
1859
1860		i++
1861	}
1862
1863	for ; col < columns; col++ {
1864		empty_cell := []byte{}
1865		if rndr.mk.table_cell != nil {
1866			cdata := 0
1867			if col < len(col_data) {
1868				cdata = col_data[col]
1869			}
1870			rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1871		}
1872	}
1873
1874	if rndr.mk.table_row != nil {
1875		rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1876	}
1877}
1878
1879// returns blockquote prefix length
1880func prefix_quote(data []byte) int {
1881	i := 0
1882	for i < len(data) && i < 3 && data[i] == ' ' {
1883		i++
1884	}
1885	if i < len(data) && data[i] == '>' {
1886		if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1887			return i + 2
1888		}
1889		return i + 1
1890	}
1891	return 0
1892}
1893
1894// parse a blockquote fragment
1895func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1896	out := bytes.NewBuffer(nil)
1897	work := bytes.NewBuffer(nil)
1898	beg, end := 0, 0
1899	for beg < len(data) {
1900		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1901		}
1902
1903		if pre := prefix_quote(data[beg:]); pre > 0 {
1904			beg += pre // skip prefix
1905		} else {
1906			// empty line followed by non-quote line
1907			if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1908				break
1909			}
1910		}
1911
1912		if beg < end { // copy into the in-place working buffer
1913			work.Write(data[beg:end])
1914		}
1915		beg = end
1916	}
1917
1918	parse_block(out, rndr, work.Bytes())
1919	if rndr.mk.blockquote != nil {
1920		rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1921	}
1922	return end
1923}
1924
1925// returns prefix length for block code
1926func prefix_code(data []byte) int {
1927	if len(data) > 0 && data[0] == '\t' {
1928		return 1
1929	}
1930	if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1931		return 4
1932	}
1933	return 0
1934}
1935
1936func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1937	work := bytes.NewBuffer(nil)
1938
1939	beg, end := 0, 0
1940	for beg < len(data) {
1941		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1942		}
1943
1944		if pre := prefix_code(data[beg:end]); pre > 0 {
1945			beg += pre
1946		} else {
1947			if is_empty(data[beg:end]) == 0 {
1948				// non-empty non-prefixed line breaks the pre
1949				break
1950			}
1951		}
1952
1953		if beg < end {
1954			// verbatim copy to the working buffer, escaping entities
1955			if is_empty(data[beg:end]) > 0 {
1956				work.WriteByte('\n')
1957			} else {
1958				work.Write(data[beg:end])
1959			}
1960		}
1961		beg = end
1962	}
1963
1964	// trim all the \n off the end of work
1965	workbytes := work.Bytes()
1966	n := 0
1967	for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
1968		n++
1969	}
1970	if n > 0 {
1971		work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
1972	}
1973
1974	work.WriteByte('\n')
1975
1976	if rndr.mk.blockcode != nil {
1977		rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
1978	}
1979
1980	return beg
1981}
1982
1983// returns unordered list item prefix
1984func prefix_uli(data []byte) int {
1985	i := 0
1986	for i < len(data) && i < 3 && data[i] == ' ' {
1987		i++
1988	}
1989	if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
1990		return 0
1991	}
1992	return i + 2
1993}
1994
1995// returns ordered list item prefix
1996func prefix_oli(data []byte) int {
1997	i := 0
1998	for i < len(data) && i < 3 && data[i] == ' ' {
1999		i++
2000	}
2001	if i >= len(data) || data[i] < '0' || data[i] > '9' {
2002		return 0
2003	}
2004	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
2005		i++
2006	}
2007	if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
2008		return 0
2009	}
2010	return i + 2
2011}
2012
2013// parse ordered or unordered list block
2014func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
2015	work := bytes.NewBuffer(nil)
2016
2017	i, j := 0, 0
2018	for i < len(data) {
2019		j = parse_listitem(work, rndr, data[i:], &flags)
2020		i += j
2021
2022		if j == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
2023			break
2024		}
2025	}
2026
2027	if rndr.mk.list != nil {
2028		rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
2029	}
2030	return i
2031}
2032
2033// parse a single list item
2034// assumes initial prefix is already removed
2035func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
2036	// keep track of the first indentation prefix
2037	beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
2038
2039	for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
2040		orgpre++
2041	}
2042
2043	beg = prefix_uli(data)
2044	if beg == 0 {
2045		beg = prefix_oli(data)
2046	}
2047	if beg == 0 {
2048		return 0
2049	}
2050
2051	// skip leading whitespace on first line
2052	for beg < len(data) && data[beg] == ' ' {
2053		beg++
2054	}
2055
2056	// skip to the beginning of the following line
2057	end = beg
2058	for end < len(data) && data[end-1] != '\n' {
2059		end++
2060	}
2061
2062	// get working buffers
2063	work := bytes.NewBuffer(nil)
2064	inter := bytes.NewBuffer(nil)
2065
2066	// put the first line into the working buffer
2067	work.Write(data[beg:end])
2068	beg = end
2069
2070	// process the following lines
2071	in_empty, has_inside_empty := false, false
2072	for beg < len(data) {
2073		end++
2074
2075		for end < len(data) && data[end-1] != '\n' {
2076			end++
2077		}
2078
2079		// process an empty line
2080		if is_empty(data[beg:end]) > 0 {
2081			in_empty = true
2082			beg = end
2083			continue
2084		}
2085
2086		// calculate the indentation
2087		i = 0
2088		for i < 4 && beg+i < end && data[beg+i] == ' ' {
2089			i++
2090		}
2091
2092		pre = i
2093		if data[beg] == '\t' {
2094			i = 1
2095			pre = 8
2096		}
2097
2098		// check for a new item
2099		chunk := data[beg+i : end]
2100		if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
2101			if in_empty {
2102				has_inside_empty = true
2103			}
2104
2105			if pre == orgpre { // the following item must have the same indentation
2106				break
2107			}
2108
2109			if sublist == 0 {
2110				sublist = work.Len()
2111			}
2112		} else {
2113			// only join indented stuff after empty lines
2114			if in_empty && i < 4 && data[beg] != '\t' {
2115				*flags |= LIST_ITEM_END_OF_LIST
2116				break
2117			} else {
2118				if in_empty {
2119					work.WriteByte('\n')
2120					has_inside_empty = true
2121				}
2122			}
2123		}
2124
2125		in_empty = false
2126
2127		// add the line into the working buffer without prefix
2128		work.Write(data[beg+i : end])
2129		beg = end
2130	}
2131
2132	// render li contents
2133	if has_inside_empty {
2134		*flags |= LIST_ITEM_CONTAINS_BLOCK
2135	}
2136
2137	workbytes := work.Bytes()
2138	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
2139		// intermediate render of block li
2140		if sublist > 0 && sublist < len(workbytes) {
2141			parse_block(inter, rndr, workbytes[:sublist])
2142			parse_block(inter, rndr, workbytes[sublist:])
2143		} else {
2144			parse_block(inter, rndr, workbytes)
2145		}
2146	} else {
2147		// intermediate render of inline li
2148		if sublist > 0 && sublist < len(workbytes) {
2149			parse_inline(inter, rndr, workbytes[:sublist])
2150			parse_block(inter, rndr, workbytes[sublist:])
2151		} else {
2152			parse_inline(inter, rndr, workbytes)
2153		}
2154	}
2155
2156	// render li itself
2157	if rndr.mk.listitem != nil {
2158		rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
2159	}
2160
2161	return beg
2162}
2163
2164func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
2165	i, end, level := 0, 0, 0
2166
2167	for i < len(data) {
2168		for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
2169		}
2170
2171		if is_empty(data[i:]) > 0 {
2172			break
2173		}
2174		if level = is_headerline(data[i:]); level > 0 {
2175			break
2176		}
2177
2178		if rndr.ext_flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
2179			if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
2180				end = i
2181				break
2182			}
2183		}
2184
2185		if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
2186			end = i
2187			break
2188		}
2189
2190		i = end
2191	}
2192
2193	work := data
2194	size := i
2195	for size > 0 && work[size-1] == '\n' {
2196		size--
2197	}
2198
2199	if level == 0 {
2200		tmp := bytes.NewBuffer(nil)
2201		parse_inline(tmp, rndr, work[:size])
2202		if rndr.mk.paragraph != nil {
2203			rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2204		}
2205	} else {
2206		if size > 0 {
2207			beg := 0
2208			i = size
2209			size--
2210
2211			for size > 0 && work[size] != '\n' {
2212				size--
2213			}
2214
2215			beg = size + 1
2216			for size > 0 && work[size-1] == '\n' {
2217				size--
2218			}
2219
2220			if size > 0 {
2221				tmp := bytes.NewBuffer(nil)
2222				parse_inline(tmp, rndr, work[:size])
2223				if rndr.mk.paragraph != nil {
2224					rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2225				}
2226
2227				work = work[beg:]
2228				size = i - beg
2229			} else {
2230				size = i
2231			}
2232		}
2233
2234		header_work := bytes.NewBuffer(nil)
2235		parse_inline(header_work, rndr, work[:size])
2236
2237		if rndr.mk.header != nil {
2238			rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
2239		}
2240	}
2241
2242	return end
2243}
2244
2245
2246//
2247// Link references
2248//
2249// This section implements support for references that (usually) appear
2250// as footnotes in a document, and can be referenced anywhere in the document.
2251// The basic format is:
2252//
2253//    [1]: http://www.google.com/ "Google"
2254//    [2]: http://www.github.com/ "Github"
2255//
2256// Anywhere in the document, the reference can be linked by referring to its
2257// label, i.e., 1 and 2 in this example, as in:
2258//
2259//    This library is hosted on [Github][2], a git hosting site.
2260
2261// References are parsed and stored in this struct.
2262type link_ref struct {
2263	id    []byte
2264	link  []byte
2265	title []byte
2266}
2267
2268// The list of all reference links is stored in this type.
2269type link_ref_array []*link_ref
2270
2271// Find the length of a list of references.
2272// This implements an interface needed for sorting.
2273func (elt link_ref_array) Len() int {
2274	return len(elt)
2275}
2276
2277// Test if one reference is less than another (case-insensitive).
2278// This implements an interface needed for sorting.
2279func (elt link_ref_array) Less(i, j int) bool {
2280	return byteslice_less(elt[i].id, elt[j].id)
2281}
2282
2283// Compare two []byte values (case-insensitive), returning
2284// true if a is less than b.
2285func byteslice_less(a []byte, b []byte) bool {
2286	// adapted from bytes.Compare in stdlib
2287	m := len(a)
2288	if m > len(b) {
2289		m = len(b)
2290	}
2291	for i, ac := range a[0:m] {
2292		// do a case-insensitive comparison
2293		ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
2294		switch {
2295		case ai > bi:
2296			return false
2297		case ai < bi:
2298			return true
2299		}
2300	}
2301	switch {
2302	case len(a) < len(b):
2303		return true
2304	case len(a) > len(b):
2305		return false
2306	}
2307	return false
2308}
2309
2310// Swap two references.
2311// This implements an interface needed for sorting.
2312func (elt link_ref_array) Swap(i, j int) {
2313	elt[i], elt[j] = elt[j], elt[i]
2314}
2315
2316// Check whether or not data starts with a reference link.
2317// If so, it is parsed and stored in the list of references
2318// (in the render struct).
2319// Returns the number of bytes to skip to move past it, or zero
2320// if there is the first line is not a reference.
2321func is_ref(rndr *render, data []byte) int {
2322	// up to 3 optional leading spaces
2323	if len(data) < 4 {
2324		return 0
2325	}
2326	i := 0
2327	for i < 3 && data[i] == ' ' {
2328		i++
2329	}
2330	if data[i] == ' ' {
2331		return 0
2332	}
2333
2334	// id part: anything but a newline between brackets
2335	if data[i] != '[' {
2336		return 0
2337	}
2338	i++
2339	id_offset := i
2340	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
2341		i++
2342	}
2343	if i >= len(data) || data[i] != ']' {
2344		return 0
2345	}
2346	id_end := i
2347
2348	// spacer: colon (space | tab)* newline? (space | tab)*
2349	i++
2350	if i >= len(data) || data[i] != ':' {
2351		return 0
2352	}
2353	i++
2354	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
2355		i++
2356	}
2357	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
2358		i++
2359		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
2360			i++
2361		}
2362	}
2363	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
2364		i++
2365	}
2366	if i >= len(data) {
2367		return 0
2368	}
2369
2370	// link: whitespace-free sequence, optionally between angle brackets
2371	if data[i] == '<' {
2372		i++
2373	}
2374	link_offset := i
2375	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
2376		i++
2377	}
2378	link_end := i
2379	if data[link_offset] == '<' && data[link_end-1] == '>' {
2380		link_offset++
2381		link_end--
2382	}
2383
2384	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
2385	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
2386		i++
2387	}
2388	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
2389		return 0
2390	}
2391
2392	// compute end-of-line
2393	line_end := 0
2394	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
2395		line_end = i
2396	}
2397	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
2398		line_end++
2399	}
2400
2401	// optional (space|tab)* spacer after a newline
2402	if line_end > 0 {
2403		i = line_end + 1
2404		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
2405			i++
2406		}
2407	}
2408
2409	// optional title: any non-newline sequence enclosed in '"() alone on its line
2410	title_offset, title_end := 0, 0
2411	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
2412		i++
2413		title_offset = i
2414
2415		// look for EOL
2416		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
2417			i++
2418		}
2419		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
2420			title_end = i + 1
2421		} else {
2422			title_end = i
2423		}
2424
2425		// step back
2426		i--
2427		for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
2428			i--
2429		}
2430		if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
2431			line_end = title_end
2432			title_end = i
2433		}
2434	}
2435	if line_end == 0 { // garbage after the link
2436		return 0
2437	}
2438
2439	// a valid ref has been found
2440	if rndr == nil {
2441		return line_end
2442	}
2443	item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
2444	rndr.refs = append(rndr.refs, item)
2445
2446	return line_end
2447}
2448
2449
2450//
2451//
2452// Miscellaneous helper functions
2453//
2454//
2455
2456
2457// Test if a character is a punctuation symbol.
2458// Taken from a private function in regexp in the stdlib.
2459func ispunct(c byte) bool {
2460	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
2461		if c == r {
2462			return true
2463		}
2464	}
2465	return false
2466}
2467
2468// this is sort.Search, reproduced here because an older
2469// version of the library had a bug
2470func sortDotSearch(n int, f func(int) bool) int {
2471	// Define f(-1) == false and f(n) == true.
2472	// Invariant: f(i-1) == false, f(j) == true.
2473	i, j := 0, n
2474	for i < j {
2475		h := i + (j-i)/2 // avoid overflow when computing h
2476		// i ≤ h < j
2477		if !f(h) {
2478			i = h + 1 // preserves f(i-1) == false
2479		} else {
2480			j = h // preserves f(j) == true
2481		}
2482	}
2483	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
2484	return i
2485}
2486
2487// Test if a character is a whitespace character.
2488func isspace(c byte) bool {
2489	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
2490}
2491
2492// Test if a character is a letter or a digit.
2493func isalnum(c byte) bool {
2494	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
2495}
2496
2497// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
2498func expand_tabs(ob *bytes.Buffer, line []byte) {
2499	i, tab := 0, 0
2500
2501	for i < len(line) {
2502		org := i
2503		for i < len(line) && line[i] != '\t' {
2504			i++
2505			tab++
2506		}
2507
2508		if i > org {
2509			ob.Write(line[org:i])
2510		}
2511
2512		if i >= len(line) {
2513			break
2514		}
2515
2516		for {
2517			ob.WriteByte(' ')
2518			tab++
2519			if tab%TAB_SIZE == 0 {
2520				break
2521			}
2522		}
2523
2524		i++
2525	}
2526}