all repos — grayfriday @ c3545f4e70dab6389ab4b1cdbae13c5189db4a25

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Black Friday Markdown Processor
   3// Ported to Go from http://github.com/tanoku/upskirt
   4// by Russ Ross <russ@russross.com>
   5//
   6
   7package main
   8
   9import (
  10	"bytes"
  11	"fmt"
  12	"io/ioutil"
  13	"os"
  14	"sort"
  15	"unicode"
  16)
  17
  18const (
  19	MKDA_NOT_AUTOLINK = iota
  20	MKDA_NORMAL
  21	MKDA_EMAIL
  22)
  23
  24const (
  25	MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
  26	MKDEXT_TABLES
  27	MKDEXT_FENCED_CODE
  28	MKDEXT_AUTOLINK
  29	MKDEXT_STRIKETHROUGH
  30	MKDEXT_LAX_HTML_BLOCKS
  31	MKDEXT_SPACE_HEADERS
  32)
  33
  34const (
  35	_ = iota
  36	MKD_LIST_ORDERED
  37	MKD_LI_BLOCK // <li> containing block data
  38	MKD_LI_END   = 8
  39)
  40
  41const (
  42	MKD_TABLE_ALIGN_L = 1 << iota
  43	MKD_TABLE_ALIGN_R
  44	MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
  45)
  46
  47var block_tags = map[string]int{
  48	"p":          1, // 0
  49	"dl":         2,
  50	"h1":         2,
  51	"h2":         2,
  52	"h3":         2,
  53	"h4":         2,
  54	"h5":         2,
  55	"h6":         2,
  56	"ol":         2,
  57	"ul":         2,
  58	"del":        3, // 10
  59	"div":        3,
  60	"ins":        3, // 12
  61	"pre":        3,
  62	"form":       4,
  63	"math":       4,
  64	"table":      5,
  65	"iframe":     6,
  66	"script":     6,
  67	"fieldset":   8,
  68	"noscript":   8,
  69	"blockquote": 10,
  70}
  71
  72// functions for rendering parsed data
  73type mkd_renderer struct {
  74	// block-level callbacks---nil skips the block
  75	blockcode  func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
  76	blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
  77	blockhtml  func(ob *bytes.Buffer, text []byte, opaque interface{})
  78	header     func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
  79	hrule      func(ob *bytes.Buffer, opaque interface{})
  80	list       func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  81	listitem   func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  82	paragraph  func(ob *bytes.Buffer, text []byte, opaque interface{})
  83	table      func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
  84	table_row  func(ob *bytes.Buffer, text []byte, opaque interface{})
  85	table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  86
  87	// span-level callbacks---nil or return 0 prints the span verbatim
  88	autolink        func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
  89	codespan        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  90	double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  91	emphasis        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  92	image           func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
  93	linebreak       func(ob *bytes.Buffer, opaque interface{}) int
  94	link            func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
  95	raw_html_tag    func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
  96	triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  97	strikethrough   func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  98
  99	// low-level callbacks---nil copies input directly into the output
 100	entity      func(ob *bytes.Buffer, entity []byte, opaque interface{})
 101	normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
 102
 103	// header and footer
 104	doc_header func(ob *bytes.Buffer, opaque interface{})
 105	doc_footer func(ob *bytes.Buffer, opaque interface{})
 106
 107	// user data---passed back to every callback
 108	opaque interface{}
 109}
 110
 111type link_ref struct {
 112	id    []byte
 113	link  []byte
 114	title []byte
 115}
 116
 117type link_ref_array []*link_ref
 118
 119// implement the sorting interface
 120func (elt link_ref_array) Len() int {
 121	return len(elt)
 122}
 123
 124func (elt link_ref_array) Less(i, j int) bool {
 125	return byteslice_less(elt[i].id, elt[j].id)
 126}
 127
 128func byteslice_less(a []byte, b []byte) bool {
 129	// adapted from bytes.Compare in stdlib
 130	m := len(a)
 131	if m > len(b) {
 132		m = len(b)
 133	}
 134	for i, ac := range a[0:m] {
 135		// do a case-insensitive comparison
 136		ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
 137		switch {
 138		case ai > bi:
 139			return false
 140		case ai < bi:
 141			return true
 142		}
 143	}
 144	switch {
 145	case len(a) < len(b):
 146		return true
 147	case len(a) > len(b):
 148		return false
 149	}
 150	return false
 151}
 152
 153func (elt link_ref_array) Swap(i, j int) {
 154	elt[i], elt[j] = elt[j], elt[i]
 155}
 156
 157// returns whether or not a line is a reference
 158func is_ref(data []byte, beg int, last *int, rndr *render) bool {
 159	// up to 3 optional leading spaces
 160	if beg+3 > len(data) {
 161		return false
 162	}
 163	i := 0
 164	if data[beg] == ' ' {
 165		i++
 166		if data[beg+1] == ' ' {
 167			i++
 168			if data[beg+2] == ' ' {
 169				i++
 170				if data[beg+3] == ' ' {
 171					return false
 172				}
 173			}
 174		}
 175	}
 176	i += beg
 177
 178	// id part: anything but a newline between brackets
 179	if data[i] != '[' {
 180		return false
 181	}
 182	i++
 183	id_offset := i
 184	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 185		i++
 186	}
 187	if i >= len(data) || data[i] != ']' {
 188		return false
 189	}
 190	id_end := i
 191
 192	// spacer: colon (space | tab)* newline? (space | tab)*
 193	i++
 194	if i >= len(data) || data[i] != ':' {
 195		return false
 196	}
 197	i++
 198	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 199		i++
 200	}
 201	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 202		i++
 203		if i < len(data) && data[i] == '\r' && data[i-1] == '\n' {
 204			i++
 205		}
 206	}
 207	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 208		i++
 209	}
 210	if i >= len(data) {
 211		return false
 212	}
 213
 214	// link: whitespace-free sequence, optionally between angle brackets
 215	if data[i] == '<' {
 216		i++
 217	}
 218	link_offset := i
 219	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 220		i++
 221	}
 222	var link_end int
 223	if data[i-1] == '>' {
 224		link_end = i - 1
 225	} else {
 226		link_end = i
 227	}
 228
 229	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 230	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 231		i++
 232	}
 233	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 234		return false
 235	}
 236
 237	// compute end-of-line
 238	line_end := 0
 239	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 240		line_end = i
 241	}
 242	if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 243		line_end = i + 1
 244	}
 245
 246	// optional (space|tab)* spacer after a newline
 247	if line_end > 0 {
 248		i = line_end + 1
 249		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 250			i++
 251		}
 252	}
 253
 254	// optional title: any non-newline sequence enclosed in '"() alone on its line
 255	title_offset, title_end := 0, 0
 256	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 257		i++
 258		title_offset = i
 259
 260		// looking for EOL
 261		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 262			i++
 263		}
 264		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 265			title_end = i + 1
 266		} else {
 267			title_end = i
 268		}
 269
 270		// stepping back
 271		i--
 272		for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
 273			i--
 274		}
 275		if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 276			line_end = title_end
 277			title_end = i
 278		}
 279	}
 280	if line_end == 0 { // garbage after the link
 281		return false
 282	}
 283
 284	// a valid ref has been found; fill in return structures
 285	if last != nil {
 286		*last = line_end
 287	}
 288	if rndr == nil {
 289		return true
 290	}
 291	item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
 292	rndr.refs = append(rndr.refs, item)
 293
 294	return true
 295}
 296
 297type render struct {
 298	mk          *mkd_renderer
 299	refs        link_ref_array
 300	active_char [256]int
 301	ext_flags   uint32
 302	nesting     int
 303	max_nesting int
 304}
 305
 306const (
 307	MD_CHAR_NONE = iota
 308	MD_CHAR_EMPHASIS
 309	MD_CHAR_CODESPAN
 310	MD_CHAR_LINEBREAK
 311	MD_CHAR_LINK
 312	MD_CHAR_LANGLE
 313	MD_CHAR_ESCAPE
 314	MD_CHAR_ENTITITY
 315	MD_CHAR_AUTOLINK
 316)
 317
 318// closures to render active chars, each:
 319//   returns the number of chars taken care of
 320//   data is the complete block being rendered
 321//   offset is the number of valid chars before the data
 322//
 323// Note: this is filled in in Markdown to prevent an initilization loop
 324var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
 325
 326func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
 327	if rndr.nesting >= rndr.max_nesting {
 328		return
 329	}
 330	rndr.nesting++
 331
 332	i, end := 0, 0
 333	for i < len(data) {
 334		// copy inactive chars into the output
 335		for end < len(data) && rndr.active_char[data[end]] == 0 {
 336			end++
 337		}
 338
 339		if rndr.mk.normal_text != nil {
 340			rndr.mk.normal_text(ob, data[i:end], rndr.mk.opaque)
 341		} else {
 342			ob.Write(data[i:end])
 343		}
 344
 345		if end >= len(data) {
 346			break
 347		}
 348		i = end
 349
 350		// call the trigger
 351		action := rndr.active_char[data[end]]
 352		end = markdown_char_ptrs[action](ob, rndr, data, i)
 353
 354		if end == 0 { // no action from the callback
 355			end = i + 1
 356		} else {
 357			i += end
 358			end = i
 359		}
 360	}
 361
 362	rndr.nesting--
 363}
 364
 365// single and double emphasis parsing
 366func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 367	data = data[offset:]
 368	c := data[0]
 369	ret := 0
 370
 371	if len(data) > 2 && data[1] != c {
 372		// whitespace cannot follow an opening emphasis;
 373		// strikethrough only takes two characters '~~'
 374		if c == '~' || isspace(data[1]) {
 375			return 0
 376		}
 377		if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
 378			return 0
 379		}
 380
 381		return ret + 1
 382	}
 383
 384	if len(data) > 3 && data[1] == c && data[2] != c {
 385		if isspace(data[2]) {
 386			return 0
 387		}
 388		if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
 389			return 0
 390		}
 391
 392		return ret + 2
 393	}
 394
 395	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 396		if c == '~' || isspace(data[3]) {
 397			return 0
 398		}
 399		if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
 400			return 0
 401		}
 402
 403		return ret + 3
 404	}
 405
 406	return 0
 407}
 408
 409func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 410	data = data[offset:]
 411
 412	nb := 0
 413
 414	// counting the number of backticks in the delimiter
 415	for nb < len(data) && data[nb] == '`' {
 416		nb++
 417	}
 418
 419	// finding the next delimiter
 420	i, end := 0, 0
 421	for end = nb; end < len(data) && i < nb; end++ {
 422		if data[end] == '`' {
 423			i++
 424		} else {
 425			i = 0
 426		}
 427	}
 428
 429	if i < nb && end >= len(data) {
 430		return 0 // no matching delimiter
 431	}
 432
 433	// trim outside whitespace
 434	f_begin := nb
 435	for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
 436		f_begin++
 437	}
 438
 439	f_end := end - nb
 440	for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
 441		f_end--
 442	}
 443
 444	// real code span
 445	if rndr.mk.codespan == nil {
 446		return 0
 447	}
 448	if f_begin < f_end {
 449		if rndr.mk.codespan(ob, data[f_begin:f_end], rndr.mk.opaque) == 0 {
 450			end = 0
 451		}
 452	} else {
 453		if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
 454			end = 0
 455		}
 456	}
 457
 458	return end
 459
 460}
 461
 462// '\n' preceded by two spaces
 463func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 464	if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
 465		return 0
 466	}
 467
 468	// remove trailing spaces from ob and render
 469	ob_bytes := ob.Bytes()
 470	end := len(ob_bytes)
 471	for end > 0 && ob_bytes[end-1] == ' ' {
 472		end--
 473	}
 474	ob.Truncate(end)
 475
 476	if rndr.mk.linebreak == nil {
 477		return 0
 478	}
 479	if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
 480		return 1
 481	} else {
 482		return 0
 483	}
 484
 485	return 0
 486}
 487
 488// '[': parsing a link or an image
 489func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 490	is_img := offset > 0 && data[offset-1] == '!'
 491
 492	data = data[offset:]
 493
 494	i := 1
 495	var title, link []byte
 496	text_has_nl := false
 497
 498	// checking whether the correct renderer exists
 499	if (is_img && rndr.mk.image == nil) || (!is_img && rndr.mk.link == nil) {
 500		return 0
 501	}
 502
 503	// looking for the matching closing bracket
 504	for level := 1; level > 0 && i < len(data); i++ {
 505		switch {
 506		case data[i] == '\n':
 507			text_has_nl = true
 508
 509		case data[i-1] == '\\':
 510			continue
 511
 512		case data[i] == '[':
 513			level++
 514
 515		case data[i] == ']':
 516			level--
 517			if level <= 0 {
 518				i-- // compensate for extra i++ in for loop
 519			}
 520		}
 521	}
 522
 523	if i >= len(data) {
 524		return 0
 525	}
 526
 527	txt_e := i
 528	i++
 529
 530	// skip any amount of whitespace or newline
 531	// (this is much more lax than original markdown syntax)
 532	for i < len(data) && isspace(data[i]) {
 533		i++
 534	}
 535
 536	// inline style link
 537	switch {
 538	case i < len(data) && data[i] == '(':
 539		// skipping initial whitespace
 540		i++
 541
 542		for i < len(data) && isspace(data[i]) {
 543			i++
 544		}
 545
 546		link_b := i
 547
 548		// looking for link end: ' " )
 549		for i < len(data) {
 550			if data[i] == '\\' {
 551				i += 2
 552			} else {
 553				if data[i] == ')' || data[i] == '\'' || data[i] == '"' {
 554					break
 555				}
 556				i++
 557			}
 558		}
 559
 560		if i >= len(data) {
 561			return 0
 562		}
 563		link_e := i
 564
 565		// looking for title end if present
 566		title_b, title_e := 0, 0
 567		if data[i] == '\'' || data[i] == '"' {
 568			i++
 569			title_b = i
 570
 571			for i < len(data) {
 572				if data[i] == '\\' {
 573					i += 2
 574				} else {
 575					if data[i] == ')' {
 576						break
 577					}
 578					i++
 579				}
 580			}
 581
 582			if i >= len(data) {
 583				return 0
 584			}
 585
 586			// skipping whitespaces after title
 587			title_e = i - 1
 588			for title_e > title_b && isspace(data[title_e]) {
 589				title_e--
 590			}
 591
 592			// checking for closing quote presence
 593			if data[title_e] != '\'' && data[title_e] != '"' {
 594				title_b, title_e = 0, 0
 595				link_e = i
 596			}
 597		}
 598
 599		// remove whitespace at the end of the link
 600		for link_e > link_b && isspace(data[link_e-1]) {
 601			link_e--
 602		}
 603
 604		// remove optional angle brackets around the link
 605		if data[link_b] == '<' {
 606			link_b++
 607		}
 608		if data[link_e-1] == '>' {
 609			link_e--
 610		}
 611
 612		// building escaped link and title
 613		if link_e > link_b {
 614			link = data[link_b:link_e]
 615		}
 616
 617		if title_e > title_b {
 618			title = data[title_b:title_e]
 619		}
 620
 621		i++
 622
 623	// reference style link
 624	case i < len(data) && data[i] == '[':
 625		var id []byte
 626
 627		// looking for the id
 628		i++
 629		link_b := i
 630		for i < len(data) && data[i] != ']' {
 631			i++
 632		}
 633		if i >= len(data) {
 634			return 0
 635		}
 636		link_e := i
 637
 638		// find the link_ref
 639		if link_b == link_e {
 640			if text_has_nl {
 641				b := bytes.NewBuffer(nil)
 642
 643				for j := 1; j < txt_e; j++ {
 644					switch {
 645					case data[j] != '\n':
 646						b.WriteByte(data[j])
 647					case data[j-1] != ' ':
 648						b.WriteByte(' ')
 649					}
 650				}
 651
 652				id = b.Bytes()
 653			} else {
 654				id = data[1:txt_e]
 655			}
 656		} else {
 657			id = data[link_b:link_e]
 658		}
 659
 660		// find the link_ref with matching id
 661		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 662			return !byteslice_less(rndr.refs[i].id, id)
 663		})
 664		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 665			return 0
 666		}
 667		lr := rndr.refs[index]
 668
 669		// keep link and title from link_ref
 670		link = lr.link
 671		title = lr.title
 672		i++
 673
 674	// shortcut reference style link
 675	default:
 676		var id []byte
 677
 678		// crafting the id
 679		if text_has_nl {
 680			b := bytes.NewBuffer(nil)
 681
 682			for j := 1; j < txt_e; j++ {
 683				switch {
 684				case data[j] != '\n':
 685					b.WriteByte(data[j])
 686				case data[j-1] != ' ':
 687					b.WriteByte(' ')
 688				}
 689			}
 690
 691			id = b.Bytes()
 692		} else {
 693			id = data[1:txt_e]
 694		}
 695
 696		// find the link_ref with matching id
 697		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 698			return !byteslice_less(rndr.refs[i].id, id)
 699		})
 700		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 701			return 0
 702		}
 703		lr := rndr.refs[index]
 704
 705		// keep link and title from link_ref
 706		link = lr.link
 707		title = lr.title
 708
 709		// rewinding the whitespace
 710		i = txt_e + 1
 711	}
 712
 713	// building content: img alt is escaped, link content is parsed
 714	content := bytes.NewBuffer(nil)
 715	if txt_e > 1 {
 716		if is_img {
 717			content.Write(data[1:txt_e])
 718		} else {
 719			parse_inline(content, rndr, data[1:txt_e])
 720		}
 721	}
 722
 723	var u_link []byte
 724	if len(link) > 0 {
 725		u_link_buf := bytes.NewBuffer(nil)
 726		unscape_text(u_link_buf, link)
 727		u_link = u_link_buf.Bytes()
 728	}
 729
 730	// calling the relevant rendering function
 731	ret := 0
 732	if is_img {
 733		ob_size := ob.Len()
 734		ob_bytes := ob.Bytes()
 735		if ob_size > 0 && ob_bytes[ob_size-1] == '!' {
 736			ob.Truncate(ob_size - 1)
 737		}
 738
 739		ret = rndr.mk.image(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 740	} else {
 741		ret = rndr.mk.link(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 742	}
 743
 744	if ret > 0 {
 745		return i
 746	}
 747	return 0
 748}
 749
 750// '<' when tags or autolinks are allowed
 751func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 752	data = data[offset:]
 753	altype := MKDA_NOT_AUTOLINK
 754	end := tag_length(data, &altype)
 755	ret := 0
 756
 757	if end > 2 {
 758		switch {
 759		case rndr.mk.autolink != nil && altype != MKDA_NOT_AUTOLINK:
 760			u_link := bytes.NewBuffer(nil)
 761			unscape_text(u_link, data[1:end+1-2])
 762			ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
 763		case rndr.mk.raw_html_tag != nil:
 764			ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
 765		}
 766	}
 767
 768	if ret == 0 {
 769		return 0
 770	}
 771	return end
 772}
 773
 774// '\\' backslash escape
 775var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
 776
 777func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 778	data = data[offset:]
 779
 780	if len(data) > 1 {
 781		if bytes.IndexByte(escape_chars, data[1]) < 0 {
 782			return 0
 783		}
 784
 785		if rndr.mk.normal_text != nil {
 786			rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
 787		} else {
 788			ob.WriteByte(data[1])
 789		}
 790	}
 791
 792	return 2
 793}
 794
 795// '&' escaped when it doesn't belong to an entity
 796// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 797func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 798	data = data[offset:]
 799
 800	end := 1
 801
 802	if end < len(data) && data[end] == '#' {
 803		end++
 804	}
 805
 806	for end < len(data) && isalnum(data[end]) {
 807		end++
 808	}
 809
 810	if end < len(data) && data[end] == ';' {
 811		end++ // real entity
 812	} else {
 813		return 0 // lone '&'
 814	}
 815
 816	if rndr.mk.entity != nil {
 817		rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
 818	} else {
 819		ob.Write(data[:end])
 820	}
 821
 822	return end
 823}
 824
 825func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 826	orig_data := data
 827	data = data[offset:]
 828
 829	if offset > 0 {
 830		if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) {
 831			return 0
 832		}
 833	}
 834
 835	if !is_safe_link(data) {
 836		return 0
 837	}
 838
 839	link_end := 0
 840	for link_end < len(data) && !isspace(data[link_end]) {
 841		link_end++
 842	}
 843
 844	// Skip punctuation at the end of the link
 845	if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' {
 846		link_end--
 847	}
 848
 849	// See if the link finishes with a punctuation sign that can be closed.
 850	var copen byte
 851	switch data[link_end-1] {
 852	case '"':
 853		copen = '"'
 854	case '\'':
 855		copen = '\''
 856	case ')':
 857		copen = '('
 858	case ']':
 859		copen = '['
 860	case '}':
 861		copen = '{'
 862	default:
 863		copen = 0
 864	}
 865
 866	if copen != 0 {
 867		buf_end := offset + link_end - 2
 868
 869		open_delim := 1
 870
 871		/* Try to close the final punctuation sign in this same line;
 872		 * if we managed to close it outside of the URL, that means that it's
 873		 * not part of the URL. If it closes inside the URL, that means it
 874		 * is part of the URL.
 875		 *
 876		 * Examples:
 877		 *
 878		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 879		 *              => http://www.pokemon.com/Pikachu_(Electric)
 880		 *
 881		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 882		 *              => http://www.pokemon.com/Pikachu_(Electric)
 883		 *
 884		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 885		 *              => http://www.pokemon.com/Pikachu_(Electric))
 886		 *
 887		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 888		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 889		 */
 890
 891		for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 {
 892			if orig_data[buf_end] == data[link_end-1] {
 893				open_delim++
 894			}
 895
 896			if orig_data[buf_end] == copen {
 897				open_delim--
 898			}
 899
 900			buf_end--
 901		}
 902
 903		if open_delim == 0 {
 904			link_end--
 905		}
 906	}
 907
 908	if rndr.mk.autolink != nil {
 909		u_link := bytes.NewBuffer(nil)
 910		unscape_text(u_link, data[:link_end])
 911
 912		rndr.mk.autolink(ob, u_link.Bytes(), MKDA_NORMAL, rndr.mk.opaque)
 913	}
 914
 915	return link_end
 916}
 917
 918var valid_uris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 919
 920func is_safe_link(link []byte) bool {
 921	for _, prefix := range valid_uris {
 922		if len(link) > len(prefix) && !byteslice_less(link[:len(prefix)], prefix) && !byteslice_less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) {
 923			return true
 924		}
 925	}
 926
 927	return false
 928}
 929
 930
 931// taken from regexp in the stdlib
 932func ispunct(c byte) bool {
 933	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 934		if c == r {
 935			return true
 936		}
 937	}
 938	return false
 939}
 940
 941// this is sort.Search, reproduced here because an older
 942// version of the library had a bug
 943func sortDotSearch(n int, f func(int) bool) int {
 944	// Define f(-1) == false and f(n) == true.
 945	// Invariant: f(i-1) == false, f(j) == true.
 946	i, j := 0, n
 947	for i < j {
 948		h := i + (j-i)/2 // avoid overflow when computing h
 949		// i ≤ h < j
 950		if !f(h) {
 951			i = h + 1 // preserves f(i-1) == false
 952		} else {
 953			j = h // preserves f(j) == true
 954		}
 955	}
 956	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
 957	return i
 958}
 959
 960func isspace(c byte) bool {
 961	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 962}
 963
 964func isalnum(c byte) bool {
 965	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 966}
 967
 968// return the length of the given tag, or 0 is it's not valid
 969func tag_length(data []byte, autolink *int) int {
 970	var i, j int
 971
 972	// a valid tag can't be shorter than 3 chars
 973	if len(data) < 3 {
 974		return 0
 975	}
 976
 977	// begins with a '<' optionally followed by '/', followed by letter or number
 978	if data[0] != '<' {
 979		return 0
 980	}
 981	if data[1] == '/' {
 982		i = 2
 983	} else {
 984		i = 1
 985	}
 986
 987	if !isalnum(data[i]) {
 988		return 0
 989	}
 990
 991	// scheme test
 992	*autolink = MKDA_NOT_AUTOLINK
 993
 994	// try to find the beggining of an URI
 995	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 996		i++
 997	}
 998
 999	if i > 1 && data[i] == '@' {
1000		if j = is_mail_autolink(data[i:]); j != 0 {
1001			*autolink = MKDA_EMAIL
1002			return i + j
1003		}
1004	}
1005
1006	if i > 2 && data[i] == ':' {
1007		*autolink = MKDA_NORMAL
1008		i++
1009	}
1010
1011	// complete autolink test: no whitespace or ' or "
1012	switch {
1013	case i >= len(data):
1014		*autolink = MKDA_NOT_AUTOLINK
1015	case *autolink != 0:
1016		j = i
1017
1018		for i < len(data) {
1019			if data[i] == '\\' {
1020				i += 2
1021			} else {
1022				if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
1023					break
1024				} else {
1025					i++
1026				}
1027			}
1028
1029		}
1030
1031		if i >= len(data) {
1032			return 0
1033		}
1034		if i > j && data[i] == '>' {
1035			return i + 1
1036		}
1037
1038		// one of the forbidden chars has been found
1039		*autolink = MKDA_NOT_AUTOLINK
1040	}
1041
1042	// looking for sometinhg looking like a tag end
1043	for i < len(data) && data[i] != '>' {
1044		i++
1045	}
1046	if i >= len(data) {
1047		return 0
1048	}
1049	return i + 1
1050}
1051
1052// look for the address part of a mail autolink and '>'
1053// this is less strict than the original markdown e-mail address matching
1054func is_mail_autolink(data []byte) int {
1055	nb := 0
1056
1057	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1058	for i := 0; i < len(data); i++ {
1059		if isalnum(data[i]) {
1060			continue
1061		}
1062
1063		switch data[i] {
1064		case '@':
1065			nb++
1066
1067		case '-', '.', '_':
1068			break
1069
1070		case '>':
1071			if nb == 1 {
1072				return i + 1
1073			} else {
1074				return 0
1075			}
1076		default:
1077			return 0
1078		}
1079	}
1080
1081	return 0
1082}
1083
1084// look for the next emph char, skipping other constructs
1085func find_emph_char(data []byte, c byte) int {
1086	i := 1
1087
1088	for i < len(data) {
1089		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1090			i++
1091		}
1092		if i >= len(data) {
1093			return 0
1094		}
1095		if data[i] == c {
1096			return i
1097		}
1098
1099		// do not count escaped chars
1100		if i != 0 && data[i-1] == '\\' {
1101			i++
1102			continue
1103		}
1104
1105		if data[i] == '`' {
1106			// skip a code span
1107			tmp_i := 0
1108			i++
1109			for i < len(data) && data[i] != '`' {
1110				if tmp_i == 0 && data[i] == c {
1111					tmp_i = i
1112				}
1113				i++
1114			}
1115			if i >= len(data) {
1116				return tmp_i
1117			}
1118			i++
1119		} else {
1120			if data[i] == '[' {
1121				// skip a link
1122				tmp_i := 0
1123				i++
1124				for i < len(data) && data[i] != ']' {
1125					if tmp_i == 0 && data[i] == c {
1126						tmp_i = i
1127					}
1128					i++
1129				}
1130				i++
1131				for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
1132					i++
1133				}
1134				if i >= len(data) {
1135					return tmp_i
1136				}
1137				if data[i] != '[' && data[i] != '(' { // not a link
1138					if tmp_i > 0 {
1139						return tmp_i
1140					} else {
1141						continue
1142					}
1143				}
1144				cc := data[i]
1145				i++
1146				for i < len(data) && data[i] != cc {
1147					if tmp_i == 0 && data[i] == c {
1148						tmp_i = i
1149					}
1150					i++
1151				}
1152				if i >= len(data) {
1153					return tmp_i
1154				}
1155				i++
1156			}
1157		}
1158	}
1159	return 0
1160}
1161
1162func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1163	i := 0
1164
1165	if rndr.mk.emphasis == nil {
1166		return 0
1167	}
1168
1169	// skip one symbol if coming from emph3
1170	if len(data) > 1 && data[0] == c && data[1] == c {
1171		i = 1
1172	}
1173
1174	for i < len(data) {
1175		length := find_emph_char(data[i:], c)
1176		if length == 0 {
1177			return 0
1178		}
1179		i += length
1180		if i >= len(data) {
1181			return 0
1182		}
1183
1184		if i+1 < len(data) && data[i+1] == c {
1185			i++
1186			continue
1187		}
1188
1189		if data[i] == c && !isspace(data[i-1]) {
1190
1191			if rndr.ext_flags&MKDEXT_NO_INTRA_EMPHASIS != 0 {
1192				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1193					continue
1194				}
1195			}
1196
1197			work := bytes.NewBuffer(nil)
1198			parse_inline(work, rndr, data[:i])
1199			r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
1200			if r > 0 {
1201				return i + 1
1202			} else {
1203				return 0
1204			}
1205		}
1206	}
1207
1208	return 0
1209}
1210
1211func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1212	render_method := rndr.mk.double_emphasis
1213	if c == '~' {
1214		render_method = rndr.mk.strikethrough
1215	}
1216
1217	if render_method == nil {
1218		return 0
1219	}
1220
1221	i := 0
1222
1223	for i < len(data) {
1224		length := find_emph_char(data[i:], c)
1225		if length == 0 {
1226			return 0
1227		}
1228		i += length
1229
1230		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1231			work := bytes.NewBuffer(nil)
1232			parse_inline(work, rndr, data[:i])
1233			r := render_method(ob, work.Bytes(), rndr.mk.opaque)
1234			if r > 0 {
1235				return i + 2
1236			} else {
1237				return 0
1238			}
1239		}
1240		i++
1241	}
1242	return 0
1243}
1244
1245func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
1246	i := 0
1247	orig_data := data
1248	data = data[offset:]
1249
1250	for i < len(data) {
1251		length := find_emph_char(data[i:], c)
1252		if length == 0 {
1253			return 0
1254		}
1255		i += length
1256
1257		// skip whitespace preceded symbols
1258		if data[i] != c || isspace(data[i-1]) {
1259			continue
1260		}
1261
1262		switch {
1263		case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
1264			// triple symbol found
1265			work := bytes.NewBuffer(nil)
1266
1267			parse_inline(work, rndr, data[:i])
1268			r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
1269			if r > 0 {
1270				return i + 3
1271			} else {
1272				return 0
1273			}
1274		case (i+1 < len(data) && data[i+1] == c):
1275			// double symbol found, handing over to emph1
1276			length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
1277			if length == 0 {
1278				return 0
1279			} else {
1280				return length - 2
1281			}
1282		default:
1283			// single symbol found, handing over to emph2
1284			length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
1285			if length == 0 {
1286				return 0
1287			} else {
1288				return length - 1
1289			}
1290		}
1291	}
1292	return 0
1293}
1294
1295// parse block-level data
1296func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
1297	if rndr.nesting >= rndr.max_nesting {
1298		return
1299	}
1300	rndr.nesting++
1301
1302	for len(data) > 0 {
1303		if is_atxheader(rndr, data) {
1304			data = data[parse_atxheader(ob, rndr, data):]
1305			continue
1306		}
1307		if data[0] == '<' && rndr.mk.blockhtml != nil {
1308			if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
1309				data = data[i:]
1310				continue
1311			}
1312		}
1313		if i := is_empty(data); i > 0 {
1314			data = data[i:]
1315			continue
1316		}
1317		if is_hrule(data) {
1318			if rndr.mk.hrule != nil {
1319				rndr.mk.hrule(ob, rndr.mk.opaque)
1320			}
1321			var i int
1322			for i = 0; i < len(data) && data[i] != '\n'; i++ {
1323			}
1324			data = data[i:]
1325			continue
1326		}
1327		if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
1328			if i := parse_fencedcode(ob, rndr, data); i > 0 {
1329				data = data[i:]
1330				continue
1331			}
1332		}
1333		if rndr.ext_flags&MKDEXT_TABLES != 0 {
1334			if i := parse_table(ob, rndr, data); i > 0 {
1335				data = data[i:]
1336				continue
1337			}
1338		}
1339		if prefix_quote(data) > 0 {
1340			data = data[parse_blockquote(ob, rndr, data):]
1341			continue
1342		}
1343		if prefix_code(data) > 0 {
1344			data = data[parse_blockcode(ob, rndr, data):]
1345			continue
1346		}
1347		if prefix_uli(data) > 0 {
1348			data = data[parse_list(ob, rndr, data, 0):]
1349			continue
1350		}
1351		if prefix_oli(data) > 0 {
1352			data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
1353			continue
1354		}
1355
1356		data = data[parse_paragraph(ob, rndr, data):]
1357	}
1358
1359	rndr.nesting--
1360}
1361
1362func is_atxheader(rndr *render, data []byte) bool {
1363	if data[0] != '#' {
1364		return false
1365	}
1366
1367	if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
1368		level := 0
1369		for level < len(data) && level < 6 && data[level] == '#' {
1370			level++
1371		}
1372		if level < len(data) && data[level] != ' ' && data[level] != '\t' {
1373			return false
1374		}
1375	}
1376	return true
1377}
1378
1379func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
1380	level := 0
1381	for level < len(data) && level < 6 && data[level] == '#' {
1382		level++
1383	}
1384	i, end := 0, 0
1385	for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
1386	}
1387	for end = i; end < len(data) && data[end] != '\n'; end++ {
1388	}
1389	skip := end
1390	for end > 0 && data[end-1] == '#' {
1391		end--
1392	}
1393	for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
1394		end--
1395	}
1396	if end > i {
1397		work := bytes.NewBuffer(nil)
1398		parse_inline(work, rndr, data[i:end])
1399		if rndr.mk.header != nil {
1400			rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1401		}
1402	}
1403	return skip
1404}
1405
1406func is_headerline(data []byte) int {
1407	i := 0
1408
1409	// test of level 1 header
1410	if data[i] == '=' {
1411		for i = 1; i < len(data) && data[i] == '='; i++ {
1412		}
1413		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1414			i++
1415		}
1416		if i >= len(data) || data[i] == '\n' {
1417			return 1
1418		} else {
1419			return 0
1420		}
1421	}
1422
1423	// test of level 2 header
1424	if data[i] == '-' {
1425		for i = 1; i < len(data) && data[i] == '-'; i++ {
1426		}
1427		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1428			i++
1429		}
1430		if i >= len(data) || data[i] == '\n' {
1431			return 2
1432		} else {
1433			return 0
1434		}
1435	}
1436
1437	return 0
1438}
1439
1440func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1441	var i, j int
1442
1443	// identify the opening tag
1444	if len(data) < 2 || data[0] != '<' {
1445		return 0
1446	}
1447	curtag, tagfound := find_block_tag(data[1:])
1448
1449	// handle special cases
1450	if !tagfound {
1451
1452		// HTML comment, laxist form
1453		if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1454			i = 5
1455
1456			for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1457				i++
1458			}
1459			i++
1460
1461			if i < len(data) {
1462				j = is_empty(data[i:])
1463			}
1464
1465			if j > 0 {
1466				size := i + j
1467				if do_render && rndr.mk.blockhtml != nil {
1468					rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1469				}
1470				return size
1471			}
1472		}
1473
1474		// HR, which is the only self-closing block tag considered
1475		if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1476			i = 3
1477			for i < len(data) && data[i] != '>' {
1478				i++
1479			}
1480
1481			if i+1 < len(data) {
1482				i++
1483				j = is_empty(data[i:])
1484				if j > 0 {
1485					size := i + j
1486					if do_render && rndr.mk.blockhtml != nil {
1487						rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1488					}
1489					return size
1490				}
1491			}
1492		}
1493
1494		// no special case recognized
1495		return 0
1496	}
1497
1498	// look for an unindented matching closing tag
1499	//      followed by a blank line
1500	i = 1
1501	found := false
1502
1503	// if not found, try a second pass looking for indented match
1504	// but not if tag is "ins" or "del" (following original Markdown.pl)
1505	if curtag != "ins" && curtag != "del" {
1506		i = 1
1507		for i < len(data) {
1508			i++
1509			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1510				i++
1511			}
1512
1513			if i+2+len(curtag) >= len(data) {
1514				break
1515			}
1516
1517			j = htmlblock_end(curtag, rndr, data[i-1:])
1518
1519			if j > 0 {
1520				i += j - 1
1521				found = true
1522				break
1523			}
1524		}
1525	}
1526
1527	if !found {
1528		return 0
1529	}
1530
1531	// the end of the block has been found
1532	if do_render && rndr.mk.blockhtml != nil {
1533		rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1534	}
1535
1536	return i
1537}
1538
1539func find_block_tag(data []byte) (string, bool) {
1540	i := 0
1541	for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1542		i++
1543	}
1544	if i >= len(data) {
1545		return "", false
1546	}
1547	key := string(data[:i])
1548	if _, ok := block_tags[key]; ok {
1549		return key, true
1550	}
1551	return "", false
1552}
1553
1554func htmlblock_end(tag string, rndr *render, data []byte) int {
1555	// assume data[0] == '<' && data[1] == '/' already tested
1556
1557	// check if tag is a match
1558	if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1559		return 0
1560	}
1561
1562	// check white lines
1563	i := len(tag) + 3
1564	w := 0
1565	if i < len(data) {
1566		if w = is_empty(data[i:]); w == 0 {
1567			return 0 // non-blank after tag
1568		}
1569	}
1570	i += w
1571	w = 0
1572
1573	if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1574		if i < len(data) {
1575			w = is_empty(data[i:])
1576		}
1577	} else {
1578		if i < len(data) {
1579			if w = is_empty(data[i:]); w == 0 {
1580				return 0 // non-blank line after tag line
1581			}
1582		}
1583	}
1584
1585	return i + w
1586}
1587
1588func is_empty(data []byte) int {
1589	var i int
1590	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1591		if data[i] != ' ' && data[i] != '\t' {
1592			return 0
1593		}
1594	}
1595	return i + 1
1596}
1597
1598func is_hrule(data []byte) bool {
1599	// skip initial spaces
1600	if len(data) < 3 {
1601		return false
1602	}
1603	i := 0
1604	if data[0] == ' ' {
1605		i++
1606		if data[1] == ' ' {
1607			i++
1608			if data[2] == ' ' {
1609				i++
1610			}
1611		}
1612	}
1613
1614	// look at the hrule char
1615	if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1616		return false
1617	}
1618	c := data[i]
1619
1620	// the whole line must be the char or whitespace
1621	n := 0
1622	for i < len(data) && data[i] != '\n' {
1623		switch {
1624		case data[i] == c:
1625			n++
1626		case data[i] != ' ' && data[i] != '\t':
1627			return false
1628		}
1629		i++
1630	}
1631
1632	return n >= 3
1633}
1634
1635func is_codefence(data []byte, syntax **string) int {
1636	i, n := 0, 0
1637
1638	// skip initial spaces
1639	if len(data) < 3 {
1640		return 0
1641	}
1642	if data[0] == ' ' {
1643		i++
1644		if data[1] == ' ' {
1645			i++
1646			if data[2] == ' ' {
1647				i++
1648			}
1649		}
1650	}
1651
1652	// look at the hrule char
1653	if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1654		return 0
1655	}
1656
1657	c := data[i]
1658
1659	// the whole line must be the char or whitespace
1660	for i < len(data) && data[i] == c {
1661		n++
1662		i++
1663	}
1664
1665	if n < 3 {
1666		return 0
1667	}
1668
1669	if syntax != nil {
1670		syn := 0
1671
1672		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1673			i++
1674		}
1675
1676		syntax_start := i
1677
1678		if i < len(data) && data[i] == '{' {
1679			i++
1680			syntax_start++
1681
1682			for i < len(data) && data[i] != '}' && data[i] != '\n' {
1683				syn++
1684				i++
1685			}
1686
1687			if i == len(data) || data[i] != '}' {
1688				return 0
1689			}
1690
1691			// string all whitespace at the beginning and the end
1692			// of the {} block
1693			for syn > 0 && isspace(data[syntax_start]) {
1694				syntax_start++
1695				syn--
1696			}
1697
1698			for syn > 0 && isspace(data[syntax_start+syn-1]) {
1699				syn--
1700			}
1701
1702			i++
1703		} else {
1704			for i < len(data) && !isspace(data[i]) {
1705				syn++
1706				i++
1707			}
1708		}
1709
1710		language := string(data[syntax_start : syntax_start+syn])
1711		*syntax = &language
1712	}
1713
1714	for i < len(data) && data[i] != '\n' {
1715		if !isspace(data[i]) {
1716			return 0
1717		}
1718		i++
1719	}
1720
1721	return i + 1
1722}
1723
1724func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1725	var lang *string
1726	beg := is_codefence(data, &lang)
1727	if beg == 0 {
1728		return 0
1729	}
1730
1731	work := bytes.NewBuffer(nil)
1732
1733	for beg < len(data) {
1734		fence_end := is_codefence(data[beg:], nil)
1735		if fence_end != 0 {
1736			beg += fence_end
1737			break
1738		}
1739
1740		var end int
1741		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1742		}
1743
1744		if beg < end {
1745			// verbatim copy to the working buffer, escaping entities
1746			if is_empty(data[beg:]) > 0 {
1747				work.WriteByte('\n')
1748			} else {
1749				work.Write(data[beg:end])
1750			}
1751		}
1752		beg = end
1753	}
1754
1755	if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1756		work.WriteByte('\n')
1757	}
1758
1759	if rndr.mk.blockcode != nil {
1760		syntax := ""
1761		if lang != nil {
1762			syntax = *lang
1763		}
1764
1765		rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1766	}
1767
1768	return beg
1769}
1770
1771func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1772	header_work := bytes.NewBuffer(nil)
1773	i, columns, col_data := parse_table_header(header_work, rndr, data)
1774	if i > 0 {
1775		body_work := bytes.NewBuffer(nil)
1776
1777		for i < len(data) {
1778			pipes, row_start := 0, i
1779			for ; i < len(data) && data[i] != '\n'; i++ {
1780				if data[i] == '|' {
1781					pipes++
1782				}
1783			}
1784
1785			if pipes == 0 || i == len(data) {
1786				i = row_start
1787				break
1788			}
1789
1790			parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1791			i++
1792		}
1793
1794		if rndr.mk.table != nil {
1795			rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1796		}
1797	}
1798
1799	return i
1800}
1801
1802func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1803	i, pipes := 0, 0
1804	column_data = []int{}
1805	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1806		if data[i] == '|' {
1807			pipes++
1808		}
1809	}
1810
1811	if i == len(data) || pipes == 0 {
1812		return 0, 0, column_data
1813	}
1814
1815	header_end := i
1816
1817	if data[0] == '|' {
1818		pipes--
1819	}
1820
1821	if i > 2 && data[i-1] == '|' {
1822		pipes--
1823	}
1824
1825	columns = pipes + 1
1826	column_data = make([]int, columns)
1827
1828	// parse the header underline
1829	i++
1830	if i < len(data) && data[i] == '|' {
1831		i++
1832	}
1833
1834	under_end := i
1835	for under_end < len(data) && data[under_end] != '\n' {
1836		under_end++
1837	}
1838
1839	col := 0
1840	for ; col < columns && i < under_end; col++ {
1841		dashes := 0
1842
1843		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1844			i++
1845		}
1846
1847		if data[i] == ':' {
1848			i++
1849			column_data[col] |= MKD_TABLE_ALIGN_L
1850			dashes++
1851		}
1852
1853		for i < under_end && data[i] == '-' {
1854			i++
1855			dashes++
1856		}
1857
1858		if i < under_end && data[i] == ':' {
1859			i++
1860			column_data[col] |= MKD_TABLE_ALIGN_R
1861			dashes++
1862		}
1863
1864		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1865			i++
1866		}
1867
1868		if i < under_end && data[i] != '|' {
1869			break
1870		}
1871
1872		if dashes < 3 {
1873			break
1874		}
1875
1876		i++
1877	}
1878
1879	if col < columns {
1880		return 0, 0, column_data
1881	}
1882
1883	parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1884	size = under_end + 1
1885	return
1886}
1887
1888func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1889	i, col := 0, 0
1890	row_work := bytes.NewBuffer(nil)
1891
1892	if i < len(data) && data[i] == '|' {
1893		i++
1894	}
1895
1896	for col = 0; col < columns && i < len(data); col++ {
1897		for i < len(data) && isspace(data[i]) {
1898			i++
1899		}
1900
1901		cell_start := i
1902
1903		for i < len(data) && data[i] != '|' {
1904			i++
1905		}
1906
1907		cell_end := i - 1
1908
1909		for cell_end > cell_start && isspace(data[cell_end]) {
1910			cell_end--
1911		}
1912
1913		cell_work := bytes.NewBuffer(nil)
1914		parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1915
1916		if rndr.mk.table_cell != nil {
1917			cdata := 0
1918			if col < len(col_data) {
1919				cdata = col_data[col]
1920			}
1921			rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1922		}
1923
1924		i++
1925	}
1926
1927	for ; col < columns; col++ {
1928		empty_cell := []byte{}
1929		if rndr.mk.table_cell != nil {
1930			cdata := 0
1931			if col < len(col_data) {
1932				cdata = col_data[col]
1933			}
1934			rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1935		}
1936	}
1937
1938	if rndr.mk.table_row != nil {
1939		rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1940	}
1941}
1942
1943// returns blockquote prefix length
1944func prefix_quote(data []byte) int {
1945	i := 0
1946	for i < len(data) && i < 3 && data[i] == ' ' {
1947		i++
1948	}
1949	if i < len(data) && data[i] == '>' {
1950		if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1951			return i + 2
1952		}
1953		return i + 1
1954	}
1955	return 0
1956}
1957
1958// handles parsing of a blockquote fragment
1959func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1960	out := bytes.NewBuffer(nil)
1961	work := bytes.NewBuffer(nil)
1962	beg, end := 0, 0
1963	for beg < len(data) {
1964		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1965		}
1966
1967		if pre := prefix_quote(data[beg:]); pre > 0 {
1968			beg += pre // skip prefix
1969		} else {
1970			// empty line followed by non-quote line
1971			if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1972				break
1973			}
1974		}
1975
1976		if beg < end { // copy into the in-place working buffer
1977			work.Write(data[beg:end])
1978		}
1979		beg = end
1980	}
1981
1982	parse_block(out, rndr, work.Bytes())
1983	if rndr.mk.blockquote != nil {
1984		rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1985	}
1986	return end
1987}
1988
1989// returns prefix length for block code
1990func prefix_code(data []byte) int {
1991	if len(data) > 0 && data[0] == '\t' {
1992		return 1
1993	}
1994	if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1995		return 4
1996	}
1997	return 0
1998}
1999
2000func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
2001	work := bytes.NewBuffer(nil)
2002
2003	beg, end := 0, 0
2004	for beg < len(data) {
2005		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
2006		}
2007
2008		if pre := prefix_code(data[beg:end]); pre > 0 {
2009			beg += pre
2010		} else {
2011			if is_empty(data[beg:end]) == 0 {
2012				// non-empty non-prefixed line breaks the pre
2013				break
2014			}
2015		}
2016
2017		if beg < end {
2018			// verbatim copy to the working buffer, escaping entities
2019			if is_empty(data[beg:end]) > 0 {
2020				work.WriteByte('\n')
2021			} else {
2022				work.Write(data[beg:end])
2023			}
2024		}
2025		beg = end
2026	}
2027
2028	// trim all the \n off the end of work
2029	workbytes := work.Bytes()
2030	n := 0
2031	for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
2032		n++
2033	}
2034	if n > 0 {
2035		work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
2036	}
2037
2038	work.WriteByte('\n')
2039
2040	if rndr.mk.blockcode != nil {
2041		rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
2042	}
2043
2044	return beg
2045}
2046
2047// returns unordered list item prefix
2048func prefix_uli(data []byte) int {
2049	i := 0
2050	for i < len(data) && i < 3 && data[i] == ' ' {
2051		i++
2052	}
2053	if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
2054		return 0
2055	}
2056	return i + 2
2057}
2058
2059// returns ordered list item prefix
2060func prefix_oli(data []byte) int {
2061	i := 0
2062	for i < len(data) && i < 3 && data[i] == ' ' {
2063		i++
2064	}
2065	if i >= len(data) || data[i] < '0' || data[i] > '9' {
2066		return 0
2067	}
2068	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
2069		i++
2070	}
2071	if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
2072		return 0
2073	}
2074	return i + 2
2075}
2076
2077// parsing ordered or unordered list block
2078func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
2079	work := bytes.NewBuffer(nil)
2080
2081	i, j := 0, 0
2082	for i < len(data) {
2083		j = parse_listitem(work, rndr, data[i:], &flags)
2084		i += j
2085
2086		if j == 0 || flags&MKD_LI_END != 0 {
2087			break
2088		}
2089	}
2090
2091	if rndr.mk.list != nil {
2092		rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
2093	}
2094	return i
2095}
2096
2097// parse a single list item
2098// assumes initial prefix is already removed
2099func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
2100	// keep track of the first indentation prefix
2101	beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
2102
2103	for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
2104		orgpre++
2105	}
2106
2107	beg = prefix_uli(data)
2108	if beg == 0 {
2109		beg = prefix_oli(data)
2110	}
2111	if beg == 0 {
2112		return 0
2113	}
2114
2115	// skip to the beginning of the following line
2116	end = beg
2117	for end < len(data) && data[end-1] != '\n' {
2118		end++
2119	}
2120
2121	// get working buffers
2122	work := bytes.NewBuffer(nil)
2123	inter := bytes.NewBuffer(nil)
2124
2125	// put the first line into the working buffer
2126	work.Write(data[beg:end])
2127	beg = end
2128
2129	// process the following lines
2130	in_empty, has_inside_empty := false, false
2131	for beg < len(data) {
2132		end++
2133
2134		for end < len(data) && data[end-1] != '\n' {
2135			end++
2136		}
2137
2138		// process an empty line
2139		if is_empty(data[beg:end]) > 0 {
2140			in_empty = true
2141			beg = end
2142			continue
2143		}
2144
2145		// calculate the indentation
2146		i = 0
2147		for i < 4 && beg+i < end && data[beg+i] == ' ' {
2148			i++
2149		}
2150
2151		pre = i
2152		if data[beg] == '\t' {
2153			i = 1
2154			pre = 8
2155		}
2156
2157		// check for a new item
2158		chunk := data[beg+i : end]
2159		if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
2160			if in_empty {
2161				has_inside_empty = true
2162			}
2163
2164			if pre == orgpre { // the following item must have the same indentation
2165				break
2166			}
2167
2168			if sublist == 0 {
2169				sublist = work.Len()
2170			}
2171		} else {
2172			// only join indented stuff after empty lines
2173			if in_empty && i < 4 && data[beg] != '\t' {
2174				*flags |= MKD_LI_END
2175				break
2176			} else {
2177				if in_empty {
2178					work.WriteByte('\n')
2179					has_inside_empty = true
2180				}
2181			}
2182		}
2183
2184		in_empty = false
2185
2186		// add the line into the working buffer without prefix
2187		work.Write(data[beg+i : end])
2188		beg = end
2189	}
2190
2191	// render li contents
2192	if has_inside_empty {
2193		*flags |= MKD_LI_BLOCK
2194	}
2195
2196	workbytes := work.Bytes()
2197	if *flags&MKD_LI_BLOCK != 0 {
2198		// intermediate render of block li
2199		if sublist > 0 && sublist < len(workbytes) {
2200			parse_block(inter, rndr, workbytes[:sublist])
2201			parse_block(inter, rndr, workbytes[sublist:])
2202		} else {
2203			parse_block(inter, rndr, workbytes)
2204		}
2205	} else {
2206		// intermediate render of inline li
2207		if sublist > 0 && sublist < len(workbytes) {
2208			parse_inline(inter, rndr, workbytes[:sublist])
2209			parse_block(inter, rndr, workbytes[sublist:])
2210		} else {
2211			parse_inline(inter, rndr, workbytes)
2212		}
2213	}
2214
2215	// render li itself
2216	if rndr.mk.listitem != nil {
2217		rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
2218	}
2219
2220	return beg
2221}
2222
2223func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
2224	i, end, level := 0, 0, 0
2225
2226	for i < len(data) {
2227		for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
2228		}
2229
2230		if is_empty(data[i:]) > 0 {
2231			break
2232		}
2233		if level = is_headerline(data[i:]); level > 0 {
2234			break
2235		}
2236
2237		if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
2238			if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
2239				end = i
2240				break
2241			}
2242		}
2243
2244		if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
2245			end = i
2246			break
2247		}
2248
2249		i = end
2250	}
2251
2252	work := data
2253	size := i
2254	for size > 0 && work[size-1] == '\n' {
2255		size--
2256	}
2257
2258	if level == 0 {
2259		tmp := bytes.NewBuffer(nil)
2260		parse_inline(tmp, rndr, work[:size])
2261		if rndr.mk.paragraph != nil {
2262			rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2263		}
2264	} else {
2265		if size > 0 {
2266			beg := 0
2267			i = size
2268			size--
2269
2270			for size > 0 && work[size] != '\n' {
2271				size--
2272			}
2273
2274			beg = size + 1
2275			for size > 0 && work[size-1] == '\n' {
2276				size--
2277			}
2278
2279			if size > 0 {
2280				tmp := bytes.NewBuffer(nil)
2281				parse_inline(tmp, rndr, work[:size])
2282				if rndr.mk.paragraph != nil {
2283					rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2284				}
2285
2286				work = work[beg:]
2287				size = i - beg
2288			} else {
2289				size = i
2290			}
2291		}
2292
2293		header_work := bytes.NewBuffer(nil)
2294		parse_inline(header_work, rndr, work[:size])
2295
2296		if rndr.mk.header != nil {
2297			rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
2298		}
2299	}
2300
2301	return end
2302}
2303
2304
2305//
2306//
2307// HTML rendering
2308//
2309//
2310
2311const (
2312	HTML_SKIP_HTML = 1 << iota
2313	HTML_SKIP_STYLE
2314	HTML_SKIP_IMAGES
2315	HTML_SKIP_LINKS
2316	HTML_EXPAND_TABS
2317	HTML_SAFELINK
2318	HTML_TOC
2319	HTML_HARD_WRAP
2320	HTML_GITHUB_BLOCKCODE
2321	HTML_USE_XHTML
2322)
2323
2324type html_renderopts struct {
2325	toc_data struct {
2326		header_count  int
2327		current_level int
2328	}
2329	flags     uint32
2330	close_tag string
2331}
2332
2333func attr_escape(ob *bytes.Buffer, src []byte) {
2334	for i := 0; i < len(src); i++ {
2335		// directly copy unescaped characters
2336		org := i
2337		for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
2338			i++
2339		}
2340		if i > org {
2341			ob.Write(src[org:i])
2342		}
2343
2344		// escaping
2345		if i >= len(src) {
2346			break
2347		}
2348		switch src[i] {
2349		case '<':
2350			ob.WriteString("&lt;")
2351		case '>':
2352			ob.WriteString("&gt;")
2353		case '&':
2354			ob.WriteString("&amp;")
2355		case '"':
2356			ob.WriteString("&quot;")
2357		}
2358	}
2359}
2360
2361func unscape_text(ob *bytes.Buffer, src []byte) {
2362	i := 0
2363	for i < len(src) {
2364		org := i
2365		for i < len(src) && src[i] != '\\' {
2366			i++
2367		}
2368
2369		if i > org {
2370			ob.Write(src[org:i])
2371		}
2372
2373		if i+1 >= len(src) {
2374			break
2375		}
2376
2377		ob.WriteByte(src[i+1])
2378		i += 2
2379	}
2380}
2381
2382func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
2383	options := opaque.(*html_renderopts)
2384
2385	if ob.Len() > 0 {
2386		ob.WriteByte('\n')
2387	}
2388
2389	if options.flags&HTML_TOC != 0 {
2390		ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
2391		options.toc_data.header_count++
2392	} else {
2393		ob.WriteString(fmt.Sprintf("<h%d>", level))
2394	}
2395
2396	ob.Write(text)
2397	ob.WriteString(fmt.Sprintf("</h%d>\n", level))
2398}
2399
2400func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
2401	sz := len(text)
2402	for sz > 0 && text[sz-1] == '\n' {
2403		sz--
2404	}
2405	org := 0
2406	for org < sz && text[org] == '\n' {
2407		org++
2408	}
2409	if org >= sz {
2410		return
2411	}
2412	if ob.Len() > 0 {
2413		ob.WriteByte('\n')
2414	}
2415	ob.Write(text[org:sz])
2416	ob.WriteByte('\n')
2417}
2418
2419func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
2420	options := opaque.(*html_renderopts)
2421
2422	if ob.Len() > 0 {
2423		ob.WriteByte('\n')
2424	}
2425	ob.WriteString("<hr")
2426	ob.WriteString(options.close_tag)
2427}
2428
2429func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
2430	if ob.Len() > 0 {
2431		ob.WriteByte('\n')
2432	}
2433
2434	if lang != "" {
2435		ob.WriteString("<pre><code class=\"")
2436
2437		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
2438			for i < len(lang) && isspace(lang[i]) {
2439				i++
2440			}
2441
2442			if i < len(lang) {
2443				org := i
2444				for i < len(lang) && !isspace(lang[i]) {
2445					i++
2446				}
2447
2448				if lang[org] == '.' {
2449					org++
2450				}
2451
2452				if cls > 0 {
2453					ob.WriteByte(' ')
2454				}
2455				attr_escape(ob, []byte(lang[org:]))
2456			}
2457		}
2458
2459		ob.WriteString("\">")
2460	} else {
2461		ob.WriteString("<pre><code>")
2462	}
2463
2464	if len(text) > 0 {
2465		attr_escape(ob, text)
2466	}
2467
2468	ob.WriteString("</code></pre>\n")
2469}
2470
2471func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
2472	ob.WriteString("<blockquote>\n")
2473	ob.Write(text)
2474	ob.WriteString("</blockquote>")
2475}
2476
2477func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
2478	if ob.Len() > 0 {
2479		ob.WriteByte('\n')
2480	}
2481	ob.WriteString("<table><thead>\n")
2482	ob.Write(header)
2483	ob.WriteString("\n</thead><tbody>\n")
2484	ob.Write(body)
2485	ob.WriteString("\n</tbody></table>")
2486}
2487
2488func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
2489	if ob.Len() > 0 {
2490		ob.WriteByte('\n')
2491	}
2492	ob.WriteString("<tr>\n")
2493	ob.Write(text)
2494	ob.WriteString("\n</tr>")
2495}
2496
2497func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
2498	if ob.Len() > 0 {
2499		ob.WriteByte('\n')
2500	}
2501	switch align {
2502	case MKD_TABLE_ALIGN_L:
2503		ob.WriteString("<td align=\"left\">")
2504	case MKD_TABLE_ALIGN_R:
2505		ob.WriteString("<td align=\"right\">")
2506	case MKD_TABLE_ALIGN_CENTER:
2507		ob.WriteString("<td align=\"center\">")
2508	default:
2509		ob.WriteString("<td>")
2510	}
2511
2512	ob.Write(text)
2513	ob.WriteString("</td>")
2514}
2515
2516func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2517	if ob.Len() > 0 {
2518		ob.WriteByte('\n')
2519	}
2520	if flags&MKD_LIST_ORDERED != 0 {
2521		ob.WriteString("<ol>\n")
2522	} else {
2523		ob.WriteString("<ul>\n")
2524	}
2525	ob.Write(text)
2526	if flags&MKD_LIST_ORDERED != 0 {
2527		ob.WriteString("</ol>\n")
2528	} else {
2529		ob.WriteString("</ul>\n")
2530	}
2531}
2532
2533func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2534	ob.WriteString("<li>")
2535	size := len(text)
2536	for size > 0 && text[size-1] == '\n' {
2537		size--
2538	}
2539	ob.Write(text[:size])
2540	ob.WriteString("</li>\n")
2541}
2542
2543func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
2544	options := opaque.(*html_renderopts)
2545	i := 0
2546
2547	if ob.Len() > 0 {
2548		ob.WriteByte('\n')
2549	}
2550
2551	if len(text) == 0 {
2552		return
2553	}
2554
2555	for i < len(text) && isspace(text[i]) {
2556		i++
2557	}
2558
2559	if i == len(text) {
2560		return
2561	}
2562
2563	ob.WriteString("<p>")
2564	if options.flags&HTML_HARD_WRAP != 0 {
2565		for i < len(text) {
2566			org := i
2567			for i < len(text) && text[i] != '\n' {
2568				i++
2569			}
2570
2571			if i > org {
2572				ob.Write(text[org:i])
2573			}
2574
2575			if i >= len(text) {
2576				break
2577			}
2578
2579			ob.WriteString("<br>")
2580			ob.WriteString(options.close_tag)
2581			i++
2582		}
2583	} else {
2584		ob.Write(text[i:])
2585	}
2586	ob.WriteString("</p>\n")
2587}
2588
2589func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
2590	options := opaque.(*html_renderopts)
2591
2592	if len(link) == 0 {
2593		return 0
2594	}
2595	if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != MKDA_EMAIL {
2596		return 0
2597	}
2598
2599	ob.WriteString("<a href=\"")
2600	if kind == MKDA_EMAIL {
2601		ob.WriteString("mailto:")
2602	}
2603	ob.Write(link)
2604	ob.WriteString("\">")
2605
2606	/*
2607	 * Pretty printing: if we get an email address as
2608	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
2609	 * want to print the `mailto:` prefix
2610	 */
2611	if bytes.HasPrefix(link, []byte("mailto:")) {
2612		attr_escape(ob, link[7:])
2613	} else {
2614		attr_escape(ob, link)
2615	}
2616
2617	ob.WriteString("</a>")
2618
2619	return 1
2620}
2621
2622func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2623	ob.WriteString("<code>")
2624	attr_escape(ob, text)
2625	ob.WriteString("</code>")
2626	return 1
2627}
2628
2629func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2630	if len(text) == 0 {
2631		return 0
2632	}
2633	ob.WriteString("<strong>")
2634	ob.Write(text)
2635	ob.WriteString("</strong>")
2636	return 1
2637}
2638
2639func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2640	if len(text) == 0 {
2641		return 0
2642	}
2643	ob.WriteString("<em>")
2644	ob.Write(text)
2645	ob.WriteString("</em>")
2646	return 1
2647}
2648
2649func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
2650	options := opaque.(*html_renderopts)
2651	if len(link) == 0 {
2652		return 0
2653	}
2654	ob.WriteString("<img src=\"")
2655	attr_escape(ob, link)
2656	ob.WriteString("\" alt=\"")
2657	if len(alt) > 0 {
2658		attr_escape(ob, alt)
2659	}
2660	if len(title) > 0 {
2661		ob.WriteString("\" title=\"")
2662		attr_escape(ob, title)
2663	}
2664
2665	ob.WriteByte('"')
2666	ob.WriteString(options.close_tag)
2667	return 1
2668}
2669
2670func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
2671	options := opaque.(*html_renderopts)
2672	ob.WriteString("<br")
2673	ob.WriteString(options.close_tag)
2674	return 1
2675}
2676
2677func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
2678	options := opaque.(*html_renderopts)
2679
2680	if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
2681		return 0
2682	}
2683
2684	ob.WriteString("<a href=\"")
2685	if len(link) > 0 {
2686		ob.Write(link)
2687	}
2688	if len(title) > 0 {
2689		ob.WriteString("\" title=\"")
2690		attr_escape(ob, title)
2691	}
2692	ob.WriteString("\">")
2693	if len(content) > 0 {
2694		ob.Write(content)
2695	}
2696	ob.WriteString("</a>")
2697	return 1
2698}
2699
2700func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2701	options := opaque.(*html_renderopts)
2702	if options.flags&HTML_SKIP_HTML != 0 {
2703		return 1
2704	}
2705	if options.flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
2706		return 1
2707	}
2708	if options.flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
2709		return 1
2710	}
2711	if options.flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
2712		return 1
2713	}
2714	ob.Write(text)
2715	return 1
2716}
2717
2718func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2719	if len(text) == 0 {
2720		return 0
2721	}
2722	ob.WriteString("<strong><em>")
2723	ob.Write(text)
2724	ob.WriteString("</em></strong>")
2725	return 1
2726}
2727
2728func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2729	if len(text) == 0 {
2730		return 0
2731	}
2732	ob.WriteString("<del>")
2733	ob.Write(text)
2734	ob.WriteString("</del>")
2735	return 1
2736}
2737
2738func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
2739	attr_escape(ob, text)
2740}
2741
2742func is_html_tag(tag []byte, tagname string) bool {
2743	i := 0
2744	if i < len(tag) && tag[0] != '<' {
2745		return false
2746	}
2747	i++
2748	for i < len(tag) && isspace(tag[i]) {
2749		i++
2750	}
2751
2752	if i < len(tag) && tag[i] == '/' {
2753		i++
2754	}
2755
2756	for i < len(tag) && isspace(tag[i]) {
2757		i++
2758	}
2759
2760	tag_i := i
2761	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
2762		if tag_i >= len(tagname) {
2763			break
2764		}
2765
2766		if tag[i] != tagname[tag_i] {
2767			return false
2768		}
2769	}
2770
2771	if i == len(tag) {
2772		return false
2773	}
2774
2775	return isspace(tag[i]) || tag[i] == '>'
2776}
2777
2778
2779//
2780//
2781// Public interface
2782//
2783//
2784
2785func expand_tabs(ob *bytes.Buffer, line []byte) {
2786	i, tab := 0, 0
2787
2788	for i < len(line) {
2789		org := i
2790		for i < len(line) && line[i] != '\t' {
2791			i++
2792			tab++
2793		}
2794
2795		if i > org {
2796			ob.Write(line[org:i])
2797		}
2798
2799		if i >= len(line) {
2800			break
2801		}
2802
2803		for {
2804			ob.WriteByte(' ')
2805			tab++
2806			if tab%4 == 0 {
2807				break
2808			}
2809		}
2810
2811		i++
2812	}
2813}
2814
2815func Markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
2816	// no point in parsing if we can't render
2817	if rndrer == nil {
2818		return
2819	}
2820
2821	// fill in the character-level parsers
2822	markdown_char_ptrs[MD_CHAR_NONE] = nil
2823	markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
2824	markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
2825	markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
2826	markdown_char_ptrs[MD_CHAR_LINK] = char_link
2827	markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
2828	markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
2829	markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
2830	markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
2831
2832	// fill in the render structure
2833	rndr := new(render)
2834	rndr.mk = rndrer
2835	rndr.ext_flags = extensions
2836	rndr.max_nesting = 16
2837
2838	if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
2839		rndr.active_char['*'] = MD_CHAR_EMPHASIS
2840		rndr.active_char['_'] = MD_CHAR_EMPHASIS
2841		if extensions&MKDEXT_STRIKETHROUGH != 0 {
2842			rndr.active_char['~'] = MD_CHAR_EMPHASIS
2843		}
2844	}
2845	if rndr.mk.codespan != nil {
2846		rndr.active_char['`'] = MD_CHAR_CODESPAN
2847	}
2848	if rndr.mk.linebreak != nil {
2849		rndr.active_char['\n'] = MD_CHAR_LINEBREAK
2850	}
2851	if rndr.mk.image != nil || rndr.mk.link != nil {
2852		rndr.active_char['['] = MD_CHAR_LINK
2853	}
2854	rndr.active_char['<'] = MD_CHAR_LANGLE
2855	rndr.active_char['\\'] = MD_CHAR_ESCAPE
2856	rndr.active_char['&'] = MD_CHAR_ENTITITY
2857
2858	if extensions&MKDEXT_AUTOLINK != 0 {
2859		rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
2860		rndr.active_char['H'] = MD_CHAR_AUTOLINK
2861
2862		rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
2863		rndr.active_char['F'] = MD_CHAR_AUTOLINK
2864
2865		rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
2866		rndr.active_char['M'] = MD_CHAR_AUTOLINK
2867	}
2868
2869	// first pass: look for references, copying everything else
2870	text := bytes.NewBuffer(nil)
2871	beg, end := 0, 0
2872	for beg < len(ib) { // iterate over lines
2873		if is_ref(ib, beg, &end, rndr) {
2874			beg = end
2875		} else { // skip to the next line
2876			end = beg
2877			for end < len(ib) && ib[end] != '\n' && ib[end] != '\r' {
2878				end++
2879			}
2880
2881			// add the line body if present
2882			if end > beg {
2883				expand_tabs(text, ib[beg:end])
2884			}
2885
2886			for end < len(ib) && (ib[end] == '\n' || ib[end] == '\r') {
2887				// add one \n per newline
2888				if ib[end] == '\n' || (end+1 < len(ib) && ib[end+1] != '\n') {
2889					text.WriteByte('\n')
2890				}
2891				end++
2892			}
2893
2894			beg = end
2895		}
2896	}
2897
2898	// sort the reference array
2899	if len(rndr.refs) > 1 {
2900		sort.Sort(rndr.refs)
2901	}
2902
2903	// second pass: actual rendering
2904	if rndr.mk.doc_header != nil {
2905		rndr.mk.doc_header(ob, rndr.mk.opaque)
2906	}
2907
2908	if text.Len() > 0 {
2909		// add a final newline if not already present
2910		finalchar := text.Bytes()[text.Len()-1]
2911		if finalchar != '\n' && finalchar != '\r' {
2912			text.WriteByte('\n')
2913		}
2914		parse_block(ob, rndr, text.Bytes())
2915	}
2916
2917	if rndr.mk.doc_footer != nil {
2918		rndr.mk.doc_footer(ob, rndr.mk.opaque)
2919	}
2920
2921	if rndr.nesting != 0 {
2922		panic("Nesting level did not end at zero")
2923	}
2924}
2925
2926func main() {
2927	// configure the rendering engine
2928	rndrer := new(mkd_renderer)
2929	rndrer.blockcode = rndr_blockcode
2930	rndrer.blockquote = rndr_blockquote
2931	rndrer.blockhtml = rndr_raw_block
2932	rndrer.header = rndr_header
2933	rndrer.hrule = rndr_hrule
2934	rndrer.list = rndr_list
2935	rndrer.listitem = rndr_listitem
2936	rndrer.paragraph = rndr_paragraph
2937	rndrer.table = rndr_table
2938	rndrer.table_row = rndr_tablerow
2939	rndrer.table_cell = rndr_tablecell
2940
2941	rndrer.autolink = rndr_autolink
2942	rndrer.codespan = rndr_codespan
2943	rndrer.double_emphasis = rndr_double_emphasis
2944	rndrer.emphasis = rndr_emphasis
2945	rndrer.image = rndr_image
2946	rndrer.linebreak = rndr_linebreak
2947	rndrer.link = rndr_link
2948	rndrer.raw_html_tag = rndr_raw_html_tag
2949	rndrer.triple_emphasis = rndr_triple_emphasis
2950	rndrer.strikethrough = rndr_strikethrough
2951
2952	rndrer.normal_text = rndr_normal_text
2953
2954	rndrer.opaque = &html_renderopts{close_tag: ">\n"}
2955
2956	var extensions uint32
2957	extensions |= MKDEXT_NO_INTRA_EMPHASIS
2958	extensions |= MKDEXT_TABLES
2959	extensions |= MKDEXT_FENCED_CODE
2960	extensions |= MKDEXT_AUTOLINK
2961	extensions |= MKDEXT_STRIKETHROUGH
2962	extensions |= MKDEXT_LAX_HTML_BLOCKS
2963	extensions |= MKDEXT_SPACE_HEADERS
2964
2965	// read the input
2966	var ib []byte
2967	var err os.Error
2968	switch len(os.Args) {
2969	case 1:
2970		if ib, err = ioutil.ReadAll(os.Stdin); err != nil {
2971			fmt.Fprintln(os.Stderr, "Error reading from Stdin:", err)
2972			os.Exit(-1)
2973		}
2974	case 2, 3:
2975		if ib, err = ioutil.ReadFile(os.Args[1]); err != nil {
2976			fmt.Fprintln(os.Stderr, "Error reading from", os.Args[1], ":", err)
2977			os.Exit(-1)
2978		}
2979	default:
2980		fmt.Fprintln(os.Stderr, "Usage:", os.Args[0], "[inputfile [outputfile]]")
2981		os.Exit(-1)
2982	}
2983
2984	// call the main renderer function
2985	ob := bytes.NewBuffer(nil)
2986	Markdown(ob, ib, rndrer, extensions)
2987
2988	// output the result
2989	if len(os.Args) == 3 {
2990		if err = ioutil.WriteFile(os.Args[2], ob.Bytes(), 0644); err != nil {
2991			fmt.Fprintln(os.Stderr, "Error writing to", os.Args[2], ":", err)
2992			os.Exit(-1)
2993		}
2994	} else {
2995		if _, err = os.Stdout.Write(ob.Bytes()); err != nil {
2996			fmt.Fprintln(os.Stderr, "Error writing to Stdout:", err)
2997			os.Exit(-1)
2998		}
2999	}
3000}