all repos — grayfriday @ b117dcc9eca29c0c1787507bbdb5e3b8b0710dac

blackfriday fork with a few changes

markdown.go (view raw)

   1//
   2// Black Friday Markdown Processor
   3// Ported to Go from http://github.com/tanoku/upskirt
   4// by Russ Ross <russ@russross.com>
   5//
   6
   7package main
   8
   9import (
  10	"bytes"
  11	"fmt"
  12	"io/ioutil"
  13	"os"
  14	"sort"
  15	"unicode"
  16)
  17
  18const (
  19	MKDA_NOT_AUTOLINK = iota
  20	MKDA_NORMAL
  21	MKDA_EMAIL
  22)
  23
  24const (
  25	MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
  26	MKDEXT_TABLES
  27	MKDEXT_FENCED_CODE
  28	MKDEXT_AUTOLINK
  29	MKDEXT_STRIKETHROUGH
  30	MKDEXT_LAX_HTML_BLOCKS
  31	MKDEXT_SPACE_HEADERS
  32)
  33
  34const (
  35	_ = iota
  36	MKD_LIST_ORDERED
  37	MKD_LI_BLOCK // <li> containing block data
  38	MKD_LI_END   = 8
  39)
  40
  41const (
  42	MKD_TABLE_ALIGN_L = 1 << iota
  43	MKD_TABLE_ALIGN_R
  44	MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
  45)
  46
  47var block_tags = map[string]bool{
  48	"p":          true,
  49	"dl":         true,
  50	"h1":         true,
  51	"h2":         true,
  52	"h3":         true,
  53	"h4":         true,
  54	"h5":         true,
  55	"h6":         true,
  56	"ol":         true,
  57	"ul":         true,
  58	"del":        true,
  59	"div":        true,
  60	"ins":        true,
  61	"pre":        true,
  62	"form":       true,
  63	"math":       true,
  64	"table":      true,
  65	"iframe":     true,
  66	"script":     true,
  67	"fieldset":   true,
  68	"noscript":   true,
  69	"blockquote": true,
  70}
  71
  72// functions for rendering parsed data
  73type mkd_renderer struct {
  74	// block-level callbacks---nil skips the block
  75	blockcode  func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
  76	blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
  77	blockhtml  func(ob *bytes.Buffer, text []byte, opaque interface{})
  78	header     func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
  79	hrule      func(ob *bytes.Buffer, opaque interface{})
  80	list       func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  81	listitem   func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  82	paragraph  func(ob *bytes.Buffer, text []byte, opaque interface{})
  83	table      func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
  84	table_row  func(ob *bytes.Buffer, text []byte, opaque interface{})
  85	table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
  86
  87	// span-level callbacks---nil or return 0 prints the span verbatim
  88	autolink        func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
  89	codespan        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  90	double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  91	emphasis        func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  92	image           func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
  93	linebreak       func(ob *bytes.Buffer, opaque interface{}) int
  94	link            func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
  95	raw_html_tag    func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
  96	triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  97	strikethrough   func(ob *bytes.Buffer, text []byte, opaque interface{}) int
  98
  99	// low-level callbacks---nil copies input directly into the output
 100	entity      func(ob *bytes.Buffer, entity []byte, opaque interface{})
 101	normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
 102
 103	// header and footer
 104	doc_header func(ob *bytes.Buffer, opaque interface{})
 105	doc_footer func(ob *bytes.Buffer, opaque interface{})
 106
 107	// user data---passed back to every callback
 108	opaque interface{}
 109}
 110
 111type link_ref struct {
 112	id    []byte
 113	link  []byte
 114	title []byte
 115}
 116
 117type link_ref_array []*link_ref
 118
 119// implement the sorting interface
 120func (elt link_ref_array) Len() int {
 121	return len(elt)
 122}
 123
 124func (elt link_ref_array) Less(i, j int) bool {
 125	return byteslice_less(elt[i].id, elt[j].id)
 126}
 127
 128func byteslice_less(a []byte, b []byte) bool {
 129	// adapted from bytes.Compare in stdlib
 130	m := len(a)
 131	if m > len(b) {
 132		m = len(b)
 133	}
 134	for i, ac := range a[0:m] {
 135		// do a case-insensitive comparison
 136		ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
 137		switch {
 138		case ai > bi:
 139			return false
 140		case ai < bi:
 141			return true
 142		}
 143	}
 144	switch {
 145	case len(a) < len(b):
 146		return true
 147	case len(a) > len(b):
 148		return false
 149	}
 150	return false
 151}
 152
 153func (elt link_ref_array) Swap(i, j int) {
 154	elt[i], elt[j] = elt[j], elt[i]
 155}
 156
 157// returns whether or not a line is a reference
 158func is_ref(data []byte, beg int, last *int, rndr *render) bool {
 159	// up to 3 optional leading spaces
 160	if beg+3 > len(data) {
 161		return false
 162	}
 163	i := 0
 164	if data[beg] == ' ' {
 165		i++
 166		if data[beg+1] == ' ' {
 167			i++
 168			if data[beg+2] == ' ' {
 169				i++
 170				if data[beg+3] == ' ' {
 171					return false
 172				}
 173			}
 174		}
 175	}
 176	i += beg
 177
 178	// id part: anything but a newline between brackets
 179	if data[i] != '[' {
 180		return false
 181	}
 182	i++
 183	id_offset := i
 184	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 185		i++
 186	}
 187	if i >= len(data) || data[i] != ']' {
 188		return false
 189	}
 190	id_end := i
 191
 192	// spacer: colon (space | tab)* newline? (space | tab)*
 193	i++
 194	if i >= len(data) || data[i] != ':' {
 195		return false
 196	}
 197	i++
 198	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 199		i++
 200	}
 201	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
 202		i++
 203		if i < len(data) && data[i] == '\r' && data[i-1] == '\n' {
 204			i++
 205		}
 206	}
 207	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 208		i++
 209	}
 210	if i >= len(data) {
 211		return false
 212	}
 213
 214	// link: whitespace-free sequence, optionally between angle brackets
 215	if data[i] == '<' {
 216		i++
 217	}
 218	link_offset := i
 219	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 220		i++
 221	}
 222	var link_end int
 223	if data[i-1] == '>' {
 224		link_end = i - 1
 225	} else {
 226		link_end = i
 227	}
 228
 229	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
 230	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 231		i++
 232	}
 233	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
 234		return false
 235	}
 236
 237	// compute end-of-line
 238	line_end := 0
 239	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 240		line_end = i
 241	}
 242	if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 243		line_end = i + 1
 244	}
 245
 246	// optional (space|tab)* spacer after a newline
 247	if line_end > 0 {
 248		i = line_end + 1
 249		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
 250			i++
 251		}
 252	}
 253
 254	// optional title: any non-newline sequence enclosed in '"() alone on its line
 255	title_offset, title_end := 0, 0
 256	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 257		i++
 258		title_offset = i
 259
 260		// look for EOL
 261		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
 262			i++
 263		}
 264		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
 265			title_end = i + 1
 266		} else {
 267			title_end = i
 268		}
 269
 270		// step back
 271		i--
 272		for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
 273			i--
 274		}
 275		if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
 276			line_end = title_end
 277			title_end = i
 278		}
 279	}
 280	if line_end == 0 { // garbage after the link
 281		return false
 282	}
 283
 284	// a valid ref has been found; fill in return structures
 285	if last != nil {
 286		*last = line_end
 287	}
 288	if rndr == nil {
 289		return true
 290	}
 291	item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
 292	rndr.refs = append(rndr.refs, item)
 293
 294	return true
 295}
 296
 297type render struct {
 298	mk          *mkd_renderer
 299	refs        link_ref_array
 300	active_char [256]int
 301	ext_flags   uint32
 302	nesting     int
 303	max_nesting int
 304}
 305
 306const (
 307	MD_CHAR_NONE = iota
 308	MD_CHAR_EMPHASIS
 309	MD_CHAR_CODESPAN
 310	MD_CHAR_LINEBREAK
 311	MD_CHAR_LINK
 312	MD_CHAR_LANGLE
 313	MD_CHAR_ESCAPE
 314	MD_CHAR_ENTITITY
 315	MD_CHAR_AUTOLINK
 316)
 317
 318// closures to render active chars, each:
 319//   returns the number of chars taken care of
 320//   data is the complete block being rendered
 321//   offset is the number of valid chars before the data
 322//
 323// Note: this is filled in in Markdown to prevent an initilization loop
 324var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
 325
 326func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
 327	if rndr.nesting >= rndr.max_nesting {
 328		return
 329	}
 330	rndr.nesting++
 331
 332	i, end := 0, 0
 333	for i < len(data) {
 334		// copy inactive chars into the output
 335		for end < len(data) && rndr.active_char[data[end]] == 0 {
 336			end++
 337		}
 338
 339		if rndr.mk.normal_text != nil {
 340			rndr.mk.normal_text(ob, data[i:end], rndr.mk.opaque)
 341		} else {
 342			ob.Write(data[i:end])
 343		}
 344
 345		if end >= len(data) {
 346			break
 347		}
 348		i = end
 349
 350		// call the trigger
 351		action := rndr.active_char[data[end]]
 352		end = markdown_char_ptrs[action](ob, rndr, data, i)
 353
 354		if end == 0 { // no action from the callback
 355			end = i + 1
 356		} else {
 357			i += end
 358			end = i
 359		}
 360	}
 361
 362	rndr.nesting--
 363}
 364
 365// single and double emphasis parsing
 366func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 367	data = data[offset:]
 368	c := data[0]
 369	ret := 0
 370
 371	if len(data) > 2 && data[1] != c {
 372		// whitespace cannot follow an opening emphasis;
 373		// strikethrough only takes two characters '~~'
 374		if c == '~' || isspace(data[1]) {
 375			return 0
 376		}
 377		if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
 378			return 0
 379		}
 380
 381		return ret + 1
 382	}
 383
 384	if len(data) > 3 && data[1] == c && data[2] != c {
 385		if isspace(data[2]) {
 386			return 0
 387		}
 388		if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
 389			return 0
 390		}
 391
 392		return ret + 2
 393	}
 394
 395	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
 396		if c == '~' || isspace(data[3]) {
 397			return 0
 398		}
 399		if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
 400			return 0
 401		}
 402
 403		return ret + 3
 404	}
 405
 406	return 0
 407}
 408
 409func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 410	data = data[offset:]
 411
 412	nb := 0
 413
 414	// count the number of backticks in the delimiter
 415	for nb < len(data) && data[nb] == '`' {
 416		nb++
 417	}
 418
 419	// find the next delimiter
 420	i, end := 0, 0
 421	for end = nb; end < len(data) && i < nb; end++ {
 422		if data[end] == '`' {
 423			i++
 424		} else {
 425			i = 0
 426		}
 427	}
 428
 429	if i < nb && end >= len(data) {
 430		return 0 // no matching delimiter
 431	}
 432
 433	// trim outside whitespace
 434	f_begin := nb
 435	for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
 436		f_begin++
 437	}
 438
 439	f_end := end - nb
 440	for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
 441		f_end--
 442	}
 443
 444	// real code span
 445	if rndr.mk.codespan == nil {
 446		return 0
 447	}
 448	if f_begin < f_end {
 449		if rndr.mk.codespan(ob, data[f_begin:f_end], rndr.mk.opaque) == 0 {
 450			end = 0
 451		}
 452	} else {
 453		if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
 454			end = 0
 455		}
 456	}
 457
 458	return end
 459
 460}
 461
 462// '\n' preceded by two spaces
 463func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 464	if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
 465		return 0
 466	}
 467
 468	// remove trailing spaces from ob and render
 469	ob_bytes := ob.Bytes()
 470	end := len(ob_bytes)
 471	for end > 0 && ob_bytes[end-1] == ' ' {
 472		end--
 473	}
 474	ob.Truncate(end)
 475
 476	if rndr.mk.linebreak == nil {
 477		return 0
 478	}
 479	if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
 480		return 1
 481	} else {
 482		return 0
 483	}
 484
 485	return 0
 486}
 487
 488// '[': parse a link or an image
 489func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 490	is_img := offset > 0 && data[offset-1] == '!'
 491
 492	data = data[offset:]
 493
 494	i := 1
 495	var title, link []byte
 496	text_has_nl := false
 497
 498	// check whether the correct renderer exists
 499	if (is_img && rndr.mk.image == nil) || (!is_img && rndr.mk.link == nil) {
 500		return 0
 501	}
 502
 503	// look for the matching closing bracket
 504	for level := 1; level > 0 && i < len(data); i++ {
 505		switch {
 506		case data[i] == '\n':
 507			text_has_nl = true
 508
 509		case data[i-1] == '\\':
 510			continue
 511
 512		case data[i] == '[':
 513			level++
 514
 515		case data[i] == ']':
 516			level--
 517			if level <= 0 {
 518				i-- // compensate for extra i++ in for loop
 519			}
 520		}
 521	}
 522
 523	if i >= len(data) {
 524		return 0
 525	}
 526
 527	txt_e := i
 528	i++
 529
 530	// skip any amount of whitespace or newline
 531	// (this is much more lax than original markdown syntax)
 532	for i < len(data) && isspace(data[i]) {
 533		i++
 534	}
 535
 536	// inline style link
 537	switch {
 538	case i < len(data) && data[i] == '(':
 539		// skip initial whitespace
 540		i++
 541
 542		for i < len(data) && isspace(data[i]) {
 543			i++
 544		}
 545
 546		link_b := i
 547
 548		// look for link end: ' " )
 549		for i < len(data) {
 550			if data[i] == '\\' {
 551				i += 2
 552			} else {
 553				if data[i] == ')' || data[i] == '\'' || data[i] == '"' {
 554					break
 555				}
 556				i++
 557			}
 558		}
 559
 560		if i >= len(data) {
 561			return 0
 562		}
 563		link_e := i
 564
 565		// look for title end if present
 566		title_b, title_e := 0, 0
 567		if data[i] == '\'' || data[i] == '"' {
 568			i++
 569			title_b = i
 570
 571			for i < len(data) {
 572				if data[i] == '\\' {
 573					i += 2
 574				} else {
 575					if data[i] == ')' {
 576						break
 577					}
 578					i++
 579				}
 580			}
 581
 582			if i >= len(data) {
 583				return 0
 584			}
 585
 586			// skip whitespace after title
 587			title_e = i - 1
 588			for title_e > title_b && isspace(data[title_e]) {
 589				title_e--
 590			}
 591
 592			// check for closing quote presence
 593			if data[title_e] != '\'' && data[title_e] != '"' {
 594				title_b, title_e = 0, 0
 595				link_e = i
 596			}
 597		}
 598
 599		// remove whitespace at the end of the link
 600		for link_e > link_b && isspace(data[link_e-1]) {
 601			link_e--
 602		}
 603
 604		// remove optional angle brackets around the link
 605		if data[link_b] == '<' {
 606			link_b++
 607		}
 608		if data[link_e-1] == '>' {
 609			link_e--
 610		}
 611
 612		// build escaped link and title
 613		if link_e > link_b {
 614			link = data[link_b:link_e]
 615		}
 616
 617		if title_e > title_b {
 618			title = data[title_b:title_e]
 619		}
 620
 621		i++
 622
 623	// reference style link
 624	case i < len(data) && data[i] == '[':
 625		var id []byte
 626
 627		// look for the id
 628		i++
 629		link_b := i
 630		for i < len(data) && data[i] != ']' {
 631			i++
 632		}
 633		if i >= len(data) {
 634			return 0
 635		}
 636		link_e := i
 637
 638		// find the link_ref
 639		if link_b == link_e {
 640			if text_has_nl {
 641				b := bytes.NewBuffer(nil)
 642
 643				for j := 1; j < txt_e; j++ {
 644					switch {
 645					case data[j] != '\n':
 646						b.WriteByte(data[j])
 647					case data[j-1] != ' ':
 648						b.WriteByte(' ')
 649					}
 650				}
 651
 652				id = b.Bytes()
 653			} else {
 654				id = data[1:txt_e]
 655			}
 656		} else {
 657			id = data[link_b:link_e]
 658		}
 659
 660		// find the link_ref with matching id
 661		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 662			return !byteslice_less(rndr.refs[i].id, id)
 663		})
 664		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 665			return 0
 666		}
 667		lr := rndr.refs[index]
 668
 669		// keep link and title from link_ref
 670		link = lr.link
 671		title = lr.title
 672		i++
 673
 674	// shortcut reference style link
 675	default:
 676		var id []byte
 677
 678		// craft the id
 679		if text_has_nl {
 680			b := bytes.NewBuffer(nil)
 681
 682			for j := 1; j < txt_e; j++ {
 683				switch {
 684				case data[j] != '\n':
 685					b.WriteByte(data[j])
 686				case data[j-1] != ' ':
 687					b.WriteByte(' ')
 688				}
 689			}
 690
 691			id = b.Bytes()
 692		} else {
 693			id = data[1:txt_e]
 694		}
 695
 696		// find the link_ref with matching id
 697		index := sortDotSearch(len(rndr.refs), func(i int) bool {
 698			return !byteslice_less(rndr.refs[i].id, id)
 699		})
 700		if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
 701			return 0
 702		}
 703		lr := rndr.refs[index]
 704
 705		// keep link and title from link_ref
 706		link = lr.link
 707		title = lr.title
 708
 709		// rewind the whitespace
 710		i = txt_e + 1
 711	}
 712
 713	// build content: img alt is escaped, link content is parsed
 714	content := bytes.NewBuffer(nil)
 715	if txt_e > 1 {
 716		if is_img {
 717			content.Write(data[1:txt_e])
 718		} else {
 719			parse_inline(content, rndr, data[1:txt_e])
 720		}
 721	}
 722
 723	var u_link []byte
 724	if len(link) > 0 {
 725		u_link_buf := bytes.NewBuffer(nil)
 726		unescape_text(u_link_buf, link)
 727		u_link = u_link_buf.Bytes()
 728	}
 729
 730	// call the relevant rendering function
 731	ret := 0
 732	if is_img {
 733		ob_size := ob.Len()
 734		ob_bytes := ob.Bytes()
 735		if ob_size > 0 && ob_bytes[ob_size-1] == '!' {
 736			ob.Truncate(ob_size - 1)
 737		}
 738
 739		ret = rndr.mk.image(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 740	} else {
 741		ret = rndr.mk.link(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
 742	}
 743
 744	if ret > 0 {
 745		return i
 746	}
 747	return 0
 748}
 749
 750// '<' when tags or autolinks are allowed
 751func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 752	data = data[offset:]
 753	altype := MKDA_NOT_AUTOLINK
 754	end := tag_length(data, &altype)
 755	ret := 0
 756
 757	if end > 2 {
 758		switch {
 759		case rndr.mk.autolink != nil && altype != MKDA_NOT_AUTOLINK:
 760			u_link := bytes.NewBuffer(nil)
 761			unescape_text(u_link, data[1:end+1-2])
 762			ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
 763		case rndr.mk.raw_html_tag != nil:
 764			ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
 765		}
 766	}
 767
 768	if ret == 0 {
 769		return 0
 770	}
 771	return end
 772}
 773
 774// '\\' backslash escape
 775var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
 776
 777func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 778	data = data[offset:]
 779
 780	if len(data) > 1 {
 781		if bytes.IndexByte(escape_chars, data[1]) < 0 {
 782			return 0
 783		}
 784
 785		if rndr.mk.normal_text != nil {
 786			rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
 787		} else {
 788			ob.WriteByte(data[1])
 789		}
 790	}
 791
 792	return 2
 793}
 794
 795// '&' escaped when it doesn't belong to an entity
 796// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 797func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 798	data = data[offset:]
 799
 800	end := 1
 801
 802	if end < len(data) && data[end] == '#' {
 803		end++
 804	}
 805
 806	for end < len(data) && isalnum(data[end]) {
 807		end++
 808	}
 809
 810	if end < len(data) && data[end] == ';' {
 811		end++ // real entity
 812	} else {
 813		return 0 // lone '&'
 814	}
 815
 816	if rndr.mk.entity != nil {
 817		rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
 818	} else {
 819		ob.Write(data[:end])
 820	}
 821
 822	return end
 823}
 824
 825func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
 826	orig_data := data
 827	data = data[offset:]
 828
 829	if offset > 0 {
 830		if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) {
 831			return 0
 832		}
 833	}
 834
 835	if !is_safe_link(data) {
 836		return 0
 837	}
 838
 839	link_end := 0
 840	for link_end < len(data) && !isspace(data[link_end]) {
 841		link_end++
 842	}
 843
 844	// Skip punctuation at the end of the link
 845	if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' {
 846		link_end--
 847	}
 848
 849	// See if the link finishes with a punctuation sign that can be closed.
 850	var copen byte
 851	switch data[link_end-1] {
 852	case '"':
 853		copen = '"'
 854	case '\'':
 855		copen = '\''
 856	case ')':
 857		copen = '('
 858	case ']':
 859		copen = '['
 860	case '}':
 861		copen = '{'
 862	default:
 863		copen = 0
 864	}
 865
 866	if copen != 0 {
 867		buf_end := offset + link_end - 2
 868
 869		open_delim := 1
 870
 871		/* Try to close the final punctuation sign in this same line;
 872		 * if we managed to close it outside of the URL, that means that it's
 873		 * not part of the URL. If it closes inside the URL, that means it
 874		 * is part of the URL.
 875		 *
 876		 * Examples:
 877		 *
 878		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 879		 *              => http://www.pokemon.com/Pikachu_(Electric)
 880		 *
 881		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 882		 *              => http://www.pokemon.com/Pikachu_(Electric)
 883		 *
 884		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 885		 *              => http://www.pokemon.com/Pikachu_(Electric))
 886		 *
 887		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 888		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 889		 */
 890
 891		for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 {
 892			if orig_data[buf_end] == data[link_end-1] {
 893				open_delim++
 894			}
 895
 896			if orig_data[buf_end] == copen {
 897				open_delim--
 898			}
 899
 900			buf_end--
 901		}
 902
 903		if open_delim == 0 {
 904			link_end--
 905		}
 906	}
 907
 908	if rndr.mk.autolink != nil {
 909		u_link := bytes.NewBuffer(nil)
 910		unescape_text(u_link, data[:link_end])
 911
 912		rndr.mk.autolink(ob, u_link.Bytes(), MKDA_NORMAL, rndr.mk.opaque)
 913	}
 914
 915	return link_end
 916}
 917
 918var valid_uris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 919
 920func is_safe_link(link []byte) bool {
 921	for _, prefix := range valid_uris {
 922		if len(link) > len(prefix) && !byteslice_less(link[:len(prefix)], prefix) && !byteslice_less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) {
 923			return true
 924		}
 925	}
 926
 927	return false
 928}
 929
 930
 931// taken from regexp in the stdlib
 932func ispunct(c byte) bool {
 933	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
 934		if c == r {
 935			return true
 936		}
 937	}
 938	return false
 939}
 940
 941// this is sort.Search, reproduced here because an older
 942// version of the library had a bug
 943func sortDotSearch(n int, f func(int) bool) int {
 944	// Define f(-1) == false and f(n) == true.
 945	// Invariant: f(i-1) == false, f(j) == true.
 946	i, j := 0, n
 947	for i < j {
 948		h := i + (j-i)/2 // avoid overflow when computing h
 949		// i ≤ h < j
 950		if !f(h) {
 951			i = h + 1 // preserves f(i-1) == false
 952		} else {
 953			j = h // preserves f(j) == true
 954		}
 955	}
 956	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
 957	return i
 958}
 959
 960func isspace(c byte) bool {
 961	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
 962}
 963
 964func isalnum(c byte) bool {
 965	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 966}
 967
 968// return the length of the given tag, or 0 is it's not valid
 969func tag_length(data []byte, autolink *int) int {
 970	var i, j int
 971
 972	// a valid tag can't be shorter than 3 chars
 973	if len(data) < 3 {
 974		return 0
 975	}
 976
 977	// begins with a '<' optionally followed by '/', followed by letter or number
 978	if data[0] != '<' {
 979		return 0
 980	}
 981	if data[1] == '/' {
 982		i = 2
 983	} else {
 984		i = 1
 985	}
 986
 987	if !isalnum(data[i]) {
 988		return 0
 989	}
 990
 991	// scheme test
 992	*autolink = MKDA_NOT_AUTOLINK
 993
 994	// try to find the beggining of an URI
 995	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 996		i++
 997	}
 998
 999	if i > 1 && data[i] == '@' {
1000		if j = is_mail_autolink(data[i:]); j != 0 {
1001			*autolink = MKDA_EMAIL
1002			return i + j
1003		}
1004	}
1005
1006	if i > 2 && data[i] == ':' {
1007		*autolink = MKDA_NORMAL
1008		i++
1009	}
1010
1011	// complete autolink test: no whitespace or ' or "
1012	switch {
1013	case i >= len(data):
1014		*autolink = MKDA_NOT_AUTOLINK
1015	case *autolink != 0:
1016		j = i
1017
1018		for i < len(data) {
1019			if data[i] == '\\' {
1020				i += 2
1021			} else {
1022				if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
1023					break
1024				} else {
1025					i++
1026				}
1027			}
1028
1029		}
1030
1031		if i >= len(data) {
1032			return 0
1033		}
1034		if i > j && data[i] == '>' {
1035			return i + 1
1036		}
1037
1038		// one of the forbidden chars has been found
1039		*autolink = MKDA_NOT_AUTOLINK
1040	}
1041
1042	// look for something looking like a tag end
1043	for i < len(data) && data[i] != '>' {
1044		i++
1045	}
1046	if i >= len(data) {
1047		return 0
1048	}
1049	return i + 1
1050}
1051
1052// look for the address part of a mail autolink and '>'
1053// this is less strict than the original markdown e-mail address matching
1054func is_mail_autolink(data []byte) int {
1055	nb := 0
1056
1057	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1058	for i := 0; i < len(data); i++ {
1059		if isalnum(data[i]) {
1060			continue
1061		}
1062
1063		switch data[i] {
1064		case '@':
1065			nb++
1066
1067		case '-', '.', '_':
1068			break
1069
1070		case '>':
1071			if nb == 1 {
1072				return i + 1
1073			} else {
1074				return 0
1075			}
1076		default:
1077			return 0
1078		}
1079	}
1080
1081	return 0
1082}
1083
1084// look for the next emph char, skipping other constructs
1085func find_emph_char(data []byte, c byte) int {
1086	i := 1
1087
1088	for i < len(data) {
1089		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1090			i++
1091		}
1092		if i >= len(data) {
1093			return 0
1094		}
1095		if data[i] == c {
1096			return i
1097		}
1098
1099		// do not count escaped chars
1100		if i != 0 && data[i-1] == '\\' {
1101			i++
1102			continue
1103		}
1104
1105		if data[i] == '`' {
1106			// skip a code span
1107			tmp_i := 0
1108			i++
1109			for i < len(data) && data[i] != '`' {
1110				if tmp_i == 0 && data[i] == c {
1111					tmp_i = i
1112				}
1113				i++
1114			}
1115			if i >= len(data) {
1116				return tmp_i
1117			}
1118			i++
1119		} else {
1120			if data[i] == '[' {
1121				// skip a link
1122				tmp_i := 0
1123				i++
1124				for i < len(data) && data[i] != ']' {
1125					if tmp_i == 0 && data[i] == c {
1126						tmp_i = i
1127					}
1128					i++
1129				}
1130				i++
1131				for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
1132					i++
1133				}
1134				if i >= len(data) {
1135					return tmp_i
1136				}
1137				if data[i] != '[' && data[i] != '(' { // not a link
1138					if tmp_i > 0 {
1139						return tmp_i
1140					} else {
1141						continue
1142					}
1143				}
1144				cc := data[i]
1145				i++
1146				for i < len(data) && data[i] != cc {
1147					if tmp_i == 0 && data[i] == c {
1148						tmp_i = i
1149					}
1150					i++
1151				}
1152				if i >= len(data) {
1153					return tmp_i
1154				}
1155				i++
1156			}
1157		}
1158	}
1159	return 0
1160}
1161
1162func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1163	i := 0
1164
1165	if rndr.mk.emphasis == nil {
1166		return 0
1167	}
1168
1169	// skip one symbol if coming from emph3
1170	if len(data) > 1 && data[0] == c && data[1] == c {
1171		i = 1
1172	}
1173
1174	for i < len(data) {
1175		length := find_emph_char(data[i:], c)
1176		if length == 0 {
1177			return 0
1178		}
1179		i += length
1180		if i >= len(data) {
1181			return 0
1182		}
1183
1184		if i+1 < len(data) && data[i+1] == c {
1185			i++
1186			continue
1187		}
1188
1189		if data[i] == c && !isspace(data[i-1]) {
1190
1191			if rndr.ext_flags&MKDEXT_NO_INTRA_EMPHASIS != 0 {
1192				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1193					continue
1194				}
1195			}
1196
1197			work := bytes.NewBuffer(nil)
1198			parse_inline(work, rndr, data[:i])
1199			r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
1200			if r > 0 {
1201				return i + 1
1202			} else {
1203				return 0
1204			}
1205		}
1206	}
1207
1208	return 0
1209}
1210
1211func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1212	render_method := rndr.mk.double_emphasis
1213	if c == '~' {
1214		render_method = rndr.mk.strikethrough
1215	}
1216
1217	if render_method == nil {
1218		return 0
1219	}
1220
1221	i := 0
1222
1223	for i < len(data) {
1224		length := find_emph_char(data[i:], c)
1225		if length == 0 {
1226			return 0
1227		}
1228		i += length
1229
1230		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1231			work := bytes.NewBuffer(nil)
1232			parse_inline(work, rndr, data[:i])
1233			r := render_method(ob, work.Bytes(), rndr.mk.opaque)
1234			if r > 0 {
1235				return i + 2
1236			} else {
1237				return 0
1238			}
1239		}
1240		i++
1241	}
1242	return 0
1243}
1244
1245func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
1246	i := 0
1247	orig_data := data
1248	data = data[offset:]
1249
1250	for i < len(data) {
1251		length := find_emph_char(data[i:], c)
1252		if length == 0 {
1253			return 0
1254		}
1255		i += length
1256
1257		// skip whitespace preceded symbols
1258		if data[i] != c || isspace(data[i-1]) {
1259			continue
1260		}
1261
1262		switch {
1263		case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
1264			// triple symbol found
1265			work := bytes.NewBuffer(nil)
1266
1267			parse_inline(work, rndr, data[:i])
1268			r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
1269			if r > 0 {
1270				return i + 3
1271			} else {
1272				return 0
1273			}
1274		case (i+1 < len(data) && data[i+1] == c):
1275			// double symbol found, hand over to emph1
1276			length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
1277			if length == 0 {
1278				return 0
1279			} else {
1280				return length - 2
1281			}
1282		default:
1283			// single symbol found, hand over to emph2
1284			length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
1285			if length == 0 {
1286				return 0
1287			} else {
1288				return length - 1
1289			}
1290		}
1291	}
1292	return 0
1293}
1294
1295// parse block-level data
1296func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
1297	if rndr.nesting >= rndr.max_nesting {
1298		return
1299	}
1300	rndr.nesting++
1301
1302	for len(data) > 0 {
1303		if is_atxheader(rndr, data) {
1304			data = data[parse_atxheader(ob, rndr, data):]
1305			continue
1306		}
1307		if data[0] == '<' && rndr.mk.blockhtml != nil {
1308			if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
1309				data = data[i:]
1310				continue
1311			}
1312		}
1313		if i := is_empty(data); i > 0 {
1314			data = data[i:]
1315			continue
1316		}
1317		if is_hrule(data) {
1318			if rndr.mk.hrule != nil {
1319				rndr.mk.hrule(ob, rndr.mk.opaque)
1320			}
1321			var i int
1322			for i = 0; i < len(data) && data[i] != '\n'; i++ {
1323			}
1324			data = data[i:]
1325			continue
1326		}
1327		if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
1328			if i := parse_fencedcode(ob, rndr, data); i > 0 {
1329				data = data[i:]
1330				continue
1331			}
1332		}
1333		if rndr.ext_flags&MKDEXT_TABLES != 0 {
1334			if i := parse_table(ob, rndr, data); i > 0 {
1335				data = data[i:]
1336				continue
1337			}
1338		}
1339		if prefix_quote(data) > 0 {
1340			data = data[parse_blockquote(ob, rndr, data):]
1341			continue
1342		}
1343		if prefix_code(data) > 0 {
1344			data = data[parse_blockcode(ob, rndr, data):]
1345			continue
1346		}
1347		if prefix_uli(data) > 0 {
1348			data = data[parse_list(ob, rndr, data, 0):]
1349			continue
1350		}
1351		if prefix_oli(data) > 0 {
1352			data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
1353			continue
1354		}
1355
1356		data = data[parse_paragraph(ob, rndr, data):]
1357	}
1358
1359	rndr.nesting--
1360}
1361
1362func is_atxheader(rndr *render, data []byte) bool {
1363	if data[0] != '#' {
1364		return false
1365	}
1366
1367	if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
1368		level := 0
1369		for level < len(data) && level < 6 && data[level] == '#' {
1370			level++
1371		}
1372		if level < len(data) && data[level] != ' ' && data[level] != '\t' {
1373			return false
1374		}
1375	}
1376	return true
1377}
1378
1379func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
1380	level := 0
1381	for level < len(data) && level < 6 && data[level] == '#' {
1382		level++
1383	}
1384	i, end := 0, 0
1385	for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
1386	}
1387	for end = i; end < len(data) && data[end] != '\n'; end++ {
1388	}
1389	skip := end
1390	for end > 0 && data[end-1] == '#' {
1391		end--
1392	}
1393	for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
1394		end--
1395	}
1396	if end > i {
1397		work := bytes.NewBuffer(nil)
1398		parse_inline(work, rndr, data[i:end])
1399		if rndr.mk.header != nil {
1400			rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1401		}
1402	}
1403	return skip
1404}
1405
1406func is_headerline(data []byte) int {
1407	i := 0
1408
1409	// test of level 1 header
1410	if data[i] == '=' {
1411		for i = 1; i < len(data) && data[i] == '='; i++ {
1412		}
1413		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1414			i++
1415		}
1416		if i >= len(data) || data[i] == '\n' {
1417			return 1
1418		} else {
1419			return 0
1420		}
1421	}
1422
1423	// test of level 2 header
1424	if data[i] == '-' {
1425		for i = 1; i < len(data) && data[i] == '-'; i++ {
1426		}
1427		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1428			i++
1429		}
1430		if i >= len(data) || data[i] == '\n' {
1431			return 2
1432		} else {
1433			return 0
1434		}
1435	}
1436
1437	return 0
1438}
1439
1440func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1441	var i, j int
1442
1443	// identify the opening tag
1444	if len(data) < 2 || data[0] != '<' {
1445		return 0
1446	}
1447	curtag, tagfound := find_block_tag(data[1:])
1448
1449	// handle special cases
1450	if !tagfound {
1451
1452		// HTML comment, laxist form
1453		if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1454			i = 5
1455
1456			for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1457				i++
1458			}
1459			i++
1460
1461			if i < len(data) {
1462				j = is_empty(data[i:])
1463			}
1464
1465			if j > 0 {
1466				size := i + j
1467				if do_render && rndr.mk.blockhtml != nil {
1468					rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1469				}
1470				return size
1471			}
1472		}
1473
1474		// HR, which is the only self-closing block tag considered
1475		if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1476			i = 3
1477			for i < len(data) && data[i] != '>' {
1478				i++
1479			}
1480
1481			if i+1 < len(data) {
1482				i++
1483				j = is_empty(data[i:])
1484				if j > 0 {
1485					size := i + j
1486					if do_render && rndr.mk.blockhtml != nil {
1487						rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1488					}
1489					return size
1490				}
1491			}
1492		}
1493
1494		// no special case recognized
1495		return 0
1496	}
1497
1498	// look for an unindented matching closing tag
1499	//      followed by a blank line
1500	i = 1
1501	found := false
1502
1503	// if not found, try a second pass looking for indented match
1504	// but not if tag is "ins" or "del" (following original Markdown.pl)
1505	if curtag != "ins" && curtag != "del" {
1506		i = 1
1507		for i < len(data) {
1508			i++
1509			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1510				i++
1511			}
1512
1513			if i+2+len(curtag) >= len(data) {
1514				break
1515			}
1516
1517			j = htmlblock_end(curtag, rndr, data[i-1:])
1518
1519			if j > 0 {
1520				i += j - 1
1521				found = true
1522				break
1523			}
1524		}
1525	}
1526
1527	if !found {
1528		return 0
1529	}
1530
1531	// the end of the block has been found
1532	if do_render && rndr.mk.blockhtml != nil {
1533		rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1534	}
1535
1536	return i
1537}
1538
1539func find_block_tag(data []byte) (string, bool) {
1540	i := 0
1541	for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1542		i++
1543	}
1544	if i >= len(data) {
1545		return "", false
1546	}
1547	key := string(data[:i])
1548	if block_tags[key] {
1549		return key, true
1550	}
1551	return "", false
1552}
1553
1554func htmlblock_end(tag string, rndr *render, data []byte) int {
1555	// assume data[0] == '<' && data[1] == '/' already tested
1556
1557	// check if tag is a match
1558	if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1559		return 0
1560	}
1561
1562	// check white lines
1563	i := len(tag) + 3
1564	w := 0
1565	if i < len(data) {
1566		if w = is_empty(data[i:]); w == 0 {
1567			return 0 // non-blank after tag
1568		}
1569	}
1570	i += w
1571	w = 0
1572
1573	if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1574		if i < len(data) {
1575			w = is_empty(data[i:])
1576		}
1577	} else {
1578		if i < len(data) {
1579			if w = is_empty(data[i:]); w == 0 {
1580				return 0 // non-blank line after tag line
1581			}
1582		}
1583	}
1584
1585	return i + w
1586}
1587
1588func is_empty(data []byte) int {
1589	var i int
1590	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1591		if data[i] != ' ' && data[i] != '\t' {
1592			return 0
1593		}
1594	}
1595	return i + 1
1596}
1597
1598func is_hrule(data []byte) bool {
1599	// skip initial spaces
1600	if len(data) < 3 {
1601		return false
1602	}
1603	i := 0
1604	if data[0] == ' ' {
1605		i++
1606		if data[1] == ' ' {
1607			i++
1608			if data[2] == ' ' {
1609				i++
1610			}
1611		}
1612	}
1613
1614	// look at the hrule char
1615	if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1616		return false
1617	}
1618	c := data[i]
1619
1620	// the whole line must be the char or whitespace
1621	n := 0
1622	for i < len(data) && data[i] != '\n' {
1623		switch {
1624		case data[i] == c:
1625			n++
1626		case data[i] != ' ' && data[i] != '\t':
1627			return false
1628		}
1629		i++
1630	}
1631
1632	return n >= 3
1633}
1634
1635func is_codefence(data []byte, syntax **string) int {
1636	i, n := 0, 0
1637
1638	// skip initial spaces
1639	if len(data) < 3 {
1640		return 0
1641	}
1642	if data[0] == ' ' {
1643		i++
1644		if data[1] == ' ' {
1645			i++
1646			if data[2] == ' ' {
1647				i++
1648			}
1649		}
1650	}
1651
1652	// look at the hrule char
1653	if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1654		return 0
1655	}
1656
1657	c := data[i]
1658
1659	// the whole line must be the char or whitespace
1660	for i < len(data) && data[i] == c {
1661		n++
1662		i++
1663	}
1664
1665	if n < 3 {
1666		return 0
1667	}
1668
1669	if syntax != nil {
1670		syn := 0
1671
1672		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1673			i++
1674		}
1675
1676		syntax_start := i
1677
1678		if i < len(data) && data[i] == '{' {
1679			i++
1680			syntax_start++
1681
1682			for i < len(data) && data[i] != '}' && data[i] != '\n' {
1683				syn++
1684				i++
1685			}
1686
1687			if i == len(data) || data[i] != '}' {
1688				return 0
1689			}
1690
1691			// string all whitespace at the beginning and the end
1692			// of the {} block
1693			for syn > 0 && isspace(data[syntax_start]) {
1694				syntax_start++
1695				syn--
1696			}
1697
1698			for syn > 0 && isspace(data[syntax_start+syn-1]) {
1699				syn--
1700			}
1701
1702			i++
1703		} else {
1704			for i < len(data) && !isspace(data[i]) {
1705				syn++
1706				i++
1707			}
1708		}
1709
1710		language := string(data[syntax_start : syntax_start+syn])
1711		*syntax = &language
1712	}
1713
1714	for i < len(data) && data[i] != '\n' {
1715		if !isspace(data[i]) {
1716			return 0
1717		}
1718		i++
1719	}
1720
1721	return i + 1
1722}
1723
1724func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1725	var lang *string
1726	beg := is_codefence(data, &lang)
1727	if beg == 0 {
1728		return 0
1729	}
1730
1731	work := bytes.NewBuffer(nil)
1732
1733	for beg < len(data) {
1734		fence_end := is_codefence(data[beg:], nil)
1735		if fence_end != 0 {
1736			beg += fence_end
1737			break
1738		}
1739
1740		var end int
1741		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1742		}
1743
1744		if beg < end {
1745			// verbatim copy to the working buffer, escaping entities
1746			if is_empty(data[beg:]) > 0 {
1747				work.WriteByte('\n')
1748			} else {
1749				work.Write(data[beg:end])
1750			}
1751		}
1752		beg = end
1753	}
1754
1755	if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1756		work.WriteByte('\n')
1757	}
1758
1759	if rndr.mk.blockcode != nil {
1760		syntax := ""
1761		if lang != nil {
1762			syntax = *lang
1763		}
1764
1765		rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1766	}
1767
1768	return beg
1769}
1770
1771func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1772	header_work := bytes.NewBuffer(nil)
1773	i, columns, col_data := parse_table_header(header_work, rndr, data)
1774	if i > 0 {
1775		body_work := bytes.NewBuffer(nil)
1776
1777		for i < len(data) {
1778			pipes, row_start := 0, i
1779			for ; i < len(data) && data[i] != '\n'; i++ {
1780				if data[i] == '|' {
1781					pipes++
1782				}
1783			}
1784
1785			if pipes == 0 || i == len(data) {
1786				i = row_start
1787				break
1788			}
1789
1790			parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1791			i++
1792		}
1793
1794		if rndr.mk.table != nil {
1795			rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1796		}
1797	}
1798
1799	return i
1800}
1801
1802func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1803	i, pipes := 0, 0
1804	column_data = []int{}
1805	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1806		if data[i] == '|' {
1807			pipes++
1808		}
1809	}
1810
1811	if i == len(data) || pipes == 0 {
1812		return 0, 0, column_data
1813	}
1814
1815	header_end := i
1816
1817	if data[0] == '|' {
1818		pipes--
1819	}
1820
1821	if i > 2 && data[i-1] == '|' {
1822		pipes--
1823	}
1824
1825	columns = pipes + 1
1826	column_data = make([]int, columns)
1827
1828	// parse the header underline
1829	i++
1830	if i < len(data) && data[i] == '|' {
1831		i++
1832	}
1833
1834	under_end := i
1835	for under_end < len(data) && data[under_end] != '\n' {
1836		under_end++
1837	}
1838
1839	col := 0
1840	for ; col < columns && i < under_end; col++ {
1841		dashes := 0
1842
1843		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1844			i++
1845		}
1846
1847		if data[i] == ':' {
1848			i++
1849			column_data[col] |= MKD_TABLE_ALIGN_L
1850			dashes++
1851		}
1852
1853		for i < under_end && data[i] == '-' {
1854			i++
1855			dashes++
1856		}
1857
1858		if i < under_end && data[i] == ':' {
1859			i++
1860			column_data[col] |= MKD_TABLE_ALIGN_R
1861			dashes++
1862		}
1863
1864		for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1865			i++
1866		}
1867
1868		if i < under_end && data[i] != '|' {
1869			break
1870		}
1871
1872		if dashes < 3 {
1873			break
1874		}
1875
1876		i++
1877	}
1878
1879	if col < columns {
1880		return 0, 0, column_data
1881	}
1882
1883	parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1884	size = under_end + 1
1885	return
1886}
1887
1888func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1889	i, col := 0, 0
1890	row_work := bytes.NewBuffer(nil)
1891
1892	if i < len(data) && data[i] == '|' {
1893		i++
1894	}
1895
1896	for col = 0; col < columns && i < len(data); col++ {
1897		for i < len(data) && isspace(data[i]) {
1898			i++
1899		}
1900
1901		cell_start := i
1902
1903		for i < len(data) && data[i] != '|' {
1904			i++
1905		}
1906
1907		cell_end := i - 1
1908
1909		for cell_end > cell_start && isspace(data[cell_end]) {
1910			cell_end--
1911		}
1912
1913		cell_work := bytes.NewBuffer(nil)
1914		parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1915
1916		if rndr.mk.table_cell != nil {
1917			cdata := 0
1918			if col < len(col_data) {
1919				cdata = col_data[col]
1920			}
1921			rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1922		}
1923
1924		i++
1925	}
1926
1927	for ; col < columns; col++ {
1928		empty_cell := []byte{}
1929		if rndr.mk.table_cell != nil {
1930			cdata := 0
1931			if col < len(col_data) {
1932				cdata = col_data[col]
1933			}
1934			rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1935		}
1936	}
1937
1938	if rndr.mk.table_row != nil {
1939		rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1940	}
1941}
1942
1943// returns blockquote prefix length
1944func prefix_quote(data []byte) int {
1945	i := 0
1946	for i < len(data) && i < 3 && data[i] == ' ' {
1947		i++
1948	}
1949	if i < len(data) && data[i] == '>' {
1950		if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1951			return i + 2
1952		}
1953		return i + 1
1954	}
1955	return 0
1956}
1957
1958// parse a blockquote fragment
1959func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1960	out := bytes.NewBuffer(nil)
1961	work := bytes.NewBuffer(nil)
1962	beg, end := 0, 0
1963	for beg < len(data) {
1964		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1965		}
1966
1967		if pre := prefix_quote(data[beg:]); pre > 0 {
1968			beg += pre // skip prefix
1969		} else {
1970			// empty line followed by non-quote line
1971			if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1972				break
1973			}
1974		}
1975
1976		if beg < end { // copy into the in-place working buffer
1977			work.Write(data[beg:end])
1978		}
1979		beg = end
1980	}
1981
1982	parse_block(out, rndr, work.Bytes())
1983	if rndr.mk.blockquote != nil {
1984		rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1985	}
1986	return end
1987}
1988
1989// returns prefix length for block code
1990func prefix_code(data []byte) int {
1991	if len(data) > 0 && data[0] == '\t' {
1992		return 1
1993	}
1994	if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1995		return 4
1996	}
1997	return 0
1998}
1999
2000func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
2001	work := bytes.NewBuffer(nil)
2002
2003	beg, end := 0, 0
2004	for beg < len(data) {
2005		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
2006		}
2007
2008		if pre := prefix_code(data[beg:end]); pre > 0 {
2009			beg += pre
2010		} else {
2011			if is_empty(data[beg:end]) == 0 {
2012				// non-empty non-prefixed line breaks the pre
2013				break
2014			}
2015		}
2016
2017		if beg < end {
2018			// verbatim copy to the working buffer, escaping entities
2019			if is_empty(data[beg:end]) > 0 {
2020				work.WriteByte('\n')
2021			} else {
2022				work.Write(data[beg:end])
2023			}
2024		}
2025		beg = end
2026	}
2027
2028	// trim all the \n off the end of work
2029	workbytes := work.Bytes()
2030	n := 0
2031	for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
2032		n++
2033	}
2034	if n > 0 {
2035		work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
2036	}
2037
2038	work.WriteByte('\n')
2039
2040	if rndr.mk.blockcode != nil {
2041		rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
2042	}
2043
2044	return beg
2045}
2046
2047// returns unordered list item prefix
2048func prefix_uli(data []byte) int {
2049	i := 0
2050	for i < len(data) && i < 3 && data[i] == ' ' {
2051		i++
2052	}
2053	if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
2054		return 0
2055	}
2056	return i + 2
2057}
2058
2059// returns ordered list item prefix
2060func prefix_oli(data []byte) int {
2061	i := 0
2062	for i < len(data) && i < 3 && data[i] == ' ' {
2063		i++
2064	}
2065	if i >= len(data) || data[i] < '0' || data[i] > '9' {
2066		return 0
2067	}
2068	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
2069		i++
2070	}
2071	if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
2072		return 0
2073	}
2074	return i + 2
2075}
2076
2077// parse ordered or unordered list block
2078func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
2079	work := bytes.NewBuffer(nil)
2080
2081	i, j := 0, 0
2082	for i < len(data) {
2083		j = parse_listitem(work, rndr, data[i:], &flags)
2084		i += j
2085
2086		if j == 0 || flags&MKD_LI_END != 0 {
2087			break
2088		}
2089	}
2090
2091	if rndr.mk.list != nil {
2092		rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
2093	}
2094	return i
2095}
2096
2097// parse a single list item
2098// assumes initial prefix is already removed
2099func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
2100	// keep track of the first indentation prefix
2101	beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
2102
2103	for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
2104		orgpre++
2105	}
2106
2107	beg = prefix_uli(data)
2108	if beg == 0 {
2109		beg = prefix_oli(data)
2110	}
2111	if beg == 0 {
2112		return 0
2113	}
2114
2115	// skip leading whitespace on first line
2116	for beg < len(data) && data[beg] == ' ' {
2117		beg++
2118	}
2119
2120	// skip to the beginning of the following line
2121	end = beg
2122	for end < len(data) && data[end-1] != '\n' {
2123		end++
2124	}
2125
2126	// get working buffers
2127	work := bytes.NewBuffer(nil)
2128	inter := bytes.NewBuffer(nil)
2129
2130	// put the first line into the working buffer
2131	work.Write(data[beg:end])
2132	beg = end
2133
2134	// process the following lines
2135	in_empty, has_inside_empty := false, false
2136	for beg < len(data) {
2137		end++
2138
2139		for end < len(data) && data[end-1] != '\n' {
2140			end++
2141		}
2142
2143		// process an empty line
2144		if is_empty(data[beg:end]) > 0 {
2145			in_empty = true
2146			beg = end
2147			continue
2148		}
2149
2150		// calculate the indentation
2151		i = 0
2152		for i < 4 && beg+i < end && data[beg+i] == ' ' {
2153			i++
2154		}
2155
2156		pre = i
2157		if data[beg] == '\t' {
2158			i = 1
2159			pre = 8
2160		}
2161
2162		// check for a new item
2163		chunk := data[beg+i : end]
2164		if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
2165			if in_empty {
2166				has_inside_empty = true
2167			}
2168
2169			if pre == orgpre { // the following item must have the same indentation
2170				break
2171			}
2172
2173			if sublist == 0 {
2174				sublist = work.Len()
2175			}
2176		} else {
2177			// only join indented stuff after empty lines
2178			if in_empty && i < 4 && data[beg] != '\t' {
2179				*flags |= MKD_LI_END
2180				break
2181			} else {
2182				if in_empty {
2183					work.WriteByte('\n')
2184					has_inside_empty = true
2185				}
2186			}
2187		}
2188
2189		in_empty = false
2190
2191		// add the line into the working buffer without prefix
2192		work.Write(data[beg+i : end])
2193		beg = end
2194	}
2195
2196	// render li contents
2197	if has_inside_empty {
2198		*flags |= MKD_LI_BLOCK
2199	}
2200
2201	workbytes := work.Bytes()
2202	if *flags&MKD_LI_BLOCK != 0 {
2203		// intermediate render of block li
2204		if sublist > 0 && sublist < len(workbytes) {
2205			parse_block(inter, rndr, workbytes[:sublist])
2206			parse_block(inter, rndr, workbytes[sublist:])
2207		} else {
2208			parse_block(inter, rndr, workbytes)
2209		}
2210	} else {
2211		// intermediate render of inline li
2212		if sublist > 0 && sublist < len(workbytes) {
2213			parse_inline(inter, rndr, workbytes[:sublist])
2214			parse_block(inter, rndr, workbytes[sublist:])
2215		} else {
2216			parse_inline(inter, rndr, workbytes)
2217		}
2218	}
2219
2220	// render li itself
2221	if rndr.mk.listitem != nil {
2222		rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
2223	}
2224
2225	return beg
2226}
2227
2228func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
2229	i, end, level := 0, 0, 0
2230
2231	for i < len(data) {
2232		for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
2233		}
2234
2235		if is_empty(data[i:]) > 0 {
2236			break
2237		}
2238		if level = is_headerline(data[i:]); level > 0 {
2239			break
2240		}
2241
2242		if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
2243			if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
2244				end = i
2245				break
2246			}
2247		}
2248
2249		if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
2250			end = i
2251			break
2252		}
2253
2254		i = end
2255	}
2256
2257	work := data
2258	size := i
2259	for size > 0 && work[size-1] == '\n' {
2260		size--
2261	}
2262
2263	if level == 0 {
2264		tmp := bytes.NewBuffer(nil)
2265		parse_inline(tmp, rndr, work[:size])
2266		if rndr.mk.paragraph != nil {
2267			rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2268		}
2269	} else {
2270		if size > 0 {
2271			beg := 0
2272			i = size
2273			size--
2274
2275			for size > 0 && work[size] != '\n' {
2276				size--
2277			}
2278
2279			beg = size + 1
2280			for size > 0 && work[size-1] == '\n' {
2281				size--
2282			}
2283
2284			if size > 0 {
2285				tmp := bytes.NewBuffer(nil)
2286				parse_inline(tmp, rndr, work[:size])
2287				if rndr.mk.paragraph != nil {
2288					rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2289				}
2290
2291				work = work[beg:]
2292				size = i - beg
2293			} else {
2294				size = i
2295			}
2296		}
2297
2298		header_work := bytes.NewBuffer(nil)
2299		parse_inline(header_work, rndr, work[:size])
2300
2301		if rndr.mk.header != nil {
2302			rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
2303		}
2304	}
2305
2306	return end
2307}
2308
2309
2310//
2311//
2312// HTML rendering
2313//
2314//
2315
2316const (
2317	HTML_SKIP_HTML = 1 << iota
2318	HTML_SKIP_STYLE
2319	HTML_SKIP_IMAGES
2320	HTML_SKIP_LINKS
2321	HTML_EXPAND_TABS
2322	HTML_SAFELINK
2323	HTML_TOC
2324	HTML_HARD_WRAP
2325	HTML_GITHUB_BLOCKCODE
2326	HTML_USE_XHTML
2327)
2328
2329type html_renderopts struct {
2330	toc_data struct {
2331		header_count  int
2332		current_level int
2333	}
2334	flags     uint32
2335	close_tag string
2336}
2337
2338func attr_escape(ob *bytes.Buffer, src []byte) {
2339	for i := 0; i < len(src); i++ {
2340		// directly copy unescaped characters
2341		org := i
2342		for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
2343			i++
2344		}
2345		if i > org {
2346			ob.Write(src[org:i])
2347		}
2348
2349		// escape a character
2350		if i >= len(src) {
2351			break
2352		}
2353		switch src[i] {
2354		case '<':
2355			ob.WriteString("&lt;")
2356		case '>':
2357			ob.WriteString("&gt;")
2358		case '&':
2359			ob.WriteString("&amp;")
2360		case '"':
2361			ob.WriteString("&quot;")
2362		}
2363	}
2364}
2365
2366func unescape_text(ob *bytes.Buffer, src []byte) {
2367	i := 0
2368	for i < len(src) {
2369		org := i
2370		for i < len(src) && src[i] != '\\' {
2371			i++
2372		}
2373
2374		if i > org {
2375			ob.Write(src[org:i])
2376		}
2377
2378		if i+1 >= len(src) {
2379			break
2380		}
2381
2382		ob.WriteByte(src[i+1])
2383		i += 2
2384	}
2385}
2386
2387func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
2388	options := opaque.(*html_renderopts)
2389
2390	if ob.Len() > 0 {
2391		ob.WriteByte('\n')
2392	}
2393
2394	if options.flags&HTML_TOC != 0 {
2395		ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
2396		options.toc_data.header_count++
2397	} else {
2398		ob.WriteString(fmt.Sprintf("<h%d>", level))
2399	}
2400
2401	ob.Write(text)
2402	ob.WriteString(fmt.Sprintf("</h%d>\n", level))
2403}
2404
2405func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
2406	sz := len(text)
2407	for sz > 0 && text[sz-1] == '\n' {
2408		sz--
2409	}
2410	org := 0
2411	for org < sz && text[org] == '\n' {
2412		org++
2413	}
2414	if org >= sz {
2415		return
2416	}
2417	if ob.Len() > 0 {
2418		ob.WriteByte('\n')
2419	}
2420	ob.Write(text[org:sz])
2421	ob.WriteByte('\n')
2422}
2423
2424func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
2425	options := opaque.(*html_renderopts)
2426
2427	if ob.Len() > 0 {
2428		ob.WriteByte('\n')
2429	}
2430	ob.WriteString("<hr")
2431	ob.WriteString(options.close_tag)
2432}
2433
2434func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
2435	if ob.Len() > 0 {
2436		ob.WriteByte('\n')
2437	}
2438
2439	if lang != "" {
2440		ob.WriteString("<pre><code class=\"")
2441
2442		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
2443			for i < len(lang) && isspace(lang[i]) {
2444				i++
2445			}
2446
2447			if i < len(lang) {
2448				org := i
2449				for i < len(lang) && !isspace(lang[i]) {
2450					i++
2451				}
2452
2453				if lang[org] == '.' {
2454					org++
2455				}
2456
2457				if cls > 0 {
2458					ob.WriteByte(' ')
2459				}
2460				attr_escape(ob, []byte(lang[org:]))
2461			}
2462		}
2463
2464		ob.WriteString("\">")
2465	} else {
2466		ob.WriteString("<pre><code>")
2467	}
2468
2469	if len(text) > 0 {
2470		attr_escape(ob, text)
2471	}
2472
2473	ob.WriteString("</code></pre>\n")
2474}
2475
2476func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
2477	ob.WriteString("<blockquote>\n")
2478	ob.Write(text)
2479	ob.WriteString("</blockquote>")
2480}
2481
2482func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
2483	if ob.Len() > 0 {
2484		ob.WriteByte('\n')
2485	}
2486	ob.WriteString("<table><thead>\n")
2487	ob.Write(header)
2488	ob.WriteString("\n</thead><tbody>\n")
2489	ob.Write(body)
2490	ob.WriteString("\n</tbody></table>")
2491}
2492
2493func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
2494	if ob.Len() > 0 {
2495		ob.WriteByte('\n')
2496	}
2497	ob.WriteString("<tr>\n")
2498	ob.Write(text)
2499	ob.WriteString("\n</tr>")
2500}
2501
2502func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
2503	if ob.Len() > 0 {
2504		ob.WriteByte('\n')
2505	}
2506	switch align {
2507	case MKD_TABLE_ALIGN_L:
2508		ob.WriteString("<td align=\"left\">")
2509	case MKD_TABLE_ALIGN_R:
2510		ob.WriteString("<td align=\"right\">")
2511	case MKD_TABLE_ALIGN_CENTER:
2512		ob.WriteString("<td align=\"center\">")
2513	default:
2514		ob.WriteString("<td>")
2515	}
2516
2517	ob.Write(text)
2518	ob.WriteString("</td>")
2519}
2520
2521func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2522	if ob.Len() > 0 {
2523		ob.WriteByte('\n')
2524	}
2525	if flags&MKD_LIST_ORDERED != 0 {
2526		ob.WriteString("<ol>\n")
2527	} else {
2528		ob.WriteString("<ul>\n")
2529	}
2530	ob.Write(text)
2531	if flags&MKD_LIST_ORDERED != 0 {
2532		ob.WriteString("</ol>\n")
2533	} else {
2534		ob.WriteString("</ul>\n")
2535	}
2536}
2537
2538func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2539	ob.WriteString("<li>")
2540	size := len(text)
2541	for size > 0 && text[size-1] == '\n' {
2542		size--
2543	}
2544	ob.Write(text[:size])
2545	ob.WriteString("</li>\n")
2546}
2547
2548func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
2549	options := opaque.(*html_renderopts)
2550	i := 0
2551
2552	if ob.Len() > 0 {
2553		ob.WriteByte('\n')
2554	}
2555
2556	if len(text) == 0 {
2557		return
2558	}
2559
2560	for i < len(text) && isspace(text[i]) {
2561		i++
2562	}
2563
2564	if i == len(text) {
2565		return
2566	}
2567
2568	ob.WriteString("<p>")
2569	if options.flags&HTML_HARD_WRAP != 0 {
2570		for i < len(text) {
2571			org := i
2572			for i < len(text) && text[i] != '\n' {
2573				i++
2574			}
2575
2576			if i > org {
2577				ob.Write(text[org:i])
2578			}
2579
2580			if i >= len(text) {
2581				break
2582			}
2583
2584			ob.WriteString("<br>")
2585			ob.WriteString(options.close_tag)
2586			i++
2587		}
2588	} else {
2589		ob.Write(text[i:])
2590	}
2591	ob.WriteString("</p>\n")
2592}
2593
2594func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
2595	options := opaque.(*html_renderopts)
2596
2597	if len(link) == 0 {
2598		return 0
2599	}
2600	if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != MKDA_EMAIL {
2601		return 0
2602	}
2603
2604	ob.WriteString("<a href=\"")
2605	if kind == MKDA_EMAIL {
2606		ob.WriteString("mailto:")
2607	}
2608	ob.Write(link)
2609	ob.WriteString("\">")
2610
2611	/*
2612	 * Pretty print: if we get an email address as
2613	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
2614	 * want to print the `mailto:` prefix
2615	 */
2616	if bytes.HasPrefix(link, []byte("mailto:")) {
2617		attr_escape(ob, link[7:])
2618	} else {
2619		attr_escape(ob, link)
2620	}
2621
2622	ob.WriteString("</a>")
2623
2624	return 1
2625}
2626
2627func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2628	ob.WriteString("<code>")
2629	attr_escape(ob, text)
2630	ob.WriteString("</code>")
2631	return 1
2632}
2633
2634func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2635	if len(text) == 0 {
2636		return 0
2637	}
2638	ob.WriteString("<strong>")
2639	ob.Write(text)
2640	ob.WriteString("</strong>")
2641	return 1
2642}
2643
2644func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2645	if len(text) == 0 {
2646		return 0
2647	}
2648	ob.WriteString("<em>")
2649	ob.Write(text)
2650	ob.WriteString("</em>")
2651	return 1
2652}
2653
2654func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
2655	options := opaque.(*html_renderopts)
2656	if len(link) == 0 {
2657		return 0
2658	}
2659	ob.WriteString("<img src=\"")
2660	attr_escape(ob, link)
2661	ob.WriteString("\" alt=\"")
2662	if len(alt) > 0 {
2663		attr_escape(ob, alt)
2664	}
2665	if len(title) > 0 {
2666		ob.WriteString("\" title=\"")
2667		attr_escape(ob, title)
2668	}
2669
2670	ob.WriteByte('"')
2671	ob.WriteString(options.close_tag)
2672	return 1
2673}
2674
2675func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
2676	options := opaque.(*html_renderopts)
2677	ob.WriteString("<br")
2678	ob.WriteString(options.close_tag)
2679	return 1
2680}
2681
2682func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
2683	options := opaque.(*html_renderopts)
2684
2685	if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
2686		return 0
2687	}
2688
2689	ob.WriteString("<a href=\"")
2690	if len(link) > 0 {
2691		ob.Write(link)
2692	}
2693	if len(title) > 0 {
2694		ob.WriteString("\" title=\"")
2695		attr_escape(ob, title)
2696	}
2697	ob.WriteString("\">")
2698	if len(content) > 0 {
2699		ob.Write(content)
2700	}
2701	ob.WriteString("</a>")
2702	return 1
2703}
2704
2705func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2706	options := opaque.(*html_renderopts)
2707	if options.flags&HTML_SKIP_HTML != 0 {
2708		return 1
2709	}
2710	if options.flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
2711		return 1
2712	}
2713	if options.flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
2714		return 1
2715	}
2716	if options.flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
2717		return 1
2718	}
2719	ob.Write(text)
2720	return 1
2721}
2722
2723func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2724	if len(text) == 0 {
2725		return 0
2726	}
2727	ob.WriteString("<strong><em>")
2728	ob.Write(text)
2729	ob.WriteString("</em></strong>")
2730	return 1
2731}
2732
2733func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2734	if len(text) == 0 {
2735		return 0
2736	}
2737	ob.WriteString("<del>")
2738	ob.Write(text)
2739	ob.WriteString("</del>")
2740	return 1
2741}
2742
2743func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
2744	attr_escape(ob, text)
2745}
2746
2747func is_html_tag(tag []byte, tagname string) bool {
2748	i := 0
2749	if i < len(tag) && tag[0] != '<' {
2750		return false
2751	}
2752	i++
2753	for i < len(tag) && isspace(tag[i]) {
2754		i++
2755	}
2756
2757	if i < len(tag) && tag[i] == '/' {
2758		i++
2759	}
2760
2761	for i < len(tag) && isspace(tag[i]) {
2762		i++
2763	}
2764
2765	tag_i := i
2766	for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
2767		if tag_i >= len(tagname) {
2768			break
2769		}
2770
2771		if tag[i] != tagname[tag_i] {
2772			return false
2773		}
2774	}
2775
2776	if i == len(tag) {
2777		return false
2778	}
2779
2780	return isspace(tag[i]) || tag[i] == '>'
2781}
2782
2783
2784//
2785//
2786// Public interface
2787//
2788//
2789
2790func expand_tabs(ob *bytes.Buffer, line []byte) {
2791	i, tab := 0, 0
2792
2793	for i < len(line) {
2794		org := i
2795		for i < len(line) && line[i] != '\t' {
2796			i++
2797			tab++
2798		}
2799
2800		if i > org {
2801			ob.Write(line[org:i])
2802		}
2803
2804		if i >= len(line) {
2805			break
2806		}
2807
2808		for {
2809			ob.WriteByte(' ')
2810			tab++
2811			if tab%4 == 0 {
2812				break
2813			}
2814		}
2815
2816		i++
2817	}
2818}
2819
2820func Markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
2821	// no point in parsing if we can't render
2822	if rndrer == nil {
2823		return
2824	}
2825
2826	// fill in the character-level parsers
2827	markdown_char_ptrs[MD_CHAR_NONE] = nil
2828	markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
2829	markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
2830	markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
2831	markdown_char_ptrs[MD_CHAR_LINK] = char_link
2832	markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
2833	markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
2834	markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
2835	markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
2836
2837	// fill in the render structure
2838	rndr := new(render)
2839	rndr.mk = rndrer
2840	rndr.ext_flags = extensions
2841	rndr.max_nesting = 16
2842
2843	if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
2844		rndr.active_char['*'] = MD_CHAR_EMPHASIS
2845		rndr.active_char['_'] = MD_CHAR_EMPHASIS
2846		if extensions&MKDEXT_STRIKETHROUGH != 0 {
2847			rndr.active_char['~'] = MD_CHAR_EMPHASIS
2848		}
2849	}
2850	if rndr.mk.codespan != nil {
2851		rndr.active_char['`'] = MD_CHAR_CODESPAN
2852	}
2853	if rndr.mk.linebreak != nil {
2854		rndr.active_char['\n'] = MD_CHAR_LINEBREAK
2855	}
2856	if rndr.mk.image != nil || rndr.mk.link != nil {
2857		rndr.active_char['['] = MD_CHAR_LINK
2858	}
2859	rndr.active_char['<'] = MD_CHAR_LANGLE
2860	rndr.active_char['\\'] = MD_CHAR_ESCAPE
2861	rndr.active_char['&'] = MD_CHAR_ENTITITY
2862
2863	if extensions&MKDEXT_AUTOLINK != 0 {
2864		rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
2865		rndr.active_char['H'] = MD_CHAR_AUTOLINK
2866
2867		rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
2868		rndr.active_char['F'] = MD_CHAR_AUTOLINK
2869
2870		rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
2871		rndr.active_char['M'] = MD_CHAR_AUTOLINK
2872	}
2873
2874	// first pass: look for references, copy everything else
2875	text := bytes.NewBuffer(nil)
2876	beg, end := 0, 0
2877	for beg < len(ib) { // iterate over lines
2878		if is_ref(ib, beg, &end, rndr) {
2879			beg = end
2880		} else { // skip to the next line
2881			end = beg
2882			for end < len(ib) && ib[end] != '\n' && ib[end] != '\r' {
2883				end++
2884			}
2885
2886			// add the line body if present
2887			if end > beg {
2888				expand_tabs(text, ib[beg:end])
2889			}
2890
2891			for end < len(ib) && (ib[end] == '\n' || ib[end] == '\r') {
2892				// add one \n per newline
2893				if ib[end] == '\n' || (end+1 < len(ib) && ib[end+1] != '\n') {
2894					text.WriteByte('\n')
2895				}
2896				end++
2897			}
2898
2899			beg = end
2900		}
2901	}
2902
2903	// sort the reference array
2904	if len(rndr.refs) > 1 {
2905		sort.Sort(rndr.refs)
2906	}
2907
2908	// second pass: actual rendering
2909	if rndr.mk.doc_header != nil {
2910		rndr.mk.doc_header(ob, rndr.mk.opaque)
2911	}
2912
2913	if text.Len() > 0 {
2914		// add a final newline if not already present
2915		finalchar := text.Bytes()[text.Len()-1]
2916		if finalchar != '\n' && finalchar != '\r' {
2917			text.WriteByte('\n')
2918		}
2919		parse_block(ob, rndr, text.Bytes())
2920	}
2921
2922	if rndr.mk.doc_footer != nil {
2923		rndr.mk.doc_footer(ob, rndr.mk.opaque)
2924	}
2925
2926	if rndr.nesting != 0 {
2927		panic("Nesting level did not end at zero")
2928	}
2929}
2930
2931func Config_html() *mkd_renderer {
2932	// configure the rendering engine
2933	rndrer := new(mkd_renderer)
2934	rndrer.blockcode = rndr_blockcode
2935	rndrer.blockquote = rndr_blockquote
2936	rndrer.blockhtml = rndr_raw_block
2937	rndrer.header = rndr_header
2938	rndrer.hrule = rndr_hrule
2939	rndrer.list = rndr_list
2940	rndrer.listitem = rndr_listitem
2941	rndrer.paragraph = rndr_paragraph
2942	rndrer.table = rndr_table
2943	rndrer.table_row = rndr_tablerow
2944	rndrer.table_cell = rndr_tablecell
2945
2946	rndrer.autolink = rndr_autolink
2947	rndrer.codespan = rndr_codespan
2948	rndrer.double_emphasis = rndr_double_emphasis
2949	rndrer.emphasis = rndr_emphasis
2950	rndrer.image = rndr_image
2951	rndrer.linebreak = rndr_linebreak
2952	rndrer.link = rndr_link
2953	rndrer.raw_html_tag = rndr_raw_html_tag
2954	rndrer.triple_emphasis = rndr_triple_emphasis
2955	rndrer.strikethrough = rndr_strikethrough
2956
2957	rndrer.normal_text = rndr_normal_text
2958
2959	rndrer.opaque = &html_renderopts{close_tag: " />\n"}
2960	return rndrer
2961}
2962
2963func main() {
2964	// read the input
2965	var ib []byte
2966	var err os.Error
2967	switch len(os.Args) {
2968	case 1:
2969		if ib, err = ioutil.ReadAll(os.Stdin); err != nil {
2970			fmt.Fprintln(os.Stderr, "Error reading from Stdin:", err)
2971			os.Exit(-1)
2972		}
2973	case 2, 3:
2974		if ib, err = ioutil.ReadFile(os.Args[1]); err != nil {
2975			fmt.Fprintln(os.Stderr, "Error reading from", os.Args[1], ":", err)
2976			os.Exit(-1)
2977		}
2978	default:
2979		fmt.Fprintln(os.Stderr, "Usage:", os.Args[0], "[inputfile [outputfile]]")
2980		os.Exit(-1)
2981	}
2982
2983	// call the main renderer function
2984	ob := bytes.NewBuffer(nil)
2985	var extensions uint32
2986	extensions |= MKDEXT_NO_INTRA_EMPHASIS
2987	extensions |= MKDEXT_TABLES
2988	extensions |= MKDEXT_FENCED_CODE
2989	extensions |= MKDEXT_AUTOLINK
2990	extensions |= MKDEXT_STRIKETHROUGH
2991	extensions |= MKDEXT_LAX_HTML_BLOCKS
2992	extensions |= MKDEXT_SPACE_HEADERS
2993	extensions = 0
2994
2995	Markdown(ob, ib, Config_html(), extensions)
2996
2997	// output the result
2998	if len(os.Args) == 3 {
2999		if err = ioutil.WriteFile(os.Args[2], ob.Bytes(), 0644); err != nil {
3000			fmt.Fprintln(os.Stderr, "Error writing to", os.Args[2], ":", err)
3001			os.Exit(-1)
3002		}
3003	} else {
3004		if _, err = os.Stdout.Write(ob.Bytes()); err != nil {
3005			fmt.Fprintln(os.Stderr, "Error writing to Stdout:", err)
3006			os.Exit(-1)
3007		}
3008	}
3009}