all repos — grayfriday @ 41159b38743bae579538f5af3ecfb4df1002e0de

blackfriday fork with a few changes

markdown.go (view raw)

  1// Blackfriday Markdown Processor
  2// Available at http://github.com/russross/blackfriday
  3//
  4// Copyright © 2011 Russ Ross <russ@russross.com>.
  5// Distributed under the Simplified BSD License.
  6// See README.md for details.
  7
  8package blackfriday
  9
 10import (
 11	"bytes"
 12	"fmt"
 13	"io"
 14	"strings"
 15	"unicode/utf8"
 16)
 17
 18//
 19// Markdown parsing and processing
 20//
 21
 22// Version string of the package.
 23const Version = "2.0"
 24
 25// Extensions is a bitwise or'ed collection of enabled Blackfriday's
 26// extensions.
 27type Extensions int
 28
 29// These are the supported markdown parsing extensions.
 30// OR these values together to select multiple extensions.
 31const (
 32	NoExtensions           Extensions = 0
 33	NoIntraEmphasis        Extensions = 1 << iota // Ignore emphasis markers inside words
 34	Tables                                        // Render tables
 35	FencedCode                                    // Render fenced code blocks
 36	Autolink                                      // Detect embedded URLs that are not explicitly marked
 37	Strikethrough                                 // Strikethrough text using ~~test~~
 38	LaxHTMLBlocks                                 // Loosen up HTML block parsing rules
 39	SpaceHeadings                                 // Be strict about prefix heading rules
 40	HardLineBreak                                 // Translate newlines into line breaks
 41	TabSizeEight                                  // Expand tabs to eight spaces instead of four
 42	Footnotes                                     // Pandoc-style footnotes
 43	NoEmptyLineBeforeBlock                        // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
 44	HeadingIDs                                    // specify heading IDs  with {#id}
 45	Titleblock                                    // Titleblock ala pandoc
 46	AutoHeadingIDs                                // Create the heading ID from the text
 47	BackslashLineBreak                            // Translate trailing backslashes into line breaks
 48	DefinitionLists                               // Render definition lists
 49
 50	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
 51		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
 52
 53	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
 54		Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
 55		BackslashLineBreak | DefinitionLists
 56)
 57
 58// ListType contains bitwise or'ed flags for list and list item objects.
 59type ListType int
 60
 61// These are the possible flag values for the ListItem renderer.
 62// Multiple flag values may be ORed together.
 63// These are mostly of interest if you are writing a new output format.
 64const (
 65	ListTypeOrdered ListType = 1 << iota
 66	ListTypeDefinition
 67	ListTypeTerm
 68
 69	ListItemContainsBlock
 70	ListItemBeginningOfList // TODO: figure out if this is of any use now
 71	ListItemEndOfList
 72)
 73
 74// CellAlignFlags holds a type of alignment in a table cell.
 75type CellAlignFlags int
 76
 77// These are the possible flag values for the table cell renderer.
 78// Only a single one of these values will be used; they are not ORed together.
 79// These are mostly of interest if you are writing a new output format.
 80const (
 81	TableAlignmentLeft CellAlignFlags = 1 << iota
 82	TableAlignmentRight
 83	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 84)
 85
 86// The size of a tab stop.
 87const (
 88	TabSizeDefault = 4
 89	TabSizeDouble  = 8
 90)
 91
 92// blockTags is a set of tags that are recognized as HTML block tags.
 93// Any of these can be included in markdown text without special escaping.
 94var blockTags = map[string]struct{}{
 95	"blockquote": struct{}{},
 96	"del":        struct{}{},
 97	"div":        struct{}{},
 98	"dl":         struct{}{},
 99	"fieldset":   struct{}{},
100	"form":       struct{}{},
101	"h1":         struct{}{},
102	"h2":         struct{}{},
103	"h3":         struct{}{},
104	"h4":         struct{}{},
105	"h5":         struct{}{},
106	"h6":         struct{}{},
107	"iframe":     struct{}{},
108	"ins":        struct{}{},
109	"math":       struct{}{},
110	"noscript":   struct{}{},
111	"ol":         struct{}{},
112	"pre":        struct{}{},
113	"p":          struct{}{},
114	"script":     struct{}{},
115	"style":      struct{}{},
116	"table":      struct{}{},
117	"ul":         struct{}{},
118
119	// HTML5
120	"address":    struct{}{},
121	"article":    struct{}{},
122	"aside":      struct{}{},
123	"canvas":     struct{}{},
124	"figcaption": struct{}{},
125	"figure":     struct{}{},
126	"footer":     struct{}{},
127	"header":     struct{}{},
128	"hgroup":     struct{}{},
129	"main":       struct{}{},
130	"nav":        struct{}{},
131	"output":     struct{}{},
132	"progress":   struct{}{},
133	"section":    struct{}{},
134	"video":      struct{}{},
135}
136
137// Renderer is the rendering interface.
138// This is mostly of interest if you are implementing a new rendering format.
139//
140// When a byte slice is provided, it contains the (rendered) contents of the
141// element.
142//
143// When a callback is provided instead, it will write the contents of the
144// respective element directly to the output buffer and return true on success.
145// If the callback returns false, the rendering function should reset the
146// output buffer as though it had never been called.
147//
148// Only an HTML implementation is provided in this repository,
149// see the README for external implementations.
150type Renderer interface {
151	Render(ast *Node) []byte
152	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
153}
154
155// Callback functions for inline parsing. One such function is defined
156// for each character that triggers a response when parsing inline data.
157type inlineParser func(p *Parser, data []byte, offset int) (int, *Node)
158
159// Parser holds runtime state used by the parser.
160// This is constructed by the Markdown function.
161type Parser struct {
162	refOverride    ReferenceOverrideFunc
163	refs           map[string]*reference
164	inlineCallback [256]inlineParser
165	flags          Extensions
166	nesting        int
167	maxNesting     int
168	insideLink     bool
169
170	// Footnotes need to be ordered as well as available to quickly check for
171	// presence. If a ref is also a footnote, it's stored both in refs and here
172	// in notes. Slice is nil if footnotes not enabled.
173	notes []*reference
174
175	doc                  *Node
176	tip                  *Node // = doc
177	oldTip               *Node
178	lastMatchedContainer *Node // = doc
179	allClosed            bool
180}
181
182func (p *Parser) getRef(refid string) (ref *reference, found bool) {
183	if p.refOverride != nil {
184		r, overridden := p.refOverride(refid)
185		if overridden {
186			if r == nil {
187				return nil, false
188			}
189			return &reference{
190				link:     []byte(r.Link),
191				title:    []byte(r.Title),
192				noteID:   0,
193				hasBlock: false,
194				text:     []byte(r.Text)}, true
195		}
196	}
197	// refs are case insensitive
198	ref, found = p.refs[strings.ToLower(refid)]
199	return ref, found
200}
201
202func (p *Parser) finalize(block *Node) {
203	above := block.Parent
204	block.open = false
205	p.tip = above
206}
207
208func (p *Parser) addChild(node NodeType, offset uint32) *Node {
209	return p.addExistingChild(NewNode(node), offset)
210}
211
212func (p *Parser) addExistingChild(node *Node, offset uint32) *Node {
213	for !p.tip.canContain(node.Type) {
214		p.finalize(p.tip)
215	}
216	p.tip.AppendChild(node)
217	p.tip = node
218	return node
219}
220
221func (p *Parser) closeUnmatchedBlocks() {
222	if !p.allClosed {
223		for p.oldTip != p.lastMatchedContainer {
224			parent := p.oldTip.Parent
225			p.finalize(p.oldTip)
226			p.oldTip = parent
227		}
228		p.allClosed = true
229	}
230}
231
232//
233//
234// Public interface
235//
236//
237
238// Reference represents the details of a link.
239// See the documentation in Options for more details on use-case.
240type Reference struct {
241	// Link is usually the URL the reference points to.
242	Link string
243	// Title is the alternate text describing the link in more detail.
244	Title string
245	// Text is the optional text to override the ref with if the syntax used was
246	// [refid][]
247	Text string
248}
249
250// ReferenceOverrideFunc is expected to be called with a reference string and
251// return either a valid Reference type that the reference string maps to or
252// nil. If overridden is false, the default reference logic will be executed.
253// See the documentation in Options for more details on use-case.
254type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
255
256// Processor contains all the state necessary for Blackfriday to operate.
257type Processor struct {
258	r                 Renderer
259	extensions        Extensions
260	referenceOverride ReferenceOverrideFunc
261}
262
263// DefaultProcessor creates the processor tuned to the most common behavior.
264func DefaultProcessor() *Processor {
265	return &Processor{
266		r: NewHTMLRenderer(HTMLRendererParameters{
267			Flags: CommonHTMLFlags,
268		}),
269		extensions: CommonExtensions,
270	}
271}
272
273// NewParser constructs a Parser. You can use the same With* functions as for
274// Markdown() to customize parser's behavior.
275func (proc *Processor) NewParser(opts ...Option) *Parser {
276	for _, opt := range opts {
277		opt(proc)
278	}
279	var p Parser
280	p.flags = proc.extensions
281	p.refOverride = proc.referenceOverride
282	p.refs = make(map[string]*reference)
283	p.maxNesting = 16
284	p.insideLink = false
285	docNode := NewNode(Document)
286	p.doc = docNode
287	p.tip = docNode
288	p.oldTip = docNode
289	p.lastMatchedContainer = docNode
290	p.allClosed = true
291	// register inline parsers
292	p.inlineCallback[' '] = maybeLineBreak
293	p.inlineCallback['*'] = emphasis
294	p.inlineCallback['_'] = emphasis
295	if proc.extensions&Strikethrough != 0 {
296		p.inlineCallback['~'] = emphasis
297	}
298	p.inlineCallback['`'] = codeSpan
299	p.inlineCallback['\n'] = lineBreak
300	p.inlineCallback['['] = link
301	p.inlineCallback['<'] = leftAngle
302	p.inlineCallback['\\'] = escape
303	p.inlineCallback['&'] = entity
304	p.inlineCallback['!'] = maybeImage
305	p.inlineCallback['^'] = maybeInlineFootnote
306	if proc.extensions&Autolink != 0 {
307		p.inlineCallback['h'] = maybeAutoLink
308		p.inlineCallback['m'] = maybeAutoLink
309		p.inlineCallback['f'] = maybeAutoLink
310		p.inlineCallback['H'] = maybeAutoLink
311		p.inlineCallback['M'] = maybeAutoLink
312		p.inlineCallback['F'] = maybeAutoLink
313	}
314	if proc.extensions&Footnotes != 0 {
315		p.notes = make([]*reference, 0)
316	}
317	return &p
318}
319
320// Option customizes Processor's default behavior.
321type Option func(*Processor)
322
323// WithRenderer allows you to override the default renderer.
324func WithRenderer(r Renderer) Option {
325	return func(p *Processor) {
326		p.r = r
327	}
328}
329
330// WithExtensions allows you to pick some of the many extensions provided by
331// Blackfriday. You can bitwise OR them.
332func WithExtensions(e Extensions) Option {
333	return func(p *Processor) {
334		p.extensions = e
335	}
336}
337
338// WithNoExtensions turns off all extensions and custom behavior.
339func WithNoExtensions() Option {
340	return func(p *Processor) {
341		p.extensions = NoExtensions
342		p.r = NewHTMLRenderer(HTMLRendererParameters{
343			Flags: HTMLFlagsNone,
344		})
345	}
346}
347
348// WithRefOverride sets an optional function callback that is called every
349// time a reference is resolved.
350//
351// In Markdown, the link reference syntax can be made to resolve a link to
352// a reference instead of an inline URL, in one of the following ways:
353//
354//  * [link text][refid]
355//  * [refid][]
356//
357// Usually, the refid is defined at the bottom of the Markdown document. If
358// this override function is provided, the refid is passed to the override
359// function first, before consulting the defined refids at the bottom. If
360// the override function indicates an override did not occur, the refids at
361// the bottom will be used to fill in the link details.
362func WithRefOverride(o ReferenceOverrideFunc) Option {
363	return func(p *Processor) {
364		p.referenceOverride = o
365	}
366}
367
368// Markdown is the main entry point to Blackfriday. It parses and renders a
369// block of markdown-encoded text.
370//
371// The simplest invocation of Markdown takes one argument, input:
372//     output := Markdown(input)
373// This will parse the input with CommonExtensions enabled and render it with
374// the default HTMLRenderer (with CommonHTMLFlags).
375//
376// Variadic arguments opts can customize the default behavior. Since Processor
377// type does not contain exported fields, you can not use it directly. Instead,
378// use the With* functions. For example, this will call the most basic
379// functionality, with no extensions:
380//     output := Markdown(input, WithNoExtensions())
381//
382// You can use any number of With* arguments, even contradicting ones. They
383// will be applied in order of appearance and the latter will override the
384// former:
385//     output := Markdown(input, WithNoExtensions(), WithExtensions(exts),
386//         WithRenderer(yourRenderer))
387func Markdown(input []byte, opts ...Option) []byte {
388	p := DefaultProcessor()
389	parser := p.NewParser(opts...)
390	return p.r.Render(parser.Parse(input))
391}
392
393// Parse is an entry point to the parsing part of Blackfriday. It takes an
394// input markdown document and produces a syntax tree for its contents. This
395// tree can then be rendered with a default or custom renderer, or
396// analyzed/transformed by the caller to whatever non-standard needs they have.
397func (p *Parser) Parse(input []byte) *Node {
398	p.block(input)
399	// Walk the tree and finish up some of unfinished blocks
400	for p.tip != nil {
401		p.finalize(p.tip)
402	}
403	// Walk the tree again and process inline markdown in each block
404	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
405		if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
406			p.inline(node, node.content)
407			node.content = nil
408		}
409		return GoToNext
410	})
411	p.parseRefsToAST()
412	return p.doc
413}
414
415func (p *Parser) parseRefsToAST() {
416	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
417		return
418	}
419	p.tip = p.doc
420	block := p.addBlock(List, nil)
421	block.IsFootnotesList = true
422	block.ListFlags = ListTypeOrdered
423	flags := ListItemBeginningOfList
424	// Note: this loop is intentionally explicit, not range-form. This is
425	// because the body of the loop will append nested footnotes to p.notes and
426	// we need to process those late additions. Range form would only walk over
427	// the fixed initial set.
428	for i := 0; i < len(p.notes); i++ {
429		ref := p.notes[i]
430		p.addExistingChild(ref.footnote, 0)
431		block := ref.footnote
432		block.ListFlags = flags | ListTypeOrdered
433		block.RefLink = ref.link
434		if ref.hasBlock {
435			flags |= ListItemContainsBlock
436			p.block(ref.title)
437		} else {
438			p.inline(block, ref.title)
439		}
440		flags &^= ListItemBeginningOfList | ListItemContainsBlock
441	}
442	above := block.Parent
443	finalizeList(block)
444	p.tip = above
445	block.Walk(func(node *Node, entering bool) WalkStatus {
446		if node.Type == Paragraph || node.Type == Heading {
447			p.inline(node, node.content)
448			node.content = nil
449		}
450		return GoToNext
451	})
452}
453
454//
455// Link references
456//
457// This section implements support for references that (usually) appear
458// as footnotes in a document, and can be referenced anywhere in the document.
459// The basic format is:
460//
461//    [1]: http://www.google.com/ "Google"
462//    [2]: http://www.github.com/ "Github"
463//
464// Anywhere in the document, the reference can be linked by referring to its
465// label, i.e., 1 and 2 in this example, as in:
466//
467//    This library is hosted on [Github][2], a git hosting site.
468//
469// Actual footnotes as specified in Pandoc and supported by some other Markdown
470// libraries such as php-markdown are also taken care of. They look like this:
471//
472//    This sentence needs a bit of further explanation.[^note]
473//
474//    [^note]: This is the explanation.
475//
476// Footnotes should be placed at the end of the document in an ordered list.
477// Inline footnotes such as:
478//
479//    Inline footnotes^[Not supported.] also exist.
480//
481// are not yet supported.
482
483// reference holds all information necessary for a reference-style links or
484// footnotes.
485//
486// Consider this markdown with reference-style links:
487//
488//     [link][ref]
489//
490//     [ref]: /url/ "tooltip title"
491//
492// It will be ultimately converted to this HTML:
493//
494//     <p><a href=\"/url/\" title=\"title\">link</a></p>
495//
496// And a reference structure will be populated as follows:
497//
498//     p.refs["ref"] = &reference{
499//         link: "/url/",
500//         title: "tooltip title",
501//     }
502//
503// Alternatively, reference can contain information about a footnote. Consider
504// this markdown:
505//
506//     Text needing a footnote.[^a]
507//
508//     [^a]: This is the note
509//
510// A reference structure will be populated as follows:
511//
512//     p.refs["a"] = &reference{
513//         link: "a",
514//         title: "This is the note",
515//         noteID: <some positive int>,
516//     }
517//
518// TODO: As you can see, it begs for splitting into two dedicated structures
519// for refs and for footnotes.
520type reference struct {
521	link     []byte
522	title    []byte
523	noteID   int // 0 if not a footnote ref
524	hasBlock bool
525	footnote *Node // a link to the Item node within a list of footnotes
526
527	text []byte // only gets populated by refOverride feature with Reference.Text
528}
529
530func (r *reference) String() string {
531	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
532		r.link, r.title, r.text, r.noteID, r.hasBlock)
533}
534
535// Check whether or not data starts with a reference link.
536// If so, it is parsed and stored in the list of references
537// (in the render struct).
538// Returns the number of bytes to skip to move past it,
539// or zero if the first line is not a reference.
540func isReference(p *Parser, data []byte, tabSize int) int {
541	// up to 3 optional leading spaces
542	if len(data) < 4 {
543		return 0
544	}
545	i := 0
546	for i < 3 && data[i] == ' ' {
547		i++
548	}
549
550	noteID := 0
551
552	// id part: anything but a newline between brackets
553	if data[i] != '[' {
554		return 0
555	}
556	i++
557	if p.flags&Footnotes != 0 {
558		if i < len(data) && data[i] == '^' {
559			// we can set it to anything here because the proper noteIds will
560			// be assigned later during the second pass. It just has to be != 0
561			noteID = 1
562			i++
563		}
564	}
565	idOffset := i
566	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
567		i++
568	}
569	if i >= len(data) || data[i] != ']' {
570		return 0
571	}
572	idEnd := i
573	// footnotes can have empty ID, like this: [^], but a reference can not be
574	// empty like this: []. Break early if it's not a footnote and there's no ID
575	if noteID == 0 && idOffset == idEnd {
576		return 0
577	}
578	// spacer: colon (space | tab)* newline? (space | tab)*
579	i++
580	if i >= len(data) || data[i] != ':' {
581		return 0
582	}
583	i++
584	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
585		i++
586	}
587	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
588		i++
589		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
590			i++
591		}
592	}
593	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
594		i++
595	}
596	if i >= len(data) {
597		return 0
598	}
599
600	var (
601		linkOffset, linkEnd   int
602		titleOffset, titleEnd int
603		lineEnd               int
604		raw                   []byte
605		hasBlock              bool
606	)
607
608	if p.flags&Footnotes != 0 && noteID != 0 {
609		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
610		lineEnd = linkEnd
611	} else {
612		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
613	}
614	if lineEnd == 0 {
615		return 0
616	}
617
618	// a valid ref has been found
619
620	ref := &reference{
621		noteID:   noteID,
622		hasBlock: hasBlock,
623	}
624
625	if noteID > 0 {
626		// reusing the link field for the id since footnotes don't have links
627		ref.link = data[idOffset:idEnd]
628		// if footnote, it's not really a title, it's the contained text
629		ref.title = raw
630	} else {
631		ref.link = data[linkOffset:linkEnd]
632		ref.title = data[titleOffset:titleEnd]
633	}
634
635	// id matches are case-insensitive
636	id := string(bytes.ToLower(data[idOffset:idEnd]))
637
638	p.refs[id] = ref
639
640	return lineEnd
641}
642
643func scanLinkRef(p *Parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
644	// link: whitespace-free sequence, optionally between angle brackets
645	if data[i] == '<' {
646		i++
647	}
648	linkOffset = i
649	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
650		i++
651	}
652	linkEnd = i
653	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
654		linkOffset++
655		linkEnd--
656	}
657
658	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
659	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
660		i++
661	}
662	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
663		return
664	}
665
666	// compute end-of-line
667	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
668		lineEnd = i
669	}
670	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
671		lineEnd++
672	}
673
674	// optional (space|tab)* spacer after a newline
675	if lineEnd > 0 {
676		i = lineEnd + 1
677		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
678			i++
679		}
680	}
681
682	// optional title: any non-newline sequence enclosed in '"() alone on its line
683	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
684		i++
685		titleOffset = i
686
687		// look for EOL
688		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
689			i++
690		}
691		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
692			titleEnd = i + 1
693		} else {
694			titleEnd = i
695		}
696
697		// step back
698		i--
699		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
700			i--
701		}
702		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
703			lineEnd = titleEnd
704			titleEnd = i
705		}
706	}
707
708	return
709}
710
711// The first bit of this logic is the same as Parser.listItem, but the rest
712// is much simpler. This function simply finds the entire block and shifts it
713// over by one tab if it is indeed a block (just returns the line if it's not).
714// blockEnd is the end of the section in the input buffer, and contents is the
715// extracted text that was shifted over one tab. It will need to be rendered at
716// the end of the document.
717func scanFootnote(p *Parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
718	if i == 0 || len(data) == 0 {
719		return
720	}
721
722	// skip leading whitespace on first line
723	for i < len(data) && data[i] == ' ' {
724		i++
725	}
726
727	blockStart = i
728
729	// find the end of the line
730	blockEnd = i
731	for i < len(data) && data[i-1] != '\n' {
732		i++
733	}
734
735	// get working buffer
736	var raw bytes.Buffer
737
738	// put the first line into the working buffer
739	raw.Write(data[blockEnd:i])
740	blockEnd = i
741
742	// process the following lines
743	containsBlankLine := false
744
745gatherLines:
746	for blockEnd < len(data) {
747		i++
748
749		// find the end of this line
750		for i < len(data) && data[i-1] != '\n' {
751			i++
752		}
753
754		// if it is an empty line, guess that it is part of this item
755		// and move on to the next line
756		if p.isEmpty(data[blockEnd:i]) > 0 {
757			containsBlankLine = true
758			blockEnd = i
759			continue
760		}
761
762		n := 0
763		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
764			// this is the end of the block.
765			// we don't want to include this last line in the index.
766			break gatherLines
767		}
768
769		// if there were blank lines before this one, insert a new one now
770		if containsBlankLine {
771			raw.WriteByte('\n')
772			containsBlankLine = false
773		}
774
775		// get rid of that first tab, write to buffer
776		raw.Write(data[blockEnd+n : i])
777		hasBlock = true
778
779		blockEnd = i
780	}
781
782	if data[blockEnd-1] != '\n' {
783		raw.WriteByte('\n')
784	}
785
786	contents = raw.Bytes()
787
788	return
789}
790
791//
792//
793// Miscellaneous helper functions
794//
795//
796
797// Test if a character is a punctuation symbol.
798// Taken from a private function in regexp in the stdlib.
799func ispunct(c byte) bool {
800	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
801		if c == r {
802			return true
803		}
804	}
805	return false
806}
807
808// Test if a character is a whitespace character.
809func isspace(c byte) bool {
810	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
811}
812
813// Test if a character is letter.
814func isletter(c byte) bool {
815	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
816}
817
818// Test if a character is a letter or a digit.
819// TODO: check when this is looking for ASCII alnum and when it should use unicode
820func isalnum(c byte) bool {
821	return (c >= '0' && c <= '9') || isletter(c)
822}
823
824// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
825// always ends output with a newline
826func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
827	// first, check for common cases: no tabs, or only tabs at beginning of line
828	i, prefix := 0, 0
829	slowcase := false
830	for i = 0; i < len(line); i++ {
831		if line[i] == '\t' {
832			if prefix == i {
833				prefix++
834			} else {
835				slowcase = true
836				break
837			}
838		}
839	}
840
841	// no need to decode runes if all tabs are at the beginning of the line
842	if !slowcase {
843		for i = 0; i < prefix*tabSize; i++ {
844			out.WriteByte(' ')
845		}
846		out.Write(line[prefix:])
847		return
848	}
849
850	// the slow case: we need to count runes to figure out how
851	// many spaces to insert for each tab
852	column := 0
853	i = 0
854	for i < len(line) {
855		start := i
856		for i < len(line) && line[i] != '\t' {
857			_, size := utf8.DecodeRune(line[i:])
858			i += size
859			column++
860		}
861
862		if i > start {
863			out.Write(line[start:i])
864		}
865
866		if i >= len(line) {
867			break
868		}
869
870		for {
871			out.WriteByte(' ')
872			column++
873			if column%tabSize == 0 {
874				break
875			}
876		}
877
878		i++
879	}
880}
881
882// Find if a line counts as indented or not.
883// Returns number of characters the indent is (0 = not indented).
884func isIndented(data []byte, indentSize int) int {
885	if len(data) == 0 {
886		return 0
887	}
888	if data[0] == '\t' {
889		return 1
890	}
891	if len(data) < indentSize {
892		return 0
893	}
894	for i := 0; i < indentSize; i++ {
895		if data[i] != ' ' {
896			return 0
897		}
898	}
899	return indentSize
900}
901
902// Create a url-safe slug for fragments
903func slugify(in []byte) []byte {
904	if len(in) == 0 {
905		return in
906	}
907	out := make([]byte, 0, len(in))
908	sym := false
909
910	for _, ch := range in {
911		if isalnum(ch) {
912			sym = false
913			out = append(out, ch)
914		} else if sym {
915			continue
916		} else {
917			out = append(out, '-')
918			sym = true
919		}
920	}
921	var a, b int
922	var ch byte
923	for a, ch = range out {
924		if ch != '-' {
925			break
926		}
927	}
928	for b = len(out) - 1; b > 0; b-- {
929		if out[b] != '-' {
930			break
931		}
932	}
933	return out[a : b+1]
934}