all repos — grayfriday @ b5ff8e0286f7dd5ba40cf0b26001b1dcdfec3993

blackfriday fork with a few changes

markdown.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// Markdown parsing and processing
 13//
 14//
 15
 16// Package blackfriday is a markdown processor.
 17//
 18// Translates plain text with simple formatting rules into HTML or LaTeX.
 19package blackfriday
 20
 21import (
 22	"bytes"
 23	"fmt"
 24	"io"
 25	"strings"
 26	"unicode/utf8"
 27)
 28
 29// Version string of the package.
 30const Version = "2.0"
 31
 32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
 33// extensions.
 34type Extensions int
 35
 36// These are the supported markdown parsing extensions.
 37// OR these values together to select multiple extensions.
 38const (
 39	NoExtensions            Extensions = 0
 40	NoIntraEmphasis         Extensions = 1 << iota // Ignore emphasis markers inside words
 41	Tables                                         // Render tables
 42	FencedCode                                     // Render fenced code blocks
 43	Autolink                                       // Detect embedded URLs that are not explicitly marked
 44	Strikethrough                                  // Strikethrough text using ~~test~~
 45	LaxHTMLBlocks                                  // Loosen up HTML block parsing rules
 46	SpaceHeaders                                   // Be strict about prefix header rules
 47	HardLineBreak                                  // Translate newlines into line breaks
 48	TabSizeEight                                   // Expand tabs to eight spaces instead of four
 49	Footnotes                                      // Pandoc-style footnotes
 50	NoEmptyLineBeforeBlock                         // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
 51	HeaderIDs                                      // specify header IDs  with {#id}
 52	Titleblock                                     // Titleblock ala pandoc
 53	AutoHeaderIDs                                  // Create the header ID from the text
 54	BackslashLineBreak                             // Translate trailing backslashes into line breaks
 55	DefinitionLists                                // Render definition lists
 56	Smartypants                                    // Enable smart punctuation substitutions
 57	SmartypantsFractions                           // Enable smart fractions (with Smartypants)
 58	SmartypantsDashes                              // Enable smart dashes (with Smartypants)
 59	SmartypantsLatexDashes                         // Enable LaTeX-style dashes (with Smartypants)
 60	SmartypantsAngledQuotes                        // Enable angled double quotes (with Smartypants) for double quotes rendering
 61	TOC                                            // Generate a table of contents
 62	OmitContents                                   // Skip the main contents (for a standalone table of contents)
 63
 64	CommonHTMLFlags HTMLFlags = UseXHTML
 65
 66	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
 67		Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
 68		BackslashLineBreak | DefinitionLists | Smartypants |
 69		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
 70)
 71
 72// DefaultOptions is a convenience variable with all the options that are
 73// enabled by default.
 74var DefaultOptions = Options{
 75	Extensions: CommonExtensions,
 76}
 77
 78// ListType contains bitwise or'ed flags for list and list item objects.
 79type ListType int
 80
 81// These are the possible flag values for the ListItem renderer.
 82// Multiple flag values may be ORed together.
 83// These are mostly of interest if you are writing a new output format.
 84const (
 85	ListTypeOrdered ListType = 1 << iota
 86	ListTypeDefinition
 87	ListTypeTerm
 88
 89	ListItemContainsBlock
 90	ListItemBeginningOfList
 91	ListItemEndOfList
 92)
 93
 94// CellAlignFlags holds a type of alignment in a table cell.
 95type CellAlignFlags int
 96
 97// These are the possible flag values for the table cell renderer.
 98// Only a single one of these values will be used; they are not ORed together.
 99// These are mostly of interest if you are writing a new output format.
100const (
101	TableAlignmentLeft = 1 << iota
102	TableAlignmentRight
103	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
104)
105
106// The size of a tab stop.
107const (
108	TabSizeDefault = 4
109	TabSizeDouble  = 8
110)
111
112// blockTags is a set of tags that are recognized as HTML block tags.
113// Any of these can be included in markdown text without special escaping.
114var blockTags = map[string]struct{}{
115	"blockquote": struct{}{},
116	"del":        struct{}{},
117	"div":        struct{}{},
118	"dl":         struct{}{},
119	"fieldset":   struct{}{},
120	"form":       struct{}{},
121	"h1":         struct{}{},
122	"h2":         struct{}{},
123	"h3":         struct{}{},
124	"h4":         struct{}{},
125	"h5":         struct{}{},
126	"h6":         struct{}{},
127	"iframe":     struct{}{},
128	"ins":        struct{}{},
129	"math":       struct{}{},
130	"noscript":   struct{}{},
131	"ol":         struct{}{},
132	"pre":        struct{}{},
133	"p":          struct{}{},
134	"script":     struct{}{},
135	"style":      struct{}{},
136	"table":      struct{}{},
137	"ul":         struct{}{},
138
139	// HTML5
140	"address":    struct{}{},
141	"article":    struct{}{},
142	"aside":      struct{}{},
143	"canvas":     struct{}{},
144	"figcaption": struct{}{},
145	"figure":     struct{}{},
146	"footer":     struct{}{},
147	"header":     struct{}{},
148	"hgroup":     struct{}{},
149	"main":       struct{}{},
150	"nav":        struct{}{},
151	"output":     struct{}{},
152	"progress":   struct{}{},
153	"section":    struct{}{},
154	"video":      struct{}{},
155}
156
157// Renderer is the rendering interface.
158// This is mostly of interest if you are implementing a new rendering format.
159//
160// When a byte slice is provided, it contains the (rendered) contents of the
161// element.
162//
163// When a callback is provided instead, it will write the contents of the
164// respective element directly to the output buffer and return true on success.
165// If the callback returns false, the rendering function should reset the
166// output buffer as though it had never been called.
167//
168// Currently HTML and Latex implementations are provided
169type Renderer interface {
170	Render(ast *Node) []byte
171	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
172}
173
174// Callback functions for inline parsing. One such function is defined
175// for each character that triggers a response when parsing inline data.
176type inlineParser func(p *parser, data []byte, offset int) int
177
178// Parser holds runtime state used by the parser.
179// This is constructed by the Markdown function.
180type parser struct {
181	refOverride    ReferenceOverrideFunc
182	refs           map[string]*reference
183	inlineCallback [256]inlineParser
184	flags          Extensions
185	nesting        int
186	maxNesting     int
187	insideLink     bool
188
189	// Footnotes need to be ordered as well as available to quickly check for
190	// presence. If a ref is also a footnote, it's stored both in refs and here
191	// in notes. Slice is nil if footnotes not enabled.
192	notes []*reference
193
194	doc                  *Node
195	tip                  *Node // = doc
196	oldTip               *Node
197	lastMatchedContainer *Node // = doc
198	allClosed            bool
199	currBlock            *Node // a block node currently being parsed by inline parser
200}
201
202func (p *parser) getRef(refid string) (ref *reference, found bool) {
203	if p.refOverride != nil {
204		r, overridden := p.refOverride(refid)
205		if overridden {
206			if r == nil {
207				return nil, false
208			}
209			return &reference{
210				link:     []byte(r.Link),
211				title:    []byte(r.Title),
212				noteID:   0,
213				hasBlock: false,
214				text:     []byte(r.Text)}, true
215		}
216	}
217	// refs are case insensitive
218	ref, found = p.refs[strings.ToLower(refid)]
219	return ref, found
220}
221
222func (p *parser) finalize(block *Node) {
223	above := block.Parent
224	block.open = false
225	p.tip = above
226}
227
228func (p *parser) addChild(node NodeType, offset uint32) *Node {
229	for !p.tip.canContain(node) {
230		p.finalize(p.tip)
231	}
232	newNode := NewNode(node)
233	newNode.content = []byte{}
234	p.tip.appendChild(newNode)
235	p.tip = newNode
236	return newNode
237}
238
239func (p *parser) closeUnmatchedBlocks() {
240	if !p.allClosed {
241		for p.oldTip != p.lastMatchedContainer {
242			parent := p.oldTip.Parent
243			p.finalize(p.oldTip)
244			p.oldTip = parent
245		}
246		p.allClosed = true
247	}
248}
249
250//
251//
252// Public interface
253//
254//
255
256// Reference represents the details of a link.
257// See the documentation in Options for more details on use-case.
258type Reference struct {
259	// Link is usually the URL the reference points to.
260	Link string
261	// Title is the alternate text describing the link in more detail.
262	Title string
263	// Text is the optional text to override the ref with if the syntax used was
264	// [refid][]
265	Text string
266}
267
268// ReferenceOverrideFunc is expected to be called with a reference string and
269// return either a valid Reference type that the reference string maps to or
270// nil. If overridden is false, the default reference logic will be executed.
271// See the documentation in Options for more details on use-case.
272type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
273
274// Options represents configurable overrides and callbacks (in addition to the
275// extension flag set) for configuring a Markdown parse.
276type Options struct {
277	// Extensions is a flag set of bit-wise ORed extension bits. See the
278	// Extensions flags defined in this package.
279	Extensions Extensions
280
281	// ReferenceOverride is an optional function callback that is called every
282	// time a reference is resolved.
283	//
284	// In Markdown, the link reference syntax can be made to resolve a link to
285	// a reference instead of an inline URL, in one of the following ways:
286	//
287	//  * [link text][refid]
288	//  * [refid][]
289	//
290	// Usually, the refid is defined at the bottom of the Markdown document. If
291	// this override function is provided, the refid is passed to the override
292	// function first, before consulting the defined refids at the bottom. If
293	// the override function indicates an override did not occur, the refids at
294	// the bottom will be used to fill in the link details.
295	ReferenceOverride ReferenceOverrideFunc
296}
297
298// MarkdownBasic is a convenience function for simple rendering.
299// It processes markdown input with no extensions enabled.
300func MarkdownBasic(input []byte) []byte {
301	// set up the HTML renderer
302	renderer := NewHTMLRenderer(HTMLRendererParameters{
303		Flags:      UseXHTML,
304		Extensions: CommonExtensions,
305	})
306
307	// set up the parser
308	return Markdown(input, renderer, Options{})
309}
310
311// MarkdownCommon is a convenience function for simple rendering. It calls
312// Markdown with most useful extensions enabled, including:
313//
314// * Smartypants processing with smart fractions and LaTeX dashes
315//
316// * Intra-word emphasis suppression
317//
318// * Tables
319//
320// * Fenced code blocks
321//
322// * Autolinking
323//
324// * Strikethrough support
325//
326// * Strict header parsing
327//
328// * Custom Header IDs
329func MarkdownCommon(input []byte) []byte {
330	// set up the HTML renderer
331	renderer := NewHTMLRenderer(HTMLRendererParameters{
332		Flags:      CommonHTMLFlags,
333		Extensions: CommonExtensions,
334	})
335	return Markdown(input, renderer, DefaultOptions)
336}
337
338// Markdown is the main rendering function.
339// It parses and renders a block of markdown-encoded text.
340// The supplied Renderer is used to format the output, and extensions dictates
341// which non-standard extensions are enabled.
342//
343// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
344// NewLatexRenderer, respectively.
345func Markdown(input []byte, renderer Renderer, options Options) []byte {
346	if renderer == nil {
347		return nil
348	}
349	return renderer.Render(Parse(input, options))
350}
351
352// Parse is an entry point to the parsing part of Blackfriday. It takes an
353// input markdown document and produces a syntax tree for its contents. This
354// tree can then be rendered with a default or custom renderer, or
355// analyzed/transformed by the caller to whatever non-standard needs they have.
356func Parse(input []byte, opts Options) *Node {
357	extensions := opts.Extensions
358
359	// fill in the render structure
360	p := new(parser)
361	p.flags = extensions
362	p.refOverride = opts.ReferenceOverride
363	p.refs = make(map[string]*reference)
364	p.maxNesting = 16
365	p.insideLink = false
366
367	docNode := NewNode(Document)
368	p.doc = docNode
369	p.tip = docNode
370	p.oldTip = docNode
371	p.lastMatchedContainer = docNode
372	p.allClosed = true
373
374	// register inline parsers
375	p.inlineCallback['*'] = emphasis
376	p.inlineCallback['_'] = emphasis
377	if extensions&Strikethrough != 0 {
378		p.inlineCallback['~'] = emphasis
379	}
380	p.inlineCallback['`'] = codeSpan
381	p.inlineCallback['\n'] = lineBreak
382	p.inlineCallback['['] = link
383	p.inlineCallback['<'] = leftAngle
384	p.inlineCallback['\\'] = escape
385	p.inlineCallback['&'] = entity
386	p.inlineCallback['!'] = maybeImage
387	p.inlineCallback['^'] = maybeInlineFootnote
388
389	if extensions&Autolink != 0 {
390		p.inlineCallback['h'] = maybeAutoLink
391		p.inlineCallback['m'] = maybeAutoLink
392		p.inlineCallback['f'] = maybeAutoLink
393		p.inlineCallback['H'] = maybeAutoLink
394		p.inlineCallback['M'] = maybeAutoLink
395		p.inlineCallback['F'] = maybeAutoLink
396	}
397
398	if extensions&Footnotes != 0 {
399		p.notes = make([]*reference, 0)
400	}
401
402	first := firstPass(p, input)
403	secondPass(p, first)
404	// Walk the tree and finish up some of unfinished blocks
405	for p.tip != nil {
406		p.finalize(p.tip)
407	}
408	// Walk the tree again and process inline markdown in each block
409	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
410		if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
411			p.currBlock = node
412			p.inline(node.content)
413			node.content = nil
414		}
415		return GoToNext
416	})
417	p.parseRefsToAST()
418	return p.doc
419}
420
421func (p *parser) parseRefsToAST() {
422	if p.flags&Footnotes == 0 || len(p.notes) == 0 {
423		return
424	}
425	p.tip = p.doc
426	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
427	p.addBlock(HorizontalRule, nil)
428	block := p.addBlock(List, nil)
429	block.ListFlags = ListTypeOrdered
430	flags := ListItemBeginningOfList
431	// Note: this loop is intentionally explicit, not range-form. This is
432	// because the body of the loop will append nested footnotes to p.notes and
433	// we need to process those late additions. Range form would only walk over
434	// the fixed initial set.
435	for i := 0; i < len(p.notes); i++ {
436		ref := p.notes[i]
437		block := p.addBlock(Item, nil)
438		block.ListFlags = ListTypeOrdered
439		block.RefLink = ref.link
440		if ref.hasBlock {
441			flags |= ListItemContainsBlock
442			p.block(ref.title)
443		} else {
444			p.currBlock = block
445			p.inline(ref.title)
446		}
447		flags &^= ListItemBeginningOfList | ListItemContainsBlock
448	}
449	above := block.Parent
450	finalizeList(block)
451	p.tip = above
452	finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
453	block.Walk(func(node *Node, entering bool) WalkStatus {
454		if node.Type == Paragraph || node.Type == Header {
455			p.currBlock = node
456			p.inline(node.content)
457			node.content = nil
458		}
459		return GoToNext
460	})
461}
462
463// first pass:
464// - normalize newlines
465// - extract references (outside of fenced code blocks)
466// - expand tabs (outside of fenced code blocks)
467// - copy everything else
468func firstPass(p *parser, input []byte) []byte {
469	var out bytes.Buffer
470	tabSize := TabSizeDefault
471	if p.flags&TabSizeEight != 0 {
472		tabSize = TabSizeDouble
473	}
474	beg := 0
475	lastFencedCodeBlockEnd := 0
476	for beg < len(input) {
477		// Find end of this line, then process the line.
478		end := beg
479		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
480			end++
481		}
482
483		if p.flags&FencedCode != 0 {
484			// track fenced code block boundaries to suppress tab expansion
485			// and reference extraction inside them:
486			if beg >= lastFencedCodeBlockEnd {
487				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
488					lastFencedCodeBlockEnd = beg + i
489				}
490			}
491		}
492
493		// add the line body if present
494		if end > beg {
495			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
496				out.Write(input[beg:end])
497			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
498				beg += refEnd
499				continue
500			} else {
501				expandTabs(&out, input[beg:end], tabSize)
502			}
503		}
504
505		if end < len(input) && input[end] == '\r' {
506			end++
507		}
508		if end < len(input) && input[end] == '\n' {
509			end++
510		}
511		out.WriteByte('\n')
512
513		beg = end
514	}
515
516	// empty input?
517	if out.Len() == 0 {
518		out.WriteByte('\n')
519	}
520
521	return out.Bytes()
522}
523
524// second pass: actual rendering
525func secondPass(p *parser, input []byte) {
526	p.block(input)
527
528	if p.flags&Footnotes != 0 && len(p.notes) > 0 {
529		flags := ListItemBeginningOfList
530		for i := 0; i < len(p.notes); i++ {
531			ref := p.notes[i]
532			if ref.hasBlock {
533				flags |= ListItemContainsBlock
534				p.block(ref.title)
535			} else {
536				p.inline(ref.title)
537			}
538			flags &^= ListItemBeginningOfList | ListItemContainsBlock
539		}
540	}
541
542	if p.nesting != 0 {
543		panic("Nesting level did not end at zero")
544	}
545}
546
547//
548// Link references
549//
550// This section implements support for references that (usually) appear
551// as footnotes in a document, and can be referenced anywhere in the document.
552// The basic format is:
553//
554//    [1]: http://www.google.com/ "Google"
555//    [2]: http://www.github.com/ "Github"
556//
557// Anywhere in the document, the reference can be linked by referring to its
558// label, i.e., 1 and 2 in this example, as in:
559//
560//    This library is hosted on [Github][2], a git hosting site.
561//
562// Actual footnotes as specified in Pandoc and supported by some other Markdown
563// libraries such as php-markdown are also taken care of. They look like this:
564//
565//    This sentence needs a bit of further explanation.[^note]
566//
567//    [^note]: This is the explanation.
568//
569// Footnotes should be placed at the end of the document in an ordered list.
570// Inline footnotes such as:
571//
572//    Inline footnotes^[Not supported.] also exist.
573//
574// are not yet supported.
575
576// References are parsed and stored in this struct.
577type reference struct {
578	link     []byte
579	title    []byte
580	noteID   int // 0 if not a footnote ref
581	hasBlock bool
582	text     []byte
583}
584
585func (r *reference) String() string {
586	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
587		r.link, r.title, r.text, r.noteID, r.hasBlock)
588}
589
590// Check whether or not data starts with a reference link.
591// If so, it is parsed and stored in the list of references
592// (in the render struct).
593// Returns the number of bytes to skip to move past it,
594// or zero if the first line is not a reference.
595func isReference(p *parser, data []byte, tabSize int) int {
596	// up to 3 optional leading spaces
597	if len(data) < 4 {
598		return 0
599	}
600	i := 0
601	for i < 3 && data[i] == ' ' {
602		i++
603	}
604
605	noteID := 0
606
607	// id part: anything but a newline between brackets
608	if data[i] != '[' {
609		return 0
610	}
611	i++
612	if p.flags&Footnotes != 0 {
613		if i < len(data) && data[i] == '^' {
614			// we can set it to anything here because the proper noteIds will
615			// be assigned later during the second pass. It just has to be != 0
616			noteID = 1
617			i++
618		}
619	}
620	idOffset := i
621	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
622		i++
623	}
624	if i >= len(data) || data[i] != ']' {
625		return 0
626	}
627	idEnd := i
628
629	// spacer: colon (space | tab)* newline? (space | tab)*
630	i++
631	if i >= len(data) || data[i] != ':' {
632		return 0
633	}
634	i++
635	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
636		i++
637	}
638	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
639		i++
640		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
641			i++
642		}
643	}
644	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
645		i++
646	}
647	if i >= len(data) {
648		return 0
649	}
650
651	var (
652		linkOffset, linkEnd   int
653		titleOffset, titleEnd int
654		lineEnd               int
655		raw                   []byte
656		hasBlock              bool
657	)
658
659	if p.flags&Footnotes != 0 && noteID != 0 {
660		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
661		lineEnd = linkEnd
662	} else {
663		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
664	}
665	if lineEnd == 0 {
666		return 0
667	}
668
669	// a valid ref has been found
670
671	ref := &reference{
672		noteID:   noteID,
673		hasBlock: hasBlock,
674	}
675
676	if noteID > 0 {
677		// reusing the link field for the id since footnotes don't have links
678		ref.link = data[idOffset:idEnd]
679		// if footnote, it's not really a title, it's the contained text
680		ref.title = raw
681	} else {
682		ref.link = data[linkOffset:linkEnd]
683		ref.title = data[titleOffset:titleEnd]
684	}
685
686	// id matches are case-insensitive
687	id := string(bytes.ToLower(data[idOffset:idEnd]))
688
689	p.refs[id] = ref
690
691	return lineEnd
692}
693
694func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
695	// link: whitespace-free sequence, optionally between angle brackets
696	if data[i] == '<' {
697		i++
698	}
699	linkOffset = i
700	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
701		i++
702	}
703	if i == len(data) {
704		return
705	}
706	linkEnd = i
707	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
708		linkOffset++
709		linkEnd--
710	}
711
712	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
713	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
714		i++
715	}
716	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
717		return
718	}
719
720	// compute end-of-line
721	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
722		lineEnd = i
723	}
724	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
725		lineEnd++
726	}
727
728	// optional (space|tab)* spacer after a newline
729	if lineEnd > 0 {
730		i = lineEnd + 1
731		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
732			i++
733		}
734	}
735
736	// optional title: any non-newline sequence enclosed in '"() alone on its line
737	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
738		i++
739		titleOffset = i
740
741		// look for EOL
742		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
743			i++
744		}
745		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
746			titleEnd = i + 1
747		} else {
748			titleEnd = i
749		}
750
751		// step back
752		i--
753		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
754			i--
755		}
756		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
757			lineEnd = titleEnd
758			titleEnd = i
759		}
760	}
761
762	return
763}
764
765// The first bit of this logic is the same as (*parser).listItem, but the rest
766// is much simpler. This function simply finds the entire block and shifts it
767// over by one tab if it is indeed a block (just returns the line if it's not).
768// blockEnd is the end of the section in the input buffer, and contents is the
769// extracted text that was shifted over one tab. It will need to be rendered at
770// the end of the document.
771func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
772	if i == 0 || len(data) == 0 {
773		return
774	}
775
776	// skip leading whitespace on first line
777	for i < len(data) && data[i] == ' ' {
778		i++
779	}
780
781	blockStart = i
782
783	// find the end of the line
784	blockEnd = i
785	for i < len(data) && data[i-1] != '\n' {
786		i++
787	}
788
789	// get working buffer
790	var raw bytes.Buffer
791
792	// put the first line into the working buffer
793	raw.Write(data[blockEnd:i])
794	blockEnd = i
795
796	// process the following lines
797	containsBlankLine := false
798
799gatherLines:
800	for blockEnd < len(data) {
801		i++
802
803		// find the end of this line
804		for i < len(data) && data[i-1] != '\n' {
805			i++
806		}
807
808		// if it is an empty line, guess that it is part of this item
809		// and move on to the next line
810		if p.isEmpty(data[blockEnd:i]) > 0 {
811			containsBlankLine = true
812			blockEnd = i
813			continue
814		}
815
816		n := 0
817		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
818			// this is the end of the block.
819			// we don't want to include this last line in the index.
820			break gatherLines
821		}
822
823		// if there were blank lines before this one, insert a new one now
824		if containsBlankLine {
825			raw.WriteByte('\n')
826			containsBlankLine = false
827		}
828
829		// get rid of that first tab, write to buffer
830		raw.Write(data[blockEnd+n : i])
831		hasBlock = true
832
833		blockEnd = i
834	}
835
836	if data[blockEnd-1] != '\n' {
837		raw.WriteByte('\n')
838	}
839
840	contents = raw.Bytes()
841
842	return
843}
844
845//
846//
847// Miscellaneous helper functions
848//
849//
850
851// Test if a character is a punctuation symbol.
852// Taken from a private function in regexp in the stdlib.
853func ispunct(c byte) bool {
854	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
855		if c == r {
856			return true
857		}
858	}
859	return false
860}
861
862// Test if a character is a whitespace character.
863func isspace(c byte) bool {
864	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
865}
866
867// Test if a character is letter.
868func isletter(c byte) bool {
869	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
870}
871
872// Test if a character is a letter or a digit.
873// TODO: check when this is looking for ASCII alnum and when it should use unicode
874func isalnum(c byte) bool {
875	return (c >= '0' && c <= '9') || isletter(c)
876}
877
878// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
879// always ends output with a newline
880func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
881	// first, check for common cases: no tabs, or only tabs at beginning of line
882	i, prefix := 0, 0
883	slowcase := false
884	for i = 0; i < len(line); i++ {
885		if line[i] == '\t' {
886			if prefix == i {
887				prefix++
888			} else {
889				slowcase = true
890				break
891			}
892		}
893	}
894
895	// no need to decode runes if all tabs are at the beginning of the line
896	if !slowcase {
897		for i = 0; i < prefix*tabSize; i++ {
898			out.WriteByte(' ')
899		}
900		out.Write(line[prefix:])
901		return
902	}
903
904	// the slow case: we need to count runes to figure out how
905	// many spaces to insert for each tab
906	column := 0
907	i = 0
908	for i < len(line) {
909		start := i
910		for i < len(line) && line[i] != '\t' {
911			_, size := utf8.DecodeRune(line[i:])
912			i += size
913			column++
914		}
915
916		if i > start {
917			out.Write(line[start:i])
918		}
919
920		if i >= len(line) {
921			break
922		}
923
924		for {
925			out.WriteByte(' ')
926			column++
927			if column%tabSize == 0 {
928				break
929			}
930		}
931
932		i++
933	}
934}
935
936// Find if a line counts as indented or not.
937// Returns number of characters the indent is (0 = not indented).
938func isIndented(data []byte, indentSize int) int {
939	if len(data) == 0 {
940		return 0
941	}
942	if data[0] == '\t' {
943		return 1
944	}
945	if len(data) < indentSize {
946		return 0
947	}
948	for i := 0; i < indentSize; i++ {
949		if data[i] != ' ' {
950			return 0
951		}
952	}
953	return indentSize
954}
955
956// Create a url-safe slug for fragments
957func slugify(in []byte) []byte {
958	if len(in) == 0 {
959		return in
960	}
961	out := make([]byte, 0, len(in))
962	sym := false
963
964	for _, ch := range in {
965		if isalnum(ch) {
966			sym = false
967			out = append(out, ch)
968		} else if sym {
969			continue
970		} else {
971			out = append(out, '-')
972			sym = true
973		}
974	}
975	var a, b int
976	var ch byte
977	for a, ch = range out {
978		if ch != '-' {
979			break
980		}
981	}
982	for b = len(out) - 1; b > 0; b-- {
983		if out[b] != '-' {
984			break
985		}
986	}
987	return out[a : b+1]
988}