markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Package blackfriday is a markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "io"
25 "strings"
26 "unicode/utf8"
27)
28
29// Version string of the package.
30const Version = "2.0"
31
32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
33// extensions.
34type Extensions int
35
36// These are the supported markdown parsing extensions.
37// OR these values together to select multiple extensions.
38const (
39 NoExtensions Extensions = 0
40 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
41 Tables // Render tables
42 FencedCode // Render fenced code blocks
43 Autolink // Detect embedded URLs that are not explicitly marked
44 Strikethrough // Strikethrough text using ~~test~~
45 LaxHTMLBlocks // Loosen up HTML block parsing rules
46 SpaceHeaders // Be strict about prefix header rules
47 HardLineBreak // Translate newlines into line breaks
48 TabSizeEight // Expand tabs to eight spaces instead of four
49 Footnotes // Pandoc-style footnotes
50 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
51 HeaderIDs // specify header IDs with {#id}
52 Titleblock // Titleblock ala pandoc
53 AutoHeaderIDs // Create the header ID from the text
54 BackslashLineBreak // Translate trailing backslashes into line breaks
55 DefinitionLists // Render definition lists
56 Smartypants // Enable smart punctuation substitutions
57 SmartypantsFractions // Enable smart fractions (with Smartypants)
58 SmartypantsDashes // Enable smart dashes (with Smartypants)
59 SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
60 SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
61 TOC // Generate a table of contents
62 OmitContents // Skip the main contents (for a standalone table of contents)
63
64 CommonHTMLFlags HTMLFlags = UseXHTML
65
66 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
67 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
68 BackslashLineBreak | DefinitionLists | Smartypants |
69 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
70)
71
72// DefaultOptions is a convenience variable with all the options that are
73// enabled by default.
74var DefaultOptions = Options{
75 Extensions: CommonExtensions,
76}
77
78// ListType contains bitwise or'ed flags for list and list item objects.
79type ListType int
80
81// These are the possible flag values for the ListItem renderer.
82// Multiple flag values may be ORed together.
83// These are mostly of interest if you are writing a new output format.
84const (
85 ListTypeOrdered ListType = 1 << iota
86 ListTypeDefinition
87 ListTypeTerm
88
89 ListItemContainsBlock
90 ListItemBeginningOfList
91 ListItemEndOfList
92)
93
94// CellAlignFlags holds a type of alignment in a table cell.
95type CellAlignFlags int
96
97// These are the possible flag values for the table cell renderer.
98// Only a single one of these values will be used; they are not ORed together.
99// These are mostly of interest if you are writing a new output format.
100const (
101 TableAlignmentLeft = 1 << iota
102 TableAlignmentRight
103 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
104)
105
106// The size of a tab stop.
107const (
108 TabSizeDefault = 4
109 TabSizeDouble = 8
110)
111
112// blockTags is a set of tags that are recognized as HTML block tags.
113// Any of these can be included in markdown text without special escaping.
114var blockTags = map[string]struct{}{
115 "blockquote": struct{}{},
116 "del": struct{}{},
117 "div": struct{}{},
118 "dl": struct{}{},
119 "fieldset": struct{}{},
120 "form": struct{}{},
121 "h1": struct{}{},
122 "h2": struct{}{},
123 "h3": struct{}{},
124 "h4": struct{}{},
125 "h5": struct{}{},
126 "h6": struct{}{},
127 "iframe": struct{}{},
128 "ins": struct{}{},
129 "math": struct{}{},
130 "noscript": struct{}{},
131 "ol": struct{}{},
132 "pre": struct{}{},
133 "p": struct{}{},
134 "script": struct{}{},
135 "style": struct{}{},
136 "table": struct{}{},
137 "ul": struct{}{},
138
139 // HTML5
140 "address": struct{}{},
141 "article": struct{}{},
142 "aside": struct{}{},
143 "canvas": struct{}{},
144 "figcaption": struct{}{},
145 "figure": struct{}{},
146 "footer": struct{}{},
147 "header": struct{}{},
148 "hgroup": struct{}{},
149 "main": struct{}{},
150 "nav": struct{}{},
151 "output": struct{}{},
152 "progress": struct{}{},
153 "section": struct{}{},
154 "video": struct{}{},
155}
156
157// Renderer is the rendering interface.
158// This is mostly of interest if you are implementing a new rendering format.
159//
160// When a byte slice is provided, it contains the (rendered) contents of the
161// element.
162//
163// When a callback is provided instead, it will write the contents of the
164// respective element directly to the output buffer and return true on success.
165// If the callback returns false, the rendering function should reset the
166// output buffer as though it had never been called.
167//
168// Currently HTML and Latex implementations are provided
169type Renderer interface {
170 Render(ast *Node) []byte
171 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
172}
173
174// Callback functions for inline parsing. One such function is defined
175// for each character that triggers a response when parsing inline data.
176type inlineParser func(p *parser, data []byte, offset int) int
177
178// Parser holds runtime state used by the parser.
179// This is constructed by the Markdown function.
180type parser struct {
181 refOverride ReferenceOverrideFunc
182 refs map[string]*reference
183 inlineCallback [256]inlineParser
184 flags Extensions
185 nesting int
186 maxNesting int
187 insideLink bool
188
189 // Footnotes need to be ordered as well as available to quickly check for
190 // presence. If a ref is also a footnote, it's stored both in refs and here
191 // in notes. Slice is nil if footnotes not enabled.
192 notes []*reference
193
194 doc *Node
195 tip *Node // = doc
196 oldTip *Node
197 lastMatchedContainer *Node // = doc
198 allClosed bool
199 currBlock *Node // a block node currently being parsed by inline parser
200}
201
202func (p *parser) getRef(refid string) (ref *reference, found bool) {
203 if p.refOverride != nil {
204 r, overridden := p.refOverride(refid)
205 if overridden {
206 if r == nil {
207 return nil, false
208 }
209 return &reference{
210 link: []byte(r.Link),
211 title: []byte(r.Title),
212 noteID: 0,
213 hasBlock: false,
214 text: []byte(r.Text)}, true
215 }
216 }
217 // refs are case insensitive
218 ref, found = p.refs[strings.ToLower(refid)]
219 return ref, found
220}
221
222func (p *parser) finalize(block *Node) {
223 above := block.Parent
224 block.open = false
225 p.tip = above
226}
227
228func (p *parser) addChild(node NodeType, offset uint32) *Node {
229 for !p.tip.canContain(node) {
230 p.finalize(p.tip)
231 }
232 newNode := NewNode(node)
233 newNode.content = []byte{}
234 p.tip.appendChild(newNode)
235 p.tip = newNode
236 return newNode
237}
238
239func (p *parser) closeUnmatchedBlocks() {
240 if !p.allClosed {
241 for p.oldTip != p.lastMatchedContainer {
242 parent := p.oldTip.Parent
243 p.finalize(p.oldTip)
244 p.oldTip = parent
245 }
246 p.allClosed = true
247 }
248}
249
250//
251//
252// Public interface
253//
254//
255
256// Reference represents the details of a link.
257// See the documentation in Options for more details on use-case.
258type Reference struct {
259 // Link is usually the URL the reference points to.
260 Link string
261 // Title is the alternate text describing the link in more detail.
262 Title string
263 // Text is the optional text to override the ref with if the syntax used was
264 // [refid][]
265 Text string
266}
267
268// ReferenceOverrideFunc is expected to be called with a reference string and
269// return either a valid Reference type that the reference string maps to or
270// nil. If overridden is false, the default reference logic will be executed.
271// See the documentation in Options for more details on use-case.
272type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
273
274// Options represents configurable overrides and callbacks (in addition to the
275// extension flag set) for configuring a Markdown parse.
276type Options struct {
277 // Extensions is a flag set of bit-wise ORed extension bits. See the
278 // Extensions flags defined in this package.
279 Extensions Extensions
280
281 // ReferenceOverride is an optional function callback that is called every
282 // time a reference is resolved.
283 //
284 // In Markdown, the link reference syntax can be made to resolve a link to
285 // a reference instead of an inline URL, in one of the following ways:
286 //
287 // * [link text][refid]
288 // * [refid][]
289 //
290 // Usually, the refid is defined at the bottom of the Markdown document. If
291 // this override function is provided, the refid is passed to the override
292 // function first, before consulting the defined refids at the bottom. If
293 // the override function indicates an override did not occur, the refids at
294 // the bottom will be used to fill in the link details.
295 ReferenceOverride ReferenceOverrideFunc
296}
297
298// MarkdownBasic is a convenience function for simple rendering.
299// It processes markdown input with no extensions enabled.
300func MarkdownBasic(input []byte) []byte {
301 // set up the HTML renderer
302 renderer := NewHTMLRenderer(HTMLRendererParameters{
303 Flags: UseXHTML,
304 Extensions: CommonExtensions,
305 })
306
307 // set up the parser
308 return Markdown(input, renderer, Options{})
309}
310
311// MarkdownCommon is a convenience function for simple rendering. It calls
312// Markdown with most useful extensions enabled, including:
313//
314// * Smartypants processing with smart fractions and LaTeX dashes
315//
316// * Intra-word emphasis suppression
317//
318// * Tables
319//
320// * Fenced code blocks
321//
322// * Autolinking
323//
324// * Strikethrough support
325//
326// * Strict header parsing
327//
328// * Custom Header IDs
329func MarkdownCommon(input []byte) []byte {
330 // set up the HTML renderer
331 renderer := NewHTMLRenderer(HTMLRendererParameters{
332 Flags: CommonHTMLFlags,
333 Extensions: CommonExtensions,
334 })
335 return Markdown(input, renderer, DefaultOptions)
336}
337
338// Markdown is the main rendering function.
339// It parses and renders a block of markdown-encoded text.
340// The supplied Renderer is used to format the output, and extensions dictates
341// which non-standard extensions are enabled.
342//
343// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
344// NewLatexRenderer, respectively.
345func Markdown(input []byte, renderer Renderer, options Options) []byte {
346 if renderer == nil {
347 return nil
348 }
349 return renderer.Render(Parse(input, options))
350}
351
352// Parse is an entry point to the parsing part of Blackfriday. It takes an
353// input markdown document and produces a syntax tree for its contents. This
354// tree can then be rendered with a default or custom renderer, or
355// analyzed/transformed by the caller to whatever non-standard needs they have.
356func Parse(input []byte, opts Options) *Node {
357 extensions := opts.Extensions
358
359 // fill in the render structure
360 p := new(parser)
361 p.flags = extensions
362 p.refOverride = opts.ReferenceOverride
363 p.refs = make(map[string]*reference)
364 p.maxNesting = 16
365 p.insideLink = false
366
367 docNode := NewNode(Document)
368 p.doc = docNode
369 p.tip = docNode
370 p.oldTip = docNode
371 p.lastMatchedContainer = docNode
372 p.allClosed = true
373
374 // register inline parsers
375 p.inlineCallback['*'] = emphasis
376 p.inlineCallback['_'] = emphasis
377 if extensions&Strikethrough != 0 {
378 p.inlineCallback['~'] = emphasis
379 }
380 p.inlineCallback['`'] = codeSpan
381 p.inlineCallback['\n'] = lineBreak
382 p.inlineCallback['['] = link
383 p.inlineCallback['<'] = leftAngle
384 p.inlineCallback['\\'] = escape
385 p.inlineCallback['&'] = entity
386 p.inlineCallback['!'] = maybeImage
387 p.inlineCallback['^'] = maybeInlineFootnote
388
389 if extensions&Autolink != 0 {
390 p.inlineCallback['h'] = maybeAutoLink
391 p.inlineCallback['m'] = maybeAutoLink
392 p.inlineCallback['f'] = maybeAutoLink
393 p.inlineCallback['H'] = maybeAutoLink
394 p.inlineCallback['M'] = maybeAutoLink
395 p.inlineCallback['F'] = maybeAutoLink
396 }
397
398 if extensions&Footnotes != 0 {
399 p.notes = make([]*reference, 0)
400 }
401
402 first := firstPass(p, input)
403 secondPass(p, first)
404 // Walk the tree and finish up some of unfinished blocks
405 for p.tip != nil {
406 p.finalize(p.tip)
407 }
408 // Walk the tree again and process inline markdown in each block
409 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
410 if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
411 p.currBlock = node
412 p.inline(node.content)
413 node.content = nil
414 }
415 return GoToNext
416 })
417 p.parseRefsToAST()
418 p.generateTOC()
419 return p.doc
420}
421
422func (p *parser) generateTOC() {
423 if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
424 return
425 }
426 navNode := NewNode(HTMLBlock)
427 navNode.Literal = []byte("<nav>")
428 navNode.open = false
429
430 var topList *Node
431 var listNode *Node
432 var lastItem *Node
433 headerCount := 0
434 currentLevel := 0
435 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
436 if entering && node.Type == Header {
437 if node.Level > currentLevel {
438 currentLevel++
439 newList := NewNode(List)
440 if lastItem != nil {
441 lastItem.appendChild(newList)
442 listNode = newList
443 } else {
444 listNode = newList
445 topList = listNode
446 }
447 }
448 if node.Level < currentLevel {
449 finalizeList(listNode)
450 lastItem = listNode.Parent
451 listNode = lastItem.Parent
452 }
453 node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
454 headerCount++
455 lastItem = NewNode(Item)
456 listNode.appendChild(lastItem)
457 anchorNode := NewNode(Link)
458 anchorNode.Destination = []byte("#" + node.HeaderID)
459 lastItem.appendChild(anchorNode)
460 anchorNode.appendChild(text(node.FirstChild.Literal))
461 }
462 return GoToNext
463 })
464 firstChild := p.doc.FirstChild
465 // Insert TOC only if there is anything to insert
466 if topList != nil {
467 finalizeList(topList)
468 firstChild.insertBefore(navNode)
469 firstChild.insertBefore(topList)
470 navCloseNode := NewNode(HTMLBlock)
471 navCloseNode.Literal = []byte("</nav>")
472 navCloseNode.open = false
473 firstChild.insertBefore(navCloseNode)
474 }
475 // Drop everything after the TOC if OmitContents was requested
476 if p.flags&OmitContents != 0 {
477 for firstChild != nil {
478 next := firstChild.Next
479 firstChild.unlink()
480 firstChild = next
481 }
482 }
483}
484
485func (p *parser) parseRefsToAST() {
486 if p.flags&Footnotes == 0 || len(p.notes) == 0 {
487 return
488 }
489 p.tip = p.doc
490 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
491 p.addBlock(HorizontalRule, nil)
492 block := p.addBlock(List, nil)
493 block.ListFlags = ListTypeOrdered
494 flags := ListItemBeginningOfList
495 // Note: this loop is intentionally explicit, not range-form. This is
496 // because the body of the loop will append nested footnotes to p.notes and
497 // we need to process those late additions. Range form would only walk over
498 // the fixed initial set.
499 for i := 0; i < len(p.notes); i++ {
500 ref := p.notes[i]
501 block := p.addBlock(Item, nil)
502 block.ListFlags = ListTypeOrdered
503 block.RefLink = ref.link
504 if ref.hasBlock {
505 flags |= ListItemContainsBlock
506 p.block(ref.title)
507 } else {
508 p.currBlock = block
509 p.inline(ref.title)
510 }
511 flags &^= ListItemBeginningOfList | ListItemContainsBlock
512 }
513 above := block.Parent
514 finalizeList(block)
515 p.tip = above
516 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
517 block.Walk(func(node *Node, entering bool) WalkStatus {
518 if node.Type == Paragraph || node.Type == Header {
519 p.currBlock = node
520 p.inline(node.content)
521 node.content = nil
522 }
523 return GoToNext
524 })
525}
526
527// first pass:
528// - normalize newlines
529// - extract references (outside of fenced code blocks)
530// - expand tabs (outside of fenced code blocks)
531// - copy everything else
532func firstPass(p *parser, input []byte) []byte {
533 var out bytes.Buffer
534 tabSize := TabSizeDefault
535 if p.flags&TabSizeEight != 0 {
536 tabSize = TabSizeDouble
537 }
538 beg := 0
539 lastFencedCodeBlockEnd := 0
540 for beg < len(input) {
541 // Find end of this line, then process the line.
542 end := beg
543 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
544 end++
545 }
546
547 if p.flags&FencedCode != 0 {
548 // track fenced code block boundaries to suppress tab expansion
549 // and reference extraction inside them:
550 if beg >= lastFencedCodeBlockEnd {
551 if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
552 lastFencedCodeBlockEnd = beg + i
553 }
554 }
555 }
556
557 // add the line body if present
558 if end > beg {
559 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
560 out.Write(input[beg:end])
561 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
562 beg += refEnd
563 continue
564 } else {
565 expandTabs(&out, input[beg:end], tabSize)
566 }
567 }
568
569 if end < len(input) && input[end] == '\r' {
570 end++
571 }
572 if end < len(input) && input[end] == '\n' {
573 end++
574 }
575 out.WriteByte('\n')
576
577 beg = end
578 }
579
580 // empty input?
581 if out.Len() == 0 {
582 out.WriteByte('\n')
583 }
584
585 return out.Bytes()
586}
587
588// second pass: actual rendering
589func secondPass(p *parser, input []byte) {
590 p.block(input)
591
592 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
593 flags := ListItemBeginningOfList
594 for i := 0; i < len(p.notes); i++ {
595 ref := p.notes[i]
596 if ref.hasBlock {
597 flags |= ListItemContainsBlock
598 p.block(ref.title)
599 } else {
600 p.inline(ref.title)
601 }
602 flags &^= ListItemBeginningOfList | ListItemContainsBlock
603 }
604 }
605
606 if p.nesting != 0 {
607 panic("Nesting level did not end at zero")
608 }
609}
610
611//
612// Link references
613//
614// This section implements support for references that (usually) appear
615// as footnotes in a document, and can be referenced anywhere in the document.
616// The basic format is:
617//
618// [1]: http://www.google.com/ "Google"
619// [2]: http://www.github.com/ "Github"
620//
621// Anywhere in the document, the reference can be linked by referring to its
622// label, i.e., 1 and 2 in this example, as in:
623//
624// This library is hosted on [Github][2], a git hosting site.
625//
626// Actual footnotes as specified in Pandoc and supported by some other Markdown
627// libraries such as php-markdown are also taken care of. They look like this:
628//
629// This sentence needs a bit of further explanation.[^note]
630//
631// [^note]: This is the explanation.
632//
633// Footnotes should be placed at the end of the document in an ordered list.
634// Inline footnotes such as:
635//
636// Inline footnotes^[Not supported.] also exist.
637//
638// are not yet supported.
639
640// References are parsed and stored in this struct.
641type reference struct {
642 link []byte
643 title []byte
644 noteID int // 0 if not a footnote ref
645 hasBlock bool
646 text []byte
647}
648
649func (r *reference) String() string {
650 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
651 r.link, r.title, r.text, r.noteID, r.hasBlock)
652}
653
654// Check whether or not data starts with a reference link.
655// If so, it is parsed and stored in the list of references
656// (in the render struct).
657// Returns the number of bytes to skip to move past it,
658// or zero if the first line is not a reference.
659func isReference(p *parser, data []byte, tabSize int) int {
660 // up to 3 optional leading spaces
661 if len(data) < 4 {
662 return 0
663 }
664 i := 0
665 for i < 3 && data[i] == ' ' {
666 i++
667 }
668
669 noteID := 0
670
671 // id part: anything but a newline between brackets
672 if data[i] != '[' {
673 return 0
674 }
675 i++
676 if p.flags&Footnotes != 0 {
677 if i < len(data) && data[i] == '^' {
678 // we can set it to anything here because the proper noteIds will
679 // be assigned later during the second pass. It just has to be != 0
680 noteID = 1
681 i++
682 }
683 }
684 idOffset := i
685 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
686 i++
687 }
688 if i >= len(data) || data[i] != ']' {
689 return 0
690 }
691 idEnd := i
692
693 // spacer: colon (space | tab)* newline? (space | tab)*
694 i++
695 if i >= len(data) || data[i] != ':' {
696 return 0
697 }
698 i++
699 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
700 i++
701 }
702 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
703 i++
704 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
705 i++
706 }
707 }
708 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
709 i++
710 }
711 if i >= len(data) {
712 return 0
713 }
714
715 var (
716 linkOffset, linkEnd int
717 titleOffset, titleEnd int
718 lineEnd int
719 raw []byte
720 hasBlock bool
721 )
722
723 if p.flags&Footnotes != 0 && noteID != 0 {
724 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
725 lineEnd = linkEnd
726 } else {
727 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
728 }
729 if lineEnd == 0 {
730 return 0
731 }
732
733 // a valid ref has been found
734
735 ref := &reference{
736 noteID: noteID,
737 hasBlock: hasBlock,
738 }
739
740 if noteID > 0 {
741 // reusing the link field for the id since footnotes don't have links
742 ref.link = data[idOffset:idEnd]
743 // if footnote, it's not really a title, it's the contained text
744 ref.title = raw
745 } else {
746 ref.link = data[linkOffset:linkEnd]
747 ref.title = data[titleOffset:titleEnd]
748 }
749
750 // id matches are case-insensitive
751 id := string(bytes.ToLower(data[idOffset:idEnd]))
752
753 p.refs[id] = ref
754
755 return lineEnd
756}
757
758func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
759 // link: whitespace-free sequence, optionally between angle brackets
760 if data[i] == '<' {
761 i++
762 }
763 linkOffset = i
764 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
765 i++
766 }
767 if i == len(data) {
768 return
769 }
770 linkEnd = i
771 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
772 linkOffset++
773 linkEnd--
774 }
775
776 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
777 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
778 i++
779 }
780 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
781 return
782 }
783
784 // compute end-of-line
785 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
786 lineEnd = i
787 }
788 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
789 lineEnd++
790 }
791
792 // optional (space|tab)* spacer after a newline
793 if lineEnd > 0 {
794 i = lineEnd + 1
795 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
796 i++
797 }
798 }
799
800 // optional title: any non-newline sequence enclosed in '"() alone on its line
801 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
802 i++
803 titleOffset = i
804
805 // look for EOL
806 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
807 i++
808 }
809 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
810 titleEnd = i + 1
811 } else {
812 titleEnd = i
813 }
814
815 // step back
816 i--
817 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
818 i--
819 }
820 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
821 lineEnd = titleEnd
822 titleEnd = i
823 }
824 }
825
826 return
827}
828
829// The first bit of this logic is the same as (*parser).listItem, but the rest
830// is much simpler. This function simply finds the entire block and shifts it
831// over by one tab if it is indeed a block (just returns the line if it's not).
832// blockEnd is the end of the section in the input buffer, and contents is the
833// extracted text that was shifted over one tab. It will need to be rendered at
834// the end of the document.
835func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
836 if i == 0 || len(data) == 0 {
837 return
838 }
839
840 // skip leading whitespace on first line
841 for i < len(data) && data[i] == ' ' {
842 i++
843 }
844
845 blockStart = i
846
847 // find the end of the line
848 blockEnd = i
849 for i < len(data) && data[i-1] != '\n' {
850 i++
851 }
852
853 // get working buffer
854 var raw bytes.Buffer
855
856 // put the first line into the working buffer
857 raw.Write(data[blockEnd:i])
858 blockEnd = i
859
860 // process the following lines
861 containsBlankLine := false
862
863gatherLines:
864 for blockEnd < len(data) {
865 i++
866
867 // find the end of this line
868 for i < len(data) && data[i-1] != '\n' {
869 i++
870 }
871
872 // if it is an empty line, guess that it is part of this item
873 // and move on to the next line
874 if p.isEmpty(data[blockEnd:i]) > 0 {
875 containsBlankLine = true
876 blockEnd = i
877 continue
878 }
879
880 n := 0
881 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
882 // this is the end of the block.
883 // we don't want to include this last line in the index.
884 break gatherLines
885 }
886
887 // if there were blank lines before this one, insert a new one now
888 if containsBlankLine {
889 raw.WriteByte('\n')
890 containsBlankLine = false
891 }
892
893 // get rid of that first tab, write to buffer
894 raw.Write(data[blockEnd+n : i])
895 hasBlock = true
896
897 blockEnd = i
898 }
899
900 if data[blockEnd-1] != '\n' {
901 raw.WriteByte('\n')
902 }
903
904 contents = raw.Bytes()
905
906 return
907}
908
909//
910//
911// Miscellaneous helper functions
912//
913//
914
915// Test if a character is a punctuation symbol.
916// Taken from a private function in regexp in the stdlib.
917func ispunct(c byte) bool {
918 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
919 if c == r {
920 return true
921 }
922 }
923 return false
924}
925
926// Test if a character is a whitespace character.
927func isspace(c byte) bool {
928 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
929}
930
931// Test if a character is letter.
932func isletter(c byte) bool {
933 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
934}
935
936// Test if a character is a letter or a digit.
937// TODO: check when this is looking for ASCII alnum and when it should use unicode
938func isalnum(c byte) bool {
939 return (c >= '0' && c <= '9') || isletter(c)
940}
941
942// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
943// always ends output with a newline
944func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
945 // first, check for common cases: no tabs, or only tabs at beginning of line
946 i, prefix := 0, 0
947 slowcase := false
948 for i = 0; i < len(line); i++ {
949 if line[i] == '\t' {
950 if prefix == i {
951 prefix++
952 } else {
953 slowcase = true
954 break
955 }
956 }
957 }
958
959 // no need to decode runes if all tabs are at the beginning of the line
960 if !slowcase {
961 for i = 0; i < prefix*tabSize; i++ {
962 out.WriteByte(' ')
963 }
964 out.Write(line[prefix:])
965 return
966 }
967
968 // the slow case: we need to count runes to figure out how
969 // many spaces to insert for each tab
970 column := 0
971 i = 0
972 for i < len(line) {
973 start := i
974 for i < len(line) && line[i] != '\t' {
975 _, size := utf8.DecodeRune(line[i:])
976 i += size
977 column++
978 }
979
980 if i > start {
981 out.Write(line[start:i])
982 }
983
984 if i >= len(line) {
985 break
986 }
987
988 for {
989 out.WriteByte(' ')
990 column++
991 if column%tabSize == 0 {
992 break
993 }
994 }
995
996 i++
997 }
998}
999
1000// Find if a line counts as indented or not.
1001// Returns number of characters the indent is (0 = not indented).
1002func isIndented(data []byte, indentSize int) int {
1003 if len(data) == 0 {
1004 return 0
1005 }
1006 if data[0] == '\t' {
1007 return 1
1008 }
1009 if len(data) < indentSize {
1010 return 0
1011 }
1012 for i := 0; i < indentSize; i++ {
1013 if data[i] != ' ' {
1014 return 0
1015 }
1016 }
1017 return indentSize
1018}
1019
1020// Create a url-safe slug for fragments
1021func slugify(in []byte) []byte {
1022 if len(in) == 0 {
1023 return in
1024 }
1025 out := make([]byte, 0, len(in))
1026 sym := false
1027
1028 for _, ch := range in {
1029 if isalnum(ch) {
1030 sym = false
1031 out = append(out, ch)
1032 } else if sym {
1033 continue
1034 } else {
1035 out = append(out, '-')
1036 sym = true
1037 }
1038 }
1039 var a, b int
1040 var ch byte
1041 for a, ch = range out {
1042 if ch != '-' {
1043 break
1044 }
1045 }
1046 for b = len(out) - 1; b > 0; b-- {
1047 if out[b] != '-' {
1048 break
1049 }
1050 }
1051 return out[a : b+1]
1052}