markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Package blackfriday is a markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "io"
25 "strings"
26 "unicode/utf8"
27)
28
29// Version string of the package.
30const Version = "2.0"
31
32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
33// extensions.
34type Extensions int
35
36// These are the supported markdown parsing extensions.
37// OR these values together to select multiple extensions.
38const (
39 NoExtensions Extensions = 0
40 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
41 Tables // Render tables
42 FencedCode // Render fenced code blocks
43 Autolink // Detect embedded URLs that are not explicitly marked
44 Strikethrough // Strikethrough text using ~~test~~
45 LaxHTMLBlocks // Loosen up HTML block parsing rules
46 SpaceHeaders // Be strict about prefix header rules
47 HardLineBreak // Translate newlines into line breaks
48 TabSizeEight // Expand tabs to eight spaces instead of four
49 Footnotes // Pandoc-style footnotes
50 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
51 HeaderIDs // specify header IDs with {#id}
52 Titleblock // Titleblock ala pandoc
53 AutoHeaderIDs // Create the header ID from the text
54 BackslashLineBreak // Translate trailing backslashes into line breaks
55 DefinitionLists // Render definition lists
56 Smartypants // Enable smart punctuation substitutions
57 SmartypantsFractions // Enable smart fractions (with Smartypants)
58 SmartypantsDashes // Enable smart dashes (with Smartypants)
59 SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
60 SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
61 TOC // Generate a table of contents
62 OmitContents // Skip the main contents (for a standalone table of contents)
63
64 CommonHTMLFlags HTMLFlags = UseXHTML
65
66 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
67 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
68 BackslashLineBreak | DefinitionLists | Smartypants |
69 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
70)
71
72// DefaultOptions is a convenience variable with all the options that are
73// enabled by default.
74var DefaultOptions = Options{
75 Extensions: CommonExtensions,
76}
77
78// TODO: this should probably be unexported. Or moved to node.go
79type LinkType int
80
81// These are the possible flag values for the link renderer.
82// Only a single one of these values will be used; they are not ORed together.
83// These are mostly of interest if you are writing a new output format.
84const (
85 LinkTypeNotAutolink LinkType = iota
86 LinkTypeNormal
87 LinkTypeEmail
88)
89
90// ListType contains bitwise or'ed flags for list and list item objects.
91type ListType int
92
93// These are the possible flag values for the ListItem renderer.
94// Multiple flag values may be ORed together.
95// These are mostly of interest if you are writing a new output format.
96const (
97 ListTypeOrdered ListType = 1 << iota
98 ListTypeDefinition
99 ListTypeTerm
100
101 ListItemContainsBlock
102 ListItemBeginningOfList
103 ListItemEndOfList
104)
105
106// CellAlignFlags holds a type of alignment in a table cell.
107type CellAlignFlags int
108
109// These are the possible flag values for the table cell renderer.
110// Only a single one of these values will be used; they are not ORed together.
111// These are mostly of interest if you are writing a new output format.
112const (
113 TableAlignmentLeft = 1 << iota
114 TableAlignmentRight
115 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
116)
117
118// The size of a tab stop.
119const (
120 TabSizeDefault = 4
121 TabSizeDouble = 8
122)
123
124// blockTags is a set of tags that are recognized as HTML block tags.
125// Any of these can be included in markdown text without special escaping.
126var blockTags = map[string]struct{}{
127 "blockquote": struct{}{},
128 "del": struct{}{},
129 "div": struct{}{},
130 "dl": struct{}{},
131 "fieldset": struct{}{},
132 "form": struct{}{},
133 "h1": struct{}{},
134 "h2": struct{}{},
135 "h3": struct{}{},
136 "h4": struct{}{},
137 "h5": struct{}{},
138 "h6": struct{}{},
139 "iframe": struct{}{},
140 "ins": struct{}{},
141 "math": struct{}{},
142 "noscript": struct{}{},
143 "ol": struct{}{},
144 "pre": struct{}{},
145 "p": struct{}{},
146 "script": struct{}{},
147 "style": struct{}{},
148 "table": struct{}{},
149 "ul": struct{}{},
150
151 // HTML5
152 "address": struct{}{},
153 "article": struct{}{},
154 "aside": struct{}{},
155 "canvas": struct{}{},
156 "figcaption": struct{}{},
157 "figure": struct{}{},
158 "footer": struct{}{},
159 "header": struct{}{},
160 "hgroup": struct{}{},
161 "main": struct{}{},
162 "nav": struct{}{},
163 "output": struct{}{},
164 "progress": struct{}{},
165 "section": struct{}{},
166 "video": struct{}{},
167}
168
169// Renderer is the rendering interface.
170// This is mostly of interest if you are implementing a new rendering format.
171//
172// When a byte slice is provided, it contains the (rendered) contents of the
173// element.
174//
175// When a callback is provided instead, it will write the contents of the
176// respective element directly to the output buffer and return true on success.
177// If the callback returns false, the rendering function should reset the
178// output buffer as though it had never been called.
179//
180// Currently HTML and Latex implementations are provided
181type Renderer interface {
182 Render(ast *Node) []byte
183 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
184}
185
186// Callback functions for inline parsing. One such function is defined
187// for each character that triggers a response when parsing inline data.
188type inlineParser func(p *parser, data []byte, offset int) int
189
190// Parser holds runtime state used by the parser.
191// This is constructed by the Markdown function.
192type parser struct {
193 refOverride ReferenceOverrideFunc
194 refs map[string]*reference
195 inlineCallback [256]inlineParser
196 flags Extensions
197 nesting int
198 maxNesting int
199 insideLink bool
200
201 // Footnotes need to be ordered as well as available to quickly check for
202 // presence. If a ref is also a footnote, it's stored both in refs and here
203 // in notes. Slice is nil if footnotes not enabled.
204 notes []*reference
205
206 doc *Node
207 tip *Node // = doc
208 oldTip *Node
209 lastMatchedContainer *Node // = doc
210 allClosed bool
211 currBlock *Node // a block node currently being parsed by inline parser
212}
213
214func (p *parser) getRef(refid string) (ref *reference, found bool) {
215 if p.refOverride != nil {
216 r, overridden := p.refOverride(refid)
217 if overridden {
218 if r == nil {
219 return nil, false
220 }
221 return &reference{
222 link: []byte(r.Link),
223 title: []byte(r.Title),
224 noteID: 0,
225 hasBlock: false,
226 text: []byte(r.Text)}, true
227 }
228 }
229 // refs are case insensitive
230 ref, found = p.refs[strings.ToLower(refid)]
231 return ref, found
232}
233
234func (p *parser) finalize(block *Node) {
235 above := block.Parent
236 block.open = false
237 p.tip = above
238}
239
240func (p *parser) addChild(node NodeType, offset uint32) *Node {
241 for !p.tip.canContain(node) {
242 p.finalize(p.tip)
243 }
244 newNode := NewNode(node)
245 newNode.content = []byte{}
246 p.tip.appendChild(newNode)
247 p.tip = newNode
248 return newNode
249}
250
251func (p *parser) closeUnmatchedBlocks() {
252 if !p.allClosed {
253 for p.oldTip != p.lastMatchedContainer {
254 parent := p.oldTip.Parent
255 p.finalize(p.oldTip)
256 p.oldTip = parent
257 }
258 p.allClosed = true
259 }
260}
261
262//
263//
264// Public interface
265//
266//
267
268// Reference represents the details of a link.
269// See the documentation in Options for more details on use-case.
270type Reference struct {
271 // Link is usually the URL the reference points to.
272 Link string
273 // Title is the alternate text describing the link in more detail.
274 Title string
275 // Text is the optional text to override the ref with if the syntax used was
276 // [refid][]
277 Text string
278}
279
280// ReferenceOverrideFunc is expected to be called with a reference string and
281// return either a valid Reference type that the reference string maps to or
282// nil. If overridden is false, the default reference logic will be executed.
283// See the documentation in Options for more details on use-case.
284type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
285
286// Options represents configurable overrides and callbacks (in addition to the
287// extension flag set) for configuring a Markdown parse.
288type Options struct {
289 // Extensions is a flag set of bit-wise ORed extension bits. See the
290 // Extensions flags defined in this package.
291 Extensions Extensions
292
293 // ReferenceOverride is an optional function callback that is called every
294 // time a reference is resolved.
295 //
296 // In Markdown, the link reference syntax can be made to resolve a link to
297 // a reference instead of an inline URL, in one of the following ways:
298 //
299 // * [link text][refid]
300 // * [refid][]
301 //
302 // Usually, the refid is defined at the bottom of the Markdown document. If
303 // this override function is provided, the refid is passed to the override
304 // function first, before consulting the defined refids at the bottom. If
305 // the override function indicates an override did not occur, the refids at
306 // the bottom will be used to fill in the link details.
307 ReferenceOverride ReferenceOverrideFunc
308}
309
310// MarkdownBasic is a convenience function for simple rendering.
311// It processes markdown input with no extensions enabled.
312func MarkdownBasic(input []byte) []byte {
313 // set up the HTML renderer
314 renderer := NewHTMLRenderer(HTMLRendererParameters{
315 Flags: UseXHTML,
316 Extensions: CommonExtensions,
317 })
318
319 // set up the parser
320 return Markdown(input, renderer, Options{})
321}
322
323// MarkdownCommon is a convenience function for simple rendering. It calls
324// Markdown with most useful extensions enabled, including:
325//
326// * Smartypants processing with smart fractions and LaTeX dashes
327//
328// * Intra-word emphasis suppression
329//
330// * Tables
331//
332// * Fenced code blocks
333//
334// * Autolinking
335//
336// * Strikethrough support
337//
338// * Strict header parsing
339//
340// * Custom Header IDs
341func MarkdownCommon(input []byte) []byte {
342 // set up the HTML renderer
343 renderer := NewHTMLRenderer(HTMLRendererParameters{
344 Flags: CommonHTMLFlags,
345 Extensions: CommonExtensions,
346 })
347 return Markdown(input, renderer, DefaultOptions)
348}
349
350// Markdown is the main rendering function.
351// It parses and renders a block of markdown-encoded text.
352// The supplied Renderer is used to format the output, and extensions dictates
353// which non-standard extensions are enabled.
354//
355// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
356// NewLatexRenderer, respectively.
357func Markdown(input []byte, renderer Renderer, options Options) []byte {
358 if renderer == nil {
359 return nil
360 }
361 return renderer.Render(Parse(input, options))
362}
363
364// Parse is an entry point to the parsing part of Blackfriday. It takes an
365// input markdown document and produces a syntax tree for its contents. This
366// tree can then be rendered with a default or custom renderer, or
367// analyzed/transformed by the caller to whatever non-standard needs they have.
368func Parse(input []byte, opts Options) *Node {
369 extensions := opts.Extensions
370
371 // fill in the render structure
372 p := new(parser)
373 p.flags = extensions
374 p.refOverride = opts.ReferenceOverride
375 p.refs = make(map[string]*reference)
376 p.maxNesting = 16
377 p.insideLink = false
378
379 docNode := NewNode(Document)
380 p.doc = docNode
381 p.tip = docNode
382 p.oldTip = docNode
383 p.lastMatchedContainer = docNode
384 p.allClosed = true
385
386 // register inline parsers
387 p.inlineCallback['*'] = emphasis
388 p.inlineCallback['_'] = emphasis
389 if extensions&Strikethrough != 0 {
390 p.inlineCallback['~'] = emphasis
391 }
392 p.inlineCallback['`'] = codeSpan
393 p.inlineCallback['\n'] = lineBreak
394 p.inlineCallback['['] = link
395 p.inlineCallback['<'] = leftAngle
396 p.inlineCallback['\\'] = escape
397 p.inlineCallback['&'] = entity
398 p.inlineCallback['!'] = maybeImage
399 p.inlineCallback['^'] = maybeInlineFootnote
400
401 if extensions&Autolink != 0 {
402 p.inlineCallback['h'] = maybeAutoLink
403 p.inlineCallback['m'] = maybeAutoLink
404 p.inlineCallback['f'] = maybeAutoLink
405 p.inlineCallback['H'] = maybeAutoLink
406 p.inlineCallback['M'] = maybeAutoLink
407 p.inlineCallback['F'] = maybeAutoLink
408 }
409
410 if extensions&Footnotes != 0 {
411 p.notes = make([]*reference, 0)
412 }
413
414 first := firstPass(p, input)
415 secondPass(p, first)
416 // Walk the tree and finish up some of unfinished blocks
417 for p.tip != nil {
418 p.finalize(p.tip)
419 }
420 // Walk the tree again and process inline markdown in each block
421 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
422 if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
423 p.currBlock = node
424 p.inline(node.content)
425 node.content = nil
426 }
427 return GoToNext
428 })
429 p.parseRefsToAST()
430 p.generateTOC()
431 return p.doc
432}
433
434func (p *parser) generateTOC() {
435 if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
436 return
437 }
438 navNode := NewNode(HTMLBlock)
439 navNode.Literal = []byte("<nav>")
440 navNode.open = false
441
442 var topList *Node
443 var listNode *Node
444 var lastItem *Node
445 headerCount := 0
446 currentLevel := 0
447 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
448 if entering && node.Type == Header {
449 if node.Level > currentLevel {
450 currentLevel++
451 newList := NewNode(List)
452 if lastItem != nil {
453 lastItem.appendChild(newList)
454 listNode = newList
455 } else {
456 listNode = newList
457 topList = listNode
458 }
459 }
460 if node.Level < currentLevel {
461 finalizeList(listNode)
462 lastItem = listNode.Parent
463 listNode = lastItem.Parent
464 }
465 node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
466 headerCount++
467 lastItem = NewNode(Item)
468 listNode.appendChild(lastItem)
469 anchorNode := NewNode(Link)
470 anchorNode.Destination = []byte("#" + node.HeaderID)
471 lastItem.appendChild(anchorNode)
472 anchorNode.appendChild(text(node.FirstChild.Literal))
473 }
474 return GoToNext
475 })
476 firstChild := p.doc.FirstChild
477 // Insert TOC only if there is anything to insert
478 if topList != nil {
479 finalizeList(topList)
480 firstChild.insertBefore(navNode)
481 firstChild.insertBefore(topList)
482 navCloseNode := NewNode(HTMLBlock)
483 navCloseNode.Literal = []byte("</nav>")
484 navCloseNode.open = false
485 firstChild.insertBefore(navCloseNode)
486 }
487 // Drop everything after the TOC if OmitContents was requested
488 if p.flags&OmitContents != 0 {
489 for firstChild != nil {
490 next := firstChild.Next
491 firstChild.unlink()
492 firstChild = next
493 }
494 }
495}
496
497func (p *parser) parseRefsToAST() {
498 if p.flags&Footnotes == 0 || len(p.notes) == 0 {
499 return
500 }
501 p.tip = p.doc
502 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
503 p.addBlock(HorizontalRule, nil)
504 block := p.addBlock(List, nil)
505 block.ListFlags = ListTypeOrdered
506 flags := ListItemBeginningOfList
507 // Note: this loop is intentionally explicit, not range-form. This is
508 // because the body of the loop will append nested footnotes to p.notes and
509 // we need to process those late additions. Range form would only walk over
510 // the fixed initial set.
511 for i := 0; i < len(p.notes); i++ {
512 ref := p.notes[i]
513 block := p.addBlock(Item, nil)
514 block.ListFlags = ListTypeOrdered
515 block.RefLink = ref.link
516 if ref.hasBlock {
517 flags |= ListItemContainsBlock
518 p.block(ref.title)
519 } else {
520 p.currBlock = block
521 p.inline(ref.title)
522 }
523 flags &^= ListItemBeginningOfList | ListItemContainsBlock
524 }
525 above := block.Parent
526 finalizeList(block)
527 p.tip = above
528 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
529 block.Walk(func(node *Node, entering bool) WalkStatus {
530 if node.Type == Paragraph || node.Type == Header {
531 p.currBlock = node
532 p.inline(node.content)
533 node.content = nil
534 }
535 return GoToNext
536 })
537}
538
539// first pass:
540// - normalize newlines
541// - extract references (outside of fenced code blocks)
542// - expand tabs (outside of fenced code blocks)
543// - copy everything else
544func firstPass(p *parser, input []byte) []byte {
545 var out bytes.Buffer
546 tabSize := TabSizeDefault
547 if p.flags&TabSizeEight != 0 {
548 tabSize = TabSizeDouble
549 }
550 beg := 0
551 lastFencedCodeBlockEnd := 0
552 for beg < len(input) {
553 // Find end of this line, then process the line.
554 end := beg
555 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
556 end++
557 }
558
559 if p.flags&FencedCode != 0 {
560 // track fenced code block boundaries to suppress tab expansion
561 // and reference extraction inside them:
562 if beg >= lastFencedCodeBlockEnd {
563 if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
564 lastFencedCodeBlockEnd = beg + i
565 }
566 }
567 }
568
569 // add the line body if present
570 if end > beg {
571 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
572 out.Write(input[beg:end])
573 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
574 beg += refEnd
575 continue
576 } else {
577 expandTabs(&out, input[beg:end], tabSize)
578 }
579 }
580
581 if end < len(input) && input[end] == '\r' {
582 end++
583 }
584 if end < len(input) && input[end] == '\n' {
585 end++
586 }
587 out.WriteByte('\n')
588
589 beg = end
590 }
591
592 // empty input?
593 if out.Len() == 0 {
594 out.WriteByte('\n')
595 }
596
597 return out.Bytes()
598}
599
600// second pass: actual rendering
601func secondPass(p *parser, input []byte) {
602 p.block(input)
603
604 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
605 flags := ListItemBeginningOfList
606 for i := 0; i < len(p.notes); i++ {
607 ref := p.notes[i]
608 if ref.hasBlock {
609 flags |= ListItemContainsBlock
610 p.block(ref.title)
611 } else {
612 p.inline(ref.title)
613 }
614 flags &^= ListItemBeginningOfList | ListItemContainsBlock
615 }
616 }
617
618 if p.nesting != 0 {
619 panic("Nesting level did not end at zero")
620 }
621}
622
623//
624// Link references
625//
626// This section implements support for references that (usually) appear
627// as footnotes in a document, and can be referenced anywhere in the document.
628// The basic format is:
629//
630// [1]: http://www.google.com/ "Google"
631// [2]: http://www.github.com/ "Github"
632//
633// Anywhere in the document, the reference can be linked by referring to its
634// label, i.e., 1 and 2 in this example, as in:
635//
636// This library is hosted on [Github][2], a git hosting site.
637//
638// Actual footnotes as specified in Pandoc and supported by some other Markdown
639// libraries such as php-markdown are also taken care of. They look like this:
640//
641// This sentence needs a bit of further explanation.[^note]
642//
643// [^note]: This is the explanation.
644//
645// Footnotes should be placed at the end of the document in an ordered list.
646// Inline footnotes such as:
647//
648// Inline footnotes^[Not supported.] also exist.
649//
650// are not yet supported.
651
652// References are parsed and stored in this struct.
653type reference struct {
654 link []byte
655 title []byte
656 noteID int // 0 if not a footnote ref
657 hasBlock bool
658 text []byte
659}
660
661func (r *reference) String() string {
662 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
663 r.link, r.title, r.text, r.noteID, r.hasBlock)
664}
665
666// Check whether or not data starts with a reference link.
667// If so, it is parsed and stored in the list of references
668// (in the render struct).
669// Returns the number of bytes to skip to move past it,
670// or zero if the first line is not a reference.
671func isReference(p *parser, data []byte, tabSize int) int {
672 // up to 3 optional leading spaces
673 if len(data) < 4 {
674 return 0
675 }
676 i := 0
677 for i < 3 && data[i] == ' ' {
678 i++
679 }
680
681 noteID := 0
682
683 // id part: anything but a newline between brackets
684 if data[i] != '[' {
685 return 0
686 }
687 i++
688 if p.flags&Footnotes != 0 {
689 if i < len(data) && data[i] == '^' {
690 // we can set it to anything here because the proper noteIds will
691 // be assigned later during the second pass. It just has to be != 0
692 noteID = 1
693 i++
694 }
695 }
696 idOffset := i
697 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
698 i++
699 }
700 if i >= len(data) || data[i] != ']' {
701 return 0
702 }
703 idEnd := i
704
705 // spacer: colon (space | tab)* newline? (space | tab)*
706 i++
707 if i >= len(data) || data[i] != ':' {
708 return 0
709 }
710 i++
711 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
712 i++
713 }
714 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
715 i++
716 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
717 i++
718 }
719 }
720 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
721 i++
722 }
723 if i >= len(data) {
724 return 0
725 }
726
727 var (
728 linkOffset, linkEnd int
729 titleOffset, titleEnd int
730 lineEnd int
731 raw []byte
732 hasBlock bool
733 )
734
735 if p.flags&Footnotes != 0 && noteID != 0 {
736 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
737 lineEnd = linkEnd
738 } else {
739 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
740 }
741 if lineEnd == 0 {
742 return 0
743 }
744
745 // a valid ref has been found
746
747 ref := &reference{
748 noteID: noteID,
749 hasBlock: hasBlock,
750 }
751
752 if noteID > 0 {
753 // reusing the link field for the id since footnotes don't have links
754 ref.link = data[idOffset:idEnd]
755 // if footnote, it's not really a title, it's the contained text
756 ref.title = raw
757 } else {
758 ref.link = data[linkOffset:linkEnd]
759 ref.title = data[titleOffset:titleEnd]
760 }
761
762 // id matches are case-insensitive
763 id := string(bytes.ToLower(data[idOffset:idEnd]))
764
765 p.refs[id] = ref
766
767 return lineEnd
768}
769
770func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
771 // link: whitespace-free sequence, optionally between angle brackets
772 if data[i] == '<' {
773 i++
774 }
775 linkOffset = i
776 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
777 i++
778 }
779 if i == len(data) {
780 return
781 }
782 linkEnd = i
783 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
784 linkOffset++
785 linkEnd--
786 }
787
788 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
789 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
790 i++
791 }
792 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
793 return
794 }
795
796 // compute end-of-line
797 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
798 lineEnd = i
799 }
800 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
801 lineEnd++
802 }
803
804 // optional (space|tab)* spacer after a newline
805 if lineEnd > 0 {
806 i = lineEnd + 1
807 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
808 i++
809 }
810 }
811
812 // optional title: any non-newline sequence enclosed in '"() alone on its line
813 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
814 i++
815 titleOffset = i
816
817 // look for EOL
818 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
819 i++
820 }
821 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
822 titleEnd = i + 1
823 } else {
824 titleEnd = i
825 }
826
827 // step back
828 i--
829 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
830 i--
831 }
832 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
833 lineEnd = titleEnd
834 titleEnd = i
835 }
836 }
837
838 return
839}
840
841// The first bit of this logic is the same as (*parser).listItem, but the rest
842// is much simpler. This function simply finds the entire block and shifts it
843// over by one tab if it is indeed a block (just returns the line if it's not).
844// blockEnd is the end of the section in the input buffer, and contents is the
845// extracted text that was shifted over one tab. It will need to be rendered at
846// the end of the document.
847func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
848 if i == 0 || len(data) == 0 {
849 return
850 }
851
852 // skip leading whitespace on first line
853 for i < len(data) && data[i] == ' ' {
854 i++
855 }
856
857 blockStart = i
858
859 // find the end of the line
860 blockEnd = i
861 for i < len(data) && data[i-1] != '\n' {
862 i++
863 }
864
865 // get working buffer
866 var raw bytes.Buffer
867
868 // put the first line into the working buffer
869 raw.Write(data[blockEnd:i])
870 blockEnd = i
871
872 // process the following lines
873 containsBlankLine := false
874
875gatherLines:
876 for blockEnd < len(data) {
877 i++
878
879 // find the end of this line
880 for i < len(data) && data[i-1] != '\n' {
881 i++
882 }
883
884 // if it is an empty line, guess that it is part of this item
885 // and move on to the next line
886 if p.isEmpty(data[blockEnd:i]) > 0 {
887 containsBlankLine = true
888 blockEnd = i
889 continue
890 }
891
892 n := 0
893 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
894 // this is the end of the block.
895 // we don't want to include this last line in the index.
896 break gatherLines
897 }
898
899 // if there were blank lines before this one, insert a new one now
900 if containsBlankLine {
901 raw.WriteByte('\n')
902 containsBlankLine = false
903 }
904
905 // get rid of that first tab, write to buffer
906 raw.Write(data[blockEnd+n : i])
907 hasBlock = true
908
909 blockEnd = i
910 }
911
912 if data[blockEnd-1] != '\n' {
913 raw.WriteByte('\n')
914 }
915
916 contents = raw.Bytes()
917
918 return
919}
920
921//
922//
923// Miscellaneous helper functions
924//
925//
926
927// Test if a character is a punctuation symbol.
928// Taken from a private function in regexp in the stdlib.
929func ispunct(c byte) bool {
930 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
931 if c == r {
932 return true
933 }
934 }
935 return false
936}
937
938// Test if a character is a whitespace character.
939func isspace(c byte) bool {
940 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
941}
942
943// Test if a character is letter.
944func isletter(c byte) bool {
945 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
946}
947
948// Test if a character is a letter or a digit.
949// TODO: check when this is looking for ASCII alnum and when it should use unicode
950func isalnum(c byte) bool {
951 return (c >= '0' && c <= '9') || isletter(c)
952}
953
954// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
955// always ends output with a newline
956func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
957 // first, check for common cases: no tabs, or only tabs at beginning of line
958 i, prefix := 0, 0
959 slowcase := false
960 for i = 0; i < len(line); i++ {
961 if line[i] == '\t' {
962 if prefix == i {
963 prefix++
964 } else {
965 slowcase = true
966 break
967 }
968 }
969 }
970
971 // no need to decode runes if all tabs are at the beginning of the line
972 if !slowcase {
973 for i = 0; i < prefix*tabSize; i++ {
974 out.WriteByte(' ')
975 }
976 out.Write(line[prefix:])
977 return
978 }
979
980 // the slow case: we need to count runes to figure out how
981 // many spaces to insert for each tab
982 column := 0
983 i = 0
984 for i < len(line) {
985 start := i
986 for i < len(line) && line[i] != '\t' {
987 _, size := utf8.DecodeRune(line[i:])
988 i += size
989 column++
990 }
991
992 if i > start {
993 out.Write(line[start:i])
994 }
995
996 if i >= len(line) {
997 break
998 }
999
1000 for {
1001 out.WriteByte(' ')
1002 column++
1003 if column%tabSize == 0 {
1004 break
1005 }
1006 }
1007
1008 i++
1009 }
1010}
1011
1012// Find if a line counts as indented or not.
1013// Returns number of characters the indent is (0 = not indented).
1014func isIndented(data []byte, indentSize int) int {
1015 if len(data) == 0 {
1016 return 0
1017 }
1018 if data[0] == '\t' {
1019 return 1
1020 }
1021 if len(data) < indentSize {
1022 return 0
1023 }
1024 for i := 0; i < indentSize; i++ {
1025 if data[i] != ' ' {
1026 return 0
1027 }
1028 }
1029 return indentSize
1030}
1031
1032// Create a url-safe slug for fragments
1033func slugify(in []byte) []byte {
1034 if len(in) == 0 {
1035 return in
1036 }
1037 out := make([]byte, 0, len(in))
1038 sym := false
1039
1040 for _, ch := range in {
1041 if isalnum(ch) {
1042 sym = false
1043 out = append(out, ch)
1044 } else if sym {
1045 continue
1046 } else {
1047 out = append(out, '-')
1048 sym = true
1049 }
1050 }
1051 var a, b int
1052 var ch byte
1053 for a, ch = range out {
1054 if ch != '-' {
1055 break
1056 }
1057 }
1058 for b = len(out) - 1; b > 0; b-- {
1059 if out[b] != '-' {
1060 break
1061 }
1062 }
1063 return out[a : b+1]
1064}