markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Package blackfriday is a markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "io"
25 "strings"
26 "unicode/utf8"
27)
28
29// Version string of the package.
30const Version = "2.0"
31
32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
33// extensions.
34type Extensions int
35
36// These are the supported markdown parsing extensions.
37// OR these values together to select multiple extensions.
38const (
39 NoExtensions Extensions = 0
40 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
41 Tables // Render tables
42 FencedCode // Render fenced code blocks
43 Autolink // Detect embedded URLs that are not explicitly marked
44 Strikethrough // Strikethrough text using ~~test~~
45 LaxHTMLBlocks // Loosen up HTML block parsing rules
46 SpaceHeaders // Be strict about prefix header rules
47 HardLineBreak // Translate newlines into line breaks
48 TabSizeEight // Expand tabs to eight spaces instead of four
49 Footnotes // Pandoc-style footnotes
50 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
51 HeaderIDs // specify header IDs with {#id}
52 Titleblock // Titleblock ala pandoc
53 AutoHeaderIDs // Create the header ID from the text
54 BackslashLineBreak // Translate trailing backslashes into line breaks
55 DefinitionLists // Render definition lists
56 Smartypants // Enable smart punctuation substitutions
57 SmartypantsFractions // Enable smart fractions (with Smartypants)
58 SmartypantsDashes // Enable smart dashes (with Smartypants)
59 SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
60 SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
61 TOC // Generate a table of contents
62 OmitContents // Skip the main contents (for a standalone table of contents)
63
64 CommonHTMLFlags HTMLFlags = UseXHTML
65
66 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
67 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
68 BackslashLineBreak | DefinitionLists | Smartypants |
69 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
70)
71
72// DefaultOptions is a convenience variable with all the options that are
73// enabled by default.
74var DefaultOptions = Options{
75 Extensions: CommonExtensions,
76}
77
78// TODO: this should probably be unexported. Or moved to node.go
79type LinkType int
80
81// These are the possible flag values for the link renderer.
82// Only a single one of these values will be used; they are not ORed together.
83// These are mostly of interest if you are writing a new output format.
84const (
85 LinkTypeNotAutolink LinkType = iota
86 LinkTypeNormal
87 LinkTypeEmail
88)
89
90// ListType contains bitwise or'ed flags for list and list item objects.
91type ListType int
92
93// These are the possible flag values for the ListItem renderer.
94// Multiple flag values may be ORed together.
95// These are mostly of interest if you are writing a new output format.
96const (
97 ListTypeOrdered ListType = 1 << iota
98 ListTypeDefinition
99 ListTypeTerm
100
101 ListItemContainsBlock
102 ListItemBeginningOfList
103 ListItemEndOfList
104)
105
106// CellAlignFlags holds a type of alignment in a table cell.
107type CellAlignFlags int
108
109// These are the possible flag values for the table cell renderer.
110// Only a single one of these values will be used; they are not ORed together.
111// These are mostly of interest if you are writing a new output format.
112const (
113 TableAlignmentLeft = 1 << iota
114 TableAlignmentRight
115 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
116)
117
118// The size of a tab stop.
119const (
120 TabSizeDefault = 4
121 TabSizeDouble = 8
122)
123
124// blockTags is a set of tags that are recognized as HTML block tags.
125// Any of these can be included in markdown text without special escaping.
126var blockTags = map[string]struct{}{
127 "blockquote": struct{}{},
128 "del": struct{}{},
129 "div": struct{}{},
130 "dl": struct{}{},
131 "fieldset": struct{}{},
132 "form": struct{}{},
133 "h1": struct{}{},
134 "h2": struct{}{},
135 "h3": struct{}{},
136 "h4": struct{}{},
137 "h5": struct{}{},
138 "h6": struct{}{},
139 "iframe": struct{}{},
140 "ins": struct{}{},
141 "math": struct{}{},
142 "noscript": struct{}{},
143 "ol": struct{}{},
144 "pre": struct{}{},
145 "p": struct{}{},
146 "script": struct{}{},
147 "style": struct{}{},
148 "table": struct{}{},
149 "ul": struct{}{},
150
151 // HTML5
152 "address": struct{}{},
153 "article": struct{}{},
154 "aside": struct{}{},
155 "canvas": struct{}{},
156 "figcaption": struct{}{},
157 "figure": struct{}{},
158 "footer": struct{}{},
159 "header": struct{}{},
160 "hgroup": struct{}{},
161 "main": struct{}{},
162 "nav": struct{}{},
163 "output": struct{}{},
164 "progress": struct{}{},
165 "section": struct{}{},
166 "video": struct{}{},
167}
168
169// Renderer is the rendering interface.
170// This is mostly of interest if you are implementing a new rendering format.
171//
172// When a byte slice is provided, it contains the (rendered) contents of the
173// element.
174//
175// When a callback is provided instead, it will write the contents of the
176// respective element directly to the output buffer and return true on success.
177// If the callback returns false, the rendering function should reset the
178// output buffer as though it had never been called.
179//
180// Currently HTML and Latex implementations are provided
181type Renderer interface {
182 Render(ast *Node) []byte
183 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
184}
185
186// Callback functions for inline parsing. One such function is defined
187// for each character that triggers a response when parsing inline data.
188type inlineParser func(p *parser, data []byte, offset int) int
189
190// Parser holds runtime state used by the parser.
191// This is constructed by the Markdown function.
192type parser struct {
193 refOverride ReferenceOverrideFunc
194 refs map[string]*reference
195 inlineCallback [256]inlineParser
196 flags Extensions
197 nesting int
198 maxNesting int
199 insideLink bool
200
201 // Footnotes need to be ordered as well as available to quickly check for
202 // presence. If a ref is also a footnote, it's stored both in refs and here
203 // in notes. Slice is nil if footnotes not enabled.
204 notes []*reference
205
206 doc *Node
207 tip *Node // = doc
208 oldTip *Node
209 lastMatchedContainer *Node // = doc
210 allClosed bool
211 currBlock *Node // a block node currently being parsed by inline parser
212}
213
214func (p *parser) getRef(refid string) (ref *reference, found bool) {
215 if p.refOverride != nil {
216 r, overridden := p.refOverride(refid)
217 if overridden {
218 if r == nil {
219 return nil, false
220 }
221 return &reference{
222 link: []byte(r.Link),
223 title: []byte(r.Title),
224 noteID: 0,
225 hasBlock: false,
226 text: []byte(r.Text)}, true
227 }
228 }
229 // refs are case insensitive
230 ref, found = p.refs[strings.ToLower(refid)]
231 return ref, found
232}
233
234func (p *parser) finalize(block *Node) {
235 above := block.Parent
236 block.open = false
237 p.tip = above
238}
239
240func (p *parser) addChild(node NodeType, offset uint32) *Node {
241 for !p.tip.canContain(node) {
242 p.finalize(p.tip)
243 }
244 newNode := NewNode(node)
245 newNode.content = []byte{}
246 p.tip.appendChild(newNode)
247 p.tip = newNode
248 return newNode
249}
250
251func (p *parser) closeUnmatchedBlocks() {
252 if !p.allClosed {
253 for p.oldTip != p.lastMatchedContainer {
254 parent := p.oldTip.Parent
255 p.finalize(p.oldTip)
256 p.oldTip = parent
257 }
258 p.allClosed = true
259 }
260}
261
262//
263//
264// Public interface
265//
266//
267
268// Reference represents the details of a link.
269// See the documentation in Options for more details on use-case.
270type Reference struct {
271 // Link is usually the URL the reference points to.
272 Link string
273 // Title is the alternate text describing the link in more detail.
274 Title string
275 // Text is the optional text to override the ref with if the syntax used was
276 // [refid][]
277 Text string
278}
279
280// ReferenceOverrideFunc is expected to be called with a reference string and
281// return either a valid Reference type that the reference string maps to or
282// nil. If overridden is false, the default reference logic will be executed.
283// See the documentation in Options for more details on use-case.
284type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
285
286// Options represents configurable overrides and callbacks (in addition to the
287// extension flag set) for configuring a Markdown parse.
288type Options struct {
289 // Extensions is a flag set of bit-wise ORed extension bits. See the
290 // Extensions flags defined in this package.
291 Extensions Extensions
292
293 // ReferenceOverride is an optional function callback that is called every
294 // time a reference is resolved.
295 //
296 // In Markdown, the link reference syntax can be made to resolve a link to
297 // a reference instead of an inline URL, in one of the following ways:
298 //
299 // * [link text][refid]
300 // * [refid][]
301 //
302 // Usually, the refid is defined at the bottom of the Markdown document. If
303 // this override function is provided, the refid is passed to the override
304 // function first, before consulting the defined refids at the bottom. If
305 // the override function indicates an override did not occur, the refids at
306 // the bottom will be used to fill in the link details.
307 ReferenceOverride ReferenceOverrideFunc
308}
309
310// MarkdownBasic is a convenience function for simple rendering.
311// It processes markdown input with no extensions enabled.
312func MarkdownBasic(input []byte) []byte {
313 // set up the HTML renderer
314 renderer := NewHTMLRenderer(HTMLRendererParameters{
315 Flags: UseXHTML,
316 Extensions: CommonExtensions,
317 })
318
319 // set up the parser
320 return Markdown(input, renderer, Options{})
321}
322
323// MarkdownCommon is a convenience function for simple rendering. It calls
324// Markdown with most useful extensions enabled, including:
325//
326// * Smartypants processing with smart fractions and LaTeX dashes
327// * Intra-word emphasis suppression
328// * Tables
329// * Fenced code blocks
330// * Autolinking
331// * Strikethrough support
332// * Strict header parsing
333// * Custom Header IDs
334func MarkdownCommon(input []byte) []byte {
335 // set up the HTML renderer
336 renderer := NewHTMLRenderer(HTMLRendererParameters{
337 Flags: CommonHTMLFlags,
338 Extensions: CommonExtensions,
339 })
340 return Markdown(input, renderer, DefaultOptions)
341}
342
343// Markdown is the main rendering function.
344// It parses and renders a block of markdown-encoded text.
345// The supplied Renderer is used to format the output, and extensions dictates
346// which non-standard extensions are enabled.
347//
348// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
349// NewLatexRenderer, respectively.
350func Markdown(input []byte, renderer Renderer, options Options) []byte {
351 if renderer == nil {
352 return nil
353 }
354 return renderer.Render(Parse(input, options))
355}
356
357// Parse is an entry point to the parsing part of Blackfriday. It takes an
358// input markdown document and produces a syntax tree for its contents. This
359// tree can then be rendered with a default or custom renderer, or
360// analyzed/transformed by the caller to whatever non-standard needs they have.
361func Parse(input []byte, opts Options) *Node {
362 extensions := opts.Extensions
363
364 // fill in the render structure
365 p := new(parser)
366 p.flags = extensions
367 p.refOverride = opts.ReferenceOverride
368 p.refs = make(map[string]*reference)
369 p.maxNesting = 16
370 p.insideLink = false
371
372 docNode := NewNode(Document)
373 p.doc = docNode
374 p.tip = docNode
375 p.oldTip = docNode
376 p.lastMatchedContainer = docNode
377 p.allClosed = true
378
379 // register inline parsers
380 p.inlineCallback['*'] = emphasis
381 p.inlineCallback['_'] = emphasis
382 if extensions&Strikethrough != 0 {
383 p.inlineCallback['~'] = emphasis
384 }
385 p.inlineCallback['`'] = codeSpan
386 p.inlineCallback['\n'] = lineBreak
387 p.inlineCallback['['] = link
388 p.inlineCallback['<'] = leftAngle
389 p.inlineCallback['\\'] = escape
390 p.inlineCallback['&'] = entity
391 p.inlineCallback['!'] = maybeImage
392 p.inlineCallback['^'] = maybeInlineFootnote
393
394 if extensions&Autolink != 0 {
395 p.inlineCallback['h'] = maybeAutoLink
396 p.inlineCallback['m'] = maybeAutoLink
397 p.inlineCallback['f'] = maybeAutoLink
398 p.inlineCallback['H'] = maybeAutoLink
399 p.inlineCallback['M'] = maybeAutoLink
400 p.inlineCallback['F'] = maybeAutoLink
401 }
402
403 if extensions&Footnotes != 0 {
404 p.notes = make([]*reference, 0)
405 }
406
407 first := firstPass(p, input)
408 secondPass(p, first)
409 // Walk the tree and finish up some of unfinished blocks
410 for p.tip != nil {
411 p.finalize(p.tip)
412 }
413 // Walk the tree again and process inline markdown in each block
414 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
415 if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
416 p.currBlock = node
417 p.inline(node.content)
418 node.content = nil
419 }
420 return GoToNext
421 })
422 p.parseRefsToAST()
423 p.generateTOC()
424 return p.doc
425}
426
427func (p *parser) generateTOC() {
428 if p.flags&TOC == 0 && p.flags&OmitContents == 0 {
429 return
430 }
431 navNode := NewNode(HTMLBlock)
432 navNode.Literal = []byte("<nav>")
433 navNode.open = false
434
435 var topList *Node
436 var listNode *Node
437 var lastItem *Node
438 headerCount := 0
439 currentLevel := 0
440 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
441 if entering && node.Type == Header {
442 if node.Level > currentLevel {
443 currentLevel++
444 newList := NewNode(List)
445 if lastItem != nil {
446 lastItem.appendChild(newList)
447 listNode = newList
448 } else {
449 listNode = newList
450 topList = listNode
451 }
452 }
453 if node.Level < currentLevel {
454 finalizeList(listNode)
455 lastItem = listNode.Parent
456 listNode = lastItem.Parent
457 }
458 node.HeaderID = fmt.Sprintf("toc_%d", headerCount)
459 headerCount++
460 lastItem = NewNode(Item)
461 listNode.appendChild(lastItem)
462 anchorNode := NewNode(Link)
463 anchorNode.Destination = []byte("#" + node.HeaderID)
464 lastItem.appendChild(anchorNode)
465 anchorNode.appendChild(text(node.FirstChild.Literal))
466 }
467 return GoToNext
468 })
469 firstChild := p.doc.FirstChild
470 // Insert TOC only if there is anything to insert
471 if topList != nil {
472 finalizeList(topList)
473 firstChild.insertBefore(navNode)
474 firstChild.insertBefore(topList)
475 navCloseNode := NewNode(HTMLBlock)
476 navCloseNode.Literal = []byte("</nav>")
477 navCloseNode.open = false
478 firstChild.insertBefore(navCloseNode)
479 }
480 // Drop everything after the TOC if OmitContents was requested
481 if p.flags&OmitContents != 0 {
482 for firstChild != nil {
483 next := firstChild.Next
484 firstChild.unlink()
485 firstChild = next
486 }
487 }
488}
489
490func (p *parser) parseRefsToAST() {
491 if p.flags&Footnotes == 0 || len(p.notes) == 0 {
492 return
493 }
494 p.tip = p.doc
495 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
496 p.addBlock(HorizontalRule, nil)
497 block := p.addBlock(List, nil)
498 block.ListFlags = ListTypeOrdered
499 flags := ListItemBeginningOfList
500 // Note: this loop is intentionally explicit, not range-form. This is
501 // because the body of the loop will append nested footnotes to p.notes and
502 // we need to process those late additions. Range form would only walk over
503 // the fixed initial set.
504 for i := 0; i < len(p.notes); i++ {
505 ref := p.notes[i]
506 block := p.addBlock(Item, nil)
507 block.ListFlags = ListTypeOrdered
508 block.RefLink = ref.link
509 if ref.hasBlock {
510 flags |= ListItemContainsBlock
511 p.block(ref.title)
512 } else {
513 p.currBlock = block
514 p.inline(ref.title)
515 }
516 flags &^= ListItemBeginningOfList | ListItemContainsBlock
517 }
518 above := block.Parent
519 finalizeList(block)
520 p.tip = above
521 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
522 block.Walk(func(node *Node, entering bool) WalkStatus {
523 if node.Type == Paragraph || node.Type == Header {
524 p.currBlock = node
525 p.inline(node.content)
526 node.content = nil
527 }
528 return GoToNext
529 })
530}
531
532// first pass:
533// - normalize newlines
534// - extract references (outside of fenced code blocks)
535// - expand tabs (outside of fenced code blocks)
536// - copy everything else
537func firstPass(p *parser, input []byte) []byte {
538 var out bytes.Buffer
539 tabSize := TabSizeDefault
540 if p.flags&TabSizeEight != 0 {
541 tabSize = TabSizeDouble
542 }
543 beg := 0
544 lastFencedCodeBlockEnd := 0
545 for beg < len(input) {
546 // Find end of this line, then process the line.
547 end := beg
548 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
549 end++
550 }
551
552 if p.flags&FencedCode != 0 {
553 // track fenced code block boundaries to suppress tab expansion
554 // and reference extraction inside them:
555 if beg >= lastFencedCodeBlockEnd {
556 if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
557 lastFencedCodeBlockEnd = beg + i
558 }
559 }
560 }
561
562 // add the line body if present
563 if end > beg {
564 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
565 out.Write(input[beg:end])
566 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
567 beg += refEnd
568 continue
569 } else {
570 expandTabs(&out, input[beg:end], tabSize)
571 }
572 }
573
574 if end < len(input) && input[end] == '\r' {
575 end++
576 }
577 if end < len(input) && input[end] == '\n' {
578 end++
579 }
580 out.WriteByte('\n')
581
582 beg = end
583 }
584
585 // empty input?
586 if out.Len() == 0 {
587 out.WriteByte('\n')
588 }
589
590 return out.Bytes()
591}
592
593// second pass: actual rendering
594func secondPass(p *parser, input []byte) {
595 p.block(input)
596
597 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
598 flags := ListItemBeginningOfList
599 for i := 0; i < len(p.notes); i++ {
600 ref := p.notes[i]
601 if ref.hasBlock {
602 flags |= ListItemContainsBlock
603 p.block(ref.title)
604 } else {
605 p.inline(ref.title)
606 }
607 flags &^= ListItemBeginningOfList | ListItemContainsBlock
608 }
609 }
610
611 if p.nesting != 0 {
612 panic("Nesting level did not end at zero")
613 }
614}
615
616//
617// Link references
618//
619// This section implements support for references that (usually) appear
620// as footnotes in a document, and can be referenced anywhere in the document.
621// The basic format is:
622//
623// [1]: http://www.google.com/ "Google"
624// [2]: http://www.github.com/ "Github"
625//
626// Anywhere in the document, the reference can be linked by referring to its
627// label, i.e., 1 and 2 in this example, as in:
628//
629// This library is hosted on [Github][2], a git hosting site.
630//
631// Actual footnotes as specified in Pandoc and supported by some other Markdown
632// libraries such as php-markdown are also taken care of. They look like this:
633//
634// This sentence needs a bit of further explanation.[^note]
635//
636// [^note]: This is the explanation.
637//
638// Footnotes should be placed at the end of the document in an ordered list.
639// Inline footnotes such as:
640//
641// Inline footnotes^[Not supported.] also exist.
642//
643// are not yet supported.
644
645// References are parsed and stored in this struct.
646type reference struct {
647 link []byte
648 title []byte
649 noteID int // 0 if not a footnote ref
650 hasBlock bool
651 text []byte
652}
653
654func (r *reference) String() string {
655 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
656 r.link, r.title, r.text, r.noteID, r.hasBlock)
657}
658
659// Check whether or not data starts with a reference link.
660// If so, it is parsed and stored in the list of references
661// (in the render struct).
662// Returns the number of bytes to skip to move past it,
663// or zero if the first line is not a reference.
664func isReference(p *parser, data []byte, tabSize int) int {
665 // up to 3 optional leading spaces
666 if len(data) < 4 {
667 return 0
668 }
669 i := 0
670 for i < 3 && data[i] == ' ' {
671 i++
672 }
673
674 noteID := 0
675
676 // id part: anything but a newline between brackets
677 if data[i] != '[' {
678 return 0
679 }
680 i++
681 if p.flags&Footnotes != 0 {
682 if i < len(data) && data[i] == '^' {
683 // we can set it to anything here because the proper noteIds will
684 // be assigned later during the second pass. It just has to be != 0
685 noteID = 1
686 i++
687 }
688 }
689 idOffset := i
690 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
691 i++
692 }
693 if i >= len(data) || data[i] != ']' {
694 return 0
695 }
696 idEnd := i
697
698 // spacer: colon (space | tab)* newline? (space | tab)*
699 i++
700 if i >= len(data) || data[i] != ':' {
701 return 0
702 }
703 i++
704 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
705 i++
706 }
707 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
708 i++
709 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
710 i++
711 }
712 }
713 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
714 i++
715 }
716 if i >= len(data) {
717 return 0
718 }
719
720 var (
721 linkOffset, linkEnd int
722 titleOffset, titleEnd int
723 lineEnd int
724 raw []byte
725 hasBlock bool
726 )
727
728 if p.flags&Footnotes != 0 && noteID != 0 {
729 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
730 lineEnd = linkEnd
731 } else {
732 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
733 }
734 if lineEnd == 0 {
735 return 0
736 }
737
738 // a valid ref has been found
739
740 ref := &reference{
741 noteID: noteID,
742 hasBlock: hasBlock,
743 }
744
745 if noteID > 0 {
746 // reusing the link field for the id since footnotes don't have links
747 ref.link = data[idOffset:idEnd]
748 // if footnote, it's not really a title, it's the contained text
749 ref.title = raw
750 } else {
751 ref.link = data[linkOffset:linkEnd]
752 ref.title = data[titleOffset:titleEnd]
753 }
754
755 // id matches are case-insensitive
756 id := string(bytes.ToLower(data[idOffset:idEnd]))
757
758 p.refs[id] = ref
759
760 return lineEnd
761}
762
763func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
764 // link: whitespace-free sequence, optionally between angle brackets
765 if data[i] == '<' {
766 i++
767 }
768 linkOffset = i
769 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
770 i++
771 }
772 if i == len(data) {
773 return
774 }
775 linkEnd = i
776 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
777 linkOffset++
778 linkEnd--
779 }
780
781 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
782 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
783 i++
784 }
785 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
786 return
787 }
788
789 // compute end-of-line
790 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
791 lineEnd = i
792 }
793 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
794 lineEnd++
795 }
796
797 // optional (space|tab)* spacer after a newline
798 if lineEnd > 0 {
799 i = lineEnd + 1
800 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
801 i++
802 }
803 }
804
805 // optional title: any non-newline sequence enclosed in '"() alone on its line
806 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
807 i++
808 titleOffset = i
809
810 // look for EOL
811 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
812 i++
813 }
814 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
815 titleEnd = i + 1
816 } else {
817 titleEnd = i
818 }
819
820 // step back
821 i--
822 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
823 i--
824 }
825 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
826 lineEnd = titleEnd
827 titleEnd = i
828 }
829 }
830
831 return
832}
833
834// The first bit of this logic is the same as (*parser).listItem, but the rest
835// is much simpler. This function simply finds the entire block and shifts it
836// over by one tab if it is indeed a block (just returns the line if it's not).
837// blockEnd is the end of the section in the input buffer, and contents is the
838// extracted text that was shifted over one tab. It will need to be rendered at
839// the end of the document.
840func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
841 if i == 0 || len(data) == 0 {
842 return
843 }
844
845 // skip leading whitespace on first line
846 for i < len(data) && data[i] == ' ' {
847 i++
848 }
849
850 blockStart = i
851
852 // find the end of the line
853 blockEnd = i
854 for i < len(data) && data[i-1] != '\n' {
855 i++
856 }
857
858 // get working buffer
859 var raw bytes.Buffer
860
861 // put the first line into the working buffer
862 raw.Write(data[blockEnd:i])
863 blockEnd = i
864
865 // process the following lines
866 containsBlankLine := false
867
868gatherLines:
869 for blockEnd < len(data) {
870 i++
871
872 // find the end of this line
873 for i < len(data) && data[i-1] != '\n' {
874 i++
875 }
876
877 // if it is an empty line, guess that it is part of this item
878 // and move on to the next line
879 if p.isEmpty(data[blockEnd:i]) > 0 {
880 containsBlankLine = true
881 blockEnd = i
882 continue
883 }
884
885 n := 0
886 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
887 // this is the end of the block.
888 // we don't want to include this last line in the index.
889 break gatherLines
890 }
891
892 // if there were blank lines before this one, insert a new one now
893 if containsBlankLine {
894 raw.WriteByte('\n')
895 containsBlankLine = false
896 }
897
898 // get rid of that first tab, write to buffer
899 raw.Write(data[blockEnd+n : i])
900 hasBlock = true
901
902 blockEnd = i
903 }
904
905 if data[blockEnd-1] != '\n' {
906 raw.WriteByte('\n')
907 }
908
909 contents = raw.Bytes()
910
911 return
912}
913
914//
915//
916// Miscellaneous helper functions
917//
918//
919
920// Test if a character is a punctuation symbol.
921// Taken from a private function in regexp in the stdlib.
922func ispunct(c byte) bool {
923 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
924 if c == r {
925 return true
926 }
927 }
928 return false
929}
930
931// Test if a character is a whitespace character.
932func isspace(c byte) bool {
933 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
934}
935
936// Test if a character is letter.
937func isletter(c byte) bool {
938 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
939}
940
941// Test if a character is a letter or a digit.
942// TODO: check when this is looking for ASCII alnum and when it should use unicode
943func isalnum(c byte) bool {
944 return (c >= '0' && c <= '9') || isletter(c)
945}
946
947// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
948// always ends output with a newline
949func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
950 // first, check for common cases: no tabs, or only tabs at beginning of line
951 i, prefix := 0, 0
952 slowcase := false
953 for i = 0; i < len(line); i++ {
954 if line[i] == '\t' {
955 if prefix == i {
956 prefix++
957 } else {
958 slowcase = true
959 break
960 }
961 }
962 }
963
964 // no need to decode runes if all tabs are at the beginning of the line
965 if !slowcase {
966 for i = 0; i < prefix*tabSize; i++ {
967 out.WriteByte(' ')
968 }
969 out.Write(line[prefix:])
970 return
971 }
972
973 // the slow case: we need to count runes to figure out how
974 // many spaces to insert for each tab
975 column := 0
976 i = 0
977 for i < len(line) {
978 start := i
979 for i < len(line) && line[i] != '\t' {
980 _, size := utf8.DecodeRune(line[i:])
981 i += size
982 column++
983 }
984
985 if i > start {
986 out.Write(line[start:i])
987 }
988
989 if i >= len(line) {
990 break
991 }
992
993 for {
994 out.WriteByte(' ')
995 column++
996 if column%tabSize == 0 {
997 break
998 }
999 }
1000
1001 i++
1002 }
1003}
1004
1005// Find if a line counts as indented or not.
1006// Returns number of characters the indent is (0 = not indented).
1007func isIndented(data []byte, indentSize int) int {
1008 if len(data) == 0 {
1009 return 0
1010 }
1011 if data[0] == '\t' {
1012 return 1
1013 }
1014 if len(data) < indentSize {
1015 return 0
1016 }
1017 for i := 0; i < indentSize; i++ {
1018 if data[i] != ' ' {
1019 return 0
1020 }
1021 }
1022 return indentSize
1023}
1024
1025// Create a url-safe slug for fragments
1026func slugify(in []byte) []byte {
1027 if len(in) == 0 {
1028 return in
1029 }
1030 out := make([]byte, 0, len(in))
1031 sym := false
1032
1033 for _, ch := range in {
1034 if isalnum(ch) {
1035 sym = false
1036 out = append(out, ch)
1037 } else if sym {
1038 continue
1039 } else {
1040 out = append(out, '-')
1041 sym = true
1042 }
1043 }
1044 var a, b int
1045 var ch byte
1046 for a, ch = range out {
1047 if ch != '-' {
1048 break
1049 }
1050 }
1051 for b = len(out) - 1; b > 0; b-- {
1052 if out[b] != '-' {
1053 break
1054 }
1055 }
1056 return out[a : b+1]
1057}