markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Blackfriday markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "strings"
25 "unicode/utf8"
26)
27
28const VERSION = "1.4"
29
30type Extensions int
31
32// These are the supported markdown parsing extensions.
33// OR these values together to select multiple extensions.
34const (
35 NoExtensions Extensions = 0
36 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
37 Tables // Render tables
38 FencedCode // Render fenced code blocks
39 Autolink // Detect embedded URLs that are not explicitly marked
40 Strikethrough // Strikethrough text using ~~test~~
41 LaxHTMLBlocks // Loosen up HTML block parsing rules
42 SpaceHeaders // Be strict about prefix header rules
43 HardLineBreak // Translate newlines into line breaks
44 TabSizeEight // Expand tabs to eight spaces instead of four
45 Footnotes // Pandoc-style footnotes
46 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
47 HeaderIDs // specify header IDs with {#id}
48 Titleblock // Titleblock ala pandoc
49 AutoHeaderIDs // Create the header ID from the text
50 BackslashLineBreak // Translate trailing backslashes into line breaks
51 DefinitionLists // Render definition lists
52
53 CommonHtmlFlags HtmlFlags = UseXHTML | UseSmartypants |
54 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
55
56 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
57 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
58 BackslashLineBreak | DefinitionLists
59)
60
61var DefaultOptions = Options{
62 Extensions: CommonExtensions,
63}
64
65type LinkType int
66
67// These are the possible flag values for the link renderer.
68// Only a single one of these values will be used; they are not ORed together.
69// These are mostly of interest if you are writing a new output format.
70const (
71 LinkTypeNotAutolink LinkType = iota
72 LinkTypeNormal
73 LinkTypeEmail
74)
75
76type ListType int
77
78// These are the possible flag values for the ListItem renderer.
79// Multiple flag values may be ORed together.
80// These are mostly of interest if you are writing a new output format.
81const (
82 ListTypeOrdered ListType = 1 << iota
83 ListTypeDefinition
84 ListTypeTerm
85
86 ListItemContainsBlock
87 ListItemBeginningOfList
88 ListItemEndOfList
89)
90
91type TableFlags int
92
93// These are the possible flag values for the table cell renderer.
94// Only a single one of these values will be used; they are not ORed together.
95// These are mostly of interest if you are writing a new output format.
96const (
97 TableAlignmentLeft = 1 << iota
98 TableAlignmentRight
99 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
100)
101
102// The size of a tab stop.
103const (
104 TabSizeDefault = 4
105 TabSizeDouble = 8
106)
107
108// blockTags is a set of tags that are recognized as HTML block tags.
109// Any of these can be included in markdown text without special escaping.
110var blockTags = map[string]struct{}{
111 "blockquote": struct{}{},
112 "del": struct{}{},
113 "div": struct{}{},
114 "dl": struct{}{},
115 "fieldset": struct{}{},
116 "form": struct{}{},
117 "h1": struct{}{},
118 "h2": struct{}{},
119 "h3": struct{}{},
120 "h4": struct{}{},
121 "h5": struct{}{},
122 "h6": struct{}{},
123 "iframe": struct{}{},
124 "ins": struct{}{},
125 "math": struct{}{},
126 "noscript": struct{}{},
127 "ol": struct{}{},
128 "pre": struct{}{},
129 "p": struct{}{},
130 "script": struct{}{},
131 "style": struct{}{},
132 "table": struct{}{},
133 "ul": struct{}{},
134
135 // HTML5
136 "address": struct{}{},
137 "article": struct{}{},
138 "aside": struct{}{},
139 "canvas": struct{}{},
140 "figcaption": struct{}{},
141 "figure": struct{}{},
142 "footer": struct{}{},
143 "header": struct{}{},
144 "hgroup": struct{}{},
145 "main": struct{}{},
146 "nav": struct{}{},
147 "output": struct{}{},
148 "progress": struct{}{},
149 "section": struct{}{},
150 "video": struct{}{},
151}
152
153// Renderer is the rendering interface.
154// This is mostly of interest if you are implementing a new rendering format.
155//
156// When a byte slice is provided, it contains the (rendered) contents of the
157// element.
158//
159// When a callback is provided instead, it will write the contents of the
160// respective element directly to the output buffer and return true on success.
161// If the callback returns false, the rendering function should reset the
162// output buffer as though it had never been called.
163//
164// Currently Html and Latex implementations are provided
165type Renderer interface {
166 // block-level callbacks
167 BlockCode(text []byte, lang string)
168 BlockQuote(text []byte)
169 BlockHtml(text []byte)
170 BeginHeader(level int, id string)
171 EndHeader(level int, id string, header []byte)
172 HRule()
173 BeginList(flags ListType)
174 EndList(flags ListType)
175 ListItem(text []byte, flags ListType)
176 BeginParagraph()
177 EndParagraph()
178 Table(header []byte, body []byte, columnData []int)
179 TableRow(text []byte)
180 TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
181 TableCell(out *bytes.Buffer, text []byte, flags int)
182 BeginFootnotes()
183 EndFootnotes()
184 FootnoteItem(name, text []byte, flags ListType)
185 TitleBlock(text []byte)
186
187 // Span-level callbacks
188 AutoLink(link []byte, kind LinkType)
189 CodeSpan(text []byte)
190 DoubleEmphasis(text []byte)
191 Emphasis(text []byte)
192 Image(link []byte, title []byte, alt []byte)
193 LineBreak()
194 Link(link []byte, title []byte, content []byte)
195 RawHtmlTag(tag []byte)
196 TripleEmphasis(text []byte)
197 StrikeThrough(text []byte)
198 FootnoteRef(ref []byte, id int)
199
200 // Low-level callbacks
201 Entity(entity []byte)
202 NormalText(text []byte)
203
204 // Header and footer
205 DocumentHeader()
206 DocumentFooter()
207
208 GetFlags() HtmlFlags
209 Write(b []byte) (int, error)
210
211 Render(ast *Node) []byte
212}
213
214// Callback functions for inline parsing. One such function is defined
215// for each character that triggers a response when parsing inline data.
216type inlineParser func(p *parser, data []byte, offset int) int
217
218// Parser holds runtime state used by the parser.
219// This is constructed by the Markdown function.
220type parser struct {
221 refOverride ReferenceOverrideFunc
222 refs map[string]*reference
223 inlineCallback [256]inlineParser
224 flags Extensions
225 nesting int
226 maxNesting int
227 insideLink bool
228
229 // Footnotes need to be ordered as well as available to quickly check for
230 // presence. If a ref is also a footnote, it's stored both in refs and here
231 // in notes. Slice is nil if footnotes not enabled.
232 notes []*reference
233
234 doc *Node
235 tip *Node // = doc
236 oldTip *Node
237 lastMatchedContainer *Node // = doc
238 allClosed bool
239 currBlock *Node // a block node currently being parsed by inline parser
240}
241
242func (p *parser) getRef(refid string) (ref *reference, found bool) {
243 if p.refOverride != nil {
244 r, overridden := p.refOverride(refid)
245 if overridden {
246 if r == nil {
247 return nil, false
248 }
249 return &reference{
250 link: []byte(r.Link),
251 title: []byte(r.Title),
252 noteId: 0,
253 hasBlock: false,
254 text: []byte(r.Text)}, true
255 }
256 }
257 // refs are case insensitive
258 ref, found = p.refs[strings.ToLower(refid)]
259 return ref, found
260}
261
262func (p *parser) finalize(block *Node) {
263 above := block.Parent
264 block.open = false
265 p.tip = above
266}
267
268func (p *parser) addChild(node NodeType, offset uint32) *Node {
269 for !p.tip.canContain(node) {
270 p.finalize(p.tip)
271 }
272 newNode := NewNode(node)
273 newNode.content = []byte{}
274 p.tip.appendChild(newNode)
275 p.tip = newNode
276 return newNode
277}
278
279func (p *parser) closeUnmatchedBlocks() {
280 if !p.allClosed {
281 for p.oldTip != p.lastMatchedContainer {
282 parent := p.oldTip.Parent
283 p.finalize(p.oldTip)
284 p.oldTip = parent
285 }
286 p.allClosed = true
287 }
288}
289
290//
291//
292// Public interface
293//
294//
295
296// Reference represents the details of a link.
297// See the documentation in Options for more details on use-case.
298type Reference struct {
299 // Link is usually the URL the reference points to.
300 Link string
301 // Title is the alternate text describing the link in more detail.
302 Title string
303 // Text is the optional text to override the ref with if the syntax used was
304 // [refid][]
305 Text string
306}
307
308// ReferenceOverrideFunc is expected to be called with a reference string and
309// return either a valid Reference type that the reference string maps to or
310// nil. If overridden is false, the default reference logic will be executed.
311// See the documentation in Options for more details on use-case.
312type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
313
314// Options represents configurable overrides and callbacks (in addition to the
315// extension flag set) for configuring a Markdown parse.
316type Options struct {
317 // Extensions is a flag set of bit-wise ORed extension bits. See the
318 // Extensions flags defined in this package.
319 Extensions Extensions
320
321 // ReferenceOverride is an optional function callback that is called every
322 // time a reference is resolved.
323 //
324 // In Markdown, the link reference syntax can be made to resolve a link to
325 // a reference instead of an inline URL, in one of the following ways:
326 //
327 // * [link text][refid]
328 // * [refid][]
329 //
330 // Usually, the refid is defined at the bottom of the Markdown document. If
331 // this override function is provided, the refid is passed to the override
332 // function first, before consulting the defined refids at the bottom. If
333 // the override function indicates an override did not occur, the refids at
334 // the bottom will be used to fill in the link details.
335 ReferenceOverride ReferenceOverrideFunc
336}
337
338// MarkdownBasic is a convenience function for simple rendering.
339// It processes markdown input with no extensions enabled.
340func MarkdownBasic(input []byte) []byte {
341 // set up the HTML renderer
342 htmlFlags := UseXHTML
343 renderer := HtmlRenderer(htmlFlags, "", "")
344
345 // set up the parser
346 return MarkdownOptions(input, renderer, Options{Extensions: 0})
347}
348
349// Call Markdown with most useful extensions enabled
350// MarkdownCommon is a convenience function for simple rendering.
351// It processes markdown input with common extensions enabled, including:
352//
353// * Smartypants processing with smart fractions and LaTeX dashes
354//
355// * Intra-word emphasis suppression
356//
357// * Tables
358//
359// * Fenced code blocks
360//
361// * Autolinking
362//
363// * Strikethrough support
364//
365// * Strict header parsing
366//
367// * Custom Header IDs
368func MarkdownCommon(input []byte) []byte {
369 // set up the HTML renderer
370 renderer := HtmlRenderer(CommonHtmlFlags, "", "")
371 return MarkdownOptions(input, renderer, DefaultOptions)
372}
373
374// Markdown is the main rendering function.
375// It parses and renders a block of markdown-encoded text.
376// The supplied Renderer is used to format the output, and extensions dictates
377// which non-standard extensions are enabled.
378//
379// To use the supplied Html or LaTeX renderers, see HtmlRenderer and
380// LatexRenderer, respectively.
381func Markdown(input []byte, renderer Renderer, extensions Extensions) []byte {
382 return MarkdownOptions(input, renderer, Options{
383 Extensions: extensions})
384}
385
386// MarkdownOptions is just like Markdown but takes additional options through
387// the Options struct.
388func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
389 // no point in parsing if we can't render
390 if renderer == nil {
391 return nil
392 }
393
394 extensions := opts.Extensions
395
396 // fill in the render structure
397 p := new(parser)
398 p.flags = extensions
399 p.refOverride = opts.ReferenceOverride
400 p.refs = make(map[string]*reference)
401 p.maxNesting = 16
402 p.insideLink = false
403
404 docNode := NewNode(Document)
405 p.doc = docNode
406 p.tip = docNode
407 p.oldTip = docNode
408 p.lastMatchedContainer = docNode
409 p.allClosed = true
410
411 // register inline parsers
412 p.inlineCallback['*'] = emphasis
413 p.inlineCallback['_'] = emphasis
414 if extensions&Strikethrough != 0 {
415 p.inlineCallback['~'] = emphasis
416 }
417 p.inlineCallback['`'] = codeSpan
418 p.inlineCallback['\n'] = lineBreak
419 p.inlineCallback['['] = link
420 p.inlineCallback['<'] = leftAngle
421 p.inlineCallback['\\'] = escape
422 p.inlineCallback['&'] = entity
423 p.inlineCallback['!'] = maybeImage
424 p.inlineCallback['^'] = maybeInlineFootnote
425
426 if extensions&Autolink != 0 {
427 p.inlineCallback['h'] = maybeAutoLink
428 p.inlineCallback['m'] = maybeAutoLink
429 p.inlineCallback['f'] = maybeAutoLink
430 p.inlineCallback['H'] = maybeAutoLink
431 p.inlineCallback['M'] = maybeAutoLink
432 p.inlineCallback['F'] = maybeAutoLink
433 }
434
435 if extensions&Footnotes != 0 {
436 p.notes = make([]*reference, 0)
437 }
438
439 first := firstPass(p, input)
440 secondPass(p, first)
441 // walk the tree and finish up some of unfinished blocks:
442 for p.tip != nil {
443 p.finalize(p.tip)
444 }
445 ForEachNode(p.doc, func(node *Node, entering bool) {
446 if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
447 p.currBlock = node
448 p.inline(node.content)
449 node.content = nil
450 }
451 })
452 p.parseRefsToAST()
453 return renderer.Render(p.doc)
454}
455
456func (p *parser) parseRefsToAST() {
457 if p.flags&Footnotes == 0 || len(p.notes) == 0 {
458 return
459 }
460 p.tip = p.doc
461 finalizeHtmlBlock(p.addBlock(HtmlBlock, []byte(`<div class="footnotes">`)))
462 p.addBlock(HorizontalRule, nil)
463 block := p.addBlock(List, nil)
464 block.ListData = &ListData{ // TODO: fill in the real ListData
465 Flags: ListTypeOrdered,
466 Tight: false,
467 BulletChar: '*',
468 Delimiter: 0,
469 }
470 flags := ListItemBeginningOfList
471 // Note: this loop is intentionally explicit, not range-form. This is
472 // because the body of the loop will append nested footnotes to p.notes and
473 // we need to process those late additions. Range form would only walk over
474 // the fixed initial set.
475 for i := 0; i < len(p.notes); i++ {
476 ref := p.notes[i]
477 block := p.addBlock(Item, nil)
478 block.ListData = &ListData{ // TODO: fill in the real ListData
479 Flags: ListTypeOrdered,
480 Tight: false,
481 BulletChar: '*',
482 Delimiter: 0,
483 RefLink: ref.link,
484 }
485 if ref.hasBlock {
486 flags |= ListItemContainsBlock
487 p.block(ref.title)
488 } else {
489 p.currBlock = block
490 p.inline(ref.title)
491 }
492 flags &^= ListItemBeginningOfList | ListItemContainsBlock
493 }
494 above := block.Parent
495 finalizeList(block)
496 p.tip = above
497 finalizeHtmlBlock(p.addBlock(HtmlBlock, []byte("</div>")))
498 ForEachNode(block, func(node *Node, entering bool) {
499 if node.Type == Paragraph || node.Type == Header {
500 p.currBlock = node
501 p.inline(node.content)
502 node.content = nil
503 }
504 })
505}
506
507// first pass:
508// - extract references
509// - expand tabs
510// - normalize newlines
511// - copy everything else
512func firstPass(p *parser, input []byte) []byte {
513 var out bytes.Buffer
514 tabSize := TabSizeDefault
515 if p.flags&TabSizeEight != 0 {
516 tabSize = TabSizeDouble
517 }
518 beg, end := 0, 0
519 lastFencedCodeBlockEnd := 0
520 for beg < len(input) { // iterate over lines
521 if end = isReference(p, input[beg:], tabSize); end > 0 {
522 beg += end
523 } else { // skip to the next line
524 end = beg
525 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
526 end++
527 }
528
529 if p.flags&FencedCode != 0 {
530 // track fenced code block boundaries to suppress tab expansion
531 // inside them:
532 if beg >= lastFencedCodeBlockEnd {
533 if i := p.fencedCode(input[beg:], false); i > 0 {
534 lastFencedCodeBlockEnd = beg + i
535 }
536 }
537 }
538
539 // add the line body if present
540 if end > beg {
541 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
542 out.Write(input[beg:end])
543 } else {
544 expandTabs(&out, input[beg:end], tabSize)
545 }
546 }
547 out.WriteByte('\n')
548
549 if end < len(input) && input[end] == '\r' {
550 end++
551 }
552 if end < len(input) && input[end] == '\n' {
553 end++
554 }
555
556 beg = end
557 }
558 }
559
560 // empty input?
561 if out.Len() == 0 {
562 out.WriteByte('\n')
563 }
564
565 return out.Bytes()
566}
567
568// second pass: actual rendering
569func secondPass(p *parser, input []byte) {
570 p.block(input)
571
572 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
573 flags := ListItemBeginningOfList
574 for i := 0; i < len(p.notes); i += 1 {
575 ref := p.notes[i]
576 if ref.hasBlock {
577 flags |= ListItemContainsBlock
578 p.block(ref.title)
579 } else {
580 p.inline(ref.title)
581 }
582 flags &^= ListItemBeginningOfList | ListItemContainsBlock
583 }
584 }
585
586 if p.nesting != 0 {
587 panic("Nesting level did not end at zero")
588 }
589}
590
591//
592// Link references
593//
594// This section implements support for references that (usually) appear
595// as footnotes in a document, and can be referenced anywhere in the document.
596// The basic format is:
597//
598// [1]: http://www.google.com/ "Google"
599// [2]: http://www.github.com/ "Github"
600//
601// Anywhere in the document, the reference can be linked by referring to its
602// label, i.e., 1 and 2 in this example, as in:
603//
604// This library is hosted on [Github][2], a git hosting site.
605//
606// Actual footnotes as specified in Pandoc and supported by some other Markdown
607// libraries such as php-markdown are also taken care of. They look like this:
608//
609// This sentence needs a bit of further explanation.[^note]
610//
611// [^note]: This is the explanation.
612//
613// Footnotes should be placed at the end of the document in an ordered list.
614// Inline footnotes such as:
615//
616// Inline footnotes^[Not supported.] also exist.
617//
618// are not yet supported.
619
620// References are parsed and stored in this struct.
621type reference struct {
622 link []byte
623 title []byte
624 noteId int // 0 if not a footnote ref
625 hasBlock bool
626 text []byte
627}
628
629func (r *reference) String() string {
630 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
631 r.link, r.title, r.text, r.noteId, r.hasBlock)
632}
633
634// Check whether or not data starts with a reference link.
635// If so, it is parsed and stored in the list of references
636// (in the render struct).
637// Returns the number of bytes to skip to move past it,
638// or zero if the first line is not a reference.
639func isReference(p *parser, data []byte, tabSize int) int {
640 // up to 3 optional leading spaces
641 if len(data) < 4 {
642 return 0
643 }
644 i := 0
645 for i < 3 && data[i] == ' ' {
646 i++
647 }
648
649 noteId := 0
650
651 // id part: anything but a newline between brackets
652 if data[i] != '[' {
653 return 0
654 }
655 i++
656 if p.flags&Footnotes != 0 {
657 if i < len(data) && data[i] == '^' {
658 // we can set it to anything here because the proper noteIds will
659 // be assigned later during the second pass. It just has to be != 0
660 noteId = 1
661 i++
662 }
663 }
664 idOffset := i
665 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
666 i++
667 }
668 if i >= len(data) || data[i] != ']' {
669 return 0
670 }
671 idEnd := i
672
673 // spacer: colon (space | tab)* newline? (space | tab)*
674 i++
675 if i >= len(data) || data[i] != ':' {
676 return 0
677 }
678 i++
679 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
680 i++
681 }
682 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
683 i++
684 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
685 i++
686 }
687 }
688 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
689 i++
690 }
691 if i >= len(data) {
692 return 0
693 }
694
695 var (
696 linkOffset, linkEnd int
697 titleOffset, titleEnd int
698 lineEnd int
699 raw []byte
700 hasBlock bool
701 )
702
703 if p.flags&Footnotes != 0 && noteId != 0 {
704 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
705 lineEnd = linkEnd
706 } else {
707 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
708 }
709 if lineEnd == 0 {
710 return 0
711 }
712
713 // a valid ref has been found
714
715 ref := &reference{
716 noteId: noteId,
717 hasBlock: hasBlock,
718 }
719
720 if noteId > 0 {
721 // reusing the link field for the id since footnotes don't have links
722 ref.link = data[idOffset:idEnd]
723 // if footnote, it's not really a title, it's the contained text
724 ref.title = raw
725 } else {
726 ref.link = data[linkOffset:linkEnd]
727 ref.title = data[titleOffset:titleEnd]
728 }
729
730 // id matches are case-insensitive
731 id := string(bytes.ToLower(data[idOffset:idEnd]))
732
733 p.refs[id] = ref
734
735 return lineEnd
736}
737
738func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
739 // link: whitespace-free sequence, optionally between angle brackets
740 if data[i] == '<' {
741 i++
742 }
743 linkOffset = i
744 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
745 i++
746 }
747 if i == len(data) {
748 return
749 }
750 linkEnd = i
751 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
752 linkOffset++
753 linkEnd--
754 }
755
756 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
757 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
758 i++
759 }
760 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
761 return
762 }
763
764 // compute end-of-line
765 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
766 lineEnd = i
767 }
768 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
769 lineEnd++
770 }
771
772 // optional (space|tab)* spacer after a newline
773 if lineEnd > 0 {
774 i = lineEnd + 1
775 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
776 i++
777 }
778 }
779
780 // optional title: any non-newline sequence enclosed in '"() alone on its line
781 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
782 i++
783 titleOffset = i
784
785 // look for EOL
786 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
787 i++
788 }
789 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
790 titleEnd = i + 1
791 } else {
792 titleEnd = i
793 }
794
795 // step back
796 i--
797 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
798 i--
799 }
800 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
801 lineEnd = titleEnd
802 titleEnd = i
803 }
804 }
805
806 return
807}
808
809// The first bit of this logic is the same as (*parser).listItem, but the rest
810// is much simpler. This function simply finds the entire block and shifts it
811// over by one tab if it is indeed a block (just returns the line if it's not).
812// blockEnd is the end of the section in the input buffer, and contents is the
813// extracted text that was shifted over one tab. It will need to be rendered at
814// the end of the document.
815func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
816 if i == 0 || len(data) == 0 {
817 return
818 }
819
820 // skip leading whitespace on first line
821 for i < len(data) && data[i] == ' ' {
822 i++
823 }
824
825 blockStart = i
826
827 // find the end of the line
828 blockEnd = i
829 for i < len(data) && data[i-1] != '\n' {
830 i++
831 }
832
833 // get working buffer
834 var raw bytes.Buffer
835
836 // put the first line into the working buffer
837 raw.Write(data[blockEnd:i])
838 blockEnd = i
839
840 // process the following lines
841 containsBlankLine := false
842
843gatherLines:
844 for blockEnd < len(data) {
845 i++
846
847 // find the end of this line
848 for i < len(data) && data[i-1] != '\n' {
849 i++
850 }
851
852 // if it is an empty line, guess that it is part of this item
853 // and move on to the next line
854 if p.isEmpty(data[blockEnd:i]) > 0 {
855 containsBlankLine = true
856 blockEnd = i
857 continue
858 }
859
860 n := 0
861 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
862 // this is the end of the block.
863 // we don't want to include this last line in the index.
864 break gatherLines
865 }
866
867 // if there were blank lines before this one, insert a new one now
868 if containsBlankLine {
869 raw.WriteByte('\n')
870 containsBlankLine = false
871 }
872
873 // get rid of that first tab, write to buffer
874 raw.Write(data[blockEnd+n : i])
875 hasBlock = true
876
877 blockEnd = i
878 }
879
880 if data[blockEnd-1] != '\n' {
881 raw.WriteByte('\n')
882 }
883
884 contents = raw.Bytes()
885
886 return
887}
888
889//
890//
891// Miscellaneous helper functions
892//
893//
894
895// Test if a character is a punctuation symbol.
896// Taken from a private function in regexp in the stdlib.
897func ispunct(c byte) bool {
898 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
899 if c == r {
900 return true
901 }
902 }
903 return false
904}
905
906// Test if a character is a whitespace character.
907func isspace(c byte) bool {
908 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
909}
910
911// Test if a character is letter.
912func isletter(c byte) bool {
913 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
914}
915
916// Test if a character is a letter or a digit.
917// TODO: check when this is looking for ASCII alnum and when it should use unicode
918func isalnum(c byte) bool {
919 return (c >= '0' && c <= '9') || isletter(c)
920}
921
922// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
923// always ends output with a newline
924func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
925 // first, check for common cases: no tabs, or only tabs at beginning of line
926 i, prefix := 0, 0
927 slowcase := false
928 for i = 0; i < len(line); i++ {
929 if line[i] == '\t' {
930 if prefix == i {
931 prefix++
932 } else {
933 slowcase = true
934 break
935 }
936 }
937 }
938
939 // no need to decode runes if all tabs are at the beginning of the line
940 if !slowcase {
941 for i = 0; i < prefix*tabSize; i++ {
942 out.WriteByte(' ')
943 }
944 out.Write(line[prefix:])
945 return
946 }
947
948 // the slow case: we need to count runes to figure out how
949 // many spaces to insert for each tab
950 column := 0
951 i = 0
952 for i < len(line) {
953 start := i
954 for i < len(line) && line[i] != '\t' {
955 _, size := utf8.DecodeRune(line[i:])
956 i += size
957 column++
958 }
959
960 if i > start {
961 out.Write(line[start:i])
962 }
963
964 if i >= len(line) {
965 break
966 }
967
968 for {
969 out.WriteByte(' ')
970 column++
971 if column%tabSize == 0 {
972 break
973 }
974 }
975
976 i++
977 }
978}
979
980// Find if a line counts as indented or not.
981// Returns number of characters the indent is (0 = not indented).
982func isIndented(data []byte, indentSize int) int {
983 if len(data) == 0 {
984 return 0
985 }
986 if data[0] == '\t' {
987 return 1
988 }
989 if len(data) < indentSize {
990 return 0
991 }
992 for i := 0; i < indentSize; i++ {
993 if data[i] != ' ' {
994 return 0
995 }
996 }
997 return indentSize
998}
999
1000// Create a url-safe slug for fragments
1001func slugify(in []byte) []byte {
1002 if len(in) == 0 {
1003 return in
1004 }
1005 out := make([]byte, 0, len(in))
1006 sym := false
1007
1008 for _, ch := range in {
1009 if isalnum(ch) {
1010 sym = false
1011 out = append(out, ch)
1012 } else if sym {
1013 continue
1014 } else {
1015 out = append(out, '-')
1016 sym = true
1017 }
1018 }
1019 var a, b int
1020 var ch byte
1021 for a, ch = range out {
1022 if ch != '-' {
1023 break
1024 }
1025 }
1026 for b = len(out) - 1; b > 0; b-- {
1027 if out[b] != '-' {
1028 break
1029 }
1030 }
1031 return out[a : b+1]
1032}