markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Package blackfriday is a markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "io"
25 "strings"
26 "unicode/utf8"
27)
28
29// Version string of the package.
30const Version = "2.0"
31
32// Extensions is a bitwise or'ed collection of enabled Blackfriday's
33// extensions.
34type Extensions int
35
36// These are the supported markdown parsing extensions.
37// OR these values together to select multiple extensions.
38const (
39 NoExtensions Extensions = 0
40 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
41 Tables // Render tables
42 FencedCode // Render fenced code blocks
43 Autolink // Detect embedded URLs that are not explicitly marked
44 Strikethrough // Strikethrough text using ~~test~~
45 LaxHTMLBlocks // Loosen up HTML block parsing rules
46 SpaceHeaders // Be strict about prefix header rules
47 HardLineBreak // Translate newlines into line breaks
48 TabSizeEight // Expand tabs to eight spaces instead of four
49 Footnotes // Pandoc-style footnotes
50 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
51 HeaderIDs // specify header IDs with {#id}
52 Titleblock // Titleblock ala pandoc
53 AutoHeaderIDs // Create the header ID from the text
54 BackslashLineBreak // Translate trailing backslashes into line breaks
55 DefinitionLists // Render definition lists
56 Smartypants // Enable smart punctuation substitutions
57 SmartypantsFractions // Enable smart fractions (with Smartypants)
58 SmartypantsDashes // Enable smart dashes (with Smartypants)
59 SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
60 SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
61 TOC // Generate a table of contents
62 OmitContents // Skip the main contents (for a standalone table of contents)
63
64 CommonHTMLFlags HTMLFlags = UseXHTML
65
66 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
67 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
68 BackslashLineBreak | DefinitionLists | Smartypants |
69 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
70)
71
72// DefaultOptions is a convenience variable with all the options that are
73// enabled by default.
74var DefaultOptions = Options{
75 Extensions: CommonExtensions,
76}
77
78// ListType contains bitwise or'ed flags for list and list item objects.
79type ListType int
80
81// These are the possible flag values for the ListItem renderer.
82// Multiple flag values may be ORed together.
83// These are mostly of interest if you are writing a new output format.
84const (
85 ListTypeOrdered ListType = 1 << iota
86 ListTypeDefinition
87 ListTypeTerm
88
89 ListItemContainsBlock
90 ListItemBeginningOfList
91 ListItemEndOfList
92)
93
94// CellAlignFlags holds a type of alignment in a table cell.
95type CellAlignFlags int
96
97// These are the possible flag values for the table cell renderer.
98// Only a single one of these values will be used; they are not ORed together.
99// These are mostly of interest if you are writing a new output format.
100const (
101 TableAlignmentLeft = 1 << iota
102 TableAlignmentRight
103 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
104)
105
106// The size of a tab stop.
107const (
108 TabSizeDefault = 4
109 TabSizeDouble = 8
110)
111
112// blockTags is a set of tags that are recognized as HTML block tags.
113// Any of these can be included in markdown text without special escaping.
114var blockTags = map[string]struct{}{
115 "blockquote": struct{}{},
116 "del": struct{}{},
117 "div": struct{}{},
118 "dl": struct{}{},
119 "fieldset": struct{}{},
120 "form": struct{}{},
121 "h1": struct{}{},
122 "h2": struct{}{},
123 "h3": struct{}{},
124 "h4": struct{}{},
125 "h5": struct{}{},
126 "h6": struct{}{},
127 "iframe": struct{}{},
128 "ins": struct{}{},
129 "math": struct{}{},
130 "noscript": struct{}{},
131 "ol": struct{}{},
132 "pre": struct{}{},
133 "p": struct{}{},
134 "script": struct{}{},
135 "style": struct{}{},
136 "table": struct{}{},
137 "ul": struct{}{},
138
139 // HTML5
140 "address": struct{}{},
141 "article": struct{}{},
142 "aside": struct{}{},
143 "canvas": struct{}{},
144 "figcaption": struct{}{},
145 "figure": struct{}{},
146 "footer": struct{}{},
147 "header": struct{}{},
148 "hgroup": struct{}{},
149 "main": struct{}{},
150 "nav": struct{}{},
151 "output": struct{}{},
152 "progress": struct{}{},
153 "section": struct{}{},
154 "video": struct{}{},
155}
156
157// Renderer is the rendering interface.
158// This is mostly of interest if you are implementing a new rendering format.
159//
160// When a byte slice is provided, it contains the (rendered) contents of the
161// element.
162//
163// When a callback is provided instead, it will write the contents of the
164// respective element directly to the output buffer and return true on success.
165// If the callback returns false, the rendering function should reset the
166// output buffer as though it had never been called.
167//
168// Currently HTML and Latex implementations are provided
169type Renderer interface {
170 Render(ast *Node) []byte
171 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
172}
173
174// Callback functions for inline parsing. One such function is defined
175// for each character that triggers a response when parsing inline data.
176type inlineParser func(p *parser, data []byte, offset int) int
177
178// Parser holds runtime state used by the parser.
179// This is constructed by the Markdown function.
180type parser struct {
181 refOverride ReferenceOverrideFunc
182 refs map[string]*reference
183 inlineCallback [256]inlineParser
184 flags Extensions
185 nesting int
186 maxNesting int
187 insideLink bool
188
189 // Footnotes need to be ordered as well as available to quickly check for
190 // presence. If a ref is also a footnote, it's stored both in refs and here
191 // in notes. Slice is nil if footnotes not enabled.
192 notes []*reference
193
194 doc *Node
195 tip *Node // = doc
196 oldTip *Node
197 lastMatchedContainer *Node // = doc
198 allClosed bool
199 currBlock *Node // a block node currently being parsed by inline parser
200}
201
202func (p *parser) getRef(refid string) (ref *reference, found bool) {
203 if p.refOverride != nil {
204 r, overridden := p.refOverride(refid)
205 if overridden {
206 if r == nil {
207 return nil, false
208 }
209 return &reference{
210 link: []byte(r.Link),
211 title: []byte(r.Title),
212 noteID: 0,
213 hasBlock: false,
214 text: []byte(r.Text)}, true
215 }
216 }
217 // refs are case insensitive
218 ref, found = p.refs[strings.ToLower(refid)]
219 return ref, found
220}
221
222func (p *parser) finalize(block *Node) {
223 above := block.Parent
224 block.open = false
225 p.tip = above
226}
227
228func (p *parser) addChild(node NodeType, offset uint32) *Node {
229 for !p.tip.canContain(node) {
230 p.finalize(p.tip)
231 }
232 newNode := NewNode(node)
233 newNode.content = []byte{}
234 p.tip.appendChild(newNode)
235 p.tip = newNode
236 return newNode
237}
238
239func (p *parser) closeUnmatchedBlocks() {
240 if !p.allClosed {
241 for p.oldTip != p.lastMatchedContainer {
242 parent := p.oldTip.Parent
243 p.finalize(p.oldTip)
244 p.oldTip = parent
245 }
246 p.allClosed = true
247 }
248}
249
250//
251//
252// Public interface
253//
254//
255
256// Reference represents the details of a link.
257// See the documentation in Options for more details on use-case.
258type Reference struct {
259 // Link is usually the URL the reference points to.
260 Link string
261 // Title is the alternate text describing the link in more detail.
262 Title string
263 // Text is the optional text to override the ref with if the syntax used was
264 // [refid][]
265 Text string
266}
267
268// ReferenceOverrideFunc is expected to be called with a reference string and
269// return either a valid Reference type that the reference string maps to or
270// nil. If overridden is false, the default reference logic will be executed.
271// See the documentation in Options for more details on use-case.
272type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
273
274// Options represents configurable overrides and callbacks (in addition to the
275// extension flag set) for configuring a Markdown parse.
276type Options struct {
277 // Extensions is a flag set of bit-wise ORed extension bits. See the
278 // Extensions flags defined in this package.
279 Extensions Extensions
280
281 // ReferenceOverride is an optional function callback that is called every
282 // time a reference is resolved.
283 //
284 // In Markdown, the link reference syntax can be made to resolve a link to
285 // a reference instead of an inline URL, in one of the following ways:
286 //
287 // * [link text][refid]
288 // * [refid][]
289 //
290 // Usually, the refid is defined at the bottom of the Markdown document. If
291 // this override function is provided, the refid is passed to the override
292 // function first, before consulting the defined refids at the bottom. If
293 // the override function indicates an override did not occur, the refids at
294 // the bottom will be used to fill in the link details.
295 ReferenceOverride ReferenceOverrideFunc
296}
297
298// MarkdownBasic is a convenience function for simple rendering.
299// It processes markdown input with no extensions enabled.
300func MarkdownBasic(input []byte) []byte {
301 // set up the HTML renderer
302 renderer := NewHTMLRenderer(HTMLRendererParameters{
303 Flags: UseXHTML,
304 Extensions: CommonExtensions,
305 })
306
307 // set up the parser
308 return Markdown(input, renderer, Options{})
309}
310
311// MarkdownCommon is a convenience function for simple rendering. It calls
312// Markdown with most useful extensions enabled, including:
313//
314// * Smartypants processing with smart fractions and LaTeX dashes
315//
316// * Intra-word emphasis suppression
317//
318// * Tables
319//
320// * Fenced code blocks
321//
322// * Autolinking
323//
324// * Strikethrough support
325//
326// * Strict header parsing
327//
328// * Custom Header IDs
329func MarkdownCommon(input []byte) []byte {
330 // set up the HTML renderer
331 renderer := NewHTMLRenderer(HTMLRendererParameters{
332 Flags: CommonHTMLFlags,
333 Extensions: CommonExtensions,
334 })
335 return Markdown(input, renderer, DefaultOptions)
336}
337
338// Markdown is the main rendering function.
339// It parses and renders a block of markdown-encoded text.
340// The supplied Renderer is used to format the output, and extensions dictates
341// which non-standard extensions are enabled.
342//
343// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
344// NewLatexRenderer, respectively.
345func Markdown(input []byte, renderer Renderer, options Options) []byte {
346 if renderer == nil {
347 return nil
348 }
349 return renderer.Render(Parse(input, options))
350}
351
352// Parse is an entry point to the parsing part of Blackfriday. It takes an
353// input markdown document and produces a syntax tree for its contents. This
354// tree can then be rendered with a default or custom renderer, or
355// analyzed/transformed by the caller to whatever non-standard needs they have.
356func Parse(input []byte, opts Options) *Node {
357 extensions := opts.Extensions
358
359 // fill in the render structure
360 p := new(parser)
361 p.flags = extensions
362 p.refOverride = opts.ReferenceOverride
363 p.refs = make(map[string]*reference)
364 p.maxNesting = 16
365 p.insideLink = false
366
367 docNode := NewNode(Document)
368 p.doc = docNode
369 p.tip = docNode
370 p.oldTip = docNode
371 p.lastMatchedContainer = docNode
372 p.allClosed = true
373
374 // register inline parsers
375 p.inlineCallback['*'] = emphasis
376 p.inlineCallback['_'] = emphasis
377 if extensions&Strikethrough != 0 {
378 p.inlineCallback['~'] = emphasis
379 }
380 p.inlineCallback['`'] = codeSpan
381 p.inlineCallback['\n'] = lineBreak
382 p.inlineCallback['['] = link
383 p.inlineCallback['<'] = leftAngle
384 p.inlineCallback['\\'] = escape
385 p.inlineCallback['&'] = entity
386 p.inlineCallback['!'] = maybeImage
387 p.inlineCallback['^'] = maybeInlineFootnote
388
389 if extensions&Autolink != 0 {
390 p.inlineCallback['h'] = maybeAutoLink
391 p.inlineCallback['m'] = maybeAutoLink
392 p.inlineCallback['f'] = maybeAutoLink
393 p.inlineCallback['H'] = maybeAutoLink
394 p.inlineCallback['M'] = maybeAutoLink
395 p.inlineCallback['F'] = maybeAutoLink
396 }
397
398 if extensions&Footnotes != 0 {
399 p.notes = make([]*reference, 0)
400 }
401
402 first := firstPass(p, input)
403 secondPass(p, first)
404 // Walk the tree and finish up some of unfinished blocks
405 for p.tip != nil {
406 p.finalize(p.tip)
407 }
408 // Walk the tree again and process inline markdown in each block
409 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
410 if node.Type == Paragraph || node.Type == Header || node.Type == TableCell {
411 p.currBlock = node
412 p.inline(node.content)
413 node.content = nil
414 }
415 return GoToNext
416 })
417 p.parseRefsToAST()
418 return p.doc
419}
420
421func (p *parser) parseRefsToAST() {
422 if p.flags&Footnotes == 0 || len(p.notes) == 0 {
423 return
424 }
425 p.tip = p.doc
426 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte(`<div class="footnotes">`)))
427 p.addBlock(HorizontalRule, nil)
428 block := p.addBlock(List, nil)
429 block.ListFlags = ListTypeOrdered
430 flags := ListItemBeginningOfList
431 // Note: this loop is intentionally explicit, not range-form. This is
432 // because the body of the loop will append nested footnotes to p.notes and
433 // we need to process those late additions. Range form would only walk over
434 // the fixed initial set.
435 for i := 0; i < len(p.notes); i++ {
436 ref := p.notes[i]
437 block := p.addBlock(Item, nil)
438 block.ListFlags = ListTypeOrdered
439 block.RefLink = ref.link
440 if ref.hasBlock {
441 flags |= ListItemContainsBlock
442 p.block(ref.title)
443 } else {
444 p.currBlock = block
445 p.inline(ref.title)
446 }
447 flags &^= ListItemBeginningOfList | ListItemContainsBlock
448 }
449 above := block.Parent
450 finalizeList(block)
451 p.tip = above
452 finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>")))
453 block.Walk(func(node *Node, entering bool) WalkStatus {
454 if node.Type == Paragraph || node.Type == Header {
455 p.currBlock = node
456 p.inline(node.content)
457 node.content = nil
458 }
459 return GoToNext
460 })
461}
462
463// first pass:
464// - normalize newlines
465// - extract references (outside of fenced code blocks)
466// - expand tabs (outside of fenced code blocks)
467// - copy everything else
468func firstPass(p *parser, input []byte) []byte {
469 var out bytes.Buffer
470 tabSize := TabSizeDefault
471 if p.flags&TabSizeEight != 0 {
472 tabSize = TabSizeDouble
473 }
474 beg := 0
475 lastFencedCodeBlockEnd := 0
476 for beg < len(input) {
477 // Find end of this line, then process the line.
478 end := beg
479 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
480 end++
481 }
482
483 if p.flags&FencedCode != 0 {
484 // track fenced code block boundaries to suppress tab expansion
485 // and reference extraction inside them:
486 if beg >= lastFencedCodeBlockEnd {
487 if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
488 lastFencedCodeBlockEnd = beg + i
489 }
490 }
491 }
492
493 // add the line body if present
494 if end > beg {
495 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
496 out.Write(input[beg:end])
497 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
498 beg += refEnd
499 continue
500 } else {
501 expandTabs(&out, input[beg:end], tabSize)
502 }
503 }
504
505 if end < len(input) && input[end] == '\r' {
506 end++
507 }
508 if end < len(input) && input[end] == '\n' {
509 end++
510 }
511 out.WriteByte('\n')
512
513 beg = end
514 }
515
516 // empty input?
517 if out.Len() == 0 {
518 out.WriteByte('\n')
519 }
520
521 return out.Bytes()
522}
523
524// second pass: actual rendering
525func secondPass(p *parser, input []byte) {
526 p.block(input)
527
528 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
529 flags := ListItemBeginningOfList
530 for i := 0; i < len(p.notes); i++ {
531 ref := p.notes[i]
532 if ref.hasBlock {
533 flags |= ListItemContainsBlock
534 p.block(ref.title)
535 } else {
536 p.inline(ref.title)
537 }
538 flags &^= ListItemBeginningOfList | ListItemContainsBlock
539 }
540 }
541
542 if p.nesting != 0 {
543 panic("Nesting level did not end at zero")
544 }
545}
546
547//
548// Link references
549//
550// This section implements support for references that (usually) appear
551// as footnotes in a document, and can be referenced anywhere in the document.
552// The basic format is:
553//
554// [1]: http://www.google.com/ "Google"
555// [2]: http://www.github.com/ "Github"
556//
557// Anywhere in the document, the reference can be linked by referring to its
558// label, i.e., 1 and 2 in this example, as in:
559//
560// This library is hosted on [Github][2], a git hosting site.
561//
562// Actual footnotes as specified in Pandoc and supported by some other Markdown
563// libraries such as php-markdown are also taken care of. They look like this:
564//
565// This sentence needs a bit of further explanation.[^note]
566//
567// [^note]: This is the explanation.
568//
569// Footnotes should be placed at the end of the document in an ordered list.
570// Inline footnotes such as:
571//
572// Inline footnotes^[Not supported.] also exist.
573//
574// are not yet supported.
575
576// References are parsed and stored in this struct.
577type reference struct {
578 link []byte
579 title []byte
580 noteID int // 0 if not a footnote ref
581 hasBlock bool
582 text []byte
583}
584
585func (r *reference) String() string {
586 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
587 r.link, r.title, r.text, r.noteID, r.hasBlock)
588}
589
590// Check whether or not data starts with a reference link.
591// If so, it is parsed and stored in the list of references
592// (in the render struct).
593// Returns the number of bytes to skip to move past it,
594// or zero if the first line is not a reference.
595func isReference(p *parser, data []byte, tabSize int) int {
596 // up to 3 optional leading spaces
597 if len(data) < 4 {
598 return 0
599 }
600 i := 0
601 for i < 3 && data[i] == ' ' {
602 i++
603 }
604
605 noteID := 0
606
607 // id part: anything but a newline between brackets
608 if data[i] != '[' {
609 return 0
610 }
611 i++
612 if p.flags&Footnotes != 0 {
613 if i < len(data) && data[i] == '^' {
614 // we can set it to anything here because the proper noteIds will
615 // be assigned later during the second pass. It just has to be != 0
616 noteID = 1
617 i++
618 }
619 }
620 idOffset := i
621 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
622 i++
623 }
624 if i >= len(data) || data[i] != ']' {
625 return 0
626 }
627 idEnd := i
628
629 // spacer: colon (space | tab)* newline? (space | tab)*
630 i++
631 if i >= len(data) || data[i] != ':' {
632 return 0
633 }
634 i++
635 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
636 i++
637 }
638 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
639 i++
640 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
641 i++
642 }
643 }
644 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
645 i++
646 }
647 if i >= len(data) {
648 return 0
649 }
650
651 var (
652 linkOffset, linkEnd int
653 titleOffset, titleEnd int
654 lineEnd int
655 raw []byte
656 hasBlock bool
657 )
658
659 if p.flags&Footnotes != 0 && noteID != 0 {
660 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
661 lineEnd = linkEnd
662 } else {
663 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
664 }
665 if lineEnd == 0 {
666 return 0
667 }
668
669 // a valid ref has been found
670
671 ref := &reference{
672 noteID: noteID,
673 hasBlock: hasBlock,
674 }
675
676 if noteID > 0 {
677 // reusing the link field for the id since footnotes don't have links
678 ref.link = data[idOffset:idEnd]
679 // if footnote, it's not really a title, it's the contained text
680 ref.title = raw
681 } else {
682 ref.link = data[linkOffset:linkEnd]
683 ref.title = data[titleOffset:titleEnd]
684 }
685
686 // id matches are case-insensitive
687 id := string(bytes.ToLower(data[idOffset:idEnd]))
688
689 p.refs[id] = ref
690
691 return lineEnd
692}
693
694func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
695 // link: whitespace-free sequence, optionally between angle brackets
696 if data[i] == '<' {
697 i++
698 }
699 linkOffset = i
700 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
701 i++
702 }
703 if i == len(data) {
704 return
705 }
706 linkEnd = i
707 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
708 linkOffset++
709 linkEnd--
710 }
711
712 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
713 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
714 i++
715 }
716 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
717 return
718 }
719
720 // compute end-of-line
721 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
722 lineEnd = i
723 }
724 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
725 lineEnd++
726 }
727
728 // optional (space|tab)* spacer after a newline
729 if lineEnd > 0 {
730 i = lineEnd + 1
731 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
732 i++
733 }
734 }
735
736 // optional title: any non-newline sequence enclosed in '"() alone on its line
737 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
738 i++
739 titleOffset = i
740
741 // look for EOL
742 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
743 i++
744 }
745 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
746 titleEnd = i + 1
747 } else {
748 titleEnd = i
749 }
750
751 // step back
752 i--
753 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
754 i--
755 }
756 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
757 lineEnd = titleEnd
758 titleEnd = i
759 }
760 }
761
762 return
763}
764
765// The first bit of this logic is the same as (*parser).listItem, but the rest
766// is much simpler. This function simply finds the entire block and shifts it
767// over by one tab if it is indeed a block (just returns the line if it's not).
768// blockEnd is the end of the section in the input buffer, and contents is the
769// extracted text that was shifted over one tab. It will need to be rendered at
770// the end of the document.
771func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
772 if i == 0 || len(data) == 0 {
773 return
774 }
775
776 // skip leading whitespace on first line
777 for i < len(data) && data[i] == ' ' {
778 i++
779 }
780
781 blockStart = i
782
783 // find the end of the line
784 blockEnd = i
785 for i < len(data) && data[i-1] != '\n' {
786 i++
787 }
788
789 // get working buffer
790 var raw bytes.Buffer
791
792 // put the first line into the working buffer
793 raw.Write(data[blockEnd:i])
794 blockEnd = i
795
796 // process the following lines
797 containsBlankLine := false
798
799gatherLines:
800 for blockEnd < len(data) {
801 i++
802
803 // find the end of this line
804 for i < len(data) && data[i-1] != '\n' {
805 i++
806 }
807
808 // if it is an empty line, guess that it is part of this item
809 // and move on to the next line
810 if p.isEmpty(data[blockEnd:i]) > 0 {
811 containsBlankLine = true
812 blockEnd = i
813 continue
814 }
815
816 n := 0
817 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
818 // this is the end of the block.
819 // we don't want to include this last line in the index.
820 break gatherLines
821 }
822
823 // if there were blank lines before this one, insert a new one now
824 if containsBlankLine {
825 raw.WriteByte('\n')
826 containsBlankLine = false
827 }
828
829 // get rid of that first tab, write to buffer
830 raw.Write(data[blockEnd+n : i])
831 hasBlock = true
832
833 blockEnd = i
834 }
835
836 if data[blockEnd-1] != '\n' {
837 raw.WriteByte('\n')
838 }
839
840 contents = raw.Bytes()
841
842 return
843}
844
845//
846//
847// Miscellaneous helper functions
848//
849//
850
851// Test if a character is a punctuation symbol.
852// Taken from a private function in regexp in the stdlib.
853func ispunct(c byte) bool {
854 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
855 if c == r {
856 return true
857 }
858 }
859 return false
860}
861
862// Test if a character is a whitespace character.
863func isspace(c byte) bool {
864 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
865}
866
867// Test if a character is letter.
868func isletter(c byte) bool {
869 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
870}
871
872// Test if a character is a letter or a digit.
873// TODO: check when this is looking for ASCII alnum and when it should use unicode
874func isalnum(c byte) bool {
875 return (c >= '0' && c <= '9') || isletter(c)
876}
877
878// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
879// always ends output with a newline
880func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
881 // first, check for common cases: no tabs, or only tabs at beginning of line
882 i, prefix := 0, 0
883 slowcase := false
884 for i = 0; i < len(line); i++ {
885 if line[i] == '\t' {
886 if prefix == i {
887 prefix++
888 } else {
889 slowcase = true
890 break
891 }
892 }
893 }
894
895 // no need to decode runes if all tabs are at the beginning of the line
896 if !slowcase {
897 for i = 0; i < prefix*tabSize; i++ {
898 out.WriteByte(' ')
899 }
900 out.Write(line[prefix:])
901 return
902 }
903
904 // the slow case: we need to count runes to figure out how
905 // many spaces to insert for each tab
906 column := 0
907 i = 0
908 for i < len(line) {
909 start := i
910 for i < len(line) && line[i] != '\t' {
911 _, size := utf8.DecodeRune(line[i:])
912 i += size
913 column++
914 }
915
916 if i > start {
917 out.Write(line[start:i])
918 }
919
920 if i >= len(line) {
921 break
922 }
923
924 for {
925 out.WriteByte(' ')
926 column++
927 if column%tabSize == 0 {
928 break
929 }
930 }
931
932 i++
933 }
934}
935
936// Find if a line counts as indented or not.
937// Returns number of characters the indent is (0 = not indented).
938func isIndented(data []byte, indentSize int) int {
939 if len(data) == 0 {
940 return 0
941 }
942 if data[0] == '\t' {
943 return 1
944 }
945 if len(data) < indentSize {
946 return 0
947 }
948 for i := 0; i < indentSize; i++ {
949 if data[i] != ' ' {
950 return 0
951 }
952 }
953 return indentSize
954}
955
956// Create a url-safe slug for fragments
957func slugify(in []byte) []byte {
958 if len(in) == 0 {
959 return in
960 }
961 out := make([]byte, 0, len(in))
962 sym := false
963
964 for _, ch := range in {
965 if isalnum(ch) {
966 sym = false
967 out = append(out, ch)
968 } else if sym {
969 continue
970 } else {
971 out = append(out, '-')
972 sym = true
973 }
974 }
975 var a, b int
976 var ch byte
977 for a, ch = range out {
978 if ch != '-' {
979 break
980 }
981 }
982 for b = len(out) - 1; b > 0; b-- {
983 if out[b] != '-' {
984 break
985 }
986 }
987 return out[a : b+1]
988}