markdown.go (view raw)
1// Blackfriday Markdown Processor
2// Available at http://github.com/russross/blackfriday
3//
4// Copyright © 2011 Russ Ross <russ@russross.com>.
5// Distributed under the Simplified BSD License.
6// See README.md for details.
7
8package blackfriday
9
10import (
11 "bytes"
12 "fmt"
13 "io"
14 "strings"
15 "unicode/utf8"
16)
17
18//
19// Markdown parsing and processing
20//
21
22// Version string of the package. Appears in the rendered document when
23// CompletePage flag is on.
24const Version = "2.0"
25
26// Extensions is a bitwise or'ed collection of enabled Blackfriday's
27// extensions.
28type Extensions int
29
30// These are the supported markdown parsing extensions.
31// OR these values together to select multiple extensions.
32const (
33 NoExtensions Extensions = 0
34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
35 Tables // Render tables
36 FencedCode // Render fenced code blocks
37 Autolink // Detect embedded URLs that are not explicitly marked
38 Strikethrough // Strikethrough text using ~~test~~
39 LaxHTMLBlocks // Loosen up HTML block parsing rules
40 SpaceHeadings // Be strict about prefix heading rules
41 HardLineBreak // Translate newlines into line breaks
42 TabSizeEight // Expand tabs to eight spaces instead of four
43 Footnotes // Pandoc-style footnotes
44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
45 HeadingIDs // specify heading IDs with {#id}
46 Titleblock // Titleblock ala pandoc
47 AutoHeadingIDs // Create the heading ID from the text
48 BackslashLineBreak // Translate trailing backslashes into line breaks
49 DefinitionLists // Render definition lists
50
51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
53
54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
56 BackslashLineBreak | DefinitionLists
57)
58
59// ListType contains bitwise or'ed flags for list and list item objects.
60type ListType int
61
62// These are the possible flag values for the ListItem renderer.
63// Multiple flag values may be ORed together.
64// These are mostly of interest if you are writing a new output format.
65const (
66 ListTypeOrdered ListType = 1 << iota
67 ListTypeDefinition
68 ListTypeTerm
69
70 ListItemContainsBlock
71 ListItemBeginningOfList // TODO: figure out if this is of any use now
72 ListItemEndOfList
73)
74
75// CellAlignFlags holds a type of alignment in a table cell.
76type CellAlignFlags int
77
78// These are the possible flag values for the table cell renderer.
79// Only a single one of these values will be used; they are not ORed together.
80// These are mostly of interest if you are writing a new output format.
81const (
82 TableAlignmentLeft CellAlignFlags = 1 << iota
83 TableAlignmentRight
84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
85)
86
87// The size of a tab stop.
88const (
89 TabSizeDefault = 4
90 TabSizeDouble = 8
91)
92
93// blockTags is a set of tags that are recognized as HTML block tags.
94// Any of these can be included in markdown text without special escaping.
95var blockTags = map[string]struct{}{
96 "blockquote": struct{}{},
97 "del": struct{}{},
98 "div": struct{}{},
99 "dl": struct{}{},
100 "fieldset": struct{}{},
101 "form": struct{}{},
102 "h1": struct{}{},
103 "h2": struct{}{},
104 "h3": struct{}{},
105 "h4": struct{}{},
106 "h5": struct{}{},
107 "h6": struct{}{},
108 "iframe": struct{}{},
109 "ins": struct{}{},
110 "math": struct{}{},
111 "noscript": struct{}{},
112 "ol": struct{}{},
113 "pre": struct{}{},
114 "p": struct{}{},
115 "script": struct{}{},
116 "style": struct{}{},
117 "table": struct{}{},
118 "ul": struct{}{},
119
120 // HTML5
121 "address": struct{}{},
122 "article": struct{}{},
123 "aside": struct{}{},
124 "canvas": struct{}{},
125 "figcaption": struct{}{},
126 "figure": struct{}{},
127 "footer": struct{}{},
128 "header": struct{}{},
129 "hgroup": struct{}{},
130 "main": struct{}{},
131 "nav": struct{}{},
132 "output": struct{}{},
133 "progress": struct{}{},
134 "section": struct{}{},
135 "video": struct{}{},
136}
137
138// Renderer is the rendering interface. This is mostly of interest if you are
139// implementing a new rendering format.
140//
141// Only an HTML implementation is provided in this repository, see the README
142// for external implementations.
143type Renderer interface {
144 // RenderNode is the main rendering method. It will be called once for
145 // every leaf node and twice for every non-leaf node (first with
146 // entering=true, then with entering=false). The method should write its
147 // rendition of the node to the supplied writer w.
148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
149
150 // RenderHeader is a method that allows the renderer to produce some
151 // content preceding the main body of the output document. The header is
152 // understood in the broad sense here. For example, the default HTML
153 // renderer will write not only the HTML document preamble, but also the
154 // table of contents if it was requested.
155 //
156 // The method will be passed an entire document tree, in case a particular
157 // implementation needs to inspect it to produce output.
158 //
159 // The output should be written to the supplied writer w. If your
160 // implementation has no header to write, supply an empty implementation.
161 RenderHeader(w io.Writer, ast *Node)
162
163 // RenderFooter is a symmetric counterpart of RenderHeader.
164 RenderFooter(w io.Writer, ast *Node)
165}
166
167// Callback functions for inline parsing. One such function is defined
168// for each character that triggers a response when parsing inline data.
169type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
170
171// Markdown is a type that holds extensions and the runtime state used by
172// Parse, and the renderer. You can not use it directly, construct it with New.
173type Markdown struct {
174 renderer Renderer
175 referenceOverride ReferenceOverrideFunc
176 refs map[string]*reference
177 inlineCallback [256]inlineParser
178 extensions Extensions
179 nesting int
180 maxNesting int
181 insideLink bool
182
183 // Footnotes need to be ordered as well as available to quickly check for
184 // presence. If a ref is also a footnote, it's stored both in refs and here
185 // in notes. Slice is nil if footnotes not enabled.
186 notes []*reference
187
188 doc *Node
189 tip *Node // = doc
190 oldTip *Node
191 lastMatchedContainer *Node // = doc
192 allClosed bool
193}
194
195func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
196 if p.referenceOverride != nil {
197 r, overridden := p.referenceOverride(refid)
198 if overridden {
199 if r == nil {
200 return nil, false
201 }
202 return &reference{
203 link: []byte(r.Link),
204 title: []byte(r.Title),
205 noteID: 0,
206 hasBlock: false,
207 text: []byte(r.Text)}, true
208 }
209 }
210 // refs are case insensitive
211 ref, found = p.refs[strings.ToLower(refid)]
212 return ref, found
213}
214
215func (p *Markdown) finalize(block *Node) {
216 above := block.Parent
217 block.open = false
218 p.tip = above
219}
220
221func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
222 return p.addExistingChild(NewNode(node), offset)
223}
224
225func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
226 for !p.tip.canContain(node.Type) {
227 p.finalize(p.tip)
228 }
229 p.tip.AppendChild(node)
230 p.tip = node
231 return node
232}
233
234func (p *Markdown) closeUnmatchedBlocks() {
235 if !p.allClosed {
236 for p.oldTip != p.lastMatchedContainer {
237 parent := p.oldTip.Parent
238 p.finalize(p.oldTip)
239 p.oldTip = parent
240 }
241 p.allClosed = true
242 }
243}
244
245//
246//
247// Public interface
248//
249//
250
251// Reference represents the details of a link.
252// See the documentation in Options for more details on use-case.
253type Reference struct {
254 // Link is usually the URL the reference points to.
255 Link string
256 // Title is the alternate text describing the link in more detail.
257 Title string
258 // Text is the optional text to override the ref with if the syntax used was
259 // [refid][]
260 Text string
261}
262
263// ReferenceOverrideFunc is expected to be called with a reference string and
264// return either a valid Reference type that the reference string maps to or
265// nil. If overridden is false, the default reference logic will be executed.
266// See the documentation in Options for more details on use-case.
267type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
268
269// New constructs a Markdown processor. You can use the same With* functions as
270// for Run() to customize parser's behavior and the renderer.
271func New(opts ...Option) *Markdown {
272 var p Markdown
273 for _, opt := range opts {
274 opt(&p)
275 }
276 p.refs = make(map[string]*reference)
277 p.maxNesting = 16
278 p.insideLink = false
279 docNode := NewNode(Document)
280 p.doc = docNode
281 p.tip = docNode
282 p.oldTip = docNode
283 p.lastMatchedContainer = docNode
284 p.allClosed = true
285 // register inline parsers
286 p.inlineCallback[' '] = maybeLineBreak
287 p.inlineCallback['*'] = emphasis
288 p.inlineCallback['_'] = emphasis
289 if p.extensions&Strikethrough != 0 {
290 p.inlineCallback['~'] = emphasis
291 }
292 p.inlineCallback['`'] = codeSpan
293 p.inlineCallback['\n'] = lineBreak
294 p.inlineCallback['['] = link
295 p.inlineCallback['<'] = leftAngle
296 p.inlineCallback['\\'] = escape
297 p.inlineCallback['&'] = entity
298 p.inlineCallback['!'] = maybeImage
299 p.inlineCallback['^'] = maybeInlineFootnote
300 if p.extensions&Autolink != 0 {
301 p.inlineCallback['h'] = maybeAutoLink
302 p.inlineCallback['m'] = maybeAutoLink
303 p.inlineCallback['f'] = maybeAutoLink
304 p.inlineCallback['H'] = maybeAutoLink
305 p.inlineCallback['M'] = maybeAutoLink
306 p.inlineCallback['F'] = maybeAutoLink
307 }
308 if p.extensions&Footnotes != 0 {
309 p.notes = make([]*reference, 0)
310 }
311 return &p
312}
313
314// Option customizes the Markdown processor's default behavior.
315type Option func(*Markdown)
316
317// WithRenderer allows you to override the default renderer.
318func WithRenderer(r Renderer) Option {
319 return func(p *Markdown) {
320 p.renderer = r
321 }
322}
323
324// WithExtensions allows you to pick some of the many extensions provided by
325// Blackfriday. You can bitwise OR them.
326func WithExtensions(e Extensions) Option {
327 return func(p *Markdown) {
328 p.extensions = e
329 }
330}
331
332// WithNoExtensions turns off all extensions and custom behavior.
333func WithNoExtensions() Option {
334 return func(p *Markdown) {
335 p.extensions = NoExtensions
336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{
337 Flags: HTMLFlagsNone,
338 })
339 }
340}
341
342// WithRefOverride sets an optional function callback that is called every
343// time a reference is resolved.
344//
345// In Markdown, the link reference syntax can be made to resolve a link to
346// a reference instead of an inline URL, in one of the following ways:
347//
348// * [link text][refid]
349// * [refid][]
350//
351// Usually, the refid is defined at the bottom of the Markdown document. If
352// this override function is provided, the refid is passed to the override
353// function first, before consulting the defined refids at the bottom. If
354// the override function indicates an override did not occur, the refids at
355// the bottom will be used to fill in the link details.
356func WithRefOverride(o ReferenceOverrideFunc) Option {
357 return func(p *Markdown) {
358 p.referenceOverride = o
359 }
360}
361
362// Run is the main entry point to Blackfriday. It parses and renders a
363// block of markdown-encoded text.
364//
365// The simplest invocation of Run takes one argument, input:
366// output := Run(input)
367// This will parse the input with CommonExtensions enabled and render it with
368// the default HTMLRenderer (with CommonHTMLFlags).
369//
370// Variadic arguments opts can customize the default behavior. Since Markdown
371// type does not contain exported fields, you can not use it directly. Instead,
372// use the With* functions. For example, this will call the most basic
373// functionality, with no extensions:
374// output := Run(input, WithNoExtensions())
375//
376// You can use any number of With* arguments, even contradicting ones. They
377// will be applied in order of appearance and the latter will override the
378// former:
379// output := Run(input, WithNoExtensions(), WithExtensions(exts),
380// WithRenderer(yourRenderer))
381func Run(input []byte, opts ...Option) []byte {
382 r := NewHTMLRenderer(HTMLRendererParameters{
383 Flags: CommonHTMLFlags,
384 })
385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
386 optList = append(optList, opts...)
387 parser := New(optList...)
388 ast := parser.Parse(input)
389 var buf bytes.Buffer
390 parser.renderer.RenderHeader(&buf, ast)
391 ast.Walk(func(node *Node, entering bool) WalkStatus {
392 return parser.renderer.RenderNode(&buf, node, entering)
393 })
394 parser.renderer.RenderFooter(&buf, ast)
395 return buf.Bytes()
396}
397
398// Parse is an entry point to the parsing part of Blackfriday. It takes an
399// input markdown document and produces a syntax tree for its contents. This
400// tree can then be rendered with a default or custom renderer, or
401// analyzed/transformed by the caller to whatever non-standard needs they have.
402// The return value is the root node of the syntax tree.
403func (p *Markdown) Parse(input []byte) *Node {
404 p.block(input)
405 // Walk the tree and finish up some of unfinished blocks
406 for p.tip != nil {
407 p.finalize(p.tip)
408 }
409 // Walk the tree again and process inline markdown in each block
410 p.doc.Walk(func(node *Node, entering bool) WalkStatus {
411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
412 p.inline(node, node.content)
413 node.content = nil
414 }
415 return GoToNext
416 })
417 p.parseRefsToAST()
418 return p.doc
419}
420
421func (p *Markdown) parseRefsToAST() {
422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
423 return
424 }
425 p.tip = p.doc
426 block := p.addBlock(List, nil)
427 block.IsFootnotesList = true
428 block.ListFlags = ListTypeOrdered
429 flags := ListItemBeginningOfList
430 // Note: this loop is intentionally explicit, not range-form. This is
431 // because the body of the loop will append nested footnotes to p.notes and
432 // we need to process those late additions. Range form would only walk over
433 // the fixed initial set.
434 for i := 0; i < len(p.notes); i++ {
435 ref := p.notes[i]
436 p.addExistingChild(ref.footnote, 0)
437 block := ref.footnote
438 block.ListFlags = flags | ListTypeOrdered
439 block.RefLink = ref.link
440 if ref.hasBlock {
441 flags |= ListItemContainsBlock
442 p.block(ref.title)
443 } else {
444 p.inline(block, ref.title)
445 }
446 flags &^= ListItemBeginningOfList | ListItemContainsBlock
447 }
448 above := block.Parent
449 finalizeList(block)
450 p.tip = above
451 block.Walk(func(node *Node, entering bool) WalkStatus {
452 if node.Type == Paragraph || node.Type == Heading {
453 p.inline(node, node.content)
454 node.content = nil
455 }
456 return GoToNext
457 })
458}
459
460//
461// Link references
462//
463// This section implements support for references that (usually) appear
464// as footnotes in a document, and can be referenced anywhere in the document.
465// The basic format is:
466//
467// [1]: http://www.google.com/ "Google"
468// [2]: http://www.github.com/ "Github"
469//
470// Anywhere in the document, the reference can be linked by referring to its
471// label, i.e., 1 and 2 in this example, as in:
472//
473// This library is hosted on [Github][2], a git hosting site.
474//
475// Actual footnotes as specified in Pandoc and supported by some other Markdown
476// libraries such as php-markdown are also taken care of. They look like this:
477//
478// This sentence needs a bit of further explanation.[^note]
479//
480// [^note]: This is the explanation.
481//
482// Footnotes should be placed at the end of the document in an ordered list.
483// Inline footnotes such as:
484//
485// Inline footnotes^[Not supported.] also exist.
486//
487// are not yet supported.
488
489// reference holds all information necessary for a reference-style links or
490// footnotes.
491//
492// Consider this markdown with reference-style links:
493//
494// [link][ref]
495//
496// [ref]: /url/ "tooltip title"
497//
498// It will be ultimately converted to this HTML:
499//
500// <p><a href=\"/url/\" title=\"title\">link</a></p>
501//
502// And a reference structure will be populated as follows:
503//
504// p.refs["ref"] = &reference{
505// link: "/url/",
506// title: "tooltip title",
507// }
508//
509// Alternatively, reference can contain information about a footnote. Consider
510// this markdown:
511//
512// Text needing a footnote.[^a]
513//
514// [^a]: This is the note
515//
516// A reference structure will be populated as follows:
517//
518// p.refs["a"] = &reference{
519// link: "a",
520// title: "This is the note",
521// noteID: <some positive int>,
522// }
523//
524// TODO: As you can see, it begs for splitting into two dedicated structures
525// for refs and for footnotes.
526type reference struct {
527 link []byte
528 title []byte
529 noteID int // 0 if not a footnote ref
530 hasBlock bool
531 footnote *Node // a link to the Item node within a list of footnotes
532
533 text []byte // only gets populated by refOverride feature with Reference.Text
534}
535
536func (r *reference) String() string {
537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
538 r.link, r.title, r.text, r.noteID, r.hasBlock)
539}
540
541// Check whether or not data starts with a reference link.
542// If so, it is parsed and stored in the list of references
543// (in the render struct).
544// Returns the number of bytes to skip to move past it,
545// or zero if the first line is not a reference.
546func isReference(p *Markdown, data []byte, tabSize int) int {
547 // up to 3 optional leading spaces
548 if len(data) < 4 {
549 return 0
550 }
551 i := 0
552 for i < 3 && data[i] == ' ' {
553 i++
554 }
555
556 noteID := 0
557
558 // id part: anything but a newline between brackets
559 if data[i] != '[' {
560 return 0
561 }
562 i++
563 if p.extensions&Footnotes != 0 {
564 if i < len(data) && data[i] == '^' {
565 // we can set it to anything here because the proper noteIds will
566 // be assigned later during the second pass. It just has to be != 0
567 noteID = 1
568 i++
569 }
570 }
571 idOffset := i
572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
573 i++
574 }
575 if i >= len(data) || data[i] != ']' {
576 return 0
577 }
578 idEnd := i
579 // footnotes can have empty ID, like this: [^], but a reference can not be
580 // empty like this: []. Break early if it's not a footnote and there's no ID
581 if noteID == 0 && idOffset == idEnd {
582 return 0
583 }
584 // spacer: colon (space | tab)* newline? (space | tab)*
585 i++
586 if i >= len(data) || data[i] != ':' {
587 return 0
588 }
589 i++
590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
591 i++
592 }
593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
594 i++
595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
596 i++
597 }
598 }
599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
600 i++
601 }
602 if i >= len(data) {
603 return 0
604 }
605
606 var (
607 linkOffset, linkEnd int
608 titleOffset, titleEnd int
609 lineEnd int
610 raw []byte
611 hasBlock bool
612 )
613
614 if p.extensions&Footnotes != 0 && noteID != 0 {
615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
616 lineEnd = linkEnd
617 } else {
618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
619 }
620 if lineEnd == 0 {
621 return 0
622 }
623
624 // a valid ref has been found
625
626 ref := &reference{
627 noteID: noteID,
628 hasBlock: hasBlock,
629 }
630
631 if noteID > 0 {
632 // reusing the link field for the id since footnotes don't have links
633 ref.link = data[idOffset:idEnd]
634 // if footnote, it's not really a title, it's the contained text
635 ref.title = raw
636 } else {
637 ref.link = data[linkOffset:linkEnd]
638 ref.title = data[titleOffset:titleEnd]
639 }
640
641 // id matches are case-insensitive
642 id := string(bytes.ToLower(data[idOffset:idEnd]))
643
644 p.refs[id] = ref
645
646 return lineEnd
647}
648
649func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
650 // link: whitespace-free sequence, optionally between angle brackets
651 if data[i] == '<' {
652 i++
653 }
654 linkOffset = i
655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
656 i++
657 }
658 linkEnd = i
659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
660 linkOffset++
661 linkEnd--
662 }
663
664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
666 i++
667 }
668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
669 return
670 }
671
672 // compute end-of-line
673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
674 lineEnd = i
675 }
676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
677 lineEnd++
678 }
679
680 // optional (space|tab)* spacer after a newline
681 if lineEnd > 0 {
682 i = lineEnd + 1
683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
684 i++
685 }
686 }
687
688 // optional title: any non-newline sequence enclosed in '"() alone on its line
689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
690 i++
691 titleOffset = i
692
693 // look for EOL
694 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
695 i++
696 }
697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
698 titleEnd = i + 1
699 } else {
700 titleEnd = i
701 }
702
703 // step back
704 i--
705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
706 i--
707 }
708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
709 lineEnd = titleEnd
710 titleEnd = i
711 }
712 }
713
714 return
715}
716
717// The first bit of this logic is the same as Parser.listItem, but the rest
718// is much simpler. This function simply finds the entire block and shifts it
719// over by one tab if it is indeed a block (just returns the line if it's not).
720// blockEnd is the end of the section in the input buffer, and contents is the
721// extracted text that was shifted over one tab. It will need to be rendered at
722// the end of the document.
723func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
724 if i == 0 || len(data) == 0 {
725 return
726 }
727
728 // skip leading whitespace on first line
729 for i < len(data) && data[i] == ' ' {
730 i++
731 }
732
733 blockStart = i
734
735 // find the end of the line
736 blockEnd = i
737 for i < len(data) && data[i-1] != '\n' {
738 i++
739 }
740
741 // get working buffer
742 var raw bytes.Buffer
743
744 // put the first line into the working buffer
745 raw.Write(data[blockEnd:i])
746 blockEnd = i
747
748 // process the following lines
749 containsBlankLine := false
750
751gatherLines:
752 for blockEnd < len(data) {
753 i++
754
755 // find the end of this line
756 for i < len(data) && data[i-1] != '\n' {
757 i++
758 }
759
760 // if it is an empty line, guess that it is part of this item
761 // and move on to the next line
762 if p.isEmpty(data[blockEnd:i]) > 0 {
763 containsBlankLine = true
764 blockEnd = i
765 continue
766 }
767
768 n := 0
769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
770 // this is the end of the block.
771 // we don't want to include this last line in the index.
772 break gatherLines
773 }
774
775 // if there were blank lines before this one, insert a new one now
776 if containsBlankLine {
777 raw.WriteByte('\n')
778 containsBlankLine = false
779 }
780
781 // get rid of that first tab, write to buffer
782 raw.Write(data[blockEnd+n : i])
783 hasBlock = true
784
785 blockEnd = i
786 }
787
788 if data[blockEnd-1] != '\n' {
789 raw.WriteByte('\n')
790 }
791
792 contents = raw.Bytes()
793
794 return
795}
796
797//
798//
799// Miscellaneous helper functions
800//
801//
802
803// Test if a character is a punctuation symbol.
804// Taken from a private function in regexp in the stdlib.
805func ispunct(c byte) bool {
806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
807 if c == r {
808 return true
809 }
810 }
811 return false
812}
813
814// Test if a character is a whitespace character.
815func isspace(c byte) bool {
816 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
817}
818
819// Test if a character is letter.
820func isletter(c byte) bool {
821 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
822}
823
824// Test if a character is a letter or a digit.
825// TODO: check when this is looking for ASCII alnum and when it should use unicode
826func isalnum(c byte) bool {
827 return (c >= '0' && c <= '9') || isletter(c)
828}
829
830// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
831// always ends output with a newline
832func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
833 // first, check for common cases: no tabs, or only tabs at beginning of line
834 i, prefix := 0, 0
835 slowcase := false
836 for i = 0; i < len(line); i++ {
837 if line[i] == '\t' {
838 if prefix == i {
839 prefix++
840 } else {
841 slowcase = true
842 break
843 }
844 }
845 }
846
847 // no need to decode runes if all tabs are at the beginning of the line
848 if !slowcase {
849 for i = 0; i < prefix*tabSize; i++ {
850 out.WriteByte(' ')
851 }
852 out.Write(line[prefix:])
853 return
854 }
855
856 // the slow case: we need to count runes to figure out how
857 // many spaces to insert for each tab
858 column := 0
859 i = 0
860 for i < len(line) {
861 start := i
862 for i < len(line) && line[i] != '\t' {
863 _, size := utf8.DecodeRune(line[i:])
864 i += size
865 column++
866 }
867
868 if i > start {
869 out.Write(line[start:i])
870 }
871
872 if i >= len(line) {
873 break
874 }
875
876 for {
877 out.WriteByte(' ')
878 column++
879 if column%tabSize == 0 {
880 break
881 }
882 }
883
884 i++
885 }
886}
887
888// Find if a line counts as indented or not.
889// Returns number of characters the indent is (0 = not indented).
890func isIndented(data []byte, indentSize int) int {
891 if len(data) == 0 {
892 return 0
893 }
894 if data[0] == '\t' {
895 return 1
896 }
897 if len(data) < indentSize {
898 return 0
899 }
900 for i := 0; i < indentSize; i++ {
901 if data[i] != ' ' {
902 return 0
903 }
904 }
905 return indentSize
906}
907
908// Create a url-safe slug for fragments
909func slugify(in []byte) []byte {
910 if len(in) == 0 {
911 return in
912 }
913 out := make([]byte, 0, len(in))
914 sym := false
915
916 for _, ch := range in {
917 if isalnum(ch) {
918 sym = false
919 out = append(out, ch)
920 } else if sym {
921 continue
922 } else {
923 out = append(out, '-')
924 sym = true
925 }
926 }
927 var a, b int
928 var ch byte
929 for a, ch = range out {
930 if ch != '-' {
931 break
932 }
933 }
934 for b = len(out) - 1; b > 0; b-- {
935 if out[b] != '-' {
936 break
937 }
938 }
939 return out[a : b+1]
940}