markdown.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// Markdown parsing and processing
13//
14//
15
16// Blackfriday markdown processor.
17//
18// Translates plain text with simple formatting rules into HTML or LaTeX.
19package blackfriday
20
21import (
22 "bytes"
23 "fmt"
24 "strings"
25 "unicode/utf8"
26)
27
28const VERSION = "1.4"
29
30type Extensions int
31
32// These are the supported markdown parsing extensions.
33// OR these values together to select multiple extensions.
34const (
35 NoExtensions Extensions = 0
36 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
37 Tables // Render tables
38 FencedCode // Render fenced code blocks
39 Autolink // Detect embedded URLs that are not explicitly marked
40 Strikethrough // Strikethrough text using ~~test~~
41 LaxHTMLBlocks // Loosen up HTML block parsing rules
42 SpaceHeaders // Be strict about prefix header rules
43 HardLineBreak // Translate newlines into line breaks
44 TabSizeEight // Expand tabs to eight spaces instead of four
45 Footnotes // Pandoc-style footnotes
46 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
47 HeaderIDs // specify header IDs with {#id}
48 Titleblock // Titleblock ala pandoc
49 AutoHeaderIDs // Create the header ID from the text
50 BackslashLineBreak // Translate trailing backslashes into line breaks
51 DefinitionLists // Render definition lists
52
53 commonHtmlFlags HtmlFlags = UseXHTML | UseSmartypants |
54 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
55
56 commonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
57 Autolink | Strikethrough | SpaceHeaders | HeaderIDs |
58 BackslashLineBreak | DefinitionLists
59)
60
61type LinkType int
62
63// These are the possible flag values for the link renderer.
64// Only a single one of these values will be used; they are not ORed together.
65// These are mostly of interest if you are writing a new output format.
66const (
67 LinkTypeNotAutolink LinkType = iota
68 LinkTypeNormal
69 LinkTypeEmail
70)
71
72type ListType int
73
74// These are the possible flag values for the ListItem renderer.
75// Multiple flag values may be ORed together.
76// These are mostly of interest if you are writing a new output format.
77const (
78 ListTypeOrdered ListType = 1 << iota
79 ListTypeDefinition
80 ListTypeTerm
81
82 ListItemContainsBlock
83 ListItemBeginningOfList
84 ListItemEndOfList
85)
86
87type TableFlags int
88
89// These are the possible flag values for the table cell renderer.
90// Only a single one of these values will be used; they are not ORed together.
91// These are mostly of interest if you are writing a new output format.
92const (
93 TableAlignmentLeft = 1 << iota
94 TableAlignmentRight
95 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
96)
97
98// The size of a tab stop.
99const (
100 TabSizeDefault = 4
101 TabSizeDouble = 8
102)
103
104// blockTags is a set of tags that are recognized as HTML block tags.
105// Any of these can be included in markdown text without special escaping.
106var blockTags = map[string]struct{}{
107 "blockquote": struct{}{},
108 "del": struct{}{},
109 "div": struct{}{},
110 "dl": struct{}{},
111 "fieldset": struct{}{},
112 "form": struct{}{},
113 "h1": struct{}{},
114 "h2": struct{}{},
115 "h3": struct{}{},
116 "h4": struct{}{},
117 "h5": struct{}{},
118 "h6": struct{}{},
119 "iframe": struct{}{},
120 "ins": struct{}{},
121 "math": struct{}{},
122 "noscript": struct{}{},
123 "ol": struct{}{},
124 "pre": struct{}{},
125 "p": struct{}{},
126 "script": struct{}{},
127 "style": struct{}{},
128 "table": struct{}{},
129 "ul": struct{}{},
130
131 // HTML5
132 "address": struct{}{},
133 "article": struct{}{},
134 "aside": struct{}{},
135 "canvas": struct{}{},
136 "figcaption": struct{}{},
137 "figure": struct{}{},
138 "footer": struct{}{},
139 "header": struct{}{},
140 "hgroup": struct{}{},
141 "main": struct{}{},
142 "nav": struct{}{},
143 "output": struct{}{},
144 "progress": struct{}{},
145 "section": struct{}{},
146 "video": struct{}{},
147}
148
149// Renderer is the rendering interface.
150// This is mostly of interest if you are implementing a new rendering format.
151//
152// When a byte slice is provided, it contains the (rendered) contents of the
153// element.
154//
155// When a callback is provided instead, it will write the contents of the
156// respective element directly to the output buffer and return true on success.
157// If the callback returns false, the rendering function should reset the
158// output buffer as though it had never been called.
159//
160// Currently Html and Latex implementations are provided
161type Renderer interface {
162 // block-level callbacks
163 BlockCode(text []byte, lang string)
164 BlockQuote(text []byte)
165 BlockHtml(text []byte)
166 BeginHeader(level int, id string)
167 EndHeader(level int, id string, header []byte)
168 HRule()
169 BeginList(flags ListType)
170 EndList(flags ListType)
171 ListItem(text []byte, flags ListType)
172 BeginParagraph()
173 EndParagraph()
174 Table(header []byte, body []byte, columnData []int)
175 TableRow(text []byte)
176 TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
177 TableCell(out *bytes.Buffer, text []byte, flags int)
178 BeginFootnotes()
179 EndFootnotes()
180 FootnoteItem(name, text []byte, flags ListType)
181 TitleBlock(text []byte)
182
183 // Span-level callbacks
184 AutoLink(link []byte, kind LinkType)
185 CodeSpan(text []byte)
186 DoubleEmphasis(text []byte)
187 Emphasis(text []byte)
188 Image(link []byte, title []byte, alt []byte)
189 LineBreak()
190 Link(link []byte, title []byte, content []byte)
191 RawHtmlTag(tag []byte)
192 TripleEmphasis(text []byte)
193 StrikeThrough(text []byte)
194 FootnoteRef(ref []byte, id int)
195
196 // Low-level callbacks
197 Entity(entity []byte)
198 NormalText(text []byte)
199
200 // Header and footer
201 DocumentHeader()
202 DocumentFooter()
203
204 GetFlags() HtmlFlags
205 CaptureWrites(processor func()) []byte
206 CopyWrites(processor func()) []byte
207 Write(b []byte) (int, error)
208 GetResult() []byte
209}
210
211// Callback functions for inline parsing. One such function is defined
212// for each character that triggers a response when parsing inline data.
213type inlineParser func(p *parser, data []byte, offset int) int
214
215// Parser holds runtime state used by the parser.
216// This is constructed by the Markdown function.
217type parser struct {
218 r Renderer
219 refOverride ReferenceOverrideFunc
220 refs map[string]*reference
221 inlineCallback [256]inlineParser
222 flags Extensions
223 nesting int
224 maxNesting int
225 insideLink bool
226
227 // Footnotes need to be ordered as well as available to quickly check for
228 // presence. If a ref is also a footnote, it's stored both in refs and here
229 // in notes. Slice is nil if footnotes not enabled.
230 notes []*reference
231}
232
233func (p *parser) getRef(refid string) (ref *reference, found bool) {
234 if p.refOverride != nil {
235 r, overridden := p.refOverride(refid)
236 if overridden {
237 if r == nil {
238 return nil, false
239 }
240 return &reference{
241 link: []byte(r.Link),
242 title: []byte(r.Title),
243 noteId: 0,
244 hasBlock: false,
245 text: []byte(r.Text)}, true
246 }
247 }
248 // refs are case insensitive
249 ref, found = p.refs[strings.ToLower(refid)]
250 return ref, found
251}
252
253//
254//
255// Public interface
256//
257//
258
259// Reference represents the details of a link.
260// See the documentation in Options for more details on use-case.
261type Reference struct {
262 // Link is usually the URL the reference points to.
263 Link string
264 // Title is the alternate text describing the link in more detail.
265 Title string
266 // Text is the optional text to override the ref with if the syntax used was
267 // [refid][]
268 Text string
269}
270
271// ReferenceOverrideFunc is expected to be called with a reference string and
272// return either a valid Reference type that the reference string maps to or
273// nil. If overridden is false, the default reference logic will be executed.
274// See the documentation in Options for more details on use-case.
275type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
276
277// Options represents configurable overrides and callbacks (in addition to the
278// extension flag set) for configuring a Markdown parse.
279type Options struct {
280 // Extensions is a flag set of bit-wise ORed extension bits. See the
281 // Extensions flags defined in this package.
282 Extensions Extensions
283
284 // ReferenceOverride is an optional function callback that is called every
285 // time a reference is resolved.
286 //
287 // In Markdown, the link reference syntax can be made to resolve a link to
288 // a reference instead of an inline URL, in one of the following ways:
289 //
290 // * [link text][refid]
291 // * [refid][]
292 //
293 // Usually, the refid is defined at the bottom of the Markdown document. If
294 // this override function is provided, the refid is passed to the override
295 // function first, before consulting the defined refids at the bottom. If
296 // the override function indicates an override did not occur, the refids at
297 // the bottom will be used to fill in the link details.
298 ReferenceOverride ReferenceOverrideFunc
299}
300
301// MarkdownBasic is a convenience function for simple rendering.
302// It processes markdown input with no extensions enabled.
303func MarkdownBasic(input []byte) []byte {
304 // set up the HTML renderer
305 htmlFlags := UseXHTML
306 renderer := HtmlRenderer(htmlFlags, "", "")
307
308 // set up the parser
309 return MarkdownOptions(input, renderer, Options{Extensions: 0})
310}
311
312// Call Markdown with most useful extensions enabled
313// MarkdownCommon is a convenience function for simple rendering.
314// It processes markdown input with common extensions enabled, including:
315//
316// * Smartypants processing with smart fractions and LaTeX dashes
317//
318// * Intra-word emphasis suppression
319//
320// * Tables
321//
322// * Fenced code blocks
323//
324// * Autolinking
325//
326// * Strikethrough support
327//
328// * Strict header parsing
329//
330// * Custom Header IDs
331func MarkdownCommon(input []byte) []byte {
332 // set up the HTML renderer
333 renderer := HtmlRenderer(commonHtmlFlags, "", "")
334 return MarkdownOptions(input, renderer, Options{
335 Extensions: commonExtensions})
336}
337
338// Markdown is the main rendering function.
339// It parses and renders a block of markdown-encoded text.
340// The supplied Renderer is used to format the output, and extensions dictates
341// which non-standard extensions are enabled.
342//
343// To use the supplied Html or LaTeX renderers, see HtmlRenderer and
344// LatexRenderer, respectively.
345func Markdown(input []byte, renderer Renderer, extensions Extensions) []byte {
346 return MarkdownOptions(input, renderer, Options{
347 Extensions: extensions})
348}
349
350// MarkdownOptions is just like Markdown but takes additional options through
351// the Options struct.
352func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
353 // no point in parsing if we can't render
354 if renderer == nil {
355 return nil
356 }
357
358 extensions := opts.Extensions
359
360 // fill in the render structure
361 p := new(parser)
362 p.r = renderer
363 p.flags = extensions
364 p.refOverride = opts.ReferenceOverride
365 p.refs = make(map[string]*reference)
366 p.maxNesting = 16
367 p.insideLink = false
368
369 // register inline parsers
370 p.inlineCallback['*'] = emphasis
371 p.inlineCallback['_'] = emphasis
372 if extensions&Strikethrough != 0 {
373 p.inlineCallback['~'] = emphasis
374 }
375 p.inlineCallback['`'] = codeSpan
376 p.inlineCallback['\n'] = lineBreak
377 p.inlineCallback['['] = link
378 p.inlineCallback['<'] = leftAngle
379 p.inlineCallback['\\'] = escape
380 p.inlineCallback['&'] = entity
381 p.inlineCallback['!'] = maybeImage
382 p.inlineCallback['^'] = maybeInlineFootnote
383
384 if extensions&Autolink != 0 {
385 p.inlineCallback['h'] = maybeAutoLink
386 p.inlineCallback['m'] = maybeAutoLink
387 p.inlineCallback['f'] = maybeAutoLink
388 p.inlineCallback['H'] = maybeAutoLink
389 p.inlineCallback['M'] = maybeAutoLink
390 p.inlineCallback['F'] = maybeAutoLink
391 }
392
393 if extensions&Footnotes != 0 {
394 p.notes = make([]*reference, 0)
395 }
396
397 first := firstPass(p, input)
398 second := secondPass(p, first)
399 return second
400}
401
402// first pass:
403// - extract references
404// - expand tabs
405// - normalize newlines
406// - copy everything else
407func firstPass(p *parser, input []byte) []byte {
408 var out bytes.Buffer
409 tabSize := TabSizeDefault
410 if p.flags&TabSizeEight != 0 {
411 tabSize = TabSizeDouble
412 }
413 beg, end := 0, 0
414 lastFencedCodeBlockEnd := 0
415 for beg < len(input) { // iterate over lines
416 if end = isReference(p, input[beg:], tabSize); end > 0 {
417 beg += end
418 } else { // skip to the next line
419 end = beg
420 for end < len(input) && input[end] != '\n' && input[end] != '\r' {
421 end++
422 }
423
424 if p.flags&FencedCode != 0 {
425 // track fenced code block boundaries to suppress tab expansion
426 // inside them:
427 if beg >= lastFencedCodeBlockEnd {
428 if i := p.fencedCode(input[beg:], false); i > 0 {
429 lastFencedCodeBlockEnd = beg + i
430 }
431 }
432 }
433
434 // add the line body if present
435 if end > beg {
436 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
437 out.Write(input[beg:end])
438 } else {
439 expandTabs(&out, input[beg:end], tabSize)
440 }
441 }
442 out.WriteByte('\n')
443
444 if end < len(input) && input[end] == '\r' {
445 end++
446 }
447 if end < len(input) && input[end] == '\n' {
448 end++
449 }
450
451 beg = end
452 }
453 }
454
455 // empty input?
456 if out.Len() == 0 {
457 out.WriteByte('\n')
458 }
459
460 return out.Bytes()
461}
462
463// second pass: actual rendering
464func secondPass(p *parser, input []byte) []byte {
465 p.r.DocumentHeader()
466 p.block(input)
467
468 if p.flags&Footnotes != 0 && len(p.notes) > 0 {
469 p.r.BeginFootnotes()
470 flags := ListItemBeginningOfList
471 for i := 0; i < len(p.notes); i += 1 {
472 ref := p.notes[i]
473 var buf bytes.Buffer
474 if ref.hasBlock {
475 flags |= ListItemContainsBlock
476 buf.Write(p.r.CaptureWrites(func() {
477 p.block(ref.title)
478 }))
479 } else {
480 buf.Write(p.r.CaptureWrites(func() {
481 p.inline(ref.title)
482 }))
483 }
484 p.r.FootnoteItem(ref.link, buf.Bytes(), flags)
485 flags &^= ListItemBeginningOfList | ListItemContainsBlock
486 }
487 p.r.EndFootnotes()
488 }
489
490 p.r.DocumentFooter()
491
492 if p.nesting != 0 {
493 panic("Nesting level did not end at zero")
494 }
495
496 return p.r.GetResult()
497}
498
499//
500// Link references
501//
502// This section implements support for references that (usually) appear
503// as footnotes in a document, and can be referenced anywhere in the document.
504// The basic format is:
505//
506// [1]: http://www.google.com/ "Google"
507// [2]: http://www.github.com/ "Github"
508//
509// Anywhere in the document, the reference can be linked by referring to its
510// label, i.e., 1 and 2 in this example, as in:
511//
512// This library is hosted on [Github][2], a git hosting site.
513//
514// Actual footnotes as specified in Pandoc and supported by some other Markdown
515// libraries such as php-markdown are also taken care of. They look like this:
516//
517// This sentence needs a bit of further explanation.[^note]
518//
519// [^note]: This is the explanation.
520//
521// Footnotes should be placed at the end of the document in an ordered list.
522// Inline footnotes such as:
523//
524// Inline footnotes^[Not supported.] also exist.
525//
526// are not yet supported.
527
528// References are parsed and stored in this struct.
529type reference struct {
530 link []byte
531 title []byte
532 noteId int // 0 if not a footnote ref
533 hasBlock bool
534 text []byte
535}
536
537func (r *reference) String() string {
538 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
539 r.link, r.title, r.text, r.noteId, r.hasBlock)
540}
541
542// Check whether or not data starts with a reference link.
543// If so, it is parsed and stored in the list of references
544// (in the render struct).
545// Returns the number of bytes to skip to move past it,
546// or zero if the first line is not a reference.
547func isReference(p *parser, data []byte, tabSize int) int {
548 // up to 3 optional leading spaces
549 if len(data) < 4 {
550 return 0
551 }
552 i := 0
553 for i < 3 && data[i] == ' ' {
554 i++
555 }
556
557 noteId := 0
558
559 // id part: anything but a newline between brackets
560 if data[i] != '[' {
561 return 0
562 }
563 i++
564 if p.flags&Footnotes != 0 {
565 if i < len(data) && data[i] == '^' {
566 // we can set it to anything here because the proper noteIds will
567 // be assigned later during the second pass. It just has to be != 0
568 noteId = 1
569 i++
570 }
571 }
572 idOffset := i
573 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
574 i++
575 }
576 if i >= len(data) || data[i] != ']' {
577 return 0
578 }
579 idEnd := i
580
581 // spacer: colon (space | tab)* newline? (space | tab)*
582 i++
583 if i >= len(data) || data[i] != ':' {
584 return 0
585 }
586 i++
587 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
588 i++
589 }
590 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
591 i++
592 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
593 i++
594 }
595 }
596 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
597 i++
598 }
599 if i >= len(data) {
600 return 0
601 }
602
603 var (
604 linkOffset, linkEnd int
605 titleOffset, titleEnd int
606 lineEnd int
607 raw []byte
608 hasBlock bool
609 )
610
611 if p.flags&Footnotes != 0 && noteId != 0 {
612 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
613 lineEnd = linkEnd
614 } else {
615 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
616 }
617 if lineEnd == 0 {
618 return 0
619 }
620
621 // a valid ref has been found
622
623 ref := &reference{
624 noteId: noteId,
625 hasBlock: hasBlock,
626 }
627
628 if noteId > 0 {
629 // reusing the link field for the id since footnotes don't have links
630 ref.link = data[idOffset:idEnd]
631 // if footnote, it's not really a title, it's the contained text
632 ref.title = raw
633 } else {
634 ref.link = data[linkOffset:linkEnd]
635 ref.title = data[titleOffset:titleEnd]
636 }
637
638 // id matches are case-insensitive
639 id := string(bytes.ToLower(data[idOffset:idEnd]))
640
641 p.refs[id] = ref
642
643 return lineEnd
644}
645
646func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
647 // link: whitespace-free sequence, optionally between angle brackets
648 if data[i] == '<' {
649 i++
650 }
651 linkOffset = i
652 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
653 i++
654 }
655 if i == len(data) {
656 return
657 }
658 linkEnd = i
659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
660 linkOffset++
661 linkEnd--
662 }
663
664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
666 i++
667 }
668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
669 return
670 }
671
672 // compute end-of-line
673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
674 lineEnd = i
675 }
676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
677 lineEnd++
678 }
679
680 // optional (space|tab)* spacer after a newline
681 if lineEnd > 0 {
682 i = lineEnd + 1
683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
684 i++
685 }
686 }
687
688 // optional title: any non-newline sequence enclosed in '"() alone on its line
689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
690 i++
691 titleOffset = i
692
693 // look for EOL
694 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
695 i++
696 }
697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
698 titleEnd = i + 1
699 } else {
700 titleEnd = i
701 }
702
703 // step back
704 i--
705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
706 i--
707 }
708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
709 lineEnd = titleEnd
710 titleEnd = i
711 }
712 }
713
714 return
715}
716
717// The first bit of this logic is the same as (*parser).listItem, but the rest
718// is much simpler. This function simply finds the entire block and shifts it
719// over by one tab if it is indeed a block (just returns the line if it's not).
720// blockEnd is the end of the section in the input buffer, and contents is the
721// extracted text that was shifted over one tab. It will need to be rendered at
722// the end of the document.
723func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
724 if i == 0 || len(data) == 0 {
725 return
726 }
727
728 // skip leading whitespace on first line
729 for i < len(data) && data[i] == ' ' {
730 i++
731 }
732
733 blockStart = i
734
735 // find the end of the line
736 blockEnd = i
737 for i < len(data) && data[i-1] != '\n' {
738 i++
739 }
740
741 // get working buffer
742 var raw bytes.Buffer
743
744 // put the first line into the working buffer
745 raw.Write(data[blockEnd:i])
746 blockEnd = i
747
748 // process the following lines
749 containsBlankLine := false
750
751gatherLines:
752 for blockEnd < len(data) {
753 i++
754
755 // find the end of this line
756 for i < len(data) && data[i-1] != '\n' {
757 i++
758 }
759
760 // if it is an empty line, guess that it is part of this item
761 // and move on to the next line
762 if p.isEmpty(data[blockEnd:i]) > 0 {
763 containsBlankLine = true
764 blockEnd = i
765 continue
766 }
767
768 n := 0
769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
770 // this is the end of the block.
771 // we don't want to include this last line in the index.
772 break gatherLines
773 }
774
775 // if there were blank lines before this one, insert a new one now
776 if containsBlankLine {
777 raw.WriteByte('\n')
778 containsBlankLine = false
779 }
780
781 // get rid of that first tab, write to buffer
782 raw.Write(data[blockEnd+n : i])
783 hasBlock = true
784
785 blockEnd = i
786 }
787
788 if data[blockEnd-1] != '\n' {
789 raw.WriteByte('\n')
790 }
791
792 contents = raw.Bytes()
793
794 return
795}
796
797//
798//
799// Miscellaneous helper functions
800//
801//
802
803// Test if a character is a punctuation symbol.
804// Taken from a private function in regexp in the stdlib.
805func ispunct(c byte) bool {
806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
807 if c == r {
808 return true
809 }
810 }
811 return false
812}
813
814// Test if a character is a whitespace character.
815func isspace(c byte) bool {
816 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
817}
818
819// Test if a character is letter.
820func isletter(c byte) bool {
821 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
822}
823
824// Test if a character is a letter or a digit.
825// TODO: check when this is looking for ASCII alnum and when it should use unicode
826func isalnum(c byte) bool {
827 return (c >= '0' && c <= '9') || isletter(c)
828}
829
830// Replace tab characters with spaces, aligning to the next TAB_SIZE column.
831// always ends output with a newline
832func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
833 // first, check for common cases: no tabs, or only tabs at beginning of line
834 i, prefix := 0, 0
835 slowcase := false
836 for i = 0; i < len(line); i++ {
837 if line[i] == '\t' {
838 if prefix == i {
839 prefix++
840 } else {
841 slowcase = true
842 break
843 }
844 }
845 }
846
847 // no need to decode runes if all tabs are at the beginning of the line
848 if !slowcase {
849 for i = 0; i < prefix*tabSize; i++ {
850 out.WriteByte(' ')
851 }
852 out.Write(line[prefix:])
853 return
854 }
855
856 // the slow case: we need to count runes to figure out how
857 // many spaces to insert for each tab
858 column := 0
859 i = 0
860 for i < len(line) {
861 start := i
862 for i < len(line) && line[i] != '\t' {
863 _, size := utf8.DecodeRune(line[i:])
864 i += size
865 column++
866 }
867
868 if i > start {
869 out.Write(line[start:i])
870 }
871
872 if i >= len(line) {
873 break
874 }
875
876 for {
877 out.WriteByte(' ')
878 column++
879 if column%tabSize == 0 {
880 break
881 }
882 }
883
884 i++
885 }
886}
887
888// Find if a line counts as indented or not.
889// Returns number of characters the indent is (0 = not indented).
890func isIndented(data []byte, indentSize int) int {
891 if len(data) == 0 {
892 return 0
893 }
894 if data[0] == '\t' {
895 return 1
896 }
897 if len(data) < indentSize {
898 return 0
899 }
900 for i := 0; i < indentSize; i++ {
901 if data[i] != ' ' {
902 return 0
903 }
904 }
905 return indentSize
906}
907
908// Create a url-safe slug for fragments
909func slugify(in []byte) []byte {
910 if len(in) == 0 {
911 return in
912 }
913 out := make([]byte, 0, len(in))
914 sym := false
915
916 for _, ch := range in {
917 if isalnum(ch) {
918 sym = false
919 out = append(out, ch)
920 } else if sym {
921 continue
922 } else {
923 out = append(out, '-')
924 sym = true
925 }
926 }
927 var a, b int
928 var ch byte
929 for a, ch = range out {
930 if ch != '-' {
931 break
932 }
933 }
934 for b = len(out) - 1; b > 0; b-- {
935 if out[b] != '-' {
936 break
937 }
938 }
939 return out[a : b+1]
940}