block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "html"
19 "regexp"
20 "strings"
21 "unicode"
22)
23
24const (
25 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
26 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
27)
28
29var (
30 reBackslashOrAmp = regexp.MustCompile("[\\&]")
31 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
32)
33
34// Parse block-level data.
35// Note: this function and many that it calls assume that
36// the input buffer ends with a newline.
37func (p *Markdown) block(data []byte) {
38 // this is called recursively: enforce a maximum depth
39 if p.nesting >= p.maxNesting {
40 return
41 }
42 p.nesting++
43
44 // parse out one block-level construct at a time
45 for len(data) > 0 {
46 // prefixed heading:
47 //
48 // # Heading 1
49 // ## Heading 2
50 // ...
51 // ###### Heading 6
52 if p.isPrefixHeading(data) {
53 data = data[p.prefixHeading(data):]
54 continue
55 }
56
57 // block of preformatted HTML:
58 //
59 // <div>
60 // ...
61 // </div>
62 if data[0] == '<' {
63 if i := p.html(data, true); i > 0 {
64 data = data[i:]
65 continue
66 }
67 }
68
69 // title block
70 //
71 // % stuff
72 // % more stuff
73 // % even more stuff
74 if p.extensions&Titleblock != 0 {
75 if data[0] == '%' {
76 if i := p.titleBlock(data, true); i > 0 {
77 data = data[i:]
78 continue
79 }
80 }
81 }
82
83 // blank lines. note: returns the # of bytes to skip
84 if i := p.isEmpty(data); i > 0 {
85 data = data[i:]
86 continue
87 }
88
89 // indented code block:
90 //
91 // func max(a, b int) int {
92 // if a > b {
93 // return a
94 // }
95 // return b
96 // }
97 if p.codePrefix(data) > 0 {
98 data = data[p.code(data):]
99 continue
100 }
101
102 // fenced code block:
103 //
104 // ``` go
105 // func fact(n int) int {
106 // if n <= 1 {
107 // return n
108 // }
109 // return n * fact(n-1)
110 // }
111 // ```
112 if p.extensions&FencedCode != 0 {
113 if i := p.fencedCodeBlock(data, true); i > 0 {
114 data = data[i:]
115 continue
116 }
117 }
118
119 // horizontal rule:
120 //
121 // ------
122 // or
123 // ******
124 // or
125 // ______
126 if p.isHRule(data) {
127 p.addBlock(HorizontalRule, nil)
128 var i int
129 for i = 0; i < len(data) && data[i] != '\n'; i++ {
130 }
131 data = data[i:]
132 continue
133 }
134
135 // block quote:
136 //
137 // > A big quote I found somewhere
138 // > on the web
139 if p.quotePrefix(data) > 0 {
140 data = data[p.quote(data):]
141 continue
142 }
143
144 // table:
145 //
146 // Name | Age | Phone
147 // ------|-----|---------
148 // Bob | 31 | 555-1234
149 // Alice | 27 | 555-4321
150 if p.extensions&Tables != 0 {
151 if i := p.table(data); i > 0 {
152 data = data[i:]
153 continue
154 }
155 }
156
157 // an itemized/unordered list:
158 //
159 // * Item 1
160 // * Item 2
161 //
162 // also works with + or -
163 if p.uliPrefix(data) > 0 {
164 data = data[p.list(data, 0):]
165 continue
166 }
167
168 // a numbered/ordered list:
169 //
170 // 1. Item 1
171 // 2. Item 2
172 if p.oliPrefix(data) > 0 {
173 data = data[p.list(data, ListTypeOrdered):]
174 continue
175 }
176
177 // definition lists:
178 //
179 // Term 1
180 // : Definition a
181 // : Definition b
182 //
183 // Term 2
184 // : Definition c
185 if p.extensions&DefinitionLists != 0 {
186 if p.dliPrefix(data) > 0 {
187 data = data[p.list(data, ListTypeDefinition):]
188 continue
189 }
190 }
191
192 // anything else must look like a normal paragraph
193 // note: this finds underlined headings, too
194 data = data[p.paragraph(data):]
195 }
196
197 p.nesting--
198}
199
200func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
201 p.closeUnmatchedBlocks()
202 container := p.addChild(typ, 0)
203 container.content = content
204 return container
205}
206
207func (p *Markdown) isPrefixHeading(data []byte) bool {
208 if data[0] != '#' {
209 return false
210 }
211
212 if p.extensions&SpaceHeadings != 0 {
213 level := 0
214 for level < 6 && level < len(data) && data[level] == '#' {
215 level++
216 }
217 if level == len(data) || data[level] != ' ' {
218 return false
219 }
220 }
221 return true
222}
223
224func (p *Markdown) prefixHeading(data []byte) int {
225 level := 0
226 for level < 6 && level < len(data) && data[level] == '#' {
227 level++
228 }
229 i := skipChar(data, level, ' ')
230 end := skipUntilChar(data, i, '\n')
231 skip := end
232 id := ""
233 if p.extensions&HeadingIDs != 0 {
234 j, k := 0, 0
235 // find start/end of heading id
236 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
237 }
238 for k = j + 1; k < end && data[k] != '}'; k++ {
239 }
240 // extract heading id iff found
241 if j < end && k < end {
242 id = string(data[j+2 : k])
243 end = j
244 skip = k + 1
245 for end > 0 && data[end-1] == ' ' {
246 end--
247 }
248 }
249 }
250 for end > 0 && data[end-1] == '#' {
251 if isBackslashEscaped(data, end-1) {
252 break
253 }
254 end--
255 }
256 for end > 0 && data[end-1] == ' ' {
257 end--
258 }
259 if end > i {
260 if id == "" && p.extensions&AutoHeadingIDs != 0 {
261 id = SanitizedAnchorName(string(data[i:end]))
262 }
263 block := p.addBlock(Heading, data[i:end])
264 block.HeadingID = id
265 block.Level = level
266 }
267 return skip
268}
269
270func (p *Markdown) isUnderlinedHeading(data []byte) int {
271 // test of level 1 heading
272 if data[0] == '=' {
273 i := skipChar(data, 1, '=')
274 i = skipChar(data, i, ' ')
275 if i < len(data) && data[i] == '\n' {
276 return 1
277 }
278 return 0
279 }
280
281 // test of level 2 heading
282 if data[0] == '-' {
283 i := skipChar(data, 1, '-')
284 i = skipChar(data, i, ' ')
285 if i < len(data) && data[i] == '\n' {
286 return 2
287 }
288 return 0
289 }
290
291 return 0
292}
293
294func (p *Markdown) titleBlock(data []byte, doRender bool) int {
295 if data[0] != '%' {
296 return 0
297 }
298 splitData := bytes.Split(data, []byte("\n"))
299 var i int
300 for idx, b := range splitData {
301 if !bytes.HasPrefix(b, []byte("%")) {
302 i = idx // - 1
303 break
304 }
305 }
306
307 data = bytes.Join(splitData[0:i], []byte("\n"))
308 consumed := len(data)
309 data = bytes.TrimPrefix(data, []byte("% "))
310 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
311 block := p.addBlock(Heading, data)
312 block.Level = 1
313 block.IsTitleblock = true
314
315 return consumed
316}
317
318func (p *Markdown) html(data []byte, doRender bool) int {
319 var i, j int
320
321 // identify the opening tag
322 if data[0] != '<' {
323 return 0
324 }
325 curtag, tagfound := p.htmlFindTag(data[1:])
326
327 // handle special cases
328 if !tagfound {
329 // check for an HTML comment
330 if size := p.htmlComment(data, doRender); size > 0 {
331 return size
332 }
333
334 // check for an <hr> tag
335 if size := p.htmlHr(data, doRender); size > 0 {
336 return size
337 }
338
339 // no special case recognized
340 return 0
341 }
342
343 // look for an unindented matching closing tag
344 // followed by a blank line
345 found := false
346 /*
347 closetag := []byte("\n</" + curtag + ">")
348 j = len(curtag) + 1
349 for !found {
350 // scan for a closing tag at the beginning of a line
351 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
352 j += skip + len(closetag)
353 } else {
354 break
355 }
356
357 // see if it is the only thing on the line
358 if skip := p.isEmpty(data[j:]); skip > 0 {
359 // see if it is followed by a blank line/eof
360 j += skip
361 if j >= len(data) {
362 found = true
363 i = j
364 } else {
365 if skip := p.isEmpty(data[j:]); skip > 0 {
366 j += skip
367 found = true
368 i = j
369 }
370 }
371 }
372 }
373 */
374
375 // if not found, try a second pass looking for indented match
376 // but not if tag is "ins" or "del" (following original Markdown.pl)
377 if !found && curtag != "ins" && curtag != "del" {
378 i = 1
379 for i < len(data) {
380 i++
381 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
382 i++
383 }
384
385 if i+2+len(curtag) >= len(data) {
386 break
387 }
388
389 j = p.htmlFindEnd(curtag, data[i-1:])
390
391 if j > 0 {
392 i += j - 1
393 found = true
394 break
395 }
396 }
397 }
398
399 if !found {
400 return 0
401 }
402
403 // the end of the block has been found
404 if doRender {
405 // trim newlines
406 end := i
407 for end > 0 && data[end-1] == '\n' {
408 end--
409 }
410 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
411 }
412
413 return i
414}
415
416func finalizeHTMLBlock(block *Node) {
417 block.Literal = block.content
418 block.content = nil
419}
420
421// HTML comment, lax form
422func (p *Markdown) htmlComment(data []byte, doRender bool) int {
423 i := p.inlineHTMLComment(data)
424 // needs to end with a blank line
425 if j := p.isEmpty(data[i:]); j > 0 {
426 size := i + j
427 if doRender {
428 // trim trailing newlines
429 end := size
430 for end > 0 && data[end-1] == '\n' {
431 end--
432 }
433 block := p.addBlock(HTMLBlock, data[:end])
434 finalizeHTMLBlock(block)
435 }
436 return size
437 }
438 return 0
439}
440
441// HR, which is the only self-closing block tag considered
442func (p *Markdown) htmlHr(data []byte, doRender bool) int {
443 if len(data) < 4 {
444 return 0
445 }
446 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
447 return 0
448 }
449 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
450 // not an <hr> tag after all; at least not a valid one
451 return 0
452 }
453 i := 3
454 for i < len(data) && data[i] != '>' && data[i] != '\n' {
455 i++
456 }
457 if i < len(data) && data[i] == '>' {
458 i++
459 if j := p.isEmpty(data[i:]); j > 0 {
460 size := i + j
461 if doRender {
462 // trim newlines
463 end := size
464 for end > 0 && data[end-1] == '\n' {
465 end--
466 }
467 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
468 }
469 return size
470 }
471 }
472 return 0
473}
474
475func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
476 i := 0
477 for i < len(data) && isalnum(data[i]) {
478 i++
479 }
480 key := string(data[:i])
481 if _, ok := blockTags[key]; ok {
482 return key, true
483 }
484 return "", false
485}
486
487func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
488 // assume data[0] == '<' && data[1] == '/' already tested
489 if tag == "hr" {
490 return 2
491 }
492 // check if tag is a match
493 closetag := []byte("</" + tag + ">")
494 if !bytes.HasPrefix(data, closetag) {
495 return 0
496 }
497 i := len(closetag)
498
499 // check that the rest of the line is blank
500 skip := 0
501 if skip = p.isEmpty(data[i:]); skip == 0 {
502 return 0
503 }
504 i += skip
505 skip = 0
506
507 if i >= len(data) {
508 return i
509 }
510
511 if p.extensions&LaxHTMLBlocks != 0 {
512 return i
513 }
514 if skip = p.isEmpty(data[i:]); skip == 0 {
515 // following line must be blank
516 return 0
517 }
518
519 return i + skip
520}
521
522func (*Markdown) isEmpty(data []byte) int {
523 // it is okay to call isEmpty on an empty buffer
524 if len(data) == 0 {
525 return 0
526 }
527
528 var i int
529 for i = 0; i < len(data) && data[i] != '\n'; i++ {
530 if data[i] != ' ' && data[i] != '\t' {
531 return 0
532 }
533 }
534 if i < len(data) && data[i] == '\n' {
535 i++
536 }
537 return i
538}
539
540func (*Markdown) isHRule(data []byte) bool {
541 i := 0
542
543 // skip up to three spaces
544 for i < 3 && data[i] == ' ' {
545 i++
546 }
547
548 // look at the hrule char
549 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
550 return false
551 }
552 c := data[i]
553
554 // the whole line must be the char or whitespace
555 n := 0
556 for i < len(data) && data[i] != '\n' {
557 switch {
558 case data[i] == c:
559 n++
560 case data[i] != ' ':
561 return false
562 }
563 i++
564 }
565
566 return n >= 3
567}
568
569// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
570// and returns the end index if so, or 0 otherwise. It also returns the marker found.
571// If info is not nil, it gets set to the syntax specified in the fence line.
572func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
573 i, size := 0, 0
574
575 // skip up to three spaces
576 for i < len(data) && i < 3 && data[i] == ' ' {
577 i++
578 }
579
580 // check for the marker characters: ~ or `
581 if i >= len(data) {
582 return 0, ""
583 }
584 if data[i] != '~' && data[i] != '`' {
585 return 0, ""
586 }
587
588 c := data[i]
589
590 // the whole line must be the same char or whitespace
591 for i < len(data) && data[i] == c {
592 size++
593 i++
594 }
595
596 // the marker char must occur at least 3 times
597 if size < 3 {
598 return 0, ""
599 }
600 marker = string(data[i-size : i])
601
602 // if this is the end marker, it must match the beginning marker
603 if oldmarker != "" && marker != oldmarker {
604 return 0, ""
605 }
606
607 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
608 // into one, always get the info string, and discard it if the caller doesn't care.
609 if info != nil {
610 infoLength := 0
611 i = skipChar(data, i, ' ')
612
613 if i >= len(data) {
614 if i == len(data) {
615 return i, marker
616 }
617 return 0, ""
618 }
619
620 infoStart := i
621
622 if data[i] == '{' {
623 i++
624 infoStart++
625
626 for i < len(data) && data[i] != '}' && data[i] != '\n' {
627 infoLength++
628 i++
629 }
630
631 if i >= len(data) || data[i] != '}' {
632 return 0, ""
633 }
634
635 // strip all whitespace at the beginning and the end
636 // of the {} block
637 for infoLength > 0 && isspace(data[infoStart]) {
638 infoStart++
639 infoLength--
640 }
641
642 for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
643 infoLength--
644 }
645 i++
646 i = skipChar(data, i, ' ')
647 } else {
648 for i < len(data) && !isverticalspace(data[i]) {
649 infoLength++
650 i++
651 }
652 }
653
654 *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
655 }
656
657 if i == len(data) {
658 return i, marker
659 }
660 if i > len(data) || data[i] != '\n' {
661 return 0, ""
662 }
663 return i + 1, marker // Take newline into account.
664}
665
666// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
667// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
668// If doRender is true, a final newline is mandatory to recognize the fenced code block.
669func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
670 var info string
671 beg, marker := isFenceLine(data, &info, "")
672 if beg == 0 || beg >= len(data) {
673 return 0
674 }
675 fenceLength := beg - 1
676
677 var work bytes.Buffer
678 work.Write([]byte(info))
679 work.WriteByte('\n')
680
681 for {
682 // safe to assume beg < len(data)
683
684 // check for the end of the code block
685 fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
686 if fenceEnd != 0 {
687 beg += fenceEnd
688 break
689 }
690
691 // copy the current line
692 end := skipUntilChar(data, beg, '\n') + 1
693
694 // did we reach the end of the buffer without a closing marker?
695 if end >= len(data) {
696 return 0
697 }
698
699 // verbatim copy to the working buffer
700 if doRender {
701 work.Write(data[beg:end])
702 }
703 beg = end
704 }
705
706 if doRender {
707 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
708 block.IsFenced = true
709 block.FenceLength = fenceLength
710 finalizeCodeBlock(block)
711 }
712
713 return beg
714}
715
716func unescapeChar(str []byte) []byte {
717 if str[0] == '\\' {
718 return []byte{str[1]}
719 }
720 return []byte(html.UnescapeString(string(str)))
721}
722
723func unescapeString(str []byte) []byte {
724 if reBackslashOrAmp.Match(str) {
725 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
726 }
727 return str
728}
729
730func finalizeCodeBlock(block *Node) {
731 if block.IsFenced {
732 newlinePos := bytes.IndexByte(block.content, '\n')
733 firstLine := block.content[:newlinePos]
734 rest := block.content[newlinePos+1:]
735 block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
736 block.Literal = rest
737 } else {
738 block.Literal = block.content
739 }
740 block.content = nil
741}
742
743func (p *Markdown) table(data []byte) int {
744 table := p.addBlock(Table, nil)
745 i, columns := p.tableHeader(data)
746 if i == 0 {
747 p.tip = table.Parent
748 table.Unlink()
749 return 0
750 }
751
752 p.addBlock(TableBody, nil)
753
754 for i < len(data) {
755 pipes, rowStart := 0, i
756 for ; i < len(data) && data[i] != '\n'; i++ {
757 if data[i] == '|' {
758 pipes++
759 }
760 }
761
762 if pipes == 0 {
763 i = rowStart
764 break
765 }
766
767 // include the newline in data sent to tableRow
768 if i < len(data) && data[i] == '\n' {
769 i++
770 }
771 p.tableRow(data[rowStart:i], columns, false)
772 }
773
774 return i
775}
776
777// check if the specified position is preceded by an odd number of backslashes
778func isBackslashEscaped(data []byte, i int) bool {
779 backslashes := 0
780 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
781 backslashes++
782 }
783 return backslashes&1 == 1
784}
785
786func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
787 i := 0
788 colCount := 1
789 for i = 0; i < len(data) && data[i] != '\n'; i++ {
790 if data[i] == '|' && !isBackslashEscaped(data, i) {
791 colCount++
792 }
793 }
794
795 // doesn't look like a table header
796 if colCount == 1 {
797 return
798 }
799
800 // include the newline in the data sent to tableRow
801 j := i
802 if j < len(data) && data[j] == '\n' {
803 j++
804 }
805 header := data[:j]
806
807 // column count ignores pipes at beginning or end of line
808 if data[0] == '|' {
809 colCount--
810 }
811 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
812 colCount--
813 }
814
815 columns = make([]CellAlignFlags, colCount)
816
817 // move on to the header underline
818 i++
819 if i >= len(data) {
820 return
821 }
822
823 if data[i] == '|' && !isBackslashEscaped(data, i) {
824 i++
825 }
826 i = skipChar(data, i, ' ')
827
828 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
829 // and trailing | optional on last column
830 col := 0
831 for i < len(data) && data[i] != '\n' {
832 dashes := 0
833
834 if data[i] == ':' {
835 i++
836 columns[col] |= TableAlignmentLeft
837 dashes++
838 }
839 for i < len(data) && data[i] == '-' {
840 i++
841 dashes++
842 }
843 if i < len(data) && data[i] == ':' {
844 i++
845 columns[col] |= TableAlignmentRight
846 dashes++
847 }
848 for i < len(data) && data[i] == ' ' {
849 i++
850 }
851 if i == len(data) {
852 return
853 }
854 // end of column test is messy
855 switch {
856 case dashes < 3:
857 // not a valid column
858 return
859
860 case data[i] == '|' && !isBackslashEscaped(data, i):
861 // marker found, now skip past trailing whitespace
862 col++
863 i++
864 for i < len(data) && data[i] == ' ' {
865 i++
866 }
867
868 // trailing junk found after last column
869 if col >= colCount && i < len(data) && data[i] != '\n' {
870 return
871 }
872
873 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
874 // something else found where marker was required
875 return
876
877 case data[i] == '\n':
878 // marker is optional for the last column
879 col++
880
881 default:
882 // trailing junk found after last column
883 return
884 }
885 }
886 if col != colCount {
887 return
888 }
889
890 p.addBlock(TableHead, nil)
891 p.tableRow(header, columns, true)
892 size = i
893 if size < len(data) && data[size] == '\n' {
894 size++
895 }
896 return
897}
898
899func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
900 p.addBlock(TableRow, nil)
901 i, col := 0, 0
902
903 if data[i] == '|' && !isBackslashEscaped(data, i) {
904 i++
905 }
906
907 for col = 0; col < len(columns) && i < len(data); col++ {
908 for i < len(data) && data[i] == ' ' {
909 i++
910 }
911
912 cellStart := i
913
914 for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
915 i++
916 }
917
918 cellEnd := i
919
920 // skip the end-of-cell marker, possibly taking us past end of buffer
921 i++
922
923 for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
924 cellEnd--
925 }
926
927 cell := p.addBlock(TableCell, data[cellStart:cellEnd])
928 cell.IsHeader = header
929 cell.Align = columns[col]
930 }
931
932 // pad it out with empty columns to get the right number
933 for ; col < len(columns); col++ {
934 cell := p.addBlock(TableCell, nil)
935 cell.IsHeader = header
936 cell.Align = columns[col]
937 }
938
939 // silently ignore rows with too many cells
940}
941
942// returns blockquote prefix length
943func (p *Markdown) quotePrefix(data []byte) int {
944 i := 0
945 for i < 3 && i < len(data) && data[i] == ' ' {
946 i++
947 }
948 if i < len(data) && data[i] == '>' {
949 if i+1 < len(data) && data[i+1] == ' ' {
950 return i + 2
951 }
952 return i + 1
953 }
954 return 0
955}
956
957// blockquote ends with at least one blank line
958// followed by something without a blockquote prefix
959func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
960 if p.isEmpty(data[beg:]) <= 0 {
961 return false
962 }
963 if end >= len(data) {
964 return true
965 }
966 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
967}
968
969// parse a blockquote fragment
970func (p *Markdown) quote(data []byte) int {
971 block := p.addBlock(BlockQuote, nil)
972 var raw bytes.Buffer
973 beg, end := 0, 0
974 for beg < len(data) {
975 end = beg
976 // Step over whole lines, collecting them. While doing that, check for
977 // fenced code and if one's found, incorporate it altogether,
978 // irregardless of any contents inside it
979 for end < len(data) && data[end] != '\n' {
980 if p.extensions&FencedCode != 0 {
981 if i := p.fencedCodeBlock(data[end:], false); i > 0 {
982 // -1 to compensate for the extra end++ after the loop:
983 end += i - 1
984 break
985 }
986 }
987 end++
988 }
989 if end < len(data) && data[end] == '\n' {
990 end++
991 }
992 if pre := p.quotePrefix(data[beg:]); pre > 0 {
993 // skip the prefix
994 beg += pre
995 } else if p.terminateBlockquote(data, beg, end) {
996 break
997 }
998 // this line is part of the blockquote
999 raw.Write(data[beg:end])
1000 beg = end
1001 }
1002 p.block(raw.Bytes())
1003 p.finalize(block)
1004 return end
1005}
1006
1007// returns prefix length for block code
1008func (p *Markdown) codePrefix(data []byte) int {
1009 if len(data) >= 1 && data[0] == '\t' {
1010 return 1
1011 }
1012 if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1013 return 4
1014 }
1015 return 0
1016}
1017
1018func (p *Markdown) code(data []byte) int {
1019 var work bytes.Buffer
1020
1021 i := 0
1022 for i < len(data) {
1023 beg := i
1024 for i < len(data) && data[i] != '\n' {
1025 i++
1026 }
1027 if i < len(data) && data[i] == '\n' {
1028 i++
1029 }
1030
1031 blankline := p.isEmpty(data[beg:i]) > 0
1032 if pre := p.codePrefix(data[beg:i]); pre > 0 {
1033 beg += pre
1034 } else if !blankline {
1035 // non-empty, non-prefixed line breaks the pre
1036 i = beg
1037 break
1038 }
1039
1040 // verbatim copy to the working buffer
1041 if blankline {
1042 work.WriteByte('\n')
1043 } else {
1044 work.Write(data[beg:i])
1045 }
1046 }
1047
1048 // trim all the \n off the end of work
1049 workbytes := work.Bytes()
1050 eol := len(workbytes)
1051 for eol > 0 && workbytes[eol-1] == '\n' {
1052 eol--
1053 }
1054 if eol != len(workbytes) {
1055 work.Truncate(eol)
1056 }
1057
1058 work.WriteByte('\n')
1059
1060 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1061 block.IsFenced = false
1062 finalizeCodeBlock(block)
1063
1064 return i
1065}
1066
1067// returns unordered list item prefix
1068func (p *Markdown) uliPrefix(data []byte) int {
1069 i := 0
1070 // start with up to 3 spaces
1071 for i < len(data) && i < 3 && data[i] == ' ' {
1072 i++
1073 }
1074 if i >= len(data)-1 {
1075 return 0
1076 }
1077 // need one of {'*', '+', '-'} followed by a space or a tab
1078 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1079 (data[i+1] != ' ' && data[i+1] != '\t') {
1080 return 0
1081 }
1082 return i + 2
1083}
1084
1085// returns ordered list item prefix
1086func (p *Markdown) oliPrefix(data []byte) int {
1087 i := 0
1088
1089 // start with up to 3 spaces
1090 for i < 3 && i < len(data) && data[i] == ' ' {
1091 i++
1092 }
1093
1094 // count the digits
1095 start := i
1096 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1097 i++
1098 }
1099 if start == i || i >= len(data)-1 {
1100 return 0
1101 }
1102
1103 // we need >= 1 digits followed by a dot and a space or a tab
1104 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1105 return 0
1106 }
1107 return i + 2
1108}
1109
1110// returns definition list item prefix
1111func (p *Markdown) dliPrefix(data []byte) int {
1112 if len(data) < 2 {
1113 return 0
1114 }
1115 i := 0
1116 // need a ':' followed by a space or a tab
1117 if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1118 return 0
1119 }
1120 for i < len(data) && data[i] == ' ' {
1121 i++
1122 }
1123 return i + 2
1124}
1125
1126// parse ordered or unordered list block
1127func (p *Markdown) list(data []byte, flags ListType) int {
1128 i := 0
1129 flags |= ListItemBeginningOfList
1130 block := p.addBlock(List, nil)
1131 block.ListFlags = flags
1132 block.Tight = true
1133
1134 for i < len(data) {
1135 skip := p.listItem(data[i:], &flags)
1136 if flags&ListItemContainsBlock != 0 {
1137 block.ListData.Tight = false
1138 }
1139 i += skip
1140 if skip == 0 || flags&ListItemEndOfList != 0 {
1141 break
1142 }
1143 flags &= ^ListItemBeginningOfList
1144 }
1145
1146 above := block.Parent
1147 finalizeList(block)
1148 p.tip = above
1149 return i
1150}
1151
1152// Returns true if the list item is not the same type as its parent list
1153func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool {
1154 if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
1155 return true
1156 } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
1157 return true
1158 } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) {
1159 return true
1160 }
1161 return false
1162}
1163
1164// Returns true if block ends with a blank line, descending if needed
1165// into lists and sublists.
1166func endsWithBlankLine(block *Node) bool {
1167 // TODO: figure this out. Always false now.
1168 for block != nil {
1169 //if block.lastLineBlank {
1170 //return true
1171 //}
1172 t := block.Type
1173 if t == List || t == Item {
1174 block = block.LastChild
1175 } else {
1176 break
1177 }
1178 }
1179 return false
1180}
1181
1182func finalizeList(block *Node) {
1183 block.open = false
1184 item := block.FirstChild
1185 for item != nil {
1186 // check for non-final list item ending with blank line:
1187 if endsWithBlankLine(item) && item.Next != nil {
1188 block.ListData.Tight = false
1189 break
1190 }
1191 // recurse into children of list item, to see if there are spaces
1192 // between any of them:
1193 subItem := item.FirstChild
1194 for subItem != nil {
1195 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1196 block.ListData.Tight = false
1197 break
1198 }
1199 subItem = subItem.Next
1200 }
1201 item = item.Next
1202 }
1203}
1204
1205// Parse a single list item.
1206// Assumes initial prefix is already removed if this is a sublist.
1207func (p *Markdown) listItem(data []byte, flags *ListType) int {
1208 // keep track of the indentation of the first line
1209 itemIndent := 0
1210 if data[0] == '\t' {
1211 itemIndent += 4
1212 } else {
1213 for itemIndent < 3 && data[itemIndent] == ' ' {
1214 itemIndent++
1215 }
1216 }
1217
1218 var bulletChar byte = '*'
1219 i := p.uliPrefix(data)
1220 if i == 0 {
1221 i = p.oliPrefix(data)
1222 } else {
1223 bulletChar = data[i-2]
1224 }
1225 if i == 0 {
1226 i = p.dliPrefix(data)
1227 // reset definition term flag
1228 if i > 0 {
1229 *flags &= ^ListTypeTerm
1230 }
1231 }
1232 if i == 0 {
1233 // if in definition list, set term flag and continue
1234 if *flags&ListTypeDefinition != 0 {
1235 *flags |= ListTypeTerm
1236 } else {
1237 return 0
1238 }
1239 }
1240
1241 // skip leading whitespace on first line
1242 for i < len(data) && data[i] == ' ' {
1243 i++
1244 }
1245
1246 // find the end of the line
1247 line := i
1248 for i > 0 && i < len(data) && data[i-1] != '\n' {
1249 i++
1250 }
1251
1252 // get working buffer
1253 var raw bytes.Buffer
1254
1255 // put the first line into the working buffer
1256 raw.Write(data[line:i])
1257 line = i
1258
1259 // process the following lines
1260 containsBlankLine := false
1261 sublist := 0
1262 codeBlockMarker := ""
1263
1264gatherlines:
1265 for line < len(data) {
1266 i++
1267
1268 // find the end of this line
1269 for i < len(data) && data[i-1] != '\n' {
1270 i++
1271 }
1272
1273 // if it is an empty line, guess that it is part of this item
1274 // and move on to the next line
1275 if p.isEmpty(data[line:i]) > 0 {
1276 containsBlankLine = true
1277 line = i
1278 continue
1279 }
1280
1281 // calculate the indentation
1282 indent := 0
1283 indentIndex := 0
1284 if data[line] == '\t' {
1285 indentIndex++
1286 indent += 4
1287 } else {
1288 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1289 indent++
1290 indentIndex++
1291 }
1292 }
1293
1294 chunk := data[line+indentIndex : i]
1295
1296 if p.extensions&FencedCode != 0 {
1297 // determine if in or out of codeblock
1298 // if in codeblock, ignore normal list processing
1299 _, marker := isFenceLine(chunk, nil, codeBlockMarker)
1300 if marker != "" {
1301 if codeBlockMarker == "" {
1302 // start of codeblock
1303 codeBlockMarker = marker
1304 } else {
1305 // end of codeblock.
1306 codeBlockMarker = ""
1307 }
1308 }
1309 // we are in a codeblock, write line, and continue
1310 if codeBlockMarker != "" || marker != "" {
1311 raw.Write(data[line+indentIndex : i])
1312 line = i
1313 continue gatherlines
1314 }
1315 }
1316
1317 // evaluate how this line fits in
1318 switch {
1319 // is this a nested list item?
1320 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1321 p.oliPrefix(chunk) > 0 ||
1322 p.dliPrefix(chunk) > 0:
1323
1324 // to be a nested list, it must be indented more
1325 // if not, it is either a different kind of list
1326 // or the next item in the same list
1327 if indent <= itemIndent {
1328 if p.listTypeChanged(chunk, flags) {
1329 *flags |= ListItemEndOfList
1330 } else if containsBlankLine {
1331 *flags |= ListItemContainsBlock
1332 }
1333
1334 break gatherlines
1335 }
1336
1337 if containsBlankLine {
1338 *flags |= ListItemContainsBlock
1339 }
1340
1341 // is this the first item in the nested list?
1342 if sublist == 0 {
1343 sublist = raw.Len()
1344 }
1345
1346 // is this a nested prefix heading?
1347 case p.isPrefixHeading(chunk):
1348 // if the heading is not indented, it is not nested in the list
1349 // and thus ends the list
1350 if containsBlankLine && indent < 4 {
1351 *flags |= ListItemEndOfList
1352 break gatherlines
1353 }
1354 *flags |= ListItemContainsBlock
1355
1356 // anything following an empty line is only part
1357 // of this item if it is indented 4 spaces
1358 // (regardless of the indentation of the beginning of the item)
1359 case containsBlankLine && indent < 4:
1360 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1361 // is the next item still a part of this list?
1362 next := i
1363 for next < len(data) && data[next] != '\n' {
1364 next++
1365 }
1366 for next < len(data)-1 && data[next] == '\n' {
1367 next++
1368 }
1369 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1370 *flags |= ListItemEndOfList
1371 }
1372 } else {
1373 *flags |= ListItemEndOfList
1374 }
1375 break gatherlines
1376
1377 // a blank line means this should be parsed as a block
1378 case containsBlankLine:
1379 raw.WriteByte('\n')
1380 *flags |= ListItemContainsBlock
1381 }
1382
1383 // if this line was preceded by one or more blanks,
1384 // re-introduce the blank into the buffer
1385 if containsBlankLine {
1386 containsBlankLine = false
1387 raw.WriteByte('\n')
1388 }
1389
1390 // add the line into the working buffer without prefix
1391 raw.Write(data[line+indentIndex : i])
1392
1393 line = i
1394 }
1395
1396 rawBytes := raw.Bytes()
1397
1398 block := p.addBlock(Item, nil)
1399 block.ListFlags = *flags
1400 block.Tight = false
1401 block.BulletChar = bulletChar
1402 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1403
1404 // render the contents of the list item
1405 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1406 // intermediate render of block item, except for definition term
1407 if sublist > 0 {
1408 p.block(rawBytes[:sublist])
1409 p.block(rawBytes[sublist:])
1410 } else {
1411 p.block(rawBytes)
1412 }
1413 } else {
1414 // intermediate render of inline item
1415 if sublist > 0 {
1416 child := p.addChild(Paragraph, 0)
1417 child.content = rawBytes[:sublist]
1418 p.block(rawBytes[sublist:])
1419 } else {
1420 child := p.addChild(Paragraph, 0)
1421 child.content = rawBytes
1422 }
1423 }
1424 return line
1425}
1426
1427// render a single paragraph that has already been parsed out
1428func (p *Markdown) renderParagraph(data []byte) {
1429 if len(data) == 0 {
1430 return
1431 }
1432
1433 // trim leading spaces
1434 beg := 0
1435 for data[beg] == ' ' {
1436 beg++
1437 }
1438
1439 end := len(data)
1440 // trim trailing newline
1441 if data[len(data)-1] == '\n' {
1442 end--
1443 }
1444
1445 // trim trailing spaces
1446 for end > beg && data[end-1] == ' ' {
1447 end--
1448 }
1449
1450 p.addBlock(Paragraph, data[beg:end])
1451}
1452
1453func (p *Markdown) paragraph(data []byte) int {
1454 // prev: index of 1st char of previous line
1455 // line: index of 1st char of current line
1456 // i: index of cursor/end of current line
1457 var prev, line, i int
1458 tabSize := TabSizeDefault
1459 if p.extensions&TabSizeEight != 0 {
1460 tabSize = TabSizeDouble
1461 }
1462 // keep going until we find something to mark the end of the paragraph
1463 for i < len(data) {
1464 // mark the beginning of the current line
1465 prev = line
1466 current := data[i:]
1467 line = i
1468
1469 // did we find a reference or a footnote? If so, end a paragraph
1470 // preceding it and report that we have consumed up to the end of that
1471 // reference:
1472 if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1473 p.renderParagraph(data[:i])
1474 return i + refEnd
1475 }
1476
1477 // did we find a blank line marking the end of the paragraph?
1478 if n := p.isEmpty(current); n > 0 {
1479 // did this blank line followed by a definition list item?
1480 if p.extensions&DefinitionLists != 0 {
1481 if i < len(data)-1 && data[i+1] == ':' {
1482 return p.list(data[prev:], ListTypeDefinition)
1483 }
1484 }
1485
1486 p.renderParagraph(data[:i])
1487 return i + n
1488 }
1489
1490 // an underline under some text marks a heading, so our paragraph ended on prev line
1491 if i > 0 {
1492 if level := p.isUnderlinedHeading(current); level > 0 {
1493 // render the paragraph
1494 p.renderParagraph(data[:prev])
1495
1496 // ignore leading and trailing whitespace
1497 eol := i - 1
1498 for prev < eol && data[prev] == ' ' {
1499 prev++
1500 }
1501 for eol > prev && data[eol-1] == ' ' {
1502 eol--
1503 }
1504
1505 id := ""
1506 if p.extensions&AutoHeadingIDs != 0 {
1507 id = SanitizedAnchorName(string(data[prev:eol]))
1508 }
1509
1510 block := p.addBlock(Heading, data[prev:eol])
1511 block.Level = level
1512 block.HeadingID = id
1513
1514 // find the end of the underline
1515 for i < len(data) && data[i] != '\n' {
1516 i++
1517 }
1518 return i
1519 }
1520 }
1521
1522 // if the next line starts a block of HTML, then the paragraph ends here
1523 if p.extensions&LaxHTMLBlocks != 0 {
1524 if data[i] == '<' && p.html(current, false) > 0 {
1525 // rewind to before the HTML block
1526 p.renderParagraph(data[:i])
1527 return i
1528 }
1529 }
1530
1531 // if there's a prefixed heading or a horizontal rule after this, paragraph is over
1532 if p.isPrefixHeading(current) || p.isHRule(current) {
1533 p.renderParagraph(data[:i])
1534 return i
1535 }
1536
1537 // if there's a fenced code block, paragraph is over
1538 if p.extensions&FencedCode != 0 {
1539 if p.fencedCodeBlock(current, false) > 0 {
1540 p.renderParagraph(data[:i])
1541 return i
1542 }
1543 }
1544
1545 // if there's a definition list item, prev line is a definition term
1546 if p.extensions&DefinitionLists != 0 {
1547 if p.dliPrefix(current) != 0 {
1548 ret := p.list(data[prev:], ListTypeDefinition)
1549 return ret
1550 }
1551 }
1552
1553 // if there's a list after this, paragraph is over
1554 if p.extensions&NoEmptyLineBeforeBlock != 0 {
1555 if p.uliPrefix(current) != 0 ||
1556 p.oliPrefix(current) != 0 ||
1557 p.quotePrefix(current) != 0 ||
1558 p.codePrefix(current) != 0 {
1559 p.renderParagraph(data[:i])
1560 return i
1561 }
1562 }
1563
1564 // otherwise, scan to the beginning of the next line
1565 nl := bytes.IndexByte(data[i:], '\n')
1566 if nl >= 0 {
1567 i += nl + 1
1568 } else {
1569 i += len(data[i:])
1570 }
1571 }
1572
1573 p.renderParagraph(data[:i])
1574 return i
1575}
1576
1577func skipChar(data []byte, start int, char byte) int {
1578 i := start
1579 for i < len(data) && data[i] == char {
1580 i++
1581 }
1582 return i
1583}
1584
1585func skipUntilChar(text []byte, start int, char byte) int {
1586 i := start
1587 for i < len(text) && text[i] != char {
1588 i++
1589 }
1590 return i
1591}
1592
1593// SanitizedAnchorName returns a sanitized anchor name for the given text.
1594//
1595// It implements the algorithm specified in the package comment.
1596func SanitizedAnchorName(text string) string {
1597 var anchorName []rune
1598 futureDash := false
1599 for _, r := range text {
1600 switch {
1601 case unicode.IsLetter(r) || unicode.IsNumber(r):
1602 if futureDash && len(anchorName) > 0 {
1603 anchorName = append(anchorName, '-')
1604 }
1605 futureDash = false
1606 anchorName = append(anchorName, unicode.ToLower(r))
1607 default:
1608 futureDash = true
1609 }
1610 }
1611 return string(anchorName)
1612}