block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "html"
19 "regexp"
20 "strings"
21
22 "github.com/shurcooL/sanitized_anchor_name"
23)
24
25const (
26 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
27 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
28)
29
30var (
31 reBackslashOrAmp = regexp.MustCompile("[\\&]")
32 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
33)
34
35// Parse block-level data.
36// Note: this function and many that it calls assume that
37// the input buffer ends with a newline.
38func (p *Markdown) block(data []byte) {
39 // this is called recursively: enforce a maximum depth
40 if p.nesting >= p.maxNesting {
41 return
42 }
43 p.nesting++
44
45 // parse out one block-level construct at a time
46 for len(data) > 0 {
47 // prefixed heading:
48 //
49 // # Heading 1
50 // ## Heading 2
51 // ...
52 // ###### Heading 6
53 if p.isPrefixHeading(data) {
54 data = data[p.prefixHeading(data):]
55 continue
56 }
57
58 // block of preformatted HTML:
59 //
60 // <div>
61 // ...
62 // </div>
63 if data[0] == '<' {
64 if i := p.html(data, true); i > 0 {
65 data = data[i:]
66 continue
67 }
68 }
69
70 // title block
71 //
72 // % stuff
73 // % more stuff
74 // % even more stuff
75 if p.extensions&Titleblock != 0 {
76 if data[0] == '%' {
77 if i := p.titleBlock(data, true); i > 0 {
78 data = data[i:]
79 continue
80 }
81 }
82 }
83
84 // blank lines. note: returns the # of bytes to skip
85 if i := p.isEmpty(data); i > 0 {
86 data = data[i:]
87 continue
88 }
89
90 // indented code block:
91 //
92 // func max(a, b int) int {
93 // if a > b {
94 // return a
95 // }
96 // return b
97 // }
98 if p.codePrefix(data) > 0 {
99 data = data[p.code(data):]
100 continue
101 }
102
103 // fenced code block:
104 //
105 // ``` go
106 // func fact(n int) int {
107 // if n <= 1 {
108 // return n
109 // }
110 // return n * fact(n-1)
111 // }
112 // ```
113 if p.extensions&FencedCode != 0 {
114 if i := p.fencedCodeBlock(data, true); i > 0 {
115 data = data[i:]
116 continue
117 }
118 }
119
120 // horizontal rule:
121 //
122 // ------
123 // or
124 // ******
125 // or
126 // ______
127 if p.isHRule(data) {
128 p.addBlock(HorizontalRule, nil)
129 var i int
130 for i = 0; i < len(data) && data[i] != '\n'; i++ {
131 }
132 data = data[i:]
133 continue
134 }
135
136 // block quote:
137 //
138 // > A big quote I found somewhere
139 // > on the web
140 if p.quotePrefix(data) > 0 {
141 data = data[p.quote(data):]
142 continue
143 }
144
145 // table:
146 //
147 // Name | Age | Phone
148 // ------|-----|---------
149 // Bob | 31 | 555-1234
150 // Alice | 27 | 555-4321
151 if p.extensions&Tables != 0 {
152 if i := p.table(data); i > 0 {
153 data = data[i:]
154 continue
155 }
156 }
157
158 // an itemized/unordered list:
159 //
160 // * Item 1
161 // * Item 2
162 //
163 // also works with + or -
164 if p.uliPrefix(data) > 0 {
165 data = data[p.list(data, 0):]
166 continue
167 }
168
169 // a numbered/ordered list:
170 //
171 // 1. Item 1
172 // 2. Item 2
173 if p.oliPrefix(data) > 0 {
174 data = data[p.list(data, ListTypeOrdered):]
175 continue
176 }
177
178 // definition lists:
179 //
180 // Term 1
181 // : Definition a
182 // : Definition b
183 //
184 // Term 2
185 // : Definition c
186 if p.extensions&DefinitionLists != 0 {
187 if p.dliPrefix(data) > 0 {
188 data = data[p.list(data, ListTypeDefinition):]
189 continue
190 }
191 }
192
193 // anything else must look like a normal paragraph
194 // note: this finds underlined headings, too
195 data = data[p.paragraph(data):]
196 }
197
198 p.nesting--
199}
200
201func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
202 p.closeUnmatchedBlocks()
203 container := p.addChild(typ, 0)
204 container.content = content
205 return container
206}
207
208func (p *Markdown) isPrefixHeading(data []byte) bool {
209 if data[0] != '#' {
210 return false
211 }
212
213 if p.extensions&SpaceHeadings != 0 {
214 level := 0
215 for level < 6 && level < len(data) && data[level] == '#' {
216 level++
217 }
218 if level == len(data) || data[level] != ' ' {
219 return false
220 }
221 }
222 return true
223}
224
225func (p *Markdown) prefixHeading(data []byte) int {
226 level := 0
227 for level < 6 && level < len(data) && data[level] == '#' {
228 level++
229 }
230 i := skipChar(data, level, ' ')
231 end := skipUntilChar(data, i, '\n')
232 skip := end
233 id := ""
234 if p.extensions&HeadingIDs != 0 {
235 j, k := 0, 0
236 // find start/end of heading id
237 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
238 }
239 for k = j + 1; k < end && data[k] != '}'; k++ {
240 }
241 // extract heading id iff found
242 if j < end && k < end {
243 id = string(data[j+2 : k])
244 end = j
245 skip = k + 1
246 for end > 0 && data[end-1] == ' ' {
247 end--
248 }
249 }
250 }
251 for end > 0 && data[end-1] == '#' {
252 if isBackslashEscaped(data, end-1) {
253 break
254 }
255 end--
256 }
257 for end > 0 && data[end-1] == ' ' {
258 end--
259 }
260 if end > i {
261 if id == "" && p.extensions&AutoHeadingIDs != 0 {
262 id = sanitized_anchor_name.Create(string(data[i:end]))
263 }
264 block := p.addBlock(Heading, data[i:end])
265 block.HeadingID = id
266 block.Level = level
267 }
268 return skip
269}
270
271func (p *Markdown) isUnderlinedHeading(data []byte) int {
272 // test of level 1 heading
273 if data[0] == '=' {
274 i := skipChar(data, 1, '=')
275 i = skipChar(data, i, ' ')
276 if i < len(data) && data[i] == '\n' {
277 return 1
278 }
279 return 0
280 }
281
282 // test of level 2 heading
283 if data[0] == '-' {
284 i := skipChar(data, 1, '-')
285 i = skipChar(data, i, ' ')
286 if i < len(data) && data[i] == '\n' {
287 return 2
288 }
289 return 0
290 }
291
292 return 0
293}
294
295func (p *Markdown) titleBlock(data []byte, doRender bool) int {
296 if data[0] != '%' {
297 return 0
298 }
299 splitData := bytes.Split(data, []byte("\n"))
300 var i int
301 for idx, b := range splitData {
302 if !bytes.HasPrefix(b, []byte("%")) {
303 i = idx // - 1
304 break
305 }
306 }
307
308 data = bytes.Join(splitData[0:i], []byte("\n"))
309 consumed := len(data)
310 data = bytes.TrimPrefix(data, []byte("% "))
311 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
312 block := p.addBlock(Heading, data)
313 block.Level = 1
314 block.IsTitleblock = true
315
316 return consumed
317}
318
319func (p *Markdown) html(data []byte, doRender bool) int {
320 var i, j int
321
322 // identify the opening tag
323 if data[0] != '<' {
324 return 0
325 }
326 curtag, tagfound := p.htmlFindTag(data[1:])
327
328 // handle special cases
329 if !tagfound {
330 // check for an HTML comment
331 if size := p.htmlComment(data, doRender); size > 0 {
332 return size
333 }
334
335 // check for an <hr> tag
336 if size := p.htmlHr(data, doRender); size > 0 {
337 return size
338 }
339
340 // no special case recognized
341 return 0
342 }
343
344 // look for an unindented matching closing tag
345 // followed by a blank line
346 found := false
347 /*
348 closetag := []byte("\n</" + curtag + ">")
349 j = len(curtag) + 1
350 for !found {
351 // scan for a closing tag at the beginning of a line
352 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
353 j += skip + len(closetag)
354 } else {
355 break
356 }
357
358 // see if it is the only thing on the line
359 if skip := p.isEmpty(data[j:]); skip > 0 {
360 // see if it is followed by a blank line/eof
361 j += skip
362 if j >= len(data) {
363 found = true
364 i = j
365 } else {
366 if skip := p.isEmpty(data[j:]); skip > 0 {
367 j += skip
368 found = true
369 i = j
370 }
371 }
372 }
373 }
374 */
375
376 // if not found, try a second pass looking for indented match
377 // but not if tag is "ins" or "del" (following original Markdown.pl)
378 if !found && curtag != "ins" && curtag != "del" {
379 i = 1
380 for i < len(data) {
381 i++
382 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
383 i++
384 }
385
386 if i+2+len(curtag) >= len(data) {
387 break
388 }
389
390 j = p.htmlFindEnd(curtag, data[i-1:])
391
392 if j > 0 {
393 i += j - 1
394 found = true
395 break
396 }
397 }
398 }
399
400 if !found {
401 return 0
402 }
403
404 // the end of the block has been found
405 if doRender {
406 // trim newlines
407 end := i
408 for end > 0 && data[end-1] == '\n' {
409 end--
410 }
411 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
412 }
413
414 return i
415}
416
417func finalizeHTMLBlock(block *Node) {
418 block.Literal = block.content
419 block.content = nil
420}
421
422// HTML comment, lax form
423func (p *Markdown) htmlComment(data []byte, doRender bool) int {
424 i := p.inlineHTMLComment(data)
425 // needs to end with a blank line
426 if j := p.isEmpty(data[i:]); j > 0 {
427 size := i + j
428 if doRender {
429 // trim trailing newlines
430 end := size
431 for end > 0 && data[end-1] == '\n' {
432 end--
433 }
434 block := p.addBlock(HTMLBlock, data[:end])
435 finalizeHTMLBlock(block)
436 }
437 return size
438 }
439 return 0
440}
441
442// HR, which is the only self-closing block tag considered
443func (p *Markdown) htmlHr(data []byte, doRender bool) int {
444 if len(data) < 4 {
445 return 0
446 }
447 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
448 return 0
449 }
450 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
451 // not an <hr> tag after all; at least not a valid one
452 return 0
453 }
454 i := 3
455 for i < len(data) && data[i] != '>' && data[i] != '\n' {
456 i++
457 }
458 if i < len(data) && data[i] == '>' {
459 i++
460 if j := p.isEmpty(data[i:]); j > 0 {
461 size := i + j
462 if doRender {
463 // trim newlines
464 end := size
465 for end > 0 && data[end-1] == '\n' {
466 end--
467 }
468 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
469 }
470 return size
471 }
472 }
473 return 0
474}
475
476func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
477 i := 0
478 for i < len(data) && isalnum(data[i]) {
479 i++
480 }
481 key := string(data[:i])
482 if _, ok := blockTags[key]; ok {
483 return key, true
484 }
485 return "", false
486}
487
488func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
489 // assume data[0] == '<' && data[1] == '/' already tested
490 if tag == "hr" {
491 return 2
492 }
493 // check if tag is a match
494 closetag := []byte("</" + tag + ">")
495 if !bytes.HasPrefix(data, closetag) {
496 return 0
497 }
498 i := len(closetag)
499
500 // check that the rest of the line is blank
501 skip := 0
502 if skip = p.isEmpty(data[i:]); skip == 0 {
503 return 0
504 }
505 i += skip
506 skip = 0
507
508 if i >= len(data) {
509 return i
510 }
511
512 if p.extensions&LaxHTMLBlocks != 0 {
513 return i
514 }
515 if skip = p.isEmpty(data[i:]); skip == 0 {
516 // following line must be blank
517 return 0
518 }
519
520 return i + skip
521}
522
523func (*Markdown) isEmpty(data []byte) int {
524 // it is okay to call isEmpty on an empty buffer
525 if len(data) == 0 {
526 return 0
527 }
528
529 var i int
530 for i = 0; i < len(data) && data[i] != '\n'; i++ {
531 if data[i] != ' ' && data[i] != '\t' {
532 return 0
533 }
534 }
535 if i < len(data) && data[i] == '\n' {
536 i++
537 }
538 return i
539}
540
541func (*Markdown) isHRule(data []byte) bool {
542 i := 0
543
544 // skip up to three spaces
545 for i < 3 && data[i] == ' ' {
546 i++
547 }
548
549 // look at the hrule char
550 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
551 return false
552 }
553 c := data[i]
554
555 // the whole line must be the char or whitespace
556 n := 0
557 for i < len(data) && data[i] != '\n' {
558 switch {
559 case data[i] == c:
560 n++
561 case data[i] != ' ':
562 return false
563 }
564 i++
565 }
566
567 return n >= 3
568}
569
570// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
571// and returns the end index if so, or 0 otherwise. It also returns the marker found.
572// If info is not nil, it gets set to the syntax specified in the fence line.
573func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) {
574 i, size := 0, 0
575
576 // skip up to three spaces
577 for i < len(data) && i < 3 && data[i] == ' ' {
578 i++
579 }
580
581 // check for the marker characters: ~ or `
582 if i >= len(data) {
583 return 0, ""
584 }
585 if data[i] != '~' && data[i] != '`' {
586 return 0, ""
587 }
588
589 c := data[i]
590
591 // the whole line must be the same char or whitespace
592 for i < len(data) && data[i] == c {
593 size++
594 i++
595 }
596
597 // the marker char must occur at least 3 times
598 if size < 3 {
599 return 0, ""
600 }
601 marker = string(data[i-size : i])
602
603 // if this is the end marker, it must match the beginning marker
604 if oldmarker != "" && marker != oldmarker {
605 return 0, ""
606 }
607
608 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
609 // into one, always get the info string, and discard it if the caller doesn't care.
610 if info != nil {
611 infoLength := 0
612 i = skipChar(data, i, ' ')
613
614 if i >= len(data) {
615 if i == len(data) {
616 return i, marker
617 }
618 return 0, ""
619 }
620
621 infoStart := i
622
623 if data[i] == '{' {
624 i++
625 infoStart++
626
627 for i < len(data) && data[i] != '}' && data[i] != '\n' {
628 infoLength++
629 i++
630 }
631
632 if i >= len(data) || data[i] != '}' {
633 return 0, ""
634 }
635
636 // strip all whitespace at the beginning and the end
637 // of the {} block
638 for infoLength > 0 && isspace(data[infoStart]) {
639 infoStart++
640 infoLength--
641 }
642
643 for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
644 infoLength--
645 }
646 i++
647 i = skipChar(data, i, ' ')
648 } else {
649 for i < len(data) && !isverticalspace(data[i]) {
650 infoLength++
651 i++
652 }
653 }
654
655 *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
656 }
657
658 if i == len(data) {
659 return i, marker
660 }
661 if i > len(data) || data[i] != '\n' {
662 return 0, ""
663 }
664 return i + 1, marker // Take newline into account.
665}
666
667// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
668// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
669// If doRender is true, a final newline is mandatory to recognize the fenced code block.
670func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
671 var info string
672 beg, marker := isFenceLine(data, &info, "")
673 if beg == 0 || beg >= len(data) {
674 return 0
675 }
676 fenceLength := beg - 1
677
678 var work bytes.Buffer
679 work.Write([]byte(info))
680 work.WriteByte('\n')
681
682 for {
683 // safe to assume beg < len(data)
684
685 // check for the end of the code block
686 fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
687 if fenceEnd != 0 {
688 beg += fenceEnd
689 break
690 }
691
692 // copy the current line
693 end := skipUntilChar(data, beg, '\n') + 1
694
695 // did we reach the end of the buffer without a closing marker?
696 if end >= len(data) {
697 return 0
698 }
699
700 // verbatim copy to the working buffer
701 if doRender {
702 work.Write(data[beg:end])
703 }
704 beg = end
705 }
706
707 if doRender {
708 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
709 block.IsFenced = true
710 block.FenceLength = fenceLength
711 finalizeCodeBlock(block)
712 }
713
714 return beg
715}
716
717func unescapeChar(str []byte) []byte {
718 if str[0] == '\\' {
719 return []byte{str[1]}
720 }
721 return []byte(html.UnescapeString(string(str)))
722}
723
724func unescapeString(str []byte) []byte {
725 if reBackslashOrAmp.Match(str) {
726 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
727 }
728 return str
729}
730
731func finalizeCodeBlock(block *Node) {
732 if block.IsFenced {
733 newlinePos := bytes.IndexByte(block.content, '\n')
734 firstLine := block.content[:newlinePos]
735 rest := block.content[newlinePos+1:]
736 block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
737 block.Literal = rest
738 } else {
739 block.Literal = block.content
740 }
741 block.content = nil
742}
743
744func (p *Markdown) table(data []byte) int {
745 table := p.addBlock(Table, nil)
746 i, columns := p.tableHeader(data)
747 if i == 0 {
748 p.tip = table.Parent
749 table.Unlink()
750 return 0
751 }
752
753 p.addBlock(TableBody, nil)
754
755 for i < len(data) {
756 pipes, rowStart := 0, i
757 for ; i < len(data) && data[i] != '\n'; i++ {
758 if data[i] == '|' {
759 pipes++
760 }
761 }
762
763 if pipes == 0 {
764 i = rowStart
765 break
766 }
767
768 // include the newline in data sent to tableRow
769 if i < len(data) && data[i] == '\n' {
770 i++
771 }
772 p.tableRow(data[rowStart:i], columns, false)
773 }
774
775 return i
776}
777
778// check if the specified position is preceded by an odd number of backslashes
779func isBackslashEscaped(data []byte, i int) bool {
780 backslashes := 0
781 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
782 backslashes++
783 }
784 return backslashes&1 == 1
785}
786
787func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
788 i := 0
789 colCount := 1
790 for i = 0; i < len(data) && data[i] != '\n'; i++ {
791 if data[i] == '|' && !isBackslashEscaped(data, i) {
792 colCount++
793 }
794 }
795
796 // doesn't look like a table header
797 if colCount == 1 {
798 return
799 }
800
801 // include the newline in the data sent to tableRow
802 j := i
803 if j < len(data) && data[j] == '\n' {
804 j++
805 }
806 header := data[:j]
807
808 // column count ignores pipes at beginning or end of line
809 if data[0] == '|' {
810 colCount--
811 }
812 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
813 colCount--
814 }
815
816 columns = make([]CellAlignFlags, colCount)
817
818 // move on to the header underline
819 i++
820 if i >= len(data) {
821 return
822 }
823
824 if data[i] == '|' && !isBackslashEscaped(data, i) {
825 i++
826 }
827 i = skipChar(data, i, ' ')
828
829 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
830 // and trailing | optional on last column
831 col := 0
832 for i < len(data) && data[i] != '\n' {
833 dashes := 0
834
835 if data[i] == ':' {
836 i++
837 columns[col] |= TableAlignmentLeft
838 dashes++
839 }
840 for i < len(data) && data[i] == '-' {
841 i++
842 dashes++
843 }
844 if i < len(data) && data[i] == ':' {
845 i++
846 columns[col] |= TableAlignmentRight
847 dashes++
848 }
849 for i < len(data) && data[i] == ' ' {
850 i++
851 }
852 if i == len(data) {
853 return
854 }
855 // end of column test is messy
856 switch {
857 case dashes < 3:
858 // not a valid column
859 return
860
861 case data[i] == '|' && !isBackslashEscaped(data, i):
862 // marker found, now skip past trailing whitespace
863 col++
864 i++
865 for i < len(data) && data[i] == ' ' {
866 i++
867 }
868
869 // trailing junk found after last column
870 if col >= colCount && i < len(data) && data[i] != '\n' {
871 return
872 }
873
874 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
875 // something else found where marker was required
876 return
877
878 case data[i] == '\n':
879 // marker is optional for the last column
880 col++
881
882 default:
883 // trailing junk found after last column
884 return
885 }
886 }
887 if col != colCount {
888 return
889 }
890
891 p.addBlock(TableHead, nil)
892 p.tableRow(header, columns, true)
893 size = i
894 if size < len(data) && data[size] == '\n' {
895 size++
896 }
897 return
898}
899
900func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
901 p.addBlock(TableRow, nil)
902 i, col := 0, 0
903
904 if data[i] == '|' && !isBackslashEscaped(data, i) {
905 i++
906 }
907
908 for col = 0; col < len(columns) && i < len(data); col++ {
909 for i < len(data) && data[i] == ' ' {
910 i++
911 }
912
913 cellStart := i
914
915 for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
916 i++
917 }
918
919 cellEnd := i
920
921 // skip the end-of-cell marker, possibly taking us past end of buffer
922 i++
923
924 for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
925 cellEnd--
926 }
927
928 cell := p.addBlock(TableCell, data[cellStart:cellEnd])
929 cell.IsHeader = header
930 cell.Align = columns[col]
931 }
932
933 // pad it out with empty columns to get the right number
934 for ; col < len(columns); col++ {
935 cell := p.addBlock(TableCell, nil)
936 cell.IsHeader = header
937 cell.Align = columns[col]
938 }
939
940 // silently ignore rows with too many cells
941}
942
943// returns blockquote prefix length
944func (p *Markdown) quotePrefix(data []byte) int {
945 i := 0
946 for i < 3 && i < len(data) && data[i] == ' ' {
947 i++
948 }
949 if i < len(data) && data[i] == '>' {
950 if i+1 < len(data) && data[i+1] == ' ' {
951 return i + 2
952 }
953 return i + 1
954 }
955 return 0
956}
957
958// blockquote ends with at least one blank line
959// followed by something without a blockquote prefix
960func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
961 if p.isEmpty(data[beg:]) <= 0 {
962 return false
963 }
964 if end >= len(data) {
965 return true
966 }
967 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
968}
969
970// parse a blockquote fragment
971func (p *Markdown) quote(data []byte) int {
972 block := p.addBlock(BlockQuote, nil)
973 var raw bytes.Buffer
974 beg, end := 0, 0
975 for beg < len(data) {
976 end = beg
977 // Step over whole lines, collecting them. While doing that, check for
978 // fenced code and if one's found, incorporate it altogether,
979 // irregardless of any contents inside it
980 for end < len(data) && data[end] != '\n' {
981 if p.extensions&FencedCode != 0 {
982 if i := p.fencedCodeBlock(data[end:], false); i > 0 {
983 // -1 to compensate for the extra end++ after the loop:
984 end += i - 1
985 break
986 }
987 }
988 end++
989 }
990 if end < len(data) && data[end] == '\n' {
991 end++
992 }
993 if pre := p.quotePrefix(data[beg:]); pre > 0 {
994 // skip the prefix
995 beg += pre
996 } else if p.terminateBlockquote(data, beg, end) {
997 break
998 }
999 // this line is part of the blockquote
1000 raw.Write(data[beg:end])
1001 beg = end
1002 }
1003 p.block(raw.Bytes())
1004 p.finalize(block)
1005 return end
1006}
1007
1008// returns prefix length for block code
1009func (p *Markdown) codePrefix(data []byte) int {
1010 if len(data) >= 1 && data[0] == '\t' {
1011 return 1
1012 }
1013 if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1014 return 4
1015 }
1016 return 0
1017}
1018
1019func (p *Markdown) code(data []byte) int {
1020 var work bytes.Buffer
1021
1022 i := 0
1023 for i < len(data) {
1024 beg := i
1025 for i < len(data) && data[i] != '\n' {
1026 i++
1027 }
1028 if i < len(data) && data[i] == '\n' {
1029 i++
1030 }
1031
1032 blankline := p.isEmpty(data[beg:i]) > 0
1033 if pre := p.codePrefix(data[beg:i]); pre > 0 {
1034 beg += pre
1035 } else if !blankline {
1036 // non-empty, non-prefixed line breaks the pre
1037 i = beg
1038 break
1039 }
1040
1041 // verbatim copy to the working buffer
1042 if blankline {
1043 work.WriteByte('\n')
1044 } else {
1045 work.Write(data[beg:i])
1046 }
1047 }
1048
1049 // trim all the \n off the end of work
1050 workbytes := work.Bytes()
1051 eol := len(workbytes)
1052 for eol > 0 && workbytes[eol-1] == '\n' {
1053 eol--
1054 }
1055 if eol != len(workbytes) {
1056 work.Truncate(eol)
1057 }
1058
1059 work.WriteByte('\n')
1060
1061 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1062 block.IsFenced = false
1063 finalizeCodeBlock(block)
1064
1065 return i
1066}
1067
1068// returns unordered list item prefix
1069func (p *Markdown) uliPrefix(data []byte) int {
1070 i := 0
1071 // start with up to 3 spaces
1072 for i < len(data) && i < 3 && data[i] == ' ' {
1073 i++
1074 }
1075 if i >= len(data)-1 {
1076 return 0
1077 }
1078 // need one of {'*', '+', '-'} followed by a space or a tab
1079 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1080 (data[i+1] != ' ' && data[i+1] != '\t') {
1081 return 0
1082 }
1083 return i + 2
1084}
1085
1086// returns ordered list item prefix
1087func (p *Markdown) oliPrefix(data []byte) int {
1088 i := 0
1089
1090 // start with up to 3 spaces
1091 for i < 3 && i < len(data) && data[i] == ' ' {
1092 i++
1093 }
1094
1095 // count the digits
1096 start := i
1097 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1098 i++
1099 }
1100 if start == i || i >= len(data)-1 {
1101 return 0
1102 }
1103
1104 // we need >= 1 digits followed by a dot and a space or a tab
1105 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1106 return 0
1107 }
1108 return i + 2
1109}
1110
1111// returns definition list item prefix
1112func (p *Markdown) dliPrefix(data []byte) int {
1113 if len(data) < 2 {
1114 return 0
1115 }
1116 i := 0
1117 // need a ':' followed by a space or a tab
1118 if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1119 return 0
1120 }
1121 for i < len(data) && data[i] == ' ' {
1122 i++
1123 }
1124 return i + 2
1125}
1126
1127// parse ordered or unordered list block
1128func (p *Markdown) list(data []byte, flags ListType) int {
1129 i := 0
1130 flags |= ListItemBeginningOfList
1131 block := p.addBlock(List, nil)
1132 block.ListFlags = flags
1133 block.Tight = true
1134
1135 for i < len(data) {
1136 skip := p.listItem(data[i:], &flags)
1137 if flags&ListItemContainsBlock != 0 {
1138 block.ListData.Tight = false
1139 }
1140 i += skip
1141 if skip == 0 || flags&ListItemEndOfList != 0 {
1142 break
1143 }
1144 flags &= ^ListItemBeginningOfList
1145 }
1146
1147 above := block.Parent
1148 finalizeList(block)
1149 p.tip = above
1150 return i
1151}
1152
1153// Returns true if the list item is not the same type as its parent list
1154func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool {
1155 if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 {
1156 return true
1157 } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 {
1158 return true
1159 } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) {
1160 return true
1161 }
1162 return false
1163}
1164
1165// Returns true if block ends with a blank line, descending if needed
1166// into lists and sublists.
1167func endsWithBlankLine(block *Node) bool {
1168 // TODO: figure this out. Always false now.
1169 for block != nil {
1170 //if block.lastLineBlank {
1171 //return true
1172 //}
1173 t := block.Type
1174 if t == List || t == Item {
1175 block = block.LastChild
1176 } else {
1177 break
1178 }
1179 }
1180 return false
1181}
1182
1183func finalizeList(block *Node) {
1184 block.open = false
1185 item := block.FirstChild
1186 for item != nil {
1187 // check for non-final list item ending with blank line:
1188 if endsWithBlankLine(item) && item.Next != nil {
1189 block.ListData.Tight = false
1190 break
1191 }
1192 // recurse into children of list item, to see if there are spaces
1193 // between any of them:
1194 subItem := item.FirstChild
1195 for subItem != nil {
1196 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1197 block.ListData.Tight = false
1198 break
1199 }
1200 subItem = subItem.Next
1201 }
1202 item = item.Next
1203 }
1204}
1205
1206// Parse a single list item.
1207// Assumes initial prefix is already removed if this is a sublist.
1208func (p *Markdown) listItem(data []byte, flags *ListType) int {
1209 // keep track of the indentation of the first line
1210 itemIndent := 0
1211 if data[0] == '\t' {
1212 itemIndent += 4
1213 } else {
1214 for itemIndent < 3 && data[itemIndent] == ' ' {
1215 itemIndent++
1216 }
1217 }
1218
1219 var bulletChar byte = '*'
1220 i := p.uliPrefix(data)
1221 if i == 0 {
1222 i = p.oliPrefix(data)
1223 } else {
1224 bulletChar = data[i-2]
1225 }
1226 if i == 0 {
1227 i = p.dliPrefix(data)
1228 // reset definition term flag
1229 if i > 0 {
1230 *flags &= ^ListTypeTerm
1231 }
1232 }
1233 if i == 0 {
1234 // if in definition list, set term flag and continue
1235 if *flags&ListTypeDefinition != 0 {
1236 *flags |= ListTypeTerm
1237 } else {
1238 return 0
1239 }
1240 }
1241
1242 // skip leading whitespace on first line
1243 for i < len(data) && data[i] == ' ' {
1244 i++
1245 }
1246
1247 // find the end of the line
1248 line := i
1249 for i > 0 && i < len(data) && data[i-1] != '\n' {
1250 i++
1251 }
1252
1253 // get working buffer
1254 var raw bytes.Buffer
1255
1256 // put the first line into the working buffer
1257 raw.Write(data[line:i])
1258 line = i
1259
1260 // process the following lines
1261 containsBlankLine := false
1262 sublist := 0
1263 codeBlockMarker := ""
1264
1265gatherlines:
1266 for line < len(data) {
1267 i++
1268
1269 // find the end of this line
1270 for i < len(data) && data[i-1] != '\n' {
1271 i++
1272 }
1273
1274 // if it is an empty line, guess that it is part of this item
1275 // and move on to the next line
1276 if p.isEmpty(data[line:i]) > 0 {
1277 containsBlankLine = true
1278 line = i
1279 continue
1280 }
1281
1282 // calculate the indentation
1283 indent := 0
1284 indentIndex := 0
1285 if data[line] == '\t' {
1286 indentIndex++
1287 indent += 4
1288 } else {
1289 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1290 indent++
1291 indentIndex++
1292 }
1293 }
1294
1295 chunk := data[line+indentIndex : i]
1296
1297 if p.extensions&FencedCode != 0 {
1298 // determine if in or out of codeblock
1299 // if in codeblock, ignore normal list processing
1300 _, marker := isFenceLine(chunk, nil, codeBlockMarker)
1301 if marker != "" {
1302 if codeBlockMarker == "" {
1303 // start of codeblock
1304 codeBlockMarker = marker
1305 } else {
1306 // end of codeblock.
1307 codeBlockMarker = ""
1308 }
1309 }
1310 // we are in a codeblock, write line, and continue
1311 if codeBlockMarker != "" || marker != "" {
1312 raw.Write(data[line+indentIndex : i])
1313 line = i
1314 continue gatherlines
1315 }
1316 }
1317
1318 // evaluate how this line fits in
1319 switch {
1320 // is this a nested list item?
1321 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1322 p.oliPrefix(chunk) > 0 ||
1323 p.dliPrefix(chunk) > 0:
1324
1325 // to be a nested list, it must be indented more
1326 // if not, it is either a different kind of list
1327 // or the next item in the same list
1328 if indent <= itemIndent {
1329 if p.listTypeChanged(chunk, flags) {
1330 *flags |= ListItemEndOfList
1331 } else if containsBlankLine {
1332 *flags |= ListItemContainsBlock
1333 }
1334
1335 break gatherlines
1336 }
1337
1338 if containsBlankLine {
1339 *flags |= ListItemContainsBlock
1340 }
1341
1342 // is this the first item in the nested list?
1343 if sublist == 0 {
1344 sublist = raw.Len()
1345 }
1346
1347 // is this a nested prefix heading?
1348 case p.isPrefixHeading(chunk):
1349 // if the heading is not indented, it is not nested in the list
1350 // and thus ends the list
1351 if containsBlankLine && indent < 4 {
1352 *flags |= ListItemEndOfList
1353 break gatherlines
1354 }
1355 *flags |= ListItemContainsBlock
1356
1357 // anything following an empty line is only part
1358 // of this item if it is indented 4 spaces
1359 // (regardless of the indentation of the beginning of the item)
1360 case containsBlankLine && indent < 4:
1361 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1362 // is the next item still a part of this list?
1363 next := i
1364 for next < len(data) && data[next] != '\n' {
1365 next++
1366 }
1367 for next < len(data)-1 && data[next] == '\n' {
1368 next++
1369 }
1370 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1371 *flags |= ListItemEndOfList
1372 }
1373 } else {
1374 *flags |= ListItemEndOfList
1375 }
1376 break gatherlines
1377
1378 // a blank line means this should be parsed as a block
1379 case containsBlankLine:
1380 raw.WriteByte('\n')
1381 *flags |= ListItemContainsBlock
1382 }
1383
1384 // if this line was preceded by one or more blanks,
1385 // re-introduce the blank into the buffer
1386 if containsBlankLine {
1387 containsBlankLine = false
1388 raw.WriteByte('\n')
1389 }
1390
1391 // add the line into the working buffer without prefix
1392 raw.Write(data[line+indentIndex : i])
1393
1394 line = i
1395 }
1396
1397 rawBytes := raw.Bytes()
1398
1399 block := p.addBlock(Item, nil)
1400 block.ListFlags = *flags
1401 block.Tight = false
1402 block.BulletChar = bulletChar
1403 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1404
1405 // render the contents of the list item
1406 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1407 // intermediate render of block item, except for definition term
1408 if sublist > 0 {
1409 p.block(rawBytes[:sublist])
1410 p.block(rawBytes[sublist:])
1411 } else {
1412 p.block(rawBytes)
1413 }
1414 } else {
1415 // intermediate render of inline item
1416 if sublist > 0 {
1417 child := p.addChild(Paragraph, 0)
1418 child.content = rawBytes[:sublist]
1419 p.block(rawBytes[sublist:])
1420 } else {
1421 child := p.addChild(Paragraph, 0)
1422 child.content = rawBytes
1423 }
1424 }
1425 return line
1426}
1427
1428// render a single paragraph that has already been parsed out
1429func (p *Markdown) renderParagraph(data []byte) {
1430 if len(data) == 0 {
1431 return
1432 }
1433
1434 // trim leading spaces
1435 beg := 0
1436 for data[beg] == ' ' {
1437 beg++
1438 }
1439
1440 end := len(data)
1441 // trim trailing newline
1442 if data[len(data)-1] == '\n' {
1443 end--
1444 }
1445
1446 // trim trailing spaces
1447 for end > beg && data[end-1] == ' ' {
1448 end--
1449 }
1450
1451 p.addBlock(Paragraph, data[beg:end])
1452}
1453
1454func (p *Markdown) paragraph(data []byte) int {
1455 // prev: index of 1st char of previous line
1456 // line: index of 1st char of current line
1457 // i: index of cursor/end of current line
1458 var prev, line, i int
1459 tabSize := TabSizeDefault
1460 if p.extensions&TabSizeEight != 0 {
1461 tabSize = TabSizeDouble
1462 }
1463 // keep going until we find something to mark the end of the paragraph
1464 for i < len(data) {
1465 // mark the beginning of the current line
1466 prev = line
1467 current := data[i:]
1468 line = i
1469
1470 // did we find a reference or a footnote? If so, end a paragraph
1471 // preceding it and report that we have consumed up to the end of that
1472 // reference:
1473 if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1474 p.renderParagraph(data[:i])
1475 return i + refEnd
1476 }
1477
1478 // did we find a blank line marking the end of the paragraph?
1479 if n := p.isEmpty(current); n > 0 {
1480 // did this blank line followed by a definition list item?
1481 if p.extensions&DefinitionLists != 0 {
1482 if i < len(data)-1 && data[i+1] == ':' {
1483 return p.list(data[prev:], ListTypeDefinition)
1484 }
1485 }
1486
1487 p.renderParagraph(data[:i])
1488 return i + n
1489 }
1490
1491 // an underline under some text marks a heading, so our paragraph ended on prev line
1492 if i > 0 {
1493 if level := p.isUnderlinedHeading(current); level > 0 {
1494 // render the paragraph
1495 p.renderParagraph(data[:prev])
1496
1497 // ignore leading and trailing whitespace
1498 eol := i - 1
1499 for prev < eol && data[prev] == ' ' {
1500 prev++
1501 }
1502 for eol > prev && data[eol-1] == ' ' {
1503 eol--
1504 }
1505
1506 id := ""
1507 if p.extensions&AutoHeadingIDs != 0 {
1508 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1509 }
1510
1511 block := p.addBlock(Heading, data[prev:eol])
1512 block.Level = level
1513 block.HeadingID = id
1514
1515 // find the end of the underline
1516 for i < len(data) && data[i] != '\n' {
1517 i++
1518 }
1519 return i
1520 }
1521 }
1522
1523 // if the next line starts a block of HTML, then the paragraph ends here
1524 if p.extensions&LaxHTMLBlocks != 0 {
1525 if data[i] == '<' && p.html(current, false) > 0 {
1526 // rewind to before the HTML block
1527 p.renderParagraph(data[:i])
1528 return i
1529 }
1530 }
1531
1532 // if there's a prefixed heading or a horizontal rule after this, paragraph is over
1533 if p.isPrefixHeading(current) || p.isHRule(current) {
1534 p.renderParagraph(data[:i])
1535 return i
1536 }
1537
1538 // if there's a fenced code block, paragraph is over
1539 if p.extensions&FencedCode != 0 {
1540 if p.fencedCodeBlock(current, false) > 0 {
1541 p.renderParagraph(data[:i])
1542 return i
1543 }
1544 }
1545
1546 // if there's a definition list item, prev line is a definition term
1547 if p.extensions&DefinitionLists != 0 {
1548 if p.dliPrefix(current) != 0 {
1549 ret := p.list(data[prev:], ListTypeDefinition)
1550 return ret
1551 }
1552 }
1553
1554 // if there's a list after this, paragraph is over
1555 if p.extensions&NoEmptyLineBeforeBlock != 0 {
1556 if p.uliPrefix(current) != 0 ||
1557 p.oliPrefix(current) != 0 ||
1558 p.quotePrefix(current) != 0 ||
1559 p.codePrefix(current) != 0 {
1560 p.renderParagraph(data[:i])
1561 return i
1562 }
1563 }
1564
1565 // otherwise, scan to the beginning of the next line
1566 nl := bytes.IndexByte(data[i:], '\n')
1567 if nl >= 0 {
1568 i += nl + 1
1569 } else {
1570 i += len(data[i:])
1571 }
1572 }
1573
1574 p.renderParagraph(data[:i])
1575 return i
1576}
1577
1578func skipChar(data []byte, start int, char byte) int {
1579 i := start
1580 for i < len(data) && data[i] == char {
1581 i++
1582 }
1583 return i
1584}
1585
1586func skipUntilChar(text []byte, start int, char byte) int {
1587 i := start
1588 for i < len(text) && text[i] != char {
1589 i++
1590 }
1591 return i
1592}