block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18
19 "github.com/shurcooL/sanitized_anchor_name"
20)
21
22// Parse block-level data.
23// Note: this function and many that it calls assume that
24// the input buffer ends with a newline.
25func (p *parser) block(out *bytes.Buffer, data []byte) {
26 if len(data) == 0 || data[len(data)-1] != '\n' {
27 panic("block input is missing terminating newline")
28 }
29
30 // this is called recursively: enforce a maximum depth
31 if p.nesting >= p.maxNesting {
32 return
33 }
34 p.nesting++
35
36 // parse out one block-level construct at a time
37 for len(data) > 0 {
38 // prefixed header:
39 //
40 // # Header 1
41 // ## Header 2
42 // ...
43 // ###### Header 6
44 if p.isPrefixHeader(data) {
45 data = data[p.prefixHeader(out, data):]
46 continue
47 }
48
49 // block of preformatted HTML:
50 //
51 // <div>
52 // ...
53 // </div>
54 if data[0] == '<' {
55 if i := p.html(out, data, true); i > 0 {
56 data = data[i:]
57 continue
58 }
59 }
60
61 // title block
62 //
63 // % stuff
64 // % more stuff
65 // % even more stuff
66 if p.flags&EXTENSION_TITLEBLOCK != 0 {
67 if data[0] == '%' {
68 if i := p.titleBlock(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73 }
74
75 // blank lines. note: returns the # of bytes to skip
76 if i := p.isEmpty(data); i > 0 {
77 data = data[i:]
78 continue
79 }
80
81 // indented code block:
82 //
83 // func max(a, b int) int {
84 // if a > b {
85 // return a
86 // }
87 // return b
88 // }
89 if p.codePrefix(data) > 0 {
90 data = data[p.code(out, data):]
91 continue
92 }
93
94 // fenced code block:
95 //
96 // ``` go
97 // func fact(n int) int {
98 // if n <= 1 {
99 // return n
100 // }
101 // return n * fact(n-1)
102 // }
103 // ```
104 if p.flags&EXTENSION_FENCED_CODE != 0 {
105 if i := p.fencedCode(out, data, true); i > 0 {
106 data = data[i:]
107 continue
108 }
109 }
110
111 // horizontal rule:
112 //
113 // ------
114 // or
115 // ******
116 // or
117 // ______
118 if p.isHRule(data) {
119 p.r.HRule(out)
120 var i int
121 for i = 0; data[i] != '\n'; i++ {
122 }
123 data = data[i:]
124 continue
125 }
126
127 // block quote:
128 //
129 // > A big quote I found somewhere
130 // > on the web
131 if p.quotePrefix(data) > 0 {
132 data = data[p.quote(out, data):]
133 continue
134 }
135
136 // table:
137 //
138 // Name | Age | Phone
139 // ------|-----|---------
140 // Bob | 31 | 555-1234
141 // Alice | 27 | 555-4321
142 if p.flags&EXTENSION_TABLES != 0 {
143 if i := p.table(out, data); i > 0 {
144 data = data[i:]
145 continue
146 }
147 }
148
149 // an itemized/unordered list:
150 //
151 // * Item 1
152 // * Item 2
153 //
154 // also works with + or -
155 if p.uliPrefix(data) > 0 {
156 data = data[p.list(out, data, 0):]
157 continue
158 }
159
160 // a numbered/ordered list:
161 //
162 // 1. Item 1
163 // 2. Item 2
164 if p.oliPrefix(data) > 0 {
165 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
166 continue
167 }
168
169 // definition lists:
170 //
171 // Term 1
172 // : Definition a
173 // : Definition b
174 //
175 // Term 2
176 // : Definition c
177 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
178 if p.dliPrefix(data) > 0 {
179 data = data[p.list(out, data, LIST_TYPE_DEFINITION):]
180 continue
181 }
182 }
183
184 // anything else must look like a normal paragraph
185 // note: this finds underlined headers, too
186 data = data[p.paragraph(out, data):]
187 }
188
189 p.nesting--
190}
191
192func (p *parser) isPrefixHeader(data []byte) bool {
193 if data[0] != '#' {
194 return false
195 }
196
197 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
198 level := 0
199 for level < 6 && data[level] == '#' {
200 level++
201 }
202 if data[level] != ' ' {
203 return false
204 }
205 }
206 return true
207}
208
209func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
210 level := 0
211 for level < 6 && data[level] == '#' {
212 level++
213 }
214 i := skipChar(data, level, ' ')
215 end := skipUntilChar(data, i, '\n')
216 skip := end
217 id := ""
218 if p.flags&EXTENSION_HEADER_IDS != 0 {
219 j, k := 0, 0
220 // find start/end of header id
221 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
222 }
223 for k = j + 1; k < end && data[k] != '}'; k++ {
224 }
225 // extract header id iff found
226 if j < end && k < end {
227 id = string(data[j+2 : k])
228 end = j
229 skip = k + 1
230 for end > 0 && data[end-1] == ' ' {
231 end--
232 }
233 }
234 }
235 for end > 0 && data[end-1] == '#' {
236 if isBackslashEscaped(data, end-1) {
237 break
238 }
239 end--
240 }
241 for end > 0 && data[end-1] == ' ' {
242 end--
243 }
244 if end > i {
245 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
246 id = sanitized_anchor_name.Create(string(data[i:end]))
247 }
248 work := func() bool {
249 p.inline(out, data[i:end])
250 return true
251 }
252 p.r.Header(out, work, level, id)
253 }
254 return skip
255}
256
257func (p *parser) isUnderlinedHeader(data []byte) int {
258 // test of level 1 header
259 if data[0] == '=' {
260 i := skipChar(data, 1, '=')
261 i = skipChar(data, i, ' ')
262 if data[i] == '\n' {
263 return 1
264 } else {
265 return 0
266 }
267 }
268
269 // test of level 2 header
270 if data[0] == '-' {
271 i := skipChar(data, 1, '-')
272 i = skipChar(data, i, ' ')
273 if data[i] == '\n' {
274 return 2
275 } else {
276 return 0
277 }
278 }
279
280 return 0
281}
282
283func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
284 if data[0] != '%' {
285 return 0
286 }
287 splitData := bytes.Split(data, []byte("\n"))
288 var i int
289 for idx, b := range splitData {
290 if !bytes.HasPrefix(b, []byte("%")) {
291 i = idx // - 1
292 break
293 }
294 }
295
296 data = bytes.Join(splitData[0:i], []byte("\n"))
297 p.r.TitleBlock(out, data)
298
299 return len(data)
300}
301
302func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
303 var i, j int
304
305 // identify the opening tag
306 if data[0] != '<' {
307 return 0
308 }
309 curtag, tagfound := p.htmlFindTag(data[1:])
310
311 // handle special cases
312 if !tagfound {
313 // check for an HTML comment
314 if size := p.htmlComment(out, data, doRender); size > 0 {
315 return size
316 }
317
318 // check for an <hr> tag
319 if size := p.htmlHr(out, data, doRender); size > 0 {
320 return size
321 }
322
323 // no special case recognized
324 return 0
325 }
326
327 // look for an unindented matching closing tag
328 // followed by a blank line
329 found := false
330 /*
331 closetag := []byte("\n</" + curtag + ">")
332 j = len(curtag) + 1
333 for !found {
334 // scan for a closing tag at the beginning of a line
335 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
336 j += skip + len(closetag)
337 } else {
338 break
339 }
340
341 // see if it is the only thing on the line
342 if skip := p.isEmpty(data[j:]); skip > 0 {
343 // see if it is followed by a blank line/eof
344 j += skip
345 if j >= len(data) {
346 found = true
347 i = j
348 } else {
349 if skip := p.isEmpty(data[j:]); skip > 0 {
350 j += skip
351 found = true
352 i = j
353 }
354 }
355 }
356 }
357 */
358
359 // if not found, try a second pass looking for indented match
360 // but not if tag is "ins" or "del" (following original Markdown.pl)
361 if !found && curtag != "ins" && curtag != "del" {
362 i = 1
363 for i < len(data) {
364 i++
365 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
366 i++
367 }
368
369 if i+2+len(curtag) >= len(data) {
370 break
371 }
372
373 j = p.htmlFindEnd(curtag, data[i-1:])
374
375 if j > 0 {
376 i += j - 1
377 found = true
378 break
379 }
380 }
381 }
382
383 if !found {
384 return 0
385 }
386
387 // the end of the block has been found
388 if doRender {
389 // trim newlines
390 end := i
391 for end > 0 && data[end-1] == '\n' {
392 end--
393 }
394 p.r.BlockHtml(out, data[:end])
395 }
396
397 return i
398}
399
400// HTML comment, lax form
401func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
402 i := p.inlineHtmlComment(out, data)
403 // needs to end with a blank line
404 if j := p.isEmpty(data[i:]); j > 0 {
405 size := i + j
406 if doRender {
407 // trim trailing newlines
408 end := size
409 for end > 0 && data[end-1] == '\n' {
410 end--
411 }
412 p.r.BlockHtml(out, data[:end])
413 }
414 return size
415 }
416 return 0
417}
418
419// HR, which is the only self-closing block tag considered
420func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
421 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
422 return 0
423 }
424 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
425 // not an <hr> tag after all; at least not a valid one
426 return 0
427 }
428
429 i := 3
430 for data[i] != '>' && data[i] != '\n' {
431 i++
432 }
433
434 if data[i] == '>' {
435 i++
436 if j := p.isEmpty(data[i:]); j > 0 {
437 size := i + j
438 if doRender {
439 // trim newlines
440 end := size
441 for end > 0 && data[end-1] == '\n' {
442 end--
443 }
444 p.r.BlockHtml(out, data[:end])
445 }
446 return size
447 }
448 }
449
450 return 0
451}
452
453func (p *parser) htmlFindTag(data []byte) (string, bool) {
454 i := 0
455 for isalnum(data[i]) {
456 i++
457 }
458 key := string(data[:i])
459 if blockTags[key] {
460 return key, true
461 }
462 return "", false
463}
464
465func (p *parser) htmlFindEnd(tag string, data []byte) int {
466 // assume data[0] == '<' && data[1] == '/' already tested
467
468 // check if tag is a match
469 closetag := []byte("</" + tag + ">")
470 if !bytes.HasPrefix(data, closetag) {
471 return 0
472 }
473 i := len(closetag)
474
475 // check that the rest of the line is blank
476 skip := 0
477 if skip = p.isEmpty(data[i:]); skip == 0 {
478 return 0
479 }
480 i += skip
481 skip = 0
482
483 if i >= len(data) {
484 return i
485 }
486
487 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
488 return i
489 }
490 if skip = p.isEmpty(data[i:]); skip == 0 {
491 // following line must be blank
492 return 0
493 }
494
495 return i + skip
496}
497
498func (p *parser) isEmpty(data []byte) int {
499 // it is okay to call isEmpty on an empty buffer
500 if len(data) == 0 {
501 return 0
502 }
503
504 var i int
505 for i = 0; i < len(data) && data[i] != '\n'; i++ {
506 if data[i] != ' ' && data[i] != '\t' {
507 return 0
508 }
509 }
510 return i + 1
511}
512
513func (p *parser) isHRule(data []byte) bool {
514 i := 0
515
516 // skip up to three spaces
517 for i < 3 && data[i] == ' ' {
518 i++
519 }
520
521 // look at the hrule char
522 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
523 return false
524 }
525 c := data[i]
526
527 // the whole line must be the char or whitespace
528 n := 0
529 for data[i] != '\n' {
530 switch {
531 case data[i] == c:
532 n++
533 case data[i] != ' ':
534 return false
535 }
536 i++
537 }
538
539 return n >= 3
540}
541
542func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
543 i, size := 0, 0
544 skip = 0
545
546 // skip up to three spaces
547 for i < len(data) && i < 3 && data[i] == ' ' {
548 i++
549 }
550 if i >= len(data) {
551 return
552 }
553
554 // check for the marker characters: ~ or `
555 if data[i] != '~' && data[i] != '`' {
556 return
557 }
558
559 c := data[i]
560
561 // the whole line must be the same char or whitespace
562 for i < len(data) && data[i] == c {
563 size++
564 i++
565 }
566
567 if i >= len(data) {
568 return
569 }
570
571 // the marker char must occur at least 3 times
572 if size < 3 {
573 return
574 }
575 marker = string(data[i-size : i])
576
577 // if this is the end marker, it must match the beginning marker
578 if oldmarker != "" && marker != oldmarker {
579 return
580 }
581
582 if syntax != nil {
583 syn := 0
584 i = skipChar(data, i, ' ')
585
586 if i >= len(data) {
587 return
588 }
589
590 syntaxStart := i
591
592 if data[i] == '{' {
593 i++
594 syntaxStart++
595
596 for i < len(data) && data[i] != '}' && data[i] != '\n' {
597 syn++
598 i++
599 }
600
601 if i >= len(data) || data[i] != '}' {
602 return
603 }
604
605 // strip all whitespace at the beginning and the end
606 // of the {} block
607 for syn > 0 && isspace(data[syntaxStart]) {
608 syntaxStart++
609 syn--
610 }
611
612 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
613 syn--
614 }
615
616 i++
617 } else {
618 for i < len(data) && !isspace(data[i]) {
619 syn++
620 i++
621 }
622 }
623
624 language := string(data[syntaxStart : syntaxStart+syn])
625 *syntax = &language
626 }
627
628 i = skipChar(data, i, ' ')
629 if i >= len(data) || data[i] != '\n' {
630 return
631 }
632
633 skip = i + 1
634 return
635}
636
637func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
638 var lang *string
639 beg, marker := p.isFencedCode(data, &lang, "")
640 if beg == 0 || beg >= len(data) {
641 return 0
642 }
643
644 var work bytes.Buffer
645
646 for {
647 // safe to assume beg < len(data)
648
649 // check for the end of the code block
650 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
651 if fenceEnd != 0 {
652 beg += fenceEnd
653 break
654 }
655
656 // copy the current line
657 end := skipUntilChar(data, beg, '\n') + 1
658
659 // did we reach the end of the buffer without a closing marker?
660 if end >= len(data) {
661 return 0
662 }
663
664 // verbatim copy to the working buffer
665 if doRender {
666 work.Write(data[beg:end])
667 }
668 beg = end
669 }
670
671 syntax := ""
672 if lang != nil {
673 syntax = *lang
674 }
675
676 if doRender {
677 p.r.BlockCode(out, work.Bytes(), syntax)
678 }
679
680 return beg
681}
682
683func (p *parser) table(out *bytes.Buffer, data []byte) int {
684 var header bytes.Buffer
685 i, columns := p.tableHeader(&header, data)
686 if i == 0 {
687 return 0
688 }
689
690 var body bytes.Buffer
691
692 for i < len(data) {
693 pipes, rowStart := 0, i
694 for ; data[i] != '\n'; i++ {
695 if data[i] == '|' {
696 pipes++
697 }
698 }
699
700 if pipes == 0 {
701 i = rowStart
702 break
703 }
704
705 // include the newline in data sent to tableRow
706 i++
707 p.tableRow(&body, data[rowStart:i], columns, false)
708 }
709
710 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
711
712 return i
713}
714
715// check if the specified position is preceded by an odd number of backslashes
716func isBackslashEscaped(data []byte, i int) bool {
717 backslashes := 0
718 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
719 backslashes++
720 }
721 return backslashes&1 == 1
722}
723
724func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
725 i := 0
726 colCount := 1
727 for i = 0; data[i] != '\n'; i++ {
728 if data[i] == '|' && !isBackslashEscaped(data, i) {
729 colCount++
730 }
731 }
732
733 // doesn't look like a table header
734 if colCount == 1 {
735 return
736 }
737
738 // include the newline in the data sent to tableRow
739 header := data[:i+1]
740
741 // column count ignores pipes at beginning or end of line
742 if data[0] == '|' {
743 colCount--
744 }
745 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
746 colCount--
747 }
748
749 columns = make([]int, colCount)
750
751 // move on to the header underline
752 i++
753 if i >= len(data) {
754 return
755 }
756
757 if data[i] == '|' && !isBackslashEscaped(data, i) {
758 i++
759 }
760 i = skipChar(data, i, ' ')
761
762 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
763 // and trailing | optional on last column
764 col := 0
765 for data[i] != '\n' {
766 dashes := 0
767
768 if data[i] == ':' {
769 i++
770 columns[col] |= TABLE_ALIGNMENT_LEFT
771 dashes++
772 }
773 for data[i] == '-' {
774 i++
775 dashes++
776 }
777 if data[i] == ':' {
778 i++
779 columns[col] |= TABLE_ALIGNMENT_RIGHT
780 dashes++
781 }
782 for data[i] == ' ' {
783 i++
784 }
785
786 // end of column test is messy
787 switch {
788 case dashes < 3:
789 // not a valid column
790 return
791
792 case data[i] == '|' && !isBackslashEscaped(data, i):
793 // marker found, now skip past trailing whitespace
794 col++
795 i++
796 for data[i] == ' ' {
797 i++
798 }
799
800 // trailing junk found after last column
801 if col >= colCount && data[i] != '\n' {
802 return
803 }
804
805 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
806 // something else found where marker was required
807 return
808
809 case data[i] == '\n':
810 // marker is optional for the last column
811 col++
812
813 default:
814 // trailing junk found after last column
815 return
816 }
817 }
818 if col != colCount {
819 return
820 }
821
822 p.tableRow(out, header, columns, true)
823 size = i + 1
824 return
825}
826
827func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
828 i, col := 0, 0
829 var rowWork bytes.Buffer
830
831 if data[i] == '|' && !isBackslashEscaped(data, i) {
832 i++
833 }
834
835 for col = 0; col < len(columns) && i < len(data); col++ {
836 for data[i] == ' ' {
837 i++
838 }
839
840 cellStart := i
841
842 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
843 i++
844 }
845
846 cellEnd := i
847
848 // skip the end-of-cell marker, possibly taking us past end of buffer
849 i++
850
851 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
852 cellEnd--
853 }
854
855 var cellWork bytes.Buffer
856 p.inline(&cellWork, data[cellStart:cellEnd])
857
858 if header {
859 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
860 } else {
861 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
862 }
863 }
864
865 // pad it out with empty columns to get the right number
866 for ; col < len(columns); col++ {
867 if header {
868 p.r.TableHeaderCell(&rowWork, nil, columns[col])
869 } else {
870 p.r.TableCell(&rowWork, nil, columns[col])
871 }
872 }
873
874 // silently ignore rows with too many cells
875
876 p.r.TableRow(out, rowWork.Bytes())
877}
878
879// returns blockquote prefix length
880func (p *parser) quotePrefix(data []byte) int {
881 i := 0
882 for i < 3 && data[i] == ' ' {
883 i++
884 }
885 if data[i] == '>' {
886 if data[i+1] == ' ' {
887 return i + 2
888 }
889 return i + 1
890 }
891 return 0
892}
893
894// blockquote ends with at least one blank line
895// followed by something without a blockquote prefix
896func terminateBlockquote(p *parser, data []byte, beg, end int) bool {
897 if p.isEmpty(data[beg:]) <= 0 {
898 return false
899 }
900 if end >= len(data) {
901 return true
902 }
903 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
904}
905
906// parse a blockquote fragment
907func (p *parser) quote(out *bytes.Buffer, data []byte) int {
908 var raw bytes.Buffer
909 beg, end := 0, 0
910 for beg < len(data) {
911 end = beg
912 // Step over whole lines, collecting them. While doing that, check for
913 // fenced code and if one's found, incorporate it altogether,
914 // irregardless of any contents inside it
915 for data[end] != '\n' {
916 if p.flags&EXTENSION_FENCED_CODE != 0 {
917 if i := p.fencedCode(out, data[end:], false); i > 0 {
918 // -1 to compensate for the extra end++ after the loop:
919 end += i - 1
920 break
921 }
922 }
923 end++
924 }
925 end++
926
927 if pre := p.quotePrefix(data[beg:]); pre > 0 {
928 // skip the prefix
929 beg += pre
930 } else if terminateBlockquote(p, data, beg, end) {
931 break
932 }
933
934 // this line is part of the blockquote
935 raw.Write(data[beg:end])
936 beg = end
937 }
938
939 var cooked bytes.Buffer
940 p.block(&cooked, raw.Bytes())
941 p.r.BlockQuote(out, cooked.Bytes())
942 return end
943}
944
945// returns prefix length for block code
946func (p *parser) codePrefix(data []byte) int {
947 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
948 return 4
949 }
950 return 0
951}
952
953func (p *parser) code(out *bytes.Buffer, data []byte) int {
954 var work bytes.Buffer
955
956 i := 0
957 for i < len(data) {
958 beg := i
959 for data[i] != '\n' {
960 i++
961 }
962 i++
963
964 blankline := p.isEmpty(data[beg:i]) > 0
965 if pre := p.codePrefix(data[beg:i]); pre > 0 {
966 beg += pre
967 } else if !blankline {
968 // non-empty, non-prefixed line breaks the pre
969 i = beg
970 break
971 }
972
973 // verbatim copy to the working buffeu
974 if blankline {
975 work.WriteByte('\n')
976 } else {
977 work.Write(data[beg:i])
978 }
979 }
980
981 // trim all the \n off the end of work
982 workbytes := work.Bytes()
983 eol := len(workbytes)
984 for eol > 0 && workbytes[eol-1] == '\n' {
985 eol--
986 }
987 if eol != len(workbytes) {
988 work.Truncate(eol)
989 }
990
991 work.WriteByte('\n')
992
993 p.r.BlockCode(out, work.Bytes(), "")
994
995 return i
996}
997
998// returns unordered list item prefix
999func (p *parser) uliPrefix(data []byte) int {
1000 i := 0
1001
1002 // start with up to 3 spaces
1003 for i < 3 && data[i] == ' ' {
1004 i++
1005 }
1006
1007 // need a *, +, or - followed by a space
1008 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1009 data[i+1] != ' ' {
1010 return 0
1011 }
1012 return i + 2
1013}
1014
1015// returns ordered list item prefix
1016func (p *parser) oliPrefix(data []byte) int {
1017 i := 0
1018
1019 // start with up to 3 spaces
1020 for i < 3 && data[i] == ' ' {
1021 i++
1022 }
1023
1024 // count the digits
1025 start := i
1026 for data[i] >= '0' && data[i] <= '9' {
1027 i++
1028 }
1029
1030 // we need >= 1 digits followed by a dot and a space
1031 if start == i || data[i] != '.' || data[i+1] != ' ' {
1032 return 0
1033 }
1034 return i + 2
1035}
1036
1037// returns definition list item prefix
1038func (p *parser) dliPrefix(data []byte) int {
1039 i := 0
1040
1041 // need a : followed by a spaces
1042 if data[i] != ':' || data[i+1] != ' ' {
1043 return 0
1044 }
1045 for data[i] == ' ' {
1046 i++
1047 }
1048 return i + 2
1049}
1050
1051// parse ordered or unordered list block
1052func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1053 i := 0
1054 flags |= LIST_ITEM_BEGINNING_OF_LIST
1055 work := func() bool {
1056 for i < len(data) {
1057 skip := p.listItem(out, data[i:], &flags)
1058 i += skip
1059
1060 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1061 break
1062 }
1063 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1064 }
1065 return true
1066 }
1067
1068 p.r.List(out, work, flags)
1069 return i
1070}
1071
1072// Parse a single list item.
1073// Assumes initial prefix is already removed if this is a sublist.
1074func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1075 // keep track of the indentation of the first line
1076 itemIndent := 0
1077 for itemIndent < 3 && data[itemIndent] == ' ' {
1078 itemIndent++
1079 }
1080
1081 i := p.uliPrefix(data)
1082 if i == 0 {
1083 i = p.oliPrefix(data)
1084 }
1085 if i == 0 {
1086 i = p.dliPrefix(data)
1087 // reset definition term flag
1088 if i > 0 {
1089 *flags &= ^LIST_TYPE_TERM
1090 }
1091 }
1092 if i == 0 {
1093 // if in defnition list, set term flag and continue
1094 if *flags&LIST_TYPE_DEFINITION != 0 {
1095 *flags |= LIST_TYPE_TERM
1096 } else {
1097 return 0
1098 }
1099 }
1100
1101 // skip leading whitespace on first line
1102 for data[i] == ' ' {
1103 i++
1104 }
1105
1106 // find the end of the line
1107 line := i
1108 for i > 0 && data[i-1] != '\n' {
1109 i++
1110 }
1111
1112 // get working buffer
1113 var raw bytes.Buffer
1114
1115 // put the first line into the working buffer
1116 raw.Write(data[line:i])
1117 line = i
1118
1119 // process the following lines
1120 containsBlankLine := false
1121 sublist := 0
1122
1123gatherlines:
1124 for line < len(data) {
1125 i++
1126
1127 // find the end of this line
1128 for data[i-1] != '\n' {
1129 i++
1130 }
1131
1132 // if it is an empty line, guess that it is part of this item
1133 // and move on to the next line
1134 if p.isEmpty(data[line:i]) > 0 {
1135 containsBlankLine = true
1136 line = i
1137 continue
1138 }
1139
1140 // calculate the indentation
1141 indent := 0
1142 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1143 indent++
1144 }
1145
1146 chunk := data[line+indent : i]
1147
1148 // evaluate how this line fits in
1149 switch {
1150 // is this a nested list item?
1151 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1152 p.oliPrefix(chunk) > 0 ||
1153 p.dliPrefix(chunk) > 0:
1154
1155 if containsBlankLine {
1156 *flags |= LIST_ITEM_CONTAINS_BLOCK
1157 }
1158
1159 // to be a nested list, it must be indented more
1160 // if not, it is the next item in the same list
1161 if indent <= itemIndent {
1162 break gatherlines
1163 }
1164
1165 // is this the first item in the nested list?
1166 if sublist == 0 {
1167 sublist = raw.Len()
1168 }
1169
1170 // is this a nested prefix header?
1171 case p.isPrefixHeader(chunk):
1172 // if the header is not indented, it is not nested in the list
1173 // and thus ends the list
1174 if containsBlankLine && indent < 4 {
1175 *flags |= LIST_ITEM_END_OF_LIST
1176 break gatherlines
1177 }
1178 *flags |= LIST_ITEM_CONTAINS_BLOCK
1179
1180 // anything following an empty line is only part
1181 // of this item if it is indented 4 spaces
1182 // (regardless of the indentation of the beginning of the item)
1183 case containsBlankLine && indent < 4:
1184 if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 {
1185 // is the next item still a part of this list?
1186 next := i
1187 for data[next] != '\n' {
1188 next++
1189 }
1190 for next < len(data)-1 && data[next] == '\n' {
1191 next++
1192 }
1193 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1194 *flags |= LIST_ITEM_END_OF_LIST
1195 }
1196 } else {
1197 *flags |= LIST_ITEM_END_OF_LIST
1198 }
1199 break gatherlines
1200
1201 // a blank line means this should be parsed as a block
1202 case containsBlankLine:
1203 raw.WriteByte('\n')
1204 *flags |= LIST_ITEM_CONTAINS_BLOCK
1205 }
1206
1207 // if this line was preceeded by one or more blanks,
1208 // re-introduce the blank into the buffer
1209 if containsBlankLine {
1210 containsBlankLine = false
1211 raw.WriteByte('\n')
1212
1213 }
1214
1215 // add the line into the working buffer without prefix
1216 raw.Write(data[line+indent : i])
1217
1218 line = i
1219 }
1220
1221 rawBytes := raw.Bytes()
1222
1223 // render the contents of the list item
1224 var cooked bytes.Buffer
1225 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 {
1226 // intermediate render of block item, except for definition term
1227 if sublist > 0 {
1228 p.block(&cooked, rawBytes[:sublist])
1229 p.block(&cooked, rawBytes[sublist:])
1230 } else {
1231 p.block(&cooked, rawBytes)
1232 }
1233 } else {
1234 // intermediate render of inline item
1235 if sublist > 0 {
1236 p.inline(&cooked, rawBytes[:sublist])
1237 p.block(&cooked, rawBytes[sublist:])
1238 } else {
1239 p.inline(&cooked, rawBytes)
1240 }
1241 }
1242
1243 // render the actual list item
1244 cookedBytes := cooked.Bytes()
1245 parsedEnd := len(cookedBytes)
1246
1247 // strip trailing newlines
1248 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1249 parsedEnd--
1250 }
1251 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1252
1253 return line
1254}
1255
1256// render a single paragraph that has already been parsed out
1257func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1258 if len(data) == 0 {
1259 return
1260 }
1261
1262 // trim leading spaces
1263 beg := 0
1264 for data[beg] == ' ' {
1265 beg++
1266 }
1267
1268 // trim trailing newline
1269 end := len(data) - 1
1270
1271 // trim trailing spaces
1272 for end > beg && data[end-1] == ' ' {
1273 end--
1274 }
1275
1276 work := func() bool {
1277 p.inline(out, data[beg:end])
1278 return true
1279 }
1280 p.r.Paragraph(out, work)
1281}
1282
1283func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1284 // prev: index of 1st char of previous line
1285 // line: index of 1st char of current line
1286 // i: index of cursor/end of current line
1287 var prev, line, i int
1288
1289 // keep going until we find something to mark the end of the paragraph
1290 for i < len(data) {
1291 // mark the beginning of the current line
1292 prev = line
1293 current := data[i:]
1294 line = i
1295
1296 // did we find a blank line marking the end of the paragraph?
1297 if n := p.isEmpty(current); n > 0 {
1298 // did this blank line followed by a definition list item?
1299 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1300 if i < len(data)-1 && data[i+1] == ':' {
1301 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1302 }
1303 }
1304
1305 p.renderParagraph(out, data[:i])
1306 return i + n
1307 }
1308
1309 // an underline under some text marks a header, so our paragraph ended on prev line
1310 if i > 0 {
1311 if level := p.isUnderlinedHeader(current); level > 0 {
1312 // render the paragraph
1313 p.renderParagraph(out, data[:prev])
1314
1315 // ignore leading and trailing whitespace
1316 eol := i - 1
1317 for prev < eol && data[prev] == ' ' {
1318 prev++
1319 }
1320 for eol > prev && data[eol-1] == ' ' {
1321 eol--
1322 }
1323
1324 // render the header
1325 // this ugly double closure avoids forcing variables onto the heap
1326 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1327 return func() bool {
1328 pp.inline(o, d)
1329 return true
1330 }
1331 }(out, p, data[prev:eol])
1332
1333 id := ""
1334 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1335 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1336 }
1337
1338 p.r.Header(out, work, level, id)
1339
1340 // find the end of the underline
1341 for data[i] != '\n' {
1342 i++
1343 }
1344 return i
1345 }
1346 }
1347
1348 // if the next line starts a block of HTML, then the paragraph ends here
1349 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1350 if data[i] == '<' && p.html(out, current, false) > 0 {
1351 // rewind to before the HTML block
1352 p.renderParagraph(out, data[:i])
1353 return i
1354 }
1355 }
1356
1357 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1358 if p.isPrefixHeader(current) || p.isHRule(current) {
1359 p.renderParagraph(out, data[:i])
1360 return i
1361 }
1362
1363 // if there's a fenced code block, paragraph is over
1364 if p.flags&EXTENSION_FENCED_CODE != 0 {
1365 if p.fencedCode(out, current, false) > 0 {
1366 p.renderParagraph(out, data[:i])
1367 return i
1368 }
1369 }
1370
1371 // if there's a definition list item, prev line is a definition term
1372 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1373 if p.dliPrefix(current) != 0 {
1374 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1375 }
1376 }
1377
1378 // if there's a list after this, paragraph is over
1379 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1380 if p.uliPrefix(current) != 0 ||
1381 p.oliPrefix(current) != 0 ||
1382 p.quotePrefix(current) != 0 ||
1383 p.codePrefix(current) != 0 {
1384 p.renderParagraph(out, data[:i])
1385 return i
1386 }
1387 }
1388
1389 // otherwise, scan to the beginning of the next line
1390 for data[i] != '\n' {
1391 i++
1392 }
1393 i++
1394 }
1395
1396 p.renderParagraph(out, data[:i])
1397 return i
1398}