block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import "bytes"
17
18// Parse block-level data.
19// Note: this function and many that it calls assume that
20// the input buffer ends with a newline.
21func (p *parser) block(out *bytes.Buffer, data []byte) {
22 if len(data) == 0 || data[len(data)-1] != '\n' {
23 panic("block input is missing terminating newline")
24 }
25
26 // this is called recursively: enforce a maximum depth
27 if p.nesting >= p.maxNesting {
28 return
29 }
30 p.nesting++
31
32 // parse out one block-level construct at a time
33 for len(data) > 0 {
34 // prefixed header:
35 //
36 // # Header 1
37 // ## Header 2
38 // ...
39 // ###### Header 6
40 if p.isPrefixHeader(data) {
41 data = data[p.prefixHeader(out, data):]
42 continue
43 }
44
45 // block of preformatted HTML:
46 //
47 // <div>
48 // ...
49 // </div>
50 if data[0] == '<' {
51 if i := p.html(out, data, true); i > 0 {
52 data = data[i:]
53 continue
54 }
55 }
56
57 // title block
58 //
59 // % stuff
60 // % more stuff
61 // % even more stuff
62 if p.flags&EXTENSION_TITLEBLOCK != 0 {
63 if data[0] == '%' {
64 if i := p.titleBlock(out, data, true); i > 0 {
65 data = data[i:]
66 continue
67 }
68 }
69 }
70
71 // blank lines. note: returns the # of bytes to skip
72 if i := p.isEmpty(data); i > 0 {
73 data = data[i:]
74 continue
75 }
76
77 // indented code block:
78 //
79 // func max(a, b int) int {
80 // if a > b {
81 // return a
82 // }
83 // return b
84 // }
85 if p.codePrefix(data) > 0 {
86 data = data[p.code(out, data):]
87 continue
88 }
89
90 // fenced code block:
91 //
92 // ``` go
93 // func fact(n int) int {
94 // if n <= 1 {
95 // return n
96 // }
97 // return n * fact(n-1)
98 // }
99 // ```
100 if p.flags&EXTENSION_FENCED_CODE != 0 {
101 if i := p.fencedCode(out, data, true); i > 0 {
102 data = data[i:]
103 continue
104 }
105 }
106
107 // horizontal rule:
108 //
109 // ------
110 // or
111 // ******
112 // or
113 // ______
114 if p.isHRule(data) {
115 p.r.HRule(out)
116 var i int
117 for i = 0; data[i] != '\n'; i++ {
118 }
119 data = data[i:]
120 continue
121 }
122
123 // block quote:
124 //
125 // > A big quote I found somewhere
126 // > on the web
127 if p.quotePrefix(data) > 0 {
128 data = data[p.quote(out, data):]
129 continue
130 }
131
132 // table:
133 //
134 // Name | Age | Phone
135 // ------|-----|---------
136 // Bob | 31 | 555-1234
137 // Alice | 27 | 555-4321
138 if p.flags&EXTENSION_TABLES != 0 {
139 if i := p.table(out, data); i > 0 {
140 data = data[i:]
141 continue
142 }
143 }
144
145 // an itemized/unordered list:
146 //
147 // * Item 1
148 // * Item 2
149 //
150 // also works with + or -
151 if p.uliPrefix(data) > 0 {
152 data = data[p.list(out, data, 0):]
153 continue
154 }
155
156 // a numbered/ordered list:
157 //
158 // 1. Item 1
159 // 2. Item 2
160 if p.oliPrefix(data) > 0 {
161 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
162 continue
163 }
164
165 // anything else must look like a normal paragraph
166 // note: this finds underlined headers, too
167 data = data[p.paragraph(out, data):]
168 }
169
170 p.nesting--
171}
172
173func (p *parser) isPrefixHeader(data []byte) bool {
174 if data[0] != '#' {
175 return false
176 }
177
178 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
179 level := 0
180 for level < 6 && data[level] == '#' {
181 level++
182 }
183 if data[level] != ' ' {
184 return false
185 }
186 }
187 return true
188}
189
190func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
191 level := 0
192 for level < 6 && data[level] == '#' {
193 level++
194 }
195 i, end := 0, 0
196 for i = level; data[i] == ' '; i++ {
197 }
198 for end = i; data[end] != '\n'; end++ {
199 }
200 skip := end
201 id := ""
202 if p.flags&EXTENSION_HEADER_IDS != 0 {
203 j, k := 0, 0
204 // find start/end of header id
205 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
206 }
207 for k = j + 1; k < end && data[k] != '}'; k++ {
208 }
209 // extract header id iff found
210 if j < end && k < end {
211 id = string(data[j+2 : k])
212 end = j
213 skip = k + 1
214 for end > 0 && data[end-1] == ' ' {
215 end--
216 }
217 }
218 }
219 for end > 0 && data[end-1] == '#' {
220 end--
221 }
222 for end > 0 && data[end-1] == ' ' {
223 end--
224 }
225 if end > i {
226 work := func() bool {
227 p.inline(out, data[i:end])
228 return true
229 }
230 p.r.Header(out, work, level, id)
231 }
232 return skip
233}
234
235func (p *parser) isUnderlinedHeader(data []byte) int {
236 // test of level 1 header
237 if data[0] == '=' {
238 i := 1
239 for data[i] == '=' {
240 i++
241 }
242 for data[i] == ' ' {
243 i++
244 }
245 if data[i] == '\n' {
246 return 1
247 } else {
248 return 0
249 }
250 }
251
252 // test of level 2 header
253 if data[0] == '-' {
254 i := 1
255 for data[i] == '-' {
256 i++
257 }
258 for data[i] == ' ' {
259 i++
260 }
261 if data[i] == '\n' {
262 return 2
263 } else {
264 return 0
265 }
266 }
267
268 return 0
269}
270
271func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
272 if data[0] != '%' {
273 return 0
274 }
275 splitData := bytes.Split(data, []byte("\n"))
276 var i int
277 for idx, b := range splitData {
278 if !bytes.HasPrefix(b, []byte("%")) {
279 i = idx // - 1
280 break
281 }
282 }
283
284 data = bytes.Join(splitData[0:i], []byte("\n"))
285 p.r.TitleBlock(out, data)
286
287 return len(data)
288}
289
290func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
291 var i, j int
292
293 // identify the opening tag
294 if data[0] != '<' {
295 return 0
296 }
297 curtag, tagfound := p.htmlFindTag(data[1:])
298
299 // handle special cases
300 if !tagfound {
301 // check for an HTML comment
302 if size := p.htmlComment(out, data, doRender); size > 0 {
303 return size
304 }
305
306 // check for an <hr> tag
307 if size := p.htmlHr(out, data, doRender); size > 0 {
308 return size
309 }
310
311 // no special case recognized
312 return 0
313 }
314
315 // look for an unindented matching closing tag
316 // followed by a blank line
317 found := false
318 /*
319 closetag := []byte("\n</" + curtag + ">")
320 j = len(curtag) + 1
321 for !found {
322 // scan for a closing tag at the beginning of a line
323 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
324 j += skip + len(closetag)
325 } else {
326 break
327 }
328
329 // see if it is the only thing on the line
330 if skip := p.isEmpty(data[j:]); skip > 0 {
331 // see if it is followed by a blank line/eof
332 j += skip
333 if j >= len(data) {
334 found = true
335 i = j
336 } else {
337 if skip := p.isEmpty(data[j:]); skip > 0 {
338 j += skip
339 found = true
340 i = j
341 }
342 }
343 }
344 }
345 */
346
347 // if not found, try a second pass looking for indented match
348 // but not if tag is "ins" or "del" (following original Markdown.pl)
349 if !found && curtag != "ins" && curtag != "del" {
350 i = 1
351 for i < len(data) {
352 i++
353 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
354 i++
355 }
356
357 if i+2+len(curtag) >= len(data) {
358 break
359 }
360
361 j = p.htmlFindEnd(curtag, data[i-1:])
362
363 if j > 0 {
364 i += j - 1
365 found = true
366 break
367 }
368 }
369 }
370
371 if !found {
372 return 0
373 }
374
375 // the end of the block has been found
376 if doRender {
377 // trim newlines
378 end := i
379 for end > 0 && data[end-1] == '\n' {
380 end--
381 }
382 p.r.BlockHtml(out, data[:end])
383 }
384
385 return i
386}
387
388// HTML comment, lax form
389func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
390 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
391 return 0
392 }
393
394 i := 5
395
396 // scan for an end-of-comment marker, across lines if necessary
397 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
398 i++
399 }
400 i++
401
402 // no end-of-comment marker
403 if i >= len(data) {
404 return 0
405 }
406
407 // needs to end with a blank line
408 if j := p.isEmpty(data[i:]); j > 0 {
409 size := i + j
410 if doRender {
411 // trim trailing newlines
412 end := size
413 for end > 0 && data[end-1] == '\n' {
414 end--
415 }
416 p.r.BlockHtml(out, data[:end])
417 }
418 return size
419 }
420
421 return 0
422}
423
424// HR, which is the only self-closing block tag considered
425func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
426 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
427 return 0
428 }
429 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
430 // not an <hr> tag after all; at least not a valid one
431 return 0
432 }
433
434 i := 3
435 for data[i] != '>' && data[i] != '\n' {
436 i++
437 }
438
439 if data[i] == '>' {
440 i++
441 if j := p.isEmpty(data[i:]); j > 0 {
442 size := i + j
443 if doRender {
444 // trim newlines
445 end := size
446 for end > 0 && data[end-1] == '\n' {
447 end--
448 }
449 p.r.BlockHtml(out, data[:end])
450 }
451 return size
452 }
453 }
454
455 return 0
456}
457
458func (p *parser) htmlFindTag(data []byte) (string, bool) {
459 i := 0
460 for isalnum(data[i]) {
461 i++
462 }
463 key := string(data[:i])
464 if blockTags[key] {
465 return key, true
466 }
467 return "", false
468}
469
470func (p *parser) htmlFindEnd(tag string, data []byte) int {
471 // assume data[0] == '<' && data[1] == '/' already tested
472
473 // check if tag is a match
474 closetag := []byte("</" + tag + ">")
475 if !bytes.HasPrefix(data, closetag) {
476 return 0
477 }
478 i := len(closetag)
479
480 // check that the rest of the line is blank
481 skip := 0
482 if skip = p.isEmpty(data[i:]); skip == 0 {
483 return 0
484 }
485 i += skip
486 skip = 0
487
488 if i >= len(data) {
489 return i
490 }
491
492 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
493 return i
494 }
495 if skip = p.isEmpty(data[i:]); skip == 0 {
496 // following line must be blank
497 return 0
498 }
499
500 return i + skip
501}
502
503func (p *parser) isEmpty(data []byte) int {
504 // it is okay to call isEmpty on an empty buffer
505 if len(data) == 0 {
506 return 0
507 }
508
509 var i int
510 for i = 0; i < len(data) && data[i] != '\n'; i++ {
511 if data[i] != ' ' && data[i] != '\t' {
512 return 0
513 }
514 }
515 return i + 1
516}
517
518func (p *parser) isHRule(data []byte) bool {
519 i := 0
520
521 // skip up to three spaces
522 for i < 3 && data[i] == ' ' {
523 i++
524 }
525
526 // look at the hrule char
527 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
528 return false
529 }
530 c := data[i]
531
532 // the whole line must be the char or whitespace
533 n := 0
534 for data[i] != '\n' {
535 switch {
536 case data[i] == c:
537 n++
538 case data[i] != ' ':
539 return false
540 }
541 i++
542 }
543
544 return n >= 3
545}
546
547func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
548 i, size := 0, 0
549 skip = 0
550
551 // skip up to three spaces
552 for i < 3 && data[i] == ' ' {
553 i++
554 }
555
556 // check for the marker characters: ~ or `
557 if data[i] != '~' && data[i] != '`' {
558 return
559 }
560
561 c := data[i]
562
563 // the whole line must be the same char or whitespace
564 for data[i] == c {
565 size++
566 i++
567 }
568
569 // the marker char must occur at least 3 times
570 if size < 3 {
571 return
572 }
573 marker = string(data[i-size : i])
574
575 // if this is the end marker, it must match the beginning marker
576 if oldmarker != "" && marker != oldmarker {
577 return
578 }
579
580 if syntax != nil {
581 syn := 0
582
583 for data[i] == ' ' {
584 i++
585 }
586
587 syntaxStart := i
588
589 if data[i] == '{' {
590 i++
591 syntaxStart++
592
593 for data[i] != '}' && data[i] != '\n' {
594 syn++
595 i++
596 }
597
598 if data[i] != '}' {
599 return
600 }
601
602 // strip all whitespace at the beginning and the end
603 // of the {} block
604 for syn > 0 && isspace(data[syntaxStart]) {
605 syntaxStart++
606 syn--
607 }
608
609 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
610 syn--
611 }
612
613 i++
614 } else {
615 for !isspace(data[i]) {
616 syn++
617 i++
618 }
619 }
620
621 language := string(data[syntaxStart : syntaxStart+syn])
622 *syntax = &language
623 }
624
625 for data[i] == ' ' {
626 i++
627 }
628 if data[i] != '\n' {
629 return
630 }
631
632 skip = i + 1
633 return
634}
635
636func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
637 var lang *string
638 beg, marker := p.isFencedCode(data, &lang, "")
639 if beg == 0 || beg >= len(data) {
640 return 0
641 }
642
643 var work bytes.Buffer
644
645 for {
646 // safe to assume beg < len(data)
647
648 // check for the end of the code block
649 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
650 if fenceEnd != 0 {
651 beg += fenceEnd
652 break
653 }
654
655 // copy the current line
656 end := beg
657 for data[end] != '\n' {
658 end++
659 }
660 end++
661
662 // did we reach the end of the buffer without a closing marker?
663 if end >= len(data) {
664 return 0
665 }
666
667 // verbatim copy to the working buffer
668 if doRender {
669 work.Write(data[beg:end])
670 }
671 beg = end
672 }
673
674 syntax := ""
675 if lang != nil {
676 syntax = *lang
677 }
678
679 if doRender {
680 p.r.BlockCode(out, work.Bytes(), syntax)
681 }
682
683 return beg
684}
685
686func (p *parser) table(out *bytes.Buffer, data []byte) int {
687 var header bytes.Buffer
688 i, columns := p.tableHeader(&header, data)
689 if i == 0 {
690 return 0
691 }
692
693 var body bytes.Buffer
694
695 for i < len(data) {
696 pipes, rowStart := 0, i
697 for ; data[i] != '\n'; i++ {
698 if data[i] == '|' {
699 pipes++
700 }
701 }
702
703 if pipes == 0 {
704 i = rowStart
705 break
706 }
707
708 // include the newline in data sent to tableRow
709 i++
710 p.tableRow(&body, data[rowStart:i], columns, false)
711 }
712
713 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
714
715 return i
716}
717
718// check if the specified position is preceeded by an odd number of backslashes
719func isBackslashEscaped(data []byte, i int) bool {
720 backslashes := 0
721 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
722 backslashes++
723 }
724 return backslashes&1 == 1
725}
726
727func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
728 i := 0
729 colCount := 1
730 for i = 0; data[i] != '\n'; i++ {
731 if data[i] == '|' && !isBackslashEscaped(data, i) {
732 colCount++
733 }
734 }
735
736 // doesn't look like a table header
737 if colCount == 1 {
738 return
739 }
740
741 // include the newline in the data sent to tableRow
742 header := data[:i+1]
743
744 // column count ignores pipes at beginning or end of line
745 if data[0] == '|' {
746 colCount--
747 }
748 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
749 colCount--
750 }
751
752 columns = make([]int, colCount)
753
754 // move on to the header underline
755 i++
756 if i >= len(data) {
757 return
758 }
759
760 if data[i] == '|' && !isBackslashEscaped(data, i) {
761 i++
762 }
763 for data[i] == ' ' {
764 i++
765 }
766
767 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
768 // and trailing | optional on last column
769 col := 0
770 for data[i] != '\n' {
771 dashes := 0
772
773 if data[i] == ':' {
774 i++
775 columns[col] |= TABLE_ALIGNMENT_LEFT
776 dashes++
777 }
778 for data[i] == '-' {
779 i++
780 dashes++
781 }
782 if data[i] == ':' {
783 i++
784 columns[col] |= TABLE_ALIGNMENT_RIGHT
785 dashes++
786 }
787 for data[i] == ' ' {
788 i++
789 }
790
791 // end of column test is messy
792 switch {
793 case dashes < 3:
794 // not a valid column
795 return
796
797 case data[i] == '|' && !isBackslashEscaped(data, i):
798 // marker found, now skip past trailing whitespace
799 col++
800 i++
801 for data[i] == ' ' {
802 i++
803 }
804
805 // trailing junk found after last column
806 if col >= colCount && data[i] != '\n' {
807 return
808 }
809
810 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
811 // something else found where marker was required
812 return
813
814 case data[i] == '\n':
815 // marker is optional for the last column
816 col++
817
818 default:
819 // trailing junk found after last column
820 return
821 }
822 }
823 if col != colCount {
824 return
825 }
826
827 p.tableRow(out, header, columns, true)
828 size = i + 1
829 return
830}
831
832func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
833 i, col := 0, 0
834 var rowWork bytes.Buffer
835
836 if data[i] == '|' && !isBackslashEscaped(data, i) {
837 i++
838 }
839
840 for col = 0; col < len(columns) && i < len(data); col++ {
841 for data[i] == ' ' {
842 i++
843 }
844
845 cellStart := i
846
847 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
848 i++
849 }
850
851 cellEnd := i
852
853 // skip the end-of-cell marker, possibly taking us past end of buffer
854 i++
855
856 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
857 cellEnd--
858 }
859
860 var cellWork bytes.Buffer
861 p.inline(&cellWork, data[cellStart:cellEnd])
862
863 if header {
864 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
865 } else {
866 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
867 }
868 }
869
870 // pad it out with empty columns to get the right number
871 for ; col < len(columns); col++ {
872 if header {
873 p.r.TableHeaderCell(&rowWork, nil, columns[col])
874 } else {
875 p.r.TableCell(&rowWork, nil, columns[col])
876 }
877 }
878
879 // silently ignore rows with too many cells
880
881 p.r.TableRow(out, rowWork.Bytes())
882}
883
884// returns blockquote prefix length
885func (p *parser) quotePrefix(data []byte) int {
886 i := 0
887 for i < 3 && data[i] == ' ' {
888 i++
889 }
890 if data[i] == '>' {
891 if data[i+1] == ' ' {
892 return i + 2
893 }
894 return i + 1
895 }
896 return 0
897}
898
899// parse a blockquote fragment
900func (p *parser) quote(out *bytes.Buffer, data []byte) int {
901 var raw bytes.Buffer
902 beg, end := 0, 0
903 for beg < len(data) {
904 end = beg
905 for data[end] != '\n' {
906 end++
907 }
908 end++
909
910 if pre := p.quotePrefix(data[beg:]); pre > 0 {
911 // skip the prefix
912 beg += pre
913 } else if p.isEmpty(data[beg:]) > 0 &&
914 (end >= len(data) ||
915 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
916 // blockquote ends with at least one blank line
917 // followed by something without a blockquote prefix
918 break
919 }
920
921 // this line is part of the blockquote
922 raw.Write(data[beg:end])
923 beg = end
924 }
925
926 var cooked bytes.Buffer
927 p.block(&cooked, raw.Bytes())
928 p.r.BlockQuote(out, cooked.Bytes())
929 return end
930}
931
932// returns prefix length for block code
933func (p *parser) codePrefix(data []byte) int {
934 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
935 return 4
936 }
937 return 0
938}
939
940func (p *parser) code(out *bytes.Buffer, data []byte) int {
941 var work bytes.Buffer
942
943 i := 0
944 for i < len(data) {
945 beg := i
946 for data[i] != '\n' {
947 i++
948 }
949 i++
950
951 blankline := p.isEmpty(data[beg:i]) > 0
952 if pre := p.codePrefix(data[beg:i]); pre > 0 {
953 beg += pre
954 } else if !blankline {
955 // non-empty, non-prefixed line breaks the pre
956 i = beg
957 break
958 }
959
960 // verbatim copy to the working buffeu
961 if blankline {
962 work.WriteByte('\n')
963 } else {
964 work.Write(data[beg:i])
965 }
966 }
967
968 // trim all the \n off the end of work
969 workbytes := work.Bytes()
970 eol := len(workbytes)
971 for eol > 0 && workbytes[eol-1] == '\n' {
972 eol--
973 }
974 if eol != len(workbytes) {
975 work.Truncate(eol)
976 }
977
978 work.WriteByte('\n')
979
980 p.r.BlockCode(out, work.Bytes(), "")
981
982 return i
983}
984
985// returns unordered list item prefix
986func (p *parser) uliPrefix(data []byte) int {
987 i := 0
988
989 // start with up to 3 spaces
990 for i < 3 && data[i] == ' ' {
991 i++
992 }
993
994 // need a *, +, or - followed by a space
995 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
996 data[i+1] != ' ' {
997 return 0
998 }
999 return i + 2
1000}
1001
1002// returns ordered list item prefix
1003func (p *parser) oliPrefix(data []byte) int {
1004 i := 0
1005
1006 // start with up to 3 spaces
1007 for i < 3 && data[i] == ' ' {
1008 i++
1009 }
1010
1011 // count the digits
1012 start := i
1013 for data[i] >= '0' && data[i] <= '9' {
1014 i++
1015 }
1016
1017 // we need >= 1 digits followed by a dot and a space
1018 if start == i || data[i] != '.' || data[i+1] != ' ' {
1019 return 0
1020 }
1021 return i + 2
1022}
1023
1024// parse ordered or unordered list block
1025func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1026 i := 0
1027 flags |= LIST_ITEM_BEGINNING_OF_LIST
1028 work := func() bool {
1029 for i < len(data) {
1030 skip := p.listItem(out, data[i:], &flags)
1031 i += skip
1032
1033 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1034 break
1035 }
1036 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1037 }
1038 return true
1039 }
1040
1041 p.r.List(out, work, flags)
1042 return i
1043}
1044
1045// Parse a single list item.
1046// Assumes initial prefix is already removed if this is a sublist.
1047func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1048 // keep track of the indentation of the first line
1049 itemIndent := 0
1050 for itemIndent < 3 && data[itemIndent] == ' ' {
1051 itemIndent++
1052 }
1053
1054 i := p.uliPrefix(data)
1055 if i == 0 {
1056 i = p.oliPrefix(data)
1057 }
1058 if i == 0 {
1059 return 0
1060 }
1061
1062 // skip leading whitespace on first line
1063 for data[i] == ' ' {
1064 i++
1065 }
1066
1067 // find the end of the line
1068 line := i
1069 for data[i-1] != '\n' {
1070 i++
1071 }
1072
1073 // get working buffer
1074 var raw bytes.Buffer
1075
1076 // put the first line into the working buffer
1077 raw.Write(data[line:i])
1078 line = i
1079
1080 // process the following lines
1081 containsBlankLine := false
1082 sublist := 0
1083
1084gatherlines:
1085 for line < len(data) {
1086 i++
1087
1088 // find the end of this line
1089 for data[i-1] != '\n' {
1090 i++
1091 }
1092
1093 // if it is an empty line, guess that it is part of this item
1094 // and move on to the next line
1095 if p.isEmpty(data[line:i]) > 0 {
1096 containsBlankLine = true
1097 line = i
1098 continue
1099 }
1100
1101 // calculate the indentation
1102 indent := 0
1103 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1104 indent++
1105 }
1106
1107 chunk := data[line+indent : i]
1108
1109 // evaluate how this line fits in
1110 switch {
1111 // is this a nested list item?
1112 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1113 p.oliPrefix(chunk) > 0:
1114
1115 if containsBlankLine {
1116 *flags |= LIST_ITEM_CONTAINS_BLOCK
1117 }
1118
1119 // to be a nested list, it must be indented more
1120 // if not, it is the next item in the same list
1121 if indent <= itemIndent {
1122 break gatherlines
1123 }
1124
1125 // is this the first item in the the nested list?
1126 if sublist == 0 {
1127 sublist = raw.Len()
1128 }
1129
1130 // is this a nested prefix header?
1131 case p.isPrefixHeader(chunk):
1132 // if the header is not indented, it is not nested in the list
1133 // and thus ends the list
1134 if containsBlankLine && indent < 4 {
1135 *flags |= LIST_ITEM_END_OF_LIST
1136 break gatherlines
1137 }
1138 *flags |= LIST_ITEM_CONTAINS_BLOCK
1139
1140 // anything following an empty line is only part
1141 // of this item if it is indented 4 spaces
1142 // (regardless of the indentation of the beginning of the item)
1143 case containsBlankLine && indent < 4:
1144 *flags |= LIST_ITEM_END_OF_LIST
1145 break gatherlines
1146
1147 // a blank line means this should be parsed as a block
1148 case containsBlankLine:
1149 raw.WriteByte('\n')
1150 *flags |= LIST_ITEM_CONTAINS_BLOCK
1151 }
1152
1153 // if this line was preceeded by one or more blanks,
1154 // re-introduce the blank into the buffer
1155 if containsBlankLine {
1156 containsBlankLine = false
1157 raw.WriteByte('\n')
1158 }
1159
1160 // add the line into the working buffer without prefix
1161 raw.Write(data[line+indent : i])
1162
1163 line = i
1164 }
1165
1166 rawBytes := raw.Bytes()
1167
1168 // render the contents of the list item
1169 var cooked bytes.Buffer
1170 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1171 // intermediate render of block li
1172 if sublist > 0 {
1173 p.block(&cooked, rawBytes[:sublist])
1174 p.block(&cooked, rawBytes[sublist:])
1175 } else {
1176 p.block(&cooked, rawBytes)
1177 }
1178 } else {
1179 // intermediate render of inline li
1180 if sublist > 0 {
1181 p.inline(&cooked, rawBytes[:sublist])
1182 p.block(&cooked, rawBytes[sublist:])
1183 } else {
1184 p.inline(&cooked, rawBytes)
1185 }
1186 }
1187
1188 // render the actual list item
1189 cookedBytes := cooked.Bytes()
1190 parsedEnd := len(cookedBytes)
1191
1192 // strip trailing newlines
1193 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1194 parsedEnd--
1195 }
1196 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1197
1198 return line
1199}
1200
1201// render a single paragraph that has already been parsed out
1202func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1203 if len(data) == 0 {
1204 return
1205 }
1206
1207 // trim leading spaces
1208 beg := 0
1209 for data[beg] == ' ' {
1210 beg++
1211 }
1212
1213 // trim trailing newline
1214 end := len(data) - 1
1215
1216 // trim trailing spaces
1217 for end > beg && data[end-1] == ' ' {
1218 end--
1219 }
1220
1221 work := func() bool {
1222 p.inline(out, data[beg:end])
1223 return true
1224 }
1225 p.r.Paragraph(out, work)
1226}
1227
1228func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1229 // prev: index of 1st char of previous line
1230 // line: index of 1st char of current line
1231 // i: index of cursor/end of current line
1232 var prev, line, i int
1233
1234 // keep going until we find something to mark the end of the paragraph
1235 for i < len(data) {
1236 // mark the beginning of the current line
1237 prev = line
1238 current := data[i:]
1239 line = i
1240
1241 // did we find a blank line marking the end of the paragraph?
1242 if n := p.isEmpty(current); n > 0 {
1243 p.renderParagraph(out, data[:i])
1244 return i + n
1245 }
1246
1247 // an underline under some text marks a header, so our paragraph ended on prev line
1248 if i > 0 {
1249 if level := p.isUnderlinedHeader(current); level > 0 {
1250 // render the paragraph
1251 p.renderParagraph(out, data[:prev])
1252
1253 // ignore leading and trailing whitespace
1254 eol := i - 1
1255 for prev < eol && data[prev] == ' ' {
1256 prev++
1257 }
1258 for eol > prev && data[eol-1] == ' ' {
1259 eol--
1260 }
1261
1262 // render the header
1263 // this ugly double closure avoids forcing variables onto the heap
1264 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1265 return func() bool {
1266 pp.inline(o, d)
1267 return true
1268 }
1269 }(out, p, data[prev:eol])
1270 p.r.Header(out, work, level, "")
1271
1272 // find the end of the underline
1273 for data[i] != '\n' {
1274 i++
1275 }
1276 return i
1277 }
1278 }
1279
1280 // if the next line starts a block of HTML, then the paragraph ends here
1281 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1282 if data[i] == '<' && p.html(out, current, false) > 0 {
1283 // rewind to before the HTML block
1284 p.renderParagraph(out, data[:i])
1285 return i
1286 }
1287 }
1288
1289 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1290 if p.isPrefixHeader(current) || p.isHRule(current) {
1291 p.renderParagraph(out, data[:i])
1292 return i
1293 }
1294
1295 // if there's a list after this, paragraph is over
1296 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1297 if p.uliPrefix(current) != 0 ||
1298 p.oliPrefix(current) != 0 ||
1299 p.quotePrefix(current) != 0 ||
1300 p.codePrefix(current) != 0 {
1301 p.renderParagraph(out, data[:i])
1302 return i
1303 }
1304 }
1305
1306 // otherwise, scan to the beginning of the next line
1307 for data[i] != '\n' {
1308 i++
1309 }
1310 i++
1311 }
1312
1313 p.renderParagraph(out, data[:i])
1314 return i
1315}