block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18)
19
20// Parse block-level data.
21// Note: this function and many that it calls assume that
22// the input buffer ends with a newline.
23func (p *parser) block(out *bytes.Buffer, data []byte) {
24 if len(data) == 0 || data[len(data)-1] != '\n' {
25 panic("block input is missing terminating newline")
26 }
27
28 // this is called recursively: enforce a maximum depth
29 if p.nesting >= p.maxNesting {
30 return
31 }
32 p.nesting++
33
34 lastLen := 0
35 sameLenCount := 0
36
37 // parse out one block-level construct at a time
38 for len(data) > 0 {
39 curLen := len(data)
40 if curLen == lastLen {
41 sameLenCount += 1
42 if sameLenCount >= 3 {
43 // infinity loop detection
44 return
45 }
46 } else {
47 sameLenCount = 0
48 }
49 lastLen = curLen
50
51 // prefixed header:
52 //
53 // # Header 1
54 // ## Header 2
55 // ...
56 // ###### Header 6
57 if p.isPrefixHeader(data) {
58 data = data[p.prefixHeader(out, data):]
59 continue
60 }
61
62 // block of preformatted HTML:
63 //
64 // <div>
65 // ...
66 // </div>
67 if data[0] == '<' {
68 if i := p.html(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73
74 // blank lines. note: returns the # of bytes to skip
75 if i := p.isEmpty(data); i > 0 {
76 data = data[i:]
77 continue
78 }
79
80 // indented code block:
81 //
82 // func max(a, b int) int {
83 // if a > b {
84 // return a
85 // }
86 // return b
87 // }
88 if p.codePrefix(data) > 0 {
89 data = data[p.code(out, data):]
90 continue
91 }
92
93 // fenced code block:
94 //
95 // ``` go
96 // func fact(n int) int {
97 // if n <= 1 {
98 // return n
99 // }
100 // return n * fact(n-1)
101 // }
102 // ```
103 if p.flags&EXTENSION_FENCED_CODE != 0 {
104 if i := p.fencedCode(out, data, true); i > 0 {
105 data = data[i:]
106 continue
107 }
108 }
109
110 // horizontal rule:
111 //
112 // ------
113 // or
114 // ******
115 // or
116 // ______
117 if p.isHRule(data) {
118 p.r.HRule(out)
119 var i int
120 for i = 0; data[i] != '\n'; i++ {
121 }
122 data = data[i:]
123 continue
124 }
125
126 // block quote:
127 //
128 // > A big quote I found somewhere
129 // > on the web
130 if p.quotePrefix(data) > 0 {
131 data = data[p.quote(out, data):]
132 continue
133 }
134
135 // table:
136 //
137 // Name | Age | Phone
138 // ------|-----|---------
139 // Bob | 31 | 555-1234
140 // Alice | 27 | 555-4321
141 if p.flags&EXTENSION_TABLES != 0 {
142 if i := p.table(out, data); i > 0 {
143 data = data[i:]
144 continue
145 }
146 }
147
148 // an itemized/unordered list:
149 //
150 // * Item 1
151 // * Item 2
152 //
153 // also works with + or -
154 if p.uliPrefix(data) > 0 {
155 data = data[p.list(out, data, 0):]
156 continue
157 }
158
159 // a numbered/ordered list:
160 //
161 // 1. Item 1
162 // 2. Item 2
163 if p.oliPrefix(data) > 0 {
164 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
165 continue
166 }
167
168 // anything else must look like a normal paragraph
169 // note: this finds underlined headers, too
170 data = data[p.paragraph(out, data):]
171 }
172
173 p.nesting--
174}
175
176func (p *parser) isPrefixHeader(data []byte) bool {
177 if data[0] != '#' {
178 return false
179 }
180
181 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
182 level := 0
183 for level < 6 && data[level] == '#' {
184 level++
185 }
186 if data[level] != ' ' {
187 return false
188 }
189 }
190 return true
191}
192
193func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
194 level := 0
195 for level < 6 && data[level] == '#' {
196 level++
197 }
198 i, end := 0, 0
199 for i = level; data[i] == ' '; i++ {
200 }
201 for end = i; data[end] != '\n'; end++ {
202 }
203 skip := end
204 for end > 0 && data[end-1] == '#' {
205 end--
206 }
207 for end > 0 && data[end-1] == ' ' {
208 end--
209 }
210 if end > i {
211 work := func() bool {
212 p.inline(out, data[i:end])
213 return true
214 }
215 p.r.Header(out, work, level)
216 }
217 return skip
218}
219
220func (p *parser) isUnderlinedHeader(data []byte) int {
221 // test of level 1 header
222 if data[0] == '=' {
223 i := 1
224 for data[i] == '=' {
225 i++
226 }
227 for data[i] == ' ' {
228 i++
229 }
230 if data[i] == '\n' {
231 return 1
232 } else {
233 return 0
234 }
235 }
236
237 // test of level 2 header
238 if data[0] == '-' {
239 i := 1
240 for data[i] == '-' {
241 i++
242 }
243 for data[i] == ' ' {
244 i++
245 }
246 if data[i] == '\n' {
247 return 2
248 } else {
249 return 0
250 }
251 }
252
253 return 0
254}
255
256func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
257 var i, j int
258
259 // identify the opening tag
260 if data[0] != '<' {
261 return 0
262 }
263 curtag, tagfound := p.htmlFindTag(data[1:])
264
265 // handle special cases
266 if !tagfound {
267 // check for an HTML comment
268 if size := p.htmlComment(out, data, doRender); size > 0 {
269 return size
270 }
271
272 // check for an <hr> tag
273 if size := p.htmlHr(out, data, doRender); size > 0 {
274 return size
275 }
276
277 // no special case recognized
278 return 0
279 }
280
281 // look for an unindented matching closing tag
282 // followed by a blank line
283 found := false
284 /*
285 closetag := []byte("\n</" + curtag + ">")
286 j = len(curtag) + 1
287 for !found {
288 // scan for a closing tag at the beginning of a line
289 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
290 j += skip + len(closetag)
291 } else {
292 break
293 }
294
295 // see if it is the only thing on the line
296 if skip := p.isEmpty(data[j:]); skip > 0 {
297 // see if it is followed by a blank line/eof
298 j += skip
299 if j >= len(data) {
300 found = true
301 i = j
302 } else {
303 if skip := p.isEmpty(data[j:]); skip > 0 {
304 j += skip
305 found = true
306 i = j
307 }
308 }
309 }
310 }
311 */
312
313 // if not found, try a second pass looking for indented match
314 // but not if tag is "ins" or "del" (following original Markdown.pl)
315 if !found && curtag != "ins" && curtag != "del" {
316 i = 1
317 for i < len(data) {
318 i++
319 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
320 i++
321 }
322
323 if i+2+len(curtag) >= len(data) {
324 break
325 }
326
327 j = p.htmlFindEnd(curtag, data[i-1:])
328
329 if j > 0 {
330 i += j - 1
331 found = true
332 break
333 }
334 }
335 }
336
337 if !found {
338 return 0
339 }
340
341 // the end of the block has been found
342 if doRender {
343 // trim newlines
344 end := i
345 for end > 0 && data[end-1] == '\n' {
346 end--
347 }
348 p.r.BlockHtml(out, data[:end])
349 }
350
351 return i
352}
353
354// HTML comment, lax form
355func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
356 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
357 return 0
358 }
359
360 i := 5
361
362 // scan for an end-of-comment marker, across lines if necessary
363 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
364 i++
365 }
366 i++
367
368 // no end-of-comment marker
369 if i >= len(data) {
370 return 0
371 }
372
373 // needs to end with a blank line
374 if j := p.isEmpty(data[i:]); j > 0 {
375 size := i + j
376 if doRender {
377 // trim trailing newlines
378 end := size
379 for end > 0 && data[end-1] == '\n' {
380 end--
381 }
382 p.r.BlockHtml(out, data[:end])
383 }
384 return size
385 }
386
387 return 0
388}
389
390// HR, which is the only self-closing block tag considered
391func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
392 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
393 return 0
394 }
395 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
396 // not an <hr> tag after all; at least not a valid one
397 return 0
398 }
399
400 i := 3
401 for data[i] != '>' && data[i] != '\n' {
402 i++
403 }
404
405 if data[i] == '>' {
406 i++
407 if j := p.isEmpty(data[i:]); j > 0 {
408 size := i + j
409 if doRender {
410 // trim newlines
411 end := size
412 for end > 0 && data[end-1] == '\n' {
413 end--
414 }
415 p.r.BlockHtml(out, data[:end])
416 }
417 return size
418 }
419 }
420
421 return 0
422}
423
424func (p *parser) htmlFindTag(data []byte) (string, bool) {
425 i := 0
426 for isalnum(data[i]) {
427 i++
428 }
429 key := string(data[:i])
430 if blockTags[key] {
431 return key, true
432 }
433 return "", false
434}
435
436func (p *parser) htmlFindEnd(tag string, data []byte) int {
437 // assume data[0] == '<' && data[1] == '/' already tested
438
439 // check if tag is a match
440 closetag := []byte("</" + tag + ">")
441 if !bytes.HasPrefix(data, closetag) {
442 return 0
443 }
444 i := len(closetag)
445
446 // check that the rest of the line is blank
447 skip := 0
448 if skip = p.isEmpty(data[i:]); skip == 0 {
449 return 0
450 }
451 i += skip
452 skip = 0
453
454 if i >= len(data) {
455 return i
456 }
457
458 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
459 return i
460 }
461 if skip = p.isEmpty(data[i:]); skip == 0 {
462 // following line must be blank
463 return 0
464 }
465
466 return i + skip
467}
468
469func (p *parser) isEmpty(data []byte) int {
470 // it is okay to call isEmpty on an empty buffer
471 if len(data) == 0 {
472 return 0
473 }
474
475 var i int
476 for i = 0; i < len(data) && data[i] != '\n'; i++ {
477 if data[i] != ' ' && data[i] != '\t' {
478 return 0
479 }
480 }
481 return i + 1
482}
483
484func (p *parser) isHRule(data []byte) bool {
485 i := 0
486
487 // skip up to three spaces
488 for i < 3 && data[i] == ' ' {
489 i++
490 }
491
492 // look at the hrule char
493 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
494 return false
495 }
496 c := data[i]
497
498 // the whole line must be the char or whitespace
499 n := 0
500 for data[i] != '\n' {
501 switch {
502 case data[i] == c:
503 n++
504 case data[i] != ' ':
505 return false
506 }
507 i++
508 }
509
510 return n >= 3
511}
512
513func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
514 i, size := 0, 0
515 skip = 0
516
517 // skip up to three spaces
518 for i < 3 && data[i] == ' ' {
519 i++
520 }
521
522 // check for the marker characters: ~ or `
523 if data[i] != '~' && data[i] != '`' {
524 return
525 }
526
527 c := data[i]
528
529 // the whole line must be the same char or whitespace
530 for data[i] == c {
531 size++
532 i++
533 }
534
535 // the marker char must occur at least 3 times
536 if size < 3 {
537 return
538 }
539 marker = string(data[i-size : i])
540
541 // if this is the end marker, it must match the beginning marker
542 if oldmarker != "" && marker != oldmarker {
543 return
544 }
545
546 if syntax != nil {
547 syn := 0
548
549 for data[i] == ' ' {
550 i++
551 }
552
553 syntaxStart := i
554
555 if data[i] == '{' {
556 i++
557 syntaxStart++
558
559 for data[i] != '}' && data[i] != '\n' {
560 syn++
561 i++
562 }
563
564 if data[i] != '}' {
565 return
566 }
567
568 // strip all whitespace at the beginning and the end
569 // of the {} block
570 for syn > 0 && isspace(data[syntaxStart]) {
571 syntaxStart++
572 syn--
573 }
574
575 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
576 syn--
577 }
578
579 i++
580 } else {
581 for !isspace(data[i]) {
582 syn++
583 i++
584 }
585 }
586
587 language := string(data[syntaxStart : syntaxStart+syn])
588 *syntax = &language
589 }
590
591 for data[i] == ' ' {
592 i++
593 }
594 if data[i] != '\n' {
595 return
596 }
597
598 skip = i + 1
599 return
600}
601
602func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
603 var lang *string
604 beg, marker := p.isFencedCode(data, &lang, "")
605 if beg == 0 || beg >= len(data) {
606 return 0
607 }
608
609 var work bytes.Buffer
610
611 for {
612 // safe to assume beg < len(data)
613
614 // check for the end of the code block
615 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
616 if fenceEnd != 0 {
617 beg += fenceEnd
618 break
619 }
620
621 // copy the current line
622 end := beg
623 for data[end] != '\n' {
624 end++
625 }
626 end++
627
628 // did we reach the end of the buffer without a closing marker?
629 if end >= len(data) {
630 return 0
631 }
632
633 // verbatim copy to the working buffer
634 if doRender {
635 work.Write(data[beg:end])
636 }
637 beg = end
638 }
639
640 syntax := ""
641 if lang != nil {
642 syntax = *lang
643 }
644
645 if doRender {
646 p.r.BlockCode(out, work.Bytes(), syntax)
647 }
648
649 return beg
650}
651
652func (p *parser) table(out *bytes.Buffer, data []byte) int {
653 var header bytes.Buffer
654 i, columns := p.tableHeader(&header, data)
655 if i == 0 {
656 return 0
657 }
658
659 var body bytes.Buffer
660
661 for i < len(data) {
662 pipes, rowStart := 0, i
663 for ; data[i] != '\n'; i++ {
664 if data[i] == '|' {
665 pipes++
666 }
667 }
668
669 if pipes == 0 {
670 i = rowStart
671 break
672 }
673
674 // include the newline in data sent to tableRow
675 i++
676 p.tableRow(&body, data[rowStart:i], columns, false)
677 }
678
679 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
680
681 return i
682}
683
684// check if the specified position is preceeded by an odd number of backslashes
685func isBackslashEscaped(data []byte, i int) bool {
686 backslashes := 0
687 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
688 backslashes++
689 }
690 return backslashes&1 == 1
691}
692
693func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
694 i := 0
695 colCount := 1
696 for i = 0; data[i] != '\n'; i++ {
697 if data[i] == '|' && !isBackslashEscaped(data, i) {
698 colCount++
699 }
700 }
701
702 // doesn't look like a table header
703 if colCount == 1 {
704 return
705 }
706
707 // include the newline in the data sent to tableRow
708 header := data[:i+1]
709
710 // column count ignores pipes at beginning or end of line
711 if data[0] == '|' {
712 colCount--
713 }
714 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
715 colCount--
716 }
717
718 columns = make([]int, colCount)
719
720 // move on to the header underline
721 i++
722 if i >= len(data) {
723 return
724 }
725
726 if data[i] == '|' && !isBackslashEscaped(data, i) {
727 i++
728 }
729 for data[i] == ' ' {
730 i++
731 }
732
733 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
734 // and trailing | optional on last column
735 col := 0
736 for data[i] != '\n' {
737 dashes := 0
738
739 if data[i] == ':' {
740 i++
741 columns[col] |= TABLE_ALIGNMENT_LEFT
742 dashes++
743 }
744 for data[i] == '-' {
745 i++
746 dashes++
747 }
748 if data[i] == ':' {
749 i++
750 columns[col] |= TABLE_ALIGNMENT_RIGHT
751 dashes++
752 }
753 for data[i] == ' ' {
754 i++
755 }
756
757 // end of column test is messy
758 switch {
759 case dashes < 3:
760 // not a valid column
761 return
762
763 case data[i] == '|' && !isBackslashEscaped(data, i):
764 // marker found, now skip past trailing whitespace
765 col++
766 i++
767 for data[i] == ' ' {
768 i++
769 }
770
771 // trailing junk found after last column
772 if col >= colCount && data[i] != '\n' {
773 return
774 }
775
776 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
777 // something else found where marker was required
778 return
779
780 case data[i] == '\n':
781 // marker is optional for the last column
782 col++
783
784 default:
785 // trailing junk found after last column
786 return
787 }
788 }
789 if col != colCount {
790 return
791 }
792
793 p.tableRow(out, header, columns, true)
794 size = i + 1
795 return
796}
797
798func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
799 i, col := 0, 0
800 var rowWork bytes.Buffer
801
802 if data[i] == '|' && !isBackslashEscaped(data, i) {
803 i++
804 }
805
806 for col = 0; col < len(columns) && i < len(data); col++ {
807 for data[i] == ' ' {
808 i++
809 }
810
811 cellStart := i
812
813 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
814 i++
815 }
816
817 cellEnd := i
818
819 // skip the end-of-cell marker, possibly taking us past end of buffer
820 i++
821
822 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
823 cellEnd--
824 }
825
826 var cellWork bytes.Buffer
827 p.inline(&cellWork, data[cellStart:cellEnd])
828
829 if header {
830 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
831 } else {
832 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
833 }
834 }
835
836 // pad it out with empty columns to get the right number
837 for ; col < len(columns); col++ {
838 if header {
839 p.r.TableHeaderCell(&rowWork, nil, columns[col])
840 } else {
841 p.r.TableCell(&rowWork, nil, columns[col])
842 }
843 }
844
845 // silently ignore rows with too many cells
846
847 p.r.TableRow(out, rowWork.Bytes())
848}
849
850// returns blockquote prefix length
851func (p *parser) quotePrefix(data []byte) int {
852 i := 0
853 for i < 3 && data[i] == ' ' {
854 i++
855 }
856 if data[i] == '>' {
857 if data[i+1] == ' ' {
858 return i + 2
859 }
860 return i + 1
861 }
862 return 0
863}
864
865// parse a blockquote fragment
866func (p *parser) quote(out *bytes.Buffer, data []byte) int {
867 var raw bytes.Buffer
868 beg, end := 0, 0
869 for beg < len(data) {
870 end = beg
871 for data[end] != '\n' {
872 end++
873 }
874 end++
875
876 if pre := p.quotePrefix(data[beg:]); pre > 0 {
877 // skip the prefix
878 beg += pre
879 } else if p.isEmpty(data[beg:]) > 0 &&
880 (end >= len(data) ||
881 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
882 // blockquote ends with at least one blank line
883 // followed by something without a blockquote prefix
884 break
885 }
886
887 // this line is part of the blockquote
888 raw.Write(data[beg:end])
889 beg = end
890 }
891
892 var cooked bytes.Buffer
893 p.block(&cooked, raw.Bytes())
894 p.r.BlockQuote(out, cooked.Bytes())
895 return end
896}
897
898// returns prefix length for block code
899func (p *parser) codePrefix(data []byte) int {
900 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
901 return 4
902 }
903 return 0
904}
905
906func (p *parser) code(out *bytes.Buffer, data []byte) int {
907 var work bytes.Buffer
908
909 i := 0
910 for i < len(data) {
911 beg := i
912 for data[i] != '\n' {
913 i++
914 }
915 i++
916
917 blankline := p.isEmpty(data[beg:i]) > 0
918 if pre := p.codePrefix(data[beg:i]); pre > 0 {
919 beg += pre
920 } else if !blankline {
921 // non-empty, non-prefixed line breaks the pre
922 i = beg
923 break
924 }
925
926 // verbatim copy to the working buffeu
927 if blankline {
928 work.WriteByte('\n')
929 } else {
930 work.Write(data[beg:i])
931 }
932 }
933
934 // trim all the \n off the end of work
935 workbytes := work.Bytes()
936 eol := len(workbytes)
937 for eol > 0 && workbytes[eol-1] == '\n' {
938 eol--
939 }
940 if eol != len(workbytes) {
941 work.Truncate(eol)
942 }
943
944 work.WriteByte('\n')
945
946 p.r.BlockCode(out, work.Bytes(), "")
947
948 return i
949}
950
951// returns unordered list item prefix
952func (p *parser) uliPrefix(data []byte) int {
953 i := 0
954
955 // start with up to 3 spaces
956 for i < 3 && data[i] == ' ' {
957 i++
958 }
959
960 // need a *, +, or - followed by a space
961 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
962 data[i+1] != ' ' {
963 return 0
964 }
965 return i + 2
966}
967
968// returns ordered list item prefix
969func (p *parser) oliPrefix(data []byte) int {
970 i := 0
971
972 // start with up to 3 spaces
973 for i < 3 && data[i] == ' ' {
974 i++
975 }
976
977 // count the digits
978 start := i
979 for data[i] >= '0' && data[i] <= '9' {
980 i++
981 }
982
983 // we need >= 1 digits followed by a dot and a space
984 if start == i || data[i] != '.' || data[i+1] != ' ' {
985 return 0
986 }
987 return i + 2
988}
989
990// parse ordered or unordered list block
991func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
992 i := 0
993 flags |= LIST_ITEM_BEGINNING_OF_LIST
994 work := func() bool {
995 for i < len(data) {
996 skip := p.listItem(out, data[i:], &flags)
997 i += skip
998
999 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1000 break
1001 }
1002 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1003 }
1004 return true
1005 }
1006
1007 p.r.List(out, work, flags)
1008 return i
1009}
1010
1011// Parse a single list item.
1012// Assumes initial prefix is already removed if this is a sublist.
1013func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1014 // keep track of the indentation of the first line
1015 itemIndent := 0
1016 for itemIndent < 3 && data[itemIndent] == ' ' {
1017 itemIndent++
1018 }
1019
1020 i := p.uliPrefix(data)
1021 if i == 0 {
1022 i = p.oliPrefix(data)
1023 }
1024 if i == 0 {
1025 return 0
1026 }
1027
1028 // skip leading whitespace on first line
1029 for data[i] == ' ' {
1030 i++
1031 }
1032
1033 // find the end of the line
1034 line := i
1035 for data[i-1] != '\n' {
1036 i++
1037 }
1038
1039 // get working buffer
1040 var raw bytes.Buffer
1041
1042 // put the first line into the working buffer
1043 raw.Write(data[line:i])
1044 line = i
1045
1046 // process the following lines
1047 containsBlankLine := false
1048 sublist := 0
1049
1050gatherlines:
1051 for line < len(data) {
1052 i++
1053
1054 // find the end of this line
1055 for data[i-1] != '\n' {
1056 i++
1057 }
1058
1059 // if it is an empty line, guess that it is part of this item
1060 // and move on to the next line
1061 if p.isEmpty(data[line:i]) > 0 {
1062 containsBlankLine = true
1063 line = i
1064 continue
1065 }
1066
1067 // calculate the indentation
1068 indent := 0
1069 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1070 indent++
1071 }
1072
1073 chunk := data[line+indent : i]
1074
1075 // evaluate how this line fits in
1076 switch {
1077 // is this a nested list item?
1078 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1079 p.oliPrefix(chunk) > 0:
1080
1081 if containsBlankLine {
1082 *flags |= LIST_ITEM_CONTAINS_BLOCK
1083 }
1084
1085 // to be a nested list, it must be indented more
1086 // if not, it is the next item in the same list
1087 if indent <= itemIndent {
1088 break gatherlines
1089 }
1090
1091 // is this the first item in the the nested list?
1092 if sublist == 0 {
1093 sublist = raw.Len()
1094 }
1095
1096 // is this a nested prefix header?
1097 case p.isPrefixHeader(chunk):
1098 // if the header is not indented, it is not nested in the list
1099 // and thus ends the list
1100 if containsBlankLine && indent < 4 {
1101 *flags |= LIST_ITEM_END_OF_LIST
1102 break gatherlines
1103 }
1104 *flags |= LIST_ITEM_CONTAINS_BLOCK
1105
1106 // anything following an empty line is only part
1107 // of this item if it is indented 4 spaces
1108 // (regardless of the indentation of the beginning of the item)
1109 case containsBlankLine && indent < 4:
1110 *flags |= LIST_ITEM_END_OF_LIST
1111 break gatherlines
1112
1113 // a blank line means this should be parsed as a block
1114 case containsBlankLine:
1115 raw.WriteByte('\n')
1116 *flags |= LIST_ITEM_CONTAINS_BLOCK
1117 }
1118
1119 // if this line was preceeded by one or more blanks,
1120 // re-introduce the blank into the buffer
1121 if containsBlankLine {
1122 containsBlankLine = false
1123 raw.WriteByte('\n')
1124 }
1125
1126 // add the line into the working buffer without prefix
1127 raw.Write(data[line+indent : i])
1128
1129 line = i
1130 }
1131
1132 rawBytes := raw.Bytes()
1133
1134 // render the contents of the list item
1135 var cooked bytes.Buffer
1136 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1137 // intermediate render of block li
1138 if sublist > 0 {
1139 p.block(&cooked, rawBytes[:sublist])
1140 p.block(&cooked, rawBytes[sublist:])
1141 } else {
1142 p.block(&cooked, rawBytes)
1143 }
1144 } else {
1145 // intermediate render of inline li
1146 if sublist > 0 {
1147 p.inline(&cooked, rawBytes[:sublist])
1148 p.block(&cooked, rawBytes[sublist:])
1149 } else {
1150 p.inline(&cooked, rawBytes)
1151 }
1152 }
1153
1154 // render the actual list item
1155 cookedBytes := cooked.Bytes()
1156 parsedEnd := len(cookedBytes)
1157
1158 // strip trailing newlines
1159 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1160 parsedEnd--
1161 }
1162 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1163
1164 return line
1165}
1166
1167// render a single paragraph that has already been parsed out
1168func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1169 if len(data) == 0 {
1170 return
1171 }
1172
1173 // trim leading spaces
1174 beg := 0
1175 for data[beg] == ' ' {
1176 beg++
1177 }
1178
1179 // trim trailing newline
1180 end := len(data) - 1
1181
1182 // trim trailing spaces
1183 for end > beg && data[end-1] == ' ' {
1184 end--
1185 }
1186
1187 work := func() bool {
1188 p.inline(out, data[beg:end])
1189 return true
1190 }
1191 p.r.Paragraph(out, work)
1192}
1193
1194func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1195 // prev: index of 1st char of previous line
1196 // line: index of 1st char of current line
1197 // i: index of cursor/end of current line
1198 var prev, line, i int
1199
1200 // keep going until we find something to mark the end of the paragraph
1201 for i < len(data) {
1202 // mark the beginning of the current line
1203 prev = line
1204 current := data[i:]
1205 line = i
1206
1207 // did we find a blank line marking the end of the paragraph?
1208 if n := p.isEmpty(current); n > 0 {
1209 p.renderParagraph(out, data[:i])
1210 return i + n
1211 }
1212
1213 // an underline under some text marks a header, so our paragraph ended on prev line
1214 if i > 0 {
1215 if level := p.isUnderlinedHeader(current); level > 0 {
1216 // render the paragraph
1217 p.renderParagraph(out, data[:prev])
1218
1219 // ignore leading and trailing whitespace
1220 eol := i - 1
1221 for prev < eol && data[prev] == ' ' {
1222 prev++
1223 }
1224 for eol > prev && data[eol-1] == ' ' {
1225 eol--
1226 }
1227
1228 // render the header
1229 // this ugly double closure avoids forcing variables onto the heap
1230 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1231 return func() bool {
1232 pp.inline(o, d)
1233 return true
1234 }
1235 }(out, p, data[prev:eol])
1236 p.r.Header(out, work, level)
1237
1238 // find the end of the underline
1239 for data[i] != '\n' {
1240 i++
1241 }
1242 return i
1243 }
1244 }
1245
1246 // if the next line starts a block of HTML, then the paragraph ends here
1247 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1248 if data[i] == '<' && p.html(out, current, false) > 0 {
1249 // rewind to before the HTML block
1250 p.renderParagraph(out, data[:i])
1251 return i
1252 }
1253 }
1254
1255 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1256 if p.isPrefixHeader(current) || p.isHRule(current) {
1257 p.renderParagraph(out, data[:i])
1258 return i
1259 }
1260
1261 // if there's a list after this, paragraph is over
1262 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1263 if p.uliPrefix(current) != 0 ||
1264 p.oliPrefix(current) != 0 ||
1265 p.quotePrefix(current) != 0 ||
1266 p.codePrefix(current) != 0 {
1267 p.renderParagraph(out, data[:i])
1268 return i
1269 }
1270 }
1271
1272 // otherwise, scan to the beginning of the next line
1273 for data[i] != '\n' {
1274 i++
1275 }
1276 i++
1277 }
1278
1279 p.renderParagraph(out, data[:i])
1280 return i
1281}