block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18
19 "github.com/shurcooL/sanitized_anchor_name"
20)
21
22// Parse block-level data.
23// Note: this function and many that it calls assume that
24// the input buffer ends with a newline.
25func (p *parser) block(out *bytes.Buffer, data []byte) {
26 if len(data) == 0 || data[len(data)-1] != '\n' {
27 panic("block input is missing terminating newline")
28 }
29
30 // this is called recursively: enforce a maximum depth
31 if p.nesting >= p.maxNesting {
32 return
33 }
34 p.nesting++
35
36 // parse out one block-level construct at a time
37 for len(data) > 0 {
38 // prefixed header:
39 //
40 // # Header 1
41 // ## Header 2
42 // ...
43 // ###### Header 6
44 if p.isPrefixHeader(data) {
45 data = data[p.prefixHeader(out, data):]
46 continue
47 }
48
49 // block of preformatted HTML:
50 //
51 // <div>
52 // ...
53 // </div>
54 if data[0] == '<' {
55 if i := p.html(out, data, true); i > 0 {
56 data = data[i:]
57 continue
58 }
59 }
60
61 // title block
62 //
63 // % stuff
64 // % more stuff
65 // % even more stuff
66 if p.flags&EXTENSION_TITLEBLOCK != 0 {
67 if data[0] == '%' {
68 if i := p.titleBlock(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73 }
74
75 // blank lines. note: returns the # of bytes to skip
76 if i := p.isEmpty(data); i > 0 {
77 data = data[i:]
78 continue
79 }
80
81 // indented code block:
82 //
83 // func max(a, b int) int {
84 // if a > b {
85 // return a
86 // }
87 // return b
88 // }
89 if p.codePrefix(data) > 0 {
90 data = data[p.code(out, data):]
91 continue
92 }
93
94 // fenced code block:
95 //
96 // ``` go
97 // func fact(n int) int {
98 // if n <= 1 {
99 // return n
100 // }
101 // return n * fact(n-1)
102 // }
103 // ```
104 if p.flags&EXTENSION_FENCED_CODE != 0 {
105 if i := p.fencedCode(out, data, true); i > 0 {
106 data = data[i:]
107 continue
108 }
109 }
110
111 // horizontal rule:
112 //
113 // ------
114 // or
115 // ******
116 // or
117 // ______
118 if p.isHRule(data) {
119 p.r.HRule(out)
120 var i int
121 for i = 0; data[i] != '\n'; i++ {
122 }
123 data = data[i:]
124 continue
125 }
126
127 // block quote:
128 //
129 // > A big quote I found somewhere
130 // > on the web
131 if p.quotePrefix(data) > 0 {
132 data = data[p.quote(out, data):]
133 continue
134 }
135
136 // table:
137 //
138 // Name | Age | Phone
139 // ------|-----|---------
140 // Bob | 31 | 555-1234
141 // Alice | 27 | 555-4321
142 if p.flags&EXTENSION_TABLES != 0 {
143 if i := p.table(out, data); i > 0 {
144 data = data[i:]
145 continue
146 }
147 }
148
149 // an itemized/unordered list:
150 //
151 // * Item 1
152 // * Item 2
153 //
154 // also works with + or -
155 if p.uliPrefix(data) > 0 {
156 data = data[p.list(out, data, 0):]
157 continue
158 }
159
160 // a numbered/ordered list:
161 //
162 // 1. Item 1
163 // 2. Item 2
164 if p.oliPrefix(data) > 0 {
165 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
166 continue
167 }
168
169 // anything else must look like a normal paragraph
170 // note: this finds underlined headers, too
171 data = data[p.paragraph(out, data):]
172 }
173
174 p.nesting--
175}
176
177func (p *parser) isPrefixHeader(data []byte) bool {
178 if data[0] != '#' {
179 return false
180 }
181
182 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
183 level := 0
184 for level < 6 && data[level] == '#' {
185 level++
186 }
187 if data[level] != ' ' {
188 return false
189 }
190 }
191 return true
192}
193
194func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
195 level := 0
196 for level < 6 && data[level] == '#' {
197 level++
198 }
199 i := skipChar(data, level, ' ')
200 end := skipUntilChar(data, i, '\n')
201 skip := end
202 id := ""
203 if p.flags&EXTENSION_HEADER_IDS != 0 {
204 j, k := 0, 0
205 // find start/end of header id
206 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
207 }
208 for k = j + 1; k < end && data[k] != '}'; k++ {
209 }
210 // extract header id iff found
211 if j < end && k < end {
212 id = string(data[j+2 : k])
213 end = j
214 skip = k + 1
215 for end > 0 && data[end-1] == ' ' {
216 end--
217 }
218 }
219 }
220 for end > 0 && data[end-1] == '#' {
221 if isBackslashEscaped(data, end-1) {
222 break
223 }
224 end--
225 }
226 for end > 0 && data[end-1] == ' ' {
227 end--
228 }
229 if end > i {
230 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
231 id = sanitized_anchor_name.Create(string(data[i:end]))
232 }
233 work := func() bool {
234 p.inline(out, data[i:end])
235 return true
236 }
237 p.r.Header(out, work, level, id)
238 }
239 return skip
240}
241
242func (p *parser) isUnderlinedHeader(data []byte) int {
243 // test of level 1 header
244 if data[0] == '=' {
245 i := skipChar(data, 1, '=')
246 i = skipChar(data, i, ' ')
247 if data[i] == '\n' {
248 return 1
249 } else {
250 return 0
251 }
252 }
253
254 // test of level 2 header
255 if data[0] == '-' {
256 i := skipChar(data, 1, '-')
257 i = skipChar(data, i, ' ')
258 if data[i] == '\n' {
259 return 2
260 } else {
261 return 0
262 }
263 }
264
265 return 0
266}
267
268func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
269 if data[0] != '%' {
270 return 0
271 }
272 splitData := bytes.Split(data, []byte("\n"))
273 var i int
274 for idx, b := range splitData {
275 if !bytes.HasPrefix(b, []byte("%")) {
276 i = idx // - 1
277 break
278 }
279 }
280
281 data = bytes.Join(splitData[0:i], []byte("\n"))
282 p.r.TitleBlock(out, data)
283
284 return len(data)
285}
286
287func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
288 var i, j int
289
290 // identify the opening tag
291 if data[0] != '<' {
292 return 0
293 }
294 curtag, tagfound := p.htmlFindTag(data[1:])
295
296 // handle special cases
297 if !tagfound {
298 // check for an HTML comment
299 if size := p.htmlComment(out, data, doRender); size > 0 {
300 return size
301 }
302
303 // check for an <hr> tag
304 if size := p.htmlHr(out, data, doRender); size > 0 {
305 return size
306 }
307
308 // no special case recognized
309 return 0
310 }
311
312 // look for an unindented matching closing tag
313 // followed by a blank line
314 found := false
315 /*
316 closetag := []byte("\n</" + curtag + ">")
317 j = len(curtag) + 1
318 for !found {
319 // scan for a closing tag at the beginning of a line
320 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
321 j += skip + len(closetag)
322 } else {
323 break
324 }
325
326 // see if it is the only thing on the line
327 if skip := p.isEmpty(data[j:]); skip > 0 {
328 // see if it is followed by a blank line/eof
329 j += skip
330 if j >= len(data) {
331 found = true
332 i = j
333 } else {
334 if skip := p.isEmpty(data[j:]); skip > 0 {
335 j += skip
336 found = true
337 i = j
338 }
339 }
340 }
341 }
342 */
343
344 // if not found, try a second pass looking for indented match
345 // but not if tag is "ins" or "del" (following original Markdown.pl)
346 if !found && curtag != "ins" && curtag != "del" {
347 i = 1
348 for i < len(data) {
349 i++
350 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
351 i++
352 }
353
354 if i+2+len(curtag) >= len(data) {
355 break
356 }
357
358 j = p.htmlFindEnd(curtag, data[i-1:])
359
360 if j > 0 {
361 i += j - 1
362 found = true
363 break
364 }
365 }
366 }
367
368 if !found {
369 return 0
370 }
371
372 // the end of the block has been found
373 if doRender {
374 // trim newlines
375 end := i
376 for end > 0 && data[end-1] == '\n' {
377 end--
378 }
379 p.r.BlockHtml(out, data[:end])
380 }
381
382 return i
383}
384
385// HTML comment, lax form
386func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
387 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
388 return 0
389 }
390
391 i := 5
392
393 // scan for an end-of-comment marker, across lines if necessary
394 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
395 i++
396 }
397 i++
398
399 // no end-of-comment marker
400 if i >= len(data) {
401 return 0
402 }
403
404 // needs to end with a blank line
405 if j := p.isEmpty(data[i:]); j > 0 {
406 size := i + j
407 if doRender {
408 // trim trailing newlines
409 end := size
410 for end > 0 && data[end-1] == '\n' {
411 end--
412 }
413 p.r.BlockHtml(out, data[:end])
414 }
415 return size
416 }
417
418 return 0
419}
420
421// HR, which is the only self-closing block tag considered
422func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
423 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
424 return 0
425 }
426 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
427 // not an <hr> tag after all; at least not a valid one
428 return 0
429 }
430
431 i := 3
432 for data[i] != '>' && data[i] != '\n' {
433 i++
434 }
435
436 if data[i] == '>' {
437 i++
438 if j := p.isEmpty(data[i:]); j > 0 {
439 size := i + j
440 if doRender {
441 // trim newlines
442 end := size
443 for end > 0 && data[end-1] == '\n' {
444 end--
445 }
446 p.r.BlockHtml(out, data[:end])
447 }
448 return size
449 }
450 }
451
452 return 0
453}
454
455func (p *parser) htmlFindTag(data []byte) (string, bool) {
456 i := 0
457 for isalnum(data[i]) {
458 i++
459 }
460 key := string(data[:i])
461 if blockTags[key] {
462 return key, true
463 }
464 return "", false
465}
466
467func (p *parser) htmlFindEnd(tag string, data []byte) int {
468 // assume data[0] == '<' && data[1] == '/' already tested
469
470 // check if tag is a match
471 closetag := []byte("</" + tag + ">")
472 if !bytes.HasPrefix(data, closetag) {
473 return 0
474 }
475 i := len(closetag)
476
477 // check that the rest of the line is blank
478 skip := 0
479 if skip = p.isEmpty(data[i:]); skip == 0 {
480 return 0
481 }
482 i += skip
483 skip = 0
484
485 if i >= len(data) {
486 return i
487 }
488
489 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
490 return i
491 }
492 if skip = p.isEmpty(data[i:]); skip == 0 {
493 // following line must be blank
494 return 0
495 }
496
497 return i + skip
498}
499
500func (p *parser) isEmpty(data []byte) int {
501 // it is okay to call isEmpty on an empty buffer
502 if len(data) == 0 {
503 return 0
504 }
505
506 var i int
507 for i = 0; i < len(data) && data[i] != '\n'; i++ {
508 if data[i] != ' ' && data[i] != '\t' {
509 return 0
510 }
511 }
512 return i + 1
513}
514
515func (p *parser) isHRule(data []byte) bool {
516 i := 0
517
518 // skip up to three spaces
519 for i < 3 && data[i] == ' ' {
520 i++
521 }
522
523 // look at the hrule char
524 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
525 return false
526 }
527 c := data[i]
528
529 // the whole line must be the char or whitespace
530 n := 0
531 for data[i] != '\n' {
532 switch {
533 case data[i] == c:
534 n++
535 case data[i] != ' ':
536 return false
537 }
538 i++
539 }
540
541 return n >= 3
542}
543
544func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
545 i, size := 0, 0
546 skip = 0
547
548 // skip up to three spaces
549 for i < len(data) && i < 3 && data[i] == ' ' {
550 i++
551 }
552 if i >= len(data) {
553 return
554 }
555
556 // check for the marker characters: ~ or `
557 if data[i] != '~' && data[i] != '`' {
558 return
559 }
560
561 c := data[i]
562
563 // the whole line must be the same char or whitespace
564 for i < len(data) && data[i] == c {
565 size++
566 i++
567 }
568
569 if i >= len(data) {
570 return
571 }
572
573 // the marker char must occur at least 3 times
574 if size < 3 {
575 return
576 }
577 marker = string(data[i-size : i])
578
579 // if this is the end marker, it must match the beginning marker
580 if oldmarker != "" && marker != oldmarker {
581 return
582 }
583
584 if syntax != nil {
585 syn := 0
586 i = skipChar(data, i, ' ')
587
588 if i >= len(data) {
589 return
590 }
591
592 syntaxStart := i
593
594 if data[i] == '{' {
595 i++
596 syntaxStart++
597
598 for i < len(data) && data[i] != '}' && data[i] != '\n' {
599 syn++
600 i++
601 }
602
603 if i >= len(data) || data[i] != '}' {
604 return
605 }
606
607 // strip all whitespace at the beginning and the end
608 // of the {} block
609 for syn > 0 && isspace(data[syntaxStart]) {
610 syntaxStart++
611 syn--
612 }
613
614 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
615 syn--
616 }
617
618 i++
619 } else {
620 for i < len(data) && !isspace(data[i]) {
621 syn++
622 i++
623 }
624 }
625
626 language := string(data[syntaxStart : syntaxStart+syn])
627 *syntax = &language
628 }
629
630 i = skipChar(data, i, ' ')
631 if i >= len(data) || data[i] != '\n' {
632 return
633 }
634
635 skip = i + 1
636 return
637}
638
639func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
640 var lang *string
641 beg, marker := p.isFencedCode(data, &lang, "")
642 if beg == 0 || beg >= len(data) {
643 return 0
644 }
645
646 var work bytes.Buffer
647
648 for {
649 // safe to assume beg < len(data)
650
651 // check for the end of the code block
652 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
653 if fenceEnd != 0 {
654 beg += fenceEnd
655 break
656 }
657
658 // copy the current line
659 end := skipUntilChar(data, beg, '\n') + 1
660
661 // did we reach the end of the buffer without a closing marker?
662 if end >= len(data) {
663 return 0
664 }
665
666 // verbatim copy to the working buffer
667 if doRender {
668 work.Write(data[beg:end])
669 }
670 beg = end
671 }
672
673 syntax := ""
674 if lang != nil {
675 syntax = *lang
676 }
677
678 if doRender {
679 p.r.BlockCode(out, work.Bytes(), syntax)
680 }
681
682 return beg
683}
684
685func (p *parser) table(out *bytes.Buffer, data []byte) int {
686 var header bytes.Buffer
687 i, columns := p.tableHeader(&header, data)
688 if i == 0 {
689 return 0
690 }
691
692 var body bytes.Buffer
693
694 for i < len(data) {
695 pipes, rowStart := 0, i
696 for ; data[i] != '\n'; i++ {
697 if data[i] == '|' {
698 pipes++
699 }
700 }
701
702 if pipes == 0 {
703 i = rowStart
704 break
705 }
706
707 // include the newline in data sent to tableRow
708 i++
709 p.tableRow(&body, data[rowStart:i], columns, false)
710 }
711
712 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
713
714 return i
715}
716
717// check if the specified position is preceded by an odd number of backslashes
718func isBackslashEscaped(data []byte, i int) bool {
719 backslashes := 0
720 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
721 backslashes++
722 }
723 return backslashes&1 == 1
724}
725
726func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
727 i := 0
728 colCount := 1
729 for i = 0; data[i] != '\n'; i++ {
730 if data[i] == '|' && !isBackslashEscaped(data, i) {
731 colCount++
732 }
733 }
734
735 // doesn't look like a table header
736 if colCount == 1 {
737 return
738 }
739
740 // include the newline in the data sent to tableRow
741 header := data[:i+1]
742
743 // column count ignores pipes at beginning or end of line
744 if data[0] == '|' {
745 colCount--
746 }
747 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
748 colCount--
749 }
750
751 columns = make([]int, colCount)
752
753 // move on to the header underline
754 i++
755 if i >= len(data) {
756 return
757 }
758
759 if data[i] == '|' && !isBackslashEscaped(data, i) {
760 i++
761 }
762 i = skipChar(data, i, ' ')
763
764 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
765 // and trailing | optional on last column
766 col := 0
767 for data[i] != '\n' {
768 dashes := 0
769
770 if data[i] == ':' {
771 i++
772 columns[col] |= TABLE_ALIGNMENT_LEFT
773 dashes++
774 }
775 for data[i] == '-' {
776 i++
777 dashes++
778 }
779 if data[i] == ':' {
780 i++
781 columns[col] |= TABLE_ALIGNMENT_RIGHT
782 dashes++
783 }
784 for data[i] == ' ' {
785 i++
786 }
787
788 // end of column test is messy
789 switch {
790 case dashes < 3:
791 // not a valid column
792 return
793
794 case data[i] == '|' && !isBackslashEscaped(data, i):
795 // marker found, now skip past trailing whitespace
796 col++
797 i++
798 for data[i] == ' ' {
799 i++
800 }
801
802 // trailing junk found after last column
803 if col >= colCount && data[i] != '\n' {
804 return
805 }
806
807 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
808 // something else found where marker was required
809 return
810
811 case data[i] == '\n':
812 // marker is optional for the last column
813 col++
814
815 default:
816 // trailing junk found after last column
817 return
818 }
819 }
820 if col != colCount {
821 return
822 }
823
824 p.tableRow(out, header, columns, true)
825 size = i + 1
826 return
827}
828
829func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
830 i, col := 0, 0
831 var rowWork bytes.Buffer
832
833 if data[i] == '|' && !isBackslashEscaped(data, i) {
834 i++
835 }
836
837 for col = 0; col < len(columns) && i < len(data); col++ {
838 for data[i] == ' ' {
839 i++
840 }
841
842 cellStart := i
843
844 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
845 i++
846 }
847
848 cellEnd := i
849
850 // skip the end-of-cell marker, possibly taking us past end of buffer
851 i++
852
853 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
854 cellEnd--
855 }
856
857 var cellWork bytes.Buffer
858 p.inline(&cellWork, data[cellStart:cellEnd])
859
860 if header {
861 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
862 } else {
863 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
864 }
865 }
866
867 // pad it out with empty columns to get the right number
868 for ; col < len(columns); col++ {
869 if header {
870 p.r.TableHeaderCell(&rowWork, nil, columns[col])
871 } else {
872 p.r.TableCell(&rowWork, nil, columns[col])
873 }
874 }
875
876 // silently ignore rows with too many cells
877
878 p.r.TableRow(out, rowWork.Bytes())
879}
880
881// returns blockquote prefix length
882func (p *parser) quotePrefix(data []byte) int {
883 i := 0
884 for i < 3 && data[i] == ' ' {
885 i++
886 }
887 if data[i] == '>' {
888 if data[i+1] == ' ' {
889 return i + 2
890 }
891 return i + 1
892 }
893 return 0
894}
895
896// parse a blockquote fragment
897func (p *parser) quote(out *bytes.Buffer, data []byte) int {
898 var raw bytes.Buffer
899 beg, end := 0, 0
900 for beg < len(data) {
901 end = beg
902 for data[end] != '\n' {
903 end++
904 }
905 end++
906
907 if pre := p.quotePrefix(data[beg:]); pre > 0 {
908 // skip the prefix
909 beg += pre
910 } else if p.isEmpty(data[beg:]) > 0 &&
911 (end >= len(data) ||
912 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
913 // blockquote ends with at least one blank line
914 // followed by something without a blockquote prefix
915 break
916 }
917
918 // this line is part of the blockquote
919 raw.Write(data[beg:end])
920 beg = end
921 }
922
923 var cooked bytes.Buffer
924 p.block(&cooked, raw.Bytes())
925 p.r.BlockQuote(out, cooked.Bytes())
926 return end
927}
928
929// returns prefix length for block code
930func (p *parser) codePrefix(data []byte) int {
931 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
932 return 4
933 }
934 return 0
935}
936
937func (p *parser) code(out *bytes.Buffer, data []byte) int {
938 var work bytes.Buffer
939
940 i := 0
941 for i < len(data) {
942 beg := i
943 for data[i] != '\n' {
944 i++
945 }
946 i++
947
948 blankline := p.isEmpty(data[beg:i]) > 0
949 if pre := p.codePrefix(data[beg:i]); pre > 0 {
950 beg += pre
951 } else if !blankline {
952 // non-empty, non-prefixed line breaks the pre
953 i = beg
954 break
955 }
956
957 // verbatim copy to the working buffeu
958 if blankline {
959 work.WriteByte('\n')
960 } else {
961 work.Write(data[beg:i])
962 }
963 }
964
965 // trim all the \n off the end of work
966 workbytes := work.Bytes()
967 eol := len(workbytes)
968 for eol > 0 && workbytes[eol-1] == '\n' {
969 eol--
970 }
971 if eol != len(workbytes) {
972 work.Truncate(eol)
973 }
974
975 work.WriteByte('\n')
976
977 p.r.BlockCode(out, work.Bytes(), "")
978
979 return i
980}
981
982// returns unordered list item prefix
983func (p *parser) uliPrefix(data []byte) int {
984 i := 0
985
986 // start with up to 3 spaces
987 for i < 3 && data[i] == ' ' {
988 i++
989 }
990
991 // need a *, +, or - followed by a space
992 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
993 data[i+1] != ' ' {
994 return 0
995 }
996 return i + 2
997}
998
999// returns ordered list item prefix
1000func (p *parser) oliPrefix(data []byte) int {
1001 i := 0
1002
1003 // start with up to 3 spaces
1004 for i < 3 && data[i] == ' ' {
1005 i++
1006 }
1007
1008 // count the digits
1009 start := i
1010 for data[i] >= '0' && data[i] <= '9' {
1011 i++
1012 }
1013
1014 // we need >= 1 digits followed by a dot and a space
1015 if start == i || data[i] != '.' || data[i+1] != ' ' {
1016 return 0
1017 }
1018 return i + 2
1019}
1020
1021// parse ordered or unordered list block
1022func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1023 i := 0
1024 flags |= LIST_ITEM_BEGINNING_OF_LIST
1025 work := func() bool {
1026 for i < len(data) {
1027 skip := p.listItem(out, data[i:], &flags)
1028 i += skip
1029
1030 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1031 break
1032 }
1033 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1034 }
1035 return true
1036 }
1037
1038 p.r.List(out, work, flags)
1039 return i
1040}
1041
1042// Parse a single list item.
1043// Assumes initial prefix is already removed if this is a sublist.
1044func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1045 // keep track of the indentation of the first line
1046 itemIndent := 0
1047 for itemIndent < 3 && data[itemIndent] == ' ' {
1048 itemIndent++
1049 }
1050
1051 i := p.uliPrefix(data)
1052 if i == 0 {
1053 i = p.oliPrefix(data)
1054 }
1055 if i == 0 {
1056 return 0
1057 }
1058
1059 // skip leading whitespace on first line
1060 for data[i] == ' ' {
1061 i++
1062 }
1063
1064 // find the end of the line
1065 line := i
1066 for data[i-1] != '\n' {
1067 i++
1068 }
1069
1070 // get working buffer
1071 var raw bytes.Buffer
1072
1073 // put the first line into the working buffer
1074 raw.Write(data[line:i])
1075 line = i
1076
1077 // process the following lines
1078 containsBlankLine := false
1079 sublist := 0
1080
1081gatherlines:
1082 for line < len(data) {
1083 i++
1084
1085 // find the end of this line
1086 for data[i-1] != '\n' {
1087 i++
1088 }
1089
1090 // if it is an empty line, guess that it is part of this item
1091 // and move on to the next line
1092 if p.isEmpty(data[line:i]) > 0 {
1093 containsBlankLine = true
1094 line = i
1095 continue
1096 }
1097
1098 // calculate the indentation
1099 indent := 0
1100 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1101 indent++
1102 }
1103
1104 chunk := data[line+indent : i]
1105
1106 // evaluate how this line fits in
1107 switch {
1108 // is this a nested list item?
1109 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1110 p.oliPrefix(chunk) > 0:
1111
1112 if containsBlankLine {
1113 *flags |= LIST_ITEM_CONTAINS_BLOCK
1114 }
1115
1116 // to be a nested list, it must be indented more
1117 // if not, it is the next item in the same list
1118 if indent <= itemIndent {
1119 break gatherlines
1120 }
1121
1122 // is this the first item in the nested list?
1123 if sublist == 0 {
1124 sublist = raw.Len()
1125 }
1126
1127 // is this a nested prefix header?
1128 case p.isPrefixHeader(chunk):
1129 // if the header is not indented, it is not nested in the list
1130 // and thus ends the list
1131 if containsBlankLine && indent < 4 {
1132 *flags |= LIST_ITEM_END_OF_LIST
1133 break gatherlines
1134 }
1135 *flags |= LIST_ITEM_CONTAINS_BLOCK
1136
1137 // anything following an empty line is only part
1138 // of this item if it is indented 4 spaces
1139 // (regardless of the indentation of the beginning of the item)
1140 case containsBlankLine && indent < 4:
1141 *flags |= LIST_ITEM_END_OF_LIST
1142 break gatherlines
1143
1144 // a blank line means this should be parsed as a block
1145 case containsBlankLine:
1146 raw.WriteByte('\n')
1147 *flags |= LIST_ITEM_CONTAINS_BLOCK
1148 }
1149
1150 // if this line was preceeded by one or more blanks,
1151 // re-introduce the blank into the buffer
1152 if containsBlankLine {
1153 containsBlankLine = false
1154 raw.WriteByte('\n')
1155 }
1156
1157 // add the line into the working buffer without prefix
1158 raw.Write(data[line+indent : i])
1159
1160 line = i
1161 }
1162
1163 rawBytes := raw.Bytes()
1164
1165 // render the contents of the list item
1166 var cooked bytes.Buffer
1167 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1168 // intermediate render of block li
1169 if sublist > 0 {
1170 p.block(&cooked, rawBytes[:sublist])
1171 p.block(&cooked, rawBytes[sublist:])
1172 } else {
1173 p.block(&cooked, rawBytes)
1174 }
1175 } else {
1176 // intermediate render of inline li
1177 if sublist > 0 {
1178 p.inline(&cooked, rawBytes[:sublist])
1179 p.block(&cooked, rawBytes[sublist:])
1180 } else {
1181 p.inline(&cooked, rawBytes)
1182 }
1183 }
1184
1185 // render the actual list item
1186 cookedBytes := cooked.Bytes()
1187 parsedEnd := len(cookedBytes)
1188
1189 // strip trailing newlines
1190 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1191 parsedEnd--
1192 }
1193 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1194
1195 return line
1196}
1197
1198// render a single paragraph that has already been parsed out
1199func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1200 if len(data) == 0 {
1201 return
1202 }
1203
1204 // trim leading spaces
1205 beg := 0
1206 for data[beg] == ' ' {
1207 beg++
1208 }
1209
1210 // trim trailing newline
1211 end := len(data) - 1
1212
1213 // trim trailing spaces
1214 for end > beg && data[end-1] == ' ' {
1215 end--
1216 }
1217
1218 work := func() bool {
1219 p.inline(out, data[beg:end])
1220 return true
1221 }
1222 p.r.Paragraph(out, work)
1223}
1224
1225func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1226 // prev: index of 1st char of previous line
1227 // line: index of 1st char of current line
1228 // i: index of cursor/end of current line
1229 var prev, line, i int
1230
1231 // keep going until we find something to mark the end of the paragraph
1232 for i < len(data) {
1233 // mark the beginning of the current line
1234 prev = line
1235 current := data[i:]
1236 line = i
1237
1238 // did we find a blank line marking the end of the paragraph?
1239 if n := p.isEmpty(current); n > 0 {
1240 p.renderParagraph(out, data[:i])
1241 return i + n
1242 }
1243
1244 // an underline under some text marks a header, so our paragraph ended on prev line
1245 if i > 0 {
1246 if level := p.isUnderlinedHeader(current); level > 0 {
1247 // render the paragraph
1248 p.renderParagraph(out, data[:prev])
1249
1250 // ignore leading and trailing whitespace
1251 eol := i - 1
1252 for prev < eol && data[prev] == ' ' {
1253 prev++
1254 }
1255 for eol > prev && data[eol-1] == ' ' {
1256 eol--
1257 }
1258
1259 // render the header
1260 // this ugly double closure avoids forcing variables onto the heap
1261 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1262 return func() bool {
1263 pp.inline(o, d)
1264 return true
1265 }
1266 }(out, p, data[prev:eol])
1267
1268 id := ""
1269 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1270 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1271 }
1272
1273 p.r.Header(out, work, level, id)
1274
1275 // find the end of the underline
1276 for data[i] != '\n' {
1277 i++
1278 }
1279 return i
1280 }
1281 }
1282
1283 // if the next line starts a block of HTML, then the paragraph ends here
1284 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1285 if data[i] == '<' && p.html(out, current, false) > 0 {
1286 // rewind to before the HTML block
1287 p.renderParagraph(out, data[:i])
1288 return i
1289 }
1290 }
1291
1292 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1293 if p.isPrefixHeader(current) || p.isHRule(current) {
1294 p.renderParagraph(out, data[:i])
1295 return i
1296 }
1297
1298 // if there's a list after this, paragraph is over
1299 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1300 if p.uliPrefix(current) != 0 ||
1301 p.oliPrefix(current) != 0 ||
1302 p.quotePrefix(current) != 0 ||
1303 p.codePrefix(current) != 0 {
1304 p.renderParagraph(out, data[:i])
1305 return i
1306 }
1307 }
1308
1309 // otherwise, scan to the beginning of the next line
1310 for data[i] != '\n' {
1311 i++
1312 }
1313 i++
1314 }
1315
1316 p.renderParagraph(out, data[:i])
1317 return i
1318}