block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18
19 "github.com/shurcooL/sanitized_anchor_name"
20)
21
22// Parse block-level data.
23// Note: this function and many that it calls assume that
24// the input buffer ends with a newline.
25func (p *parser) block(out *bytes.Buffer, data []byte) {
26 if len(data) == 0 || data[len(data)-1] != '\n' {
27 panic("block input is missing terminating newline")
28 }
29
30 // this is called recursively: enforce a maximum depth
31 if p.nesting >= p.maxNesting {
32 return
33 }
34 p.nesting++
35
36 // parse out one block-level construct at a time
37 for len(data) > 0 {
38 // prefixed header:
39 //
40 // # Header 1
41 // ## Header 2
42 // ...
43 // ###### Header 6
44 if p.isPrefixHeader(data) {
45 data = data[p.prefixHeader(out, data):]
46 continue
47 }
48
49 // block of preformatted HTML:
50 //
51 // <div>
52 // ...
53 // </div>
54 if data[0] == '<' {
55 if i := p.html(out, data, true); i > 0 {
56 data = data[i:]
57 continue
58 }
59 }
60
61 // title block
62 //
63 // % stuff
64 // % more stuff
65 // % even more stuff
66 if p.flags&EXTENSION_TITLEBLOCK != 0 {
67 if data[0] == '%' {
68 if i := p.titleBlock(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73 }
74
75 // blank lines. note: returns the # of bytes to skip
76 if i := p.isEmpty(data); i > 0 {
77 data = data[i:]
78 continue
79 }
80
81 // indented code block:
82 //
83 // func max(a, b int) int {
84 // if a > b {
85 // return a
86 // }
87 // return b
88 // }
89 if p.codePrefix(data) > 0 {
90 data = data[p.code(out, data):]
91 continue
92 }
93
94 // fenced code block:
95 //
96 // ``` go
97 // func fact(n int) int {
98 // if n <= 1 {
99 // return n
100 // }
101 // return n * fact(n-1)
102 // }
103 // ```
104 if p.flags&EXTENSION_FENCED_CODE != 0 {
105 if i := p.fencedCode(out, data, true); i > 0 {
106 data = data[i:]
107 continue
108 }
109 }
110
111 // horizontal rule:
112 //
113 // ------
114 // or
115 // ******
116 // or
117 // ______
118 if p.isHRule(data) {
119 p.r.HRule(out)
120 var i int
121 for i = 0; data[i] != '\n'; i++ {
122 }
123 data = data[i:]
124 continue
125 }
126
127 // block quote:
128 //
129 // > A big quote I found somewhere
130 // > on the web
131 if p.quotePrefix(data) > 0 {
132 data = data[p.quote(out, data):]
133 continue
134 }
135
136 // table:
137 //
138 // Name | Age | Phone
139 // ------|-----|---------
140 // Bob | 31 | 555-1234
141 // Alice | 27 | 555-4321
142 if p.flags&EXTENSION_TABLES != 0 {
143 if i := p.table(out, data); i > 0 {
144 data = data[i:]
145 continue
146 }
147 }
148
149 // an itemized/unordered list:
150 //
151 // * Item 1
152 // * Item 2
153 //
154 // also works with + or -
155 if p.uliPrefix(data) > 0 {
156 data = data[p.list(out, data, 0):]
157 continue
158 }
159
160 // a numbered/ordered list:
161 //
162 // 1. Item 1
163 // 2. Item 2
164 if p.oliPrefix(data) > 0 {
165 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
166 continue
167 }
168
169 // definition lists:
170 //
171 // Term 1
172 // : Definition a
173 // : Definition b
174 //
175 // Term 2
176 // : Definition c
177 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
178 if p.dliPrefix(data) > 0 {
179 data = data[p.list(out, data, LIST_TYPE_DEFINITION):]
180 continue
181 }
182 }
183
184 // anything else must look like a normal paragraph
185 // note: this finds underlined headers, too
186 data = data[p.paragraph(out, data):]
187 }
188
189 p.nesting--
190}
191
192func (p *parser) isPrefixHeader(data []byte) bool {
193 if data[0] != '#' {
194 return false
195 }
196
197 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
198 level := 0
199 for level < 6 && data[level] == '#' {
200 level++
201 }
202 if data[level] != ' ' {
203 return false
204 }
205 }
206 return true
207}
208
209func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
210 level := 0
211 for level < 6 && data[level] == '#' {
212 level++
213 }
214 i := skipChar(data, level, ' ')
215 end := skipUntilChar(data, i, '\n')
216 skip := end
217 id := ""
218 if p.flags&EXTENSION_HEADER_IDS != 0 {
219 j, k := 0, 0
220 // find start/end of header id
221 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
222 }
223 for k = j + 1; k < end && data[k] != '}'; k++ {
224 }
225 // extract header id iff found
226 if j < end && k < end {
227 id = string(data[j+2 : k])
228 end = j
229 skip = k + 1
230 for end > 0 && data[end-1] == ' ' {
231 end--
232 }
233 }
234 }
235 for end > 0 && data[end-1] == '#' {
236 if isBackslashEscaped(data, end-1) {
237 break
238 }
239 end--
240 }
241 for end > 0 && data[end-1] == ' ' {
242 end--
243 }
244 if end > i {
245 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
246 id = sanitized_anchor_name.Create(string(data[i:end]))
247 }
248 work := func() bool {
249 p.inline(out, data[i:end])
250 return true
251 }
252 p.r.Header(out, work, level, id)
253 }
254 return skip
255}
256
257func (p *parser) isUnderlinedHeader(data []byte) int {
258 // test of level 1 header
259 if data[0] == '=' {
260 i := skipChar(data, 1, '=')
261 i = skipChar(data, i, ' ')
262 if data[i] == '\n' {
263 return 1
264 } else {
265 return 0
266 }
267 }
268
269 // test of level 2 header
270 if data[0] == '-' {
271 i := skipChar(data, 1, '-')
272 i = skipChar(data, i, ' ')
273 if data[i] == '\n' {
274 return 2
275 } else {
276 return 0
277 }
278 }
279
280 return 0
281}
282
283func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
284 if data[0] != '%' {
285 return 0
286 }
287 splitData := bytes.Split(data, []byte("\n"))
288 var i int
289 for idx, b := range splitData {
290 if !bytes.HasPrefix(b, []byte("%")) {
291 i = idx // - 1
292 break
293 }
294 }
295
296 data = bytes.Join(splitData[0:i], []byte("\n"))
297 p.r.TitleBlock(out, data)
298
299 return len(data)
300}
301
302func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
303 var i, j int
304
305 // identify the opening tag
306 if data[0] != '<' {
307 return 0
308 }
309 curtag, tagfound := p.htmlFindTag(data[1:])
310
311 // handle special cases
312 if !tagfound {
313 // check for an HTML comment
314 if size := p.htmlComment(out, data, doRender); size > 0 {
315 return size
316 }
317
318 // check for an <hr> tag
319 if size := p.htmlHr(out, data, doRender); size > 0 {
320 return size
321 }
322
323 // no special case recognized
324 return 0
325 }
326
327 // look for an unindented matching closing tag
328 // followed by a blank line
329 found := false
330 /*
331 closetag := []byte("\n</" + curtag + ">")
332 j = len(curtag) + 1
333 for !found {
334 // scan for a closing tag at the beginning of a line
335 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
336 j += skip + len(closetag)
337 } else {
338 break
339 }
340
341 // see if it is the only thing on the line
342 if skip := p.isEmpty(data[j:]); skip > 0 {
343 // see if it is followed by a blank line/eof
344 j += skip
345 if j >= len(data) {
346 found = true
347 i = j
348 } else {
349 if skip := p.isEmpty(data[j:]); skip > 0 {
350 j += skip
351 found = true
352 i = j
353 }
354 }
355 }
356 }
357 */
358
359 // if not found, try a second pass looking for indented match
360 // but not if tag is "ins" or "del" (following original Markdown.pl)
361 if !found && curtag != "ins" && curtag != "del" {
362 i = 1
363 for i < len(data) {
364 i++
365 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
366 i++
367 }
368
369 if i+2+len(curtag) >= len(data) {
370 break
371 }
372
373 j = p.htmlFindEnd(curtag, data[i-1:])
374
375 if j > 0 {
376 i += j - 1
377 found = true
378 break
379 }
380 }
381 }
382
383 if !found {
384 return 0
385 }
386
387 // the end of the block has been found
388 if doRender {
389 // trim newlines
390 end := i
391 for end > 0 && data[end-1] == '\n' {
392 end--
393 }
394 p.r.BlockHtml(out, data[:end])
395 }
396
397 return i
398}
399
400// HTML comment, lax form
401func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
402 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
403 return 0
404 }
405
406 i := 5
407
408 // scan for an end-of-comment marker, across lines if necessary
409 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
410 i++
411 }
412 i++
413
414 // no end-of-comment marker
415 if i >= len(data) {
416 return 0
417 }
418
419 // needs to end with a blank line
420 if j := p.isEmpty(data[i:]); j > 0 {
421 size := i + j
422 if doRender {
423 // trim trailing newlines
424 end := size
425 for end > 0 && data[end-1] == '\n' {
426 end--
427 }
428 p.r.BlockHtml(out, data[:end])
429 }
430 return size
431 }
432
433 return 0
434}
435
436// HR, which is the only self-closing block tag considered
437func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
438 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
439 return 0
440 }
441 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
442 // not an <hr> tag after all; at least not a valid one
443 return 0
444 }
445
446 i := 3
447 for data[i] != '>' && data[i] != '\n' {
448 i++
449 }
450
451 if data[i] == '>' {
452 i++
453 if j := p.isEmpty(data[i:]); j > 0 {
454 size := i + j
455 if doRender {
456 // trim newlines
457 end := size
458 for end > 0 && data[end-1] == '\n' {
459 end--
460 }
461 p.r.BlockHtml(out, data[:end])
462 }
463 return size
464 }
465 }
466
467 return 0
468}
469
470func (p *parser) htmlFindTag(data []byte) (string, bool) {
471 i := 0
472 for isalnum(data[i]) {
473 i++
474 }
475 key := string(data[:i])
476 if blockTags[key] {
477 return key, true
478 }
479 return "", false
480}
481
482func (p *parser) htmlFindEnd(tag string, data []byte) int {
483 // assume data[0] == '<' && data[1] == '/' already tested
484
485 // check if tag is a match
486 closetag := []byte("</" + tag + ">")
487 if !bytes.HasPrefix(data, closetag) {
488 return 0
489 }
490 i := len(closetag)
491
492 // check that the rest of the line is blank
493 skip := 0
494 if skip = p.isEmpty(data[i:]); skip == 0 {
495 return 0
496 }
497 i += skip
498 skip = 0
499
500 if i >= len(data) {
501 return i
502 }
503
504 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
505 return i
506 }
507 if skip = p.isEmpty(data[i:]); skip == 0 {
508 // following line must be blank
509 return 0
510 }
511
512 return i + skip
513}
514
515func (p *parser) isEmpty(data []byte) int {
516 // it is okay to call isEmpty on an empty buffer
517 if len(data) == 0 {
518 return 0
519 }
520
521 var i int
522 for i = 0; i < len(data) && data[i] != '\n'; i++ {
523 if data[i] != ' ' && data[i] != '\t' {
524 return 0
525 }
526 }
527 return i + 1
528}
529
530func (p *parser) isHRule(data []byte) bool {
531 i := 0
532
533 // skip up to three spaces
534 for i < 3 && data[i] == ' ' {
535 i++
536 }
537
538 // look at the hrule char
539 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
540 return false
541 }
542 c := data[i]
543
544 // the whole line must be the char or whitespace
545 n := 0
546 for data[i] != '\n' {
547 switch {
548 case data[i] == c:
549 n++
550 case data[i] != ' ':
551 return false
552 }
553 i++
554 }
555
556 return n >= 3
557}
558
559func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
560 i, size := 0, 0
561 skip = 0
562
563 // skip up to three spaces
564 for i < len(data) && i < 3 && data[i] == ' ' {
565 i++
566 }
567 if i >= len(data) {
568 return
569 }
570
571 // check for the marker characters: ~ or `
572 if data[i] != '~' && data[i] != '`' {
573 return
574 }
575
576 c := data[i]
577
578 // the whole line must be the same char or whitespace
579 for i < len(data) && data[i] == c {
580 size++
581 i++
582 }
583
584 if i >= len(data) {
585 return
586 }
587
588 // the marker char must occur at least 3 times
589 if size < 3 {
590 return
591 }
592 marker = string(data[i-size : i])
593
594 // if this is the end marker, it must match the beginning marker
595 if oldmarker != "" && marker != oldmarker {
596 return
597 }
598
599 if syntax != nil {
600 syn := 0
601 i = skipChar(data, i, ' ')
602
603 if i >= len(data) {
604 return
605 }
606
607 syntaxStart := i
608
609 if data[i] == '{' {
610 i++
611 syntaxStart++
612
613 for i < len(data) && data[i] != '}' && data[i] != '\n' {
614 syn++
615 i++
616 }
617
618 if i >= len(data) || data[i] != '}' {
619 return
620 }
621
622 // strip all whitespace at the beginning and the end
623 // of the {} block
624 for syn > 0 && isspace(data[syntaxStart]) {
625 syntaxStart++
626 syn--
627 }
628
629 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
630 syn--
631 }
632
633 i++
634 } else {
635 for i < len(data) && !isspace(data[i]) {
636 syn++
637 i++
638 }
639 }
640
641 language := string(data[syntaxStart : syntaxStart+syn])
642 *syntax = &language
643 }
644
645 i = skipChar(data, i, ' ')
646 if i >= len(data) || data[i] != '\n' {
647 return
648 }
649
650 skip = i + 1
651 return
652}
653
654func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
655 var lang *string
656 beg, marker := p.isFencedCode(data, &lang, "")
657 if beg == 0 || beg >= len(data) {
658 return 0
659 }
660
661 var work bytes.Buffer
662
663 for {
664 // safe to assume beg < len(data)
665
666 // check for the end of the code block
667 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
668 if fenceEnd != 0 {
669 beg += fenceEnd
670 break
671 }
672
673 // copy the current line
674 end := skipUntilChar(data, beg, '\n') + 1
675
676 // did we reach the end of the buffer without a closing marker?
677 if end >= len(data) {
678 return 0
679 }
680
681 // verbatim copy to the working buffer
682 if doRender {
683 work.Write(data[beg:end])
684 }
685 beg = end
686 }
687
688 syntax := ""
689 if lang != nil {
690 syntax = *lang
691 }
692
693 if doRender {
694 p.r.BlockCode(out, work.Bytes(), syntax)
695 }
696
697 return beg
698}
699
700func (p *parser) table(out *bytes.Buffer, data []byte) int {
701 var header bytes.Buffer
702 i, columns := p.tableHeader(&header, data)
703 if i == 0 {
704 return 0
705 }
706
707 var body bytes.Buffer
708
709 for i < len(data) {
710 pipes, rowStart := 0, i
711 for ; data[i] != '\n'; i++ {
712 if data[i] == '|' {
713 pipes++
714 }
715 }
716
717 if pipes == 0 {
718 i = rowStart
719 break
720 }
721
722 // include the newline in data sent to tableRow
723 i++
724 p.tableRow(&body, data[rowStart:i], columns, false)
725 }
726
727 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
728
729 return i
730}
731
732// check if the specified position is preceded by an odd number of backslashes
733func isBackslashEscaped(data []byte, i int) bool {
734 backslashes := 0
735 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
736 backslashes++
737 }
738 return backslashes&1 == 1
739}
740
741func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
742 i := 0
743 colCount := 1
744 for i = 0; data[i] != '\n'; i++ {
745 if data[i] == '|' && !isBackslashEscaped(data, i) {
746 colCount++
747 }
748 }
749
750 // doesn't look like a table header
751 if colCount == 1 {
752 return
753 }
754
755 // include the newline in the data sent to tableRow
756 header := data[:i+1]
757
758 // column count ignores pipes at beginning or end of line
759 if data[0] == '|' {
760 colCount--
761 }
762 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
763 colCount--
764 }
765
766 columns = make([]int, colCount)
767
768 // move on to the header underline
769 i++
770 if i >= len(data) {
771 return
772 }
773
774 if data[i] == '|' && !isBackslashEscaped(data, i) {
775 i++
776 }
777 i = skipChar(data, i, ' ')
778
779 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
780 // and trailing | optional on last column
781 col := 0
782 for data[i] != '\n' {
783 dashes := 0
784
785 if data[i] == ':' {
786 i++
787 columns[col] |= TABLE_ALIGNMENT_LEFT
788 dashes++
789 }
790 for data[i] == '-' {
791 i++
792 dashes++
793 }
794 if data[i] == ':' {
795 i++
796 columns[col] |= TABLE_ALIGNMENT_RIGHT
797 dashes++
798 }
799 for data[i] == ' ' {
800 i++
801 }
802
803 // end of column test is messy
804 switch {
805 case dashes < 3:
806 // not a valid column
807 return
808
809 case data[i] == '|' && !isBackslashEscaped(data, i):
810 // marker found, now skip past trailing whitespace
811 col++
812 i++
813 for data[i] == ' ' {
814 i++
815 }
816
817 // trailing junk found after last column
818 if col >= colCount && data[i] != '\n' {
819 return
820 }
821
822 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
823 // something else found where marker was required
824 return
825
826 case data[i] == '\n':
827 // marker is optional for the last column
828 col++
829
830 default:
831 // trailing junk found after last column
832 return
833 }
834 }
835 if col != colCount {
836 return
837 }
838
839 p.tableRow(out, header, columns, true)
840 size = i + 1
841 return
842}
843
844func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
845 i, col := 0, 0
846 var rowWork bytes.Buffer
847
848 if data[i] == '|' && !isBackslashEscaped(data, i) {
849 i++
850 }
851
852 for col = 0; col < len(columns) && i < len(data); col++ {
853 for data[i] == ' ' {
854 i++
855 }
856
857 cellStart := i
858
859 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
860 i++
861 }
862
863 cellEnd := i
864
865 // skip the end-of-cell marker, possibly taking us past end of buffer
866 i++
867
868 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
869 cellEnd--
870 }
871
872 var cellWork bytes.Buffer
873 p.inline(&cellWork, data[cellStart:cellEnd])
874
875 if header {
876 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
877 } else {
878 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
879 }
880 }
881
882 // pad it out with empty columns to get the right number
883 for ; col < len(columns); col++ {
884 if header {
885 p.r.TableHeaderCell(&rowWork, nil, columns[col])
886 } else {
887 p.r.TableCell(&rowWork, nil, columns[col])
888 }
889 }
890
891 // silently ignore rows with too many cells
892
893 p.r.TableRow(out, rowWork.Bytes())
894}
895
896// returns blockquote prefix length
897func (p *parser) quotePrefix(data []byte) int {
898 i := 0
899 for i < 3 && data[i] == ' ' {
900 i++
901 }
902 if data[i] == '>' {
903 if data[i+1] == ' ' {
904 return i + 2
905 }
906 return i + 1
907 }
908 return 0
909}
910
911// parse a blockquote fragment
912func (p *parser) quote(out *bytes.Buffer, data []byte) int {
913 var raw bytes.Buffer
914 beg, end := 0, 0
915 for beg < len(data) {
916 end = beg
917 for data[end] != '\n' {
918 end++
919 }
920 end++
921
922 if pre := p.quotePrefix(data[beg:]); pre > 0 {
923 // skip the prefix
924 beg += pre
925 } else if p.isEmpty(data[beg:]) > 0 &&
926 (end >= len(data) ||
927 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
928 // blockquote ends with at least one blank line
929 // followed by something without a blockquote prefix
930 break
931 }
932
933 // this line is part of the blockquote
934 raw.Write(data[beg:end])
935 beg = end
936 }
937
938 var cooked bytes.Buffer
939 p.block(&cooked, raw.Bytes())
940 p.r.BlockQuote(out, cooked.Bytes())
941 return end
942}
943
944// returns prefix length for block code
945func (p *parser) codePrefix(data []byte) int {
946 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
947 return 4
948 }
949 return 0
950}
951
952func (p *parser) code(out *bytes.Buffer, data []byte) int {
953 var work bytes.Buffer
954
955 i := 0
956 for i < len(data) {
957 beg := i
958 for data[i] != '\n' {
959 i++
960 }
961 i++
962
963 blankline := p.isEmpty(data[beg:i]) > 0
964 if pre := p.codePrefix(data[beg:i]); pre > 0 {
965 beg += pre
966 } else if !blankline {
967 // non-empty, non-prefixed line breaks the pre
968 i = beg
969 break
970 }
971
972 // verbatim copy to the working buffeu
973 if blankline {
974 work.WriteByte('\n')
975 } else {
976 work.Write(data[beg:i])
977 }
978 }
979
980 // trim all the \n off the end of work
981 workbytes := work.Bytes()
982 eol := len(workbytes)
983 for eol > 0 && workbytes[eol-1] == '\n' {
984 eol--
985 }
986 if eol != len(workbytes) {
987 work.Truncate(eol)
988 }
989
990 work.WriteByte('\n')
991
992 p.r.BlockCode(out, work.Bytes(), "")
993
994 return i
995}
996
997// returns unordered list item prefix
998func (p *parser) uliPrefix(data []byte) int {
999 i := 0
1000
1001 // start with up to 3 spaces
1002 for i < 3 && data[i] == ' ' {
1003 i++
1004 }
1005
1006 // need a *, +, or - followed by a space
1007 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1008 data[i+1] != ' ' {
1009 return 0
1010 }
1011 return i + 2
1012}
1013
1014// returns ordered list item prefix
1015func (p *parser) oliPrefix(data []byte) int {
1016 i := 0
1017
1018 // start with up to 3 spaces
1019 for i < 3 && data[i] == ' ' {
1020 i++
1021 }
1022
1023 // count the digits
1024 start := i
1025 for data[i] >= '0' && data[i] <= '9' {
1026 i++
1027 }
1028
1029 // we need >= 1 digits followed by a dot and a space
1030 if start == i || data[i] != '.' || data[i+1] != ' ' {
1031 return 0
1032 }
1033 return i + 2
1034}
1035
1036// returns definition list item prefix
1037func (p *parser) dliPrefix(data []byte) int {
1038 i := 0
1039
1040 // need a : followed by a spaces
1041 if data[i] != ':' || data[i+1] != ' ' {
1042 return 0
1043 }
1044 for data[i] == ' ' {
1045 i++
1046 }
1047 return i + 2
1048}
1049
1050// parse ordered or unordered list block
1051func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1052 i := 0
1053 flags |= LIST_ITEM_BEGINNING_OF_LIST
1054 work := func() bool {
1055 for i < len(data) {
1056 skip := p.listItem(out, data[i:], &flags)
1057 i += skip
1058
1059 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1060 break
1061 }
1062 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1063 }
1064 return true
1065 }
1066
1067 p.r.List(out, work, flags)
1068 return i
1069}
1070
1071// Parse a single list item.
1072// Assumes initial prefix is already removed if this is a sublist.
1073func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1074 // keep track of the indentation of the first line
1075 itemIndent := 0
1076 for itemIndent < 3 && data[itemIndent] == ' ' {
1077 itemIndent++
1078 }
1079
1080 i := p.uliPrefix(data)
1081 if i == 0 {
1082 i = p.oliPrefix(data)
1083 }
1084 if i == 0 {
1085 i = p.dliPrefix(data)
1086 // reset definition term flag
1087 if i > 0 {
1088 *flags &= ^LIST_TYPE_TERM
1089 }
1090 }
1091 if i == 0 {
1092 // if in defnition list, set term flag and continue
1093 if *flags&LIST_TYPE_DEFINITION != 0 {
1094 *flags |= LIST_TYPE_TERM
1095 } else {
1096 return 0
1097 }
1098 }
1099
1100 // skip leading whitespace on first line
1101 for data[i] == ' ' {
1102 i++
1103 }
1104
1105 // find the end of the line
1106 line := i
1107 for i > 0 && data[i-1] != '\n' {
1108 i++
1109 }
1110
1111 // get working buffer
1112 var raw bytes.Buffer
1113
1114 // put the first line into the working buffer
1115 raw.Write(data[line:i])
1116 line = i
1117
1118 // process the following lines
1119 containsBlankLine := false
1120 sublist := 0
1121
1122gatherlines:
1123 for line < len(data) {
1124 i++
1125
1126 // find the end of this line
1127 for data[i-1] != '\n' {
1128 i++
1129 }
1130
1131 // if it is an empty line, guess that it is part of this item
1132 // and move on to the next line
1133 if p.isEmpty(data[line:i]) > 0 {
1134 containsBlankLine = true
1135 line = i
1136 continue
1137 }
1138
1139 // calculate the indentation
1140 indent := 0
1141 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1142 indent++
1143 }
1144
1145 chunk := data[line+indent : i]
1146
1147 // evaluate how this line fits in
1148 switch {
1149 // is this a nested list item?
1150 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1151 p.oliPrefix(chunk) > 0 ||
1152 p.dliPrefix(chunk) > 0:
1153
1154 if containsBlankLine {
1155 *flags |= LIST_ITEM_CONTAINS_BLOCK
1156 }
1157
1158 // to be a nested list, it must be indented more
1159 // if not, it is the next item in the same list
1160 if indent <= itemIndent {
1161 break gatherlines
1162 }
1163
1164 // is this the first item in the nested list?
1165 if sublist == 0 {
1166 sublist = raw.Len()
1167 }
1168
1169 // is this a nested prefix header?
1170 case p.isPrefixHeader(chunk):
1171 // if the header is not indented, it is not nested in the list
1172 // and thus ends the list
1173 if containsBlankLine && indent < 4 {
1174 *flags |= LIST_ITEM_END_OF_LIST
1175 break gatherlines
1176 }
1177 *flags |= LIST_ITEM_CONTAINS_BLOCK
1178
1179 // anything following an empty line is only part
1180 // of this item if it is indented 4 spaces
1181 // (regardless of the indentation of the beginning of the item)
1182 case containsBlankLine && indent < 4:
1183 if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 {
1184 // is the next item still a part of this list?
1185 next := i
1186 for data[next] != '\n' {
1187 next++
1188 }
1189 if next < len(data)-2 && data[next+1] != ':' {
1190 *flags |= LIST_ITEM_END_OF_LIST
1191 }
1192 } else {
1193 *flags |= LIST_ITEM_END_OF_LIST
1194 }
1195 break gatherlines
1196
1197 // a blank line means this should be parsed as a block
1198 case containsBlankLine:
1199 raw.WriteByte('\n')
1200 *flags |= LIST_ITEM_CONTAINS_BLOCK
1201 }
1202
1203 // if this line was preceeded by one or more blanks,
1204 // re-introduce the blank into the buffer
1205 if containsBlankLine {
1206 containsBlankLine = false
1207 raw.WriteByte('\n')
1208
1209 }
1210
1211 // add the line into the working buffer without prefix
1212 raw.Write(data[line+indent : i])
1213
1214 line = i
1215 }
1216
1217 rawBytes := raw.Bytes()
1218
1219 // render the contents of the list item
1220 var cooked bytes.Buffer
1221 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 {
1222 // intermediate render of block item, except for definition term
1223 if sublist > 0 {
1224 p.block(&cooked, rawBytes[:sublist])
1225 p.block(&cooked, rawBytes[sublist:])
1226 } else {
1227 p.block(&cooked, rawBytes)
1228 }
1229 } else {
1230 // intermediate render of inline item
1231 if sublist > 0 {
1232 p.inline(&cooked, rawBytes[:sublist])
1233 p.block(&cooked, rawBytes[sublist:])
1234 } else {
1235 p.inline(&cooked, rawBytes)
1236 }
1237 }
1238
1239 // render the actual list item
1240 cookedBytes := cooked.Bytes()
1241 parsedEnd := len(cookedBytes)
1242
1243 // strip trailing newlines
1244 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1245 parsedEnd--
1246 }
1247 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1248
1249 return line
1250}
1251
1252// render a single paragraph that has already been parsed out
1253func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1254 if len(data) == 0 {
1255 return
1256 }
1257
1258 // trim leading spaces
1259 beg := 0
1260 for data[beg] == ' ' {
1261 beg++
1262 }
1263
1264 // trim trailing newline
1265 end := len(data) - 1
1266
1267 // trim trailing spaces
1268 for end > beg && data[end-1] == ' ' {
1269 end--
1270 }
1271
1272 work := func() bool {
1273 p.inline(out, data[beg:end])
1274 return true
1275 }
1276 p.r.Paragraph(out, work)
1277}
1278
1279func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1280 // prev: index of 1st char of previous line
1281 // line: index of 1st char of current line
1282 // i: index of cursor/end of current line
1283 var prev, line, i int
1284
1285 // keep going until we find something to mark the end of the paragraph
1286 for i < len(data) {
1287 // mark the beginning of the current line
1288 prev = line
1289 current := data[i:]
1290 line = i
1291
1292 // did we find a blank line marking the end of the paragraph?
1293 if n := p.isEmpty(current); n > 0 {
1294 // did this blank line followed by a definition list item?
1295 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1296 if i < len(data)-1 && data[i+1] == ':' {
1297 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1298 }
1299 }
1300
1301 p.renderParagraph(out, data[:i])
1302 return i + n
1303 }
1304
1305 // an underline under some text marks a header, so our paragraph ended on prev line
1306 if i > 0 {
1307 if level := p.isUnderlinedHeader(current); level > 0 {
1308 // render the paragraph
1309 p.renderParagraph(out, data[:prev])
1310
1311 // ignore leading and trailing whitespace
1312 eol := i - 1
1313 for prev < eol && data[prev] == ' ' {
1314 prev++
1315 }
1316 for eol > prev && data[eol-1] == ' ' {
1317 eol--
1318 }
1319
1320 // render the header
1321 // this ugly double closure avoids forcing variables onto the heap
1322 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1323 return func() bool {
1324 pp.inline(o, d)
1325 return true
1326 }
1327 }(out, p, data[prev:eol])
1328
1329 id := ""
1330 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1331 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1332 }
1333
1334 p.r.Header(out, work, level, id)
1335
1336 // find the end of the underline
1337 for data[i] != '\n' {
1338 i++
1339 }
1340 return i
1341 }
1342 }
1343
1344 // if the next line starts a block of HTML, then the paragraph ends here
1345 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1346 if data[i] == '<' && p.html(out, current, false) > 0 {
1347 // rewind to before the HTML block
1348 p.renderParagraph(out, data[:i])
1349 return i
1350 }
1351 }
1352
1353 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1354 if p.isPrefixHeader(current) || p.isHRule(current) {
1355 p.renderParagraph(out, data[:i])
1356 return i
1357 }
1358
1359 // if there's a definition list item, prev line is a definition term
1360 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1361 if p.dliPrefix(current) != 0 {
1362 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1363 }
1364 }
1365
1366 // if there's a list after this, paragraph is over
1367 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1368 if p.uliPrefix(current) != 0 ||
1369 p.oliPrefix(current) != 0 ||
1370 p.quotePrefix(current) != 0 ||
1371 p.codePrefix(current) != 0 {
1372 p.renderParagraph(out, data[:i])
1373 return i
1374 }
1375 }
1376
1377 // otherwise, scan to the beginning of the next line
1378 for data[i] != '\n' {
1379 i++
1380 }
1381 i++
1382 }
1383
1384 p.renderParagraph(out, data[:i])
1385 return i
1386}