block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18
19 "github.com/shurcooL/sanitized_anchor_name"
20)
21
22// Parse block-level data.
23// Note: this function and many that it calls assume that
24// the input buffer ends with a newline.
25func (p *parser) block(out *bytes.Buffer, data []byte) {
26 if len(data) == 0 || data[len(data)-1] != '\n' {
27 panic("block input is missing terminating newline")
28 }
29
30 // this is called recursively: enforce a maximum depth
31 if p.nesting >= p.maxNesting {
32 return
33 }
34 p.nesting++
35
36 // parse out one block-level construct at a time
37 for len(data) > 0 {
38 // prefixed header:
39 //
40 // # Header 1
41 // ## Header 2
42 // ...
43 // ###### Header 6
44 if p.isPrefixHeader(data) {
45 data = data[p.prefixHeader(out, data):]
46 continue
47 }
48
49 // block of preformatted HTML:
50 //
51 // <div>
52 // ...
53 // </div>
54 if data[0] == '<' {
55 if i := p.html(out, data, true); i > 0 {
56 data = data[i:]
57 continue
58 }
59 }
60
61 // title block
62 //
63 // % stuff
64 // % more stuff
65 // % even more stuff
66 if p.flags&EXTENSION_TITLEBLOCK != 0 {
67 if data[0] == '%' {
68 if i := p.titleBlock(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73 }
74
75 // blank lines. note: returns the # of bytes to skip
76 if i := p.isEmpty(data); i > 0 {
77 data = data[i:]
78 continue
79 }
80
81 // indented code block:
82 //
83 // func max(a, b int) int {
84 // if a > b {
85 // return a
86 // }
87 // return b
88 // }
89 if p.codePrefix(data) > 0 {
90 data = data[p.code(out, data):]
91 continue
92 }
93
94 // fenced code block:
95 //
96 // ``` go
97 // func fact(n int) int {
98 // if n <= 1 {
99 // return n
100 // }
101 // return n * fact(n-1)
102 // }
103 // ```
104 if p.flags&EXTENSION_FENCED_CODE != 0 {
105 if i := p.fencedCode(out, data, true); i > 0 {
106 data = data[i:]
107 continue
108 }
109 }
110
111 // horizontal rule:
112 //
113 // ------
114 // or
115 // ******
116 // or
117 // ______
118 if p.isHRule(data) {
119 p.r.HRule(out)
120 var i int
121 for i = 0; data[i] != '\n'; i++ {
122 }
123 data = data[i:]
124 continue
125 }
126
127 // block quote:
128 //
129 // > A big quote I found somewhere
130 // > on the web
131 if p.quotePrefix(data) > 0 {
132 data = data[p.quote(out, data):]
133 continue
134 }
135
136 // table:
137 //
138 // Name | Age | Phone
139 // ------|-----|---------
140 // Bob | 31 | 555-1234
141 // Alice | 27 | 555-4321
142 if p.flags&EXTENSION_TABLES != 0 {
143 if i := p.table(out, data); i > 0 {
144 data = data[i:]
145 continue
146 }
147 }
148
149 // an itemized/unordered list:
150 //
151 // * Item 1
152 // * Item 2
153 //
154 // also works with + or -
155 if p.uliPrefix(data) > 0 {
156 data = data[p.list(out, data, 0):]
157 continue
158 }
159
160 // a numbered/ordered list:
161 //
162 // 1. Item 1
163 // 2. Item 2
164 if p.oliPrefix(data) > 0 {
165 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
166 continue
167 }
168
169 // anything else must look like a normal paragraph
170 // note: this finds underlined headers, too
171 data = data[p.paragraph(out, data):]
172 }
173
174 p.nesting--
175}
176
177func (p *parser) isPrefixHeader(data []byte) bool {
178 if data[0] != '#' {
179 return false
180 }
181
182 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
183 level := 0
184 for level < 6 && data[level] == '#' {
185 level++
186 }
187 if data[level] != ' ' {
188 return false
189 }
190 }
191 return true
192}
193
194func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
195 level := 0
196 for level < 6 && data[level] == '#' {
197 level++
198 }
199 i, end := 0, 0
200 for i = level; data[i] == ' '; i++ {
201 }
202 for end = i; data[end] != '\n'; end++ {
203 }
204 skip := end
205 id := ""
206 if p.flags&EXTENSION_HEADER_IDS != 0 {
207 j, k := 0, 0
208 // find start/end of header id
209 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
210 }
211 for k = j + 1; k < end && data[k] != '}'; k++ {
212 }
213 // extract header id iff found
214 if j < end && k < end {
215 id = string(data[j+2 : k])
216 end = j
217 skip = k + 1
218 for end > 0 && data[end-1] == ' ' {
219 end--
220 }
221 }
222 }
223 for end > 0 && data[end-1] == '#' {
224 end--
225 }
226 for end > 0 && data[end-1] == ' ' {
227 end--
228 }
229 if end > i {
230 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
231 id = sanitized_anchor_name.Create(string(data[i:end]))
232 }
233 work := func() bool {
234 p.inline(out, data[i:end])
235 return true
236 }
237 p.r.Header(out, work, level, id)
238 }
239 return skip
240}
241
242func (p *parser) isUnderlinedHeader(data []byte) int {
243 // test of level 1 header
244 if data[0] == '=' {
245 i := 1
246 for data[i] == '=' {
247 i++
248 }
249 for data[i] == ' ' {
250 i++
251 }
252 if data[i] == '\n' {
253 return 1
254 } else {
255 return 0
256 }
257 }
258
259 // test of level 2 header
260 if data[0] == '-' {
261 i := 1
262 for data[i] == '-' {
263 i++
264 }
265 for data[i] == ' ' {
266 i++
267 }
268 if data[i] == '\n' {
269 return 2
270 } else {
271 return 0
272 }
273 }
274
275 return 0
276}
277
278func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
279 if data[0] != '%' {
280 return 0
281 }
282 splitData := bytes.Split(data, []byte("\n"))
283 var i int
284 for idx, b := range splitData {
285 if !bytes.HasPrefix(b, []byte("%")) {
286 i = idx // - 1
287 break
288 }
289 }
290
291 data = bytes.Join(splitData[0:i], []byte("\n"))
292 p.r.TitleBlock(out, data)
293
294 return len(data)
295}
296
297func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
298 var i, j int
299
300 // identify the opening tag
301 if data[0] != '<' {
302 return 0
303 }
304 curtag, tagfound := p.htmlFindTag(data[1:])
305
306 // handle special cases
307 if !tagfound {
308 // check for an HTML comment
309 if size := p.htmlComment(out, data, doRender); size > 0 {
310 return size
311 }
312
313 // check for an <hr> tag
314 if size := p.htmlHr(out, data, doRender); size > 0 {
315 return size
316 }
317
318 // no special case recognized
319 return 0
320 }
321
322 // look for an unindented matching closing tag
323 // followed by a blank line
324 found := false
325 /*
326 closetag := []byte("\n</" + curtag + ">")
327 j = len(curtag) + 1
328 for !found {
329 // scan for a closing tag at the beginning of a line
330 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
331 j += skip + len(closetag)
332 } else {
333 break
334 }
335
336 // see if it is the only thing on the line
337 if skip := p.isEmpty(data[j:]); skip > 0 {
338 // see if it is followed by a blank line/eof
339 j += skip
340 if j >= len(data) {
341 found = true
342 i = j
343 } else {
344 if skip := p.isEmpty(data[j:]); skip > 0 {
345 j += skip
346 found = true
347 i = j
348 }
349 }
350 }
351 }
352 */
353
354 // if not found, try a second pass looking for indented match
355 // but not if tag is "ins" or "del" (following original Markdown.pl)
356 if !found && curtag != "ins" && curtag != "del" {
357 i = 1
358 for i < len(data) {
359 i++
360 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
361 i++
362 }
363
364 if i+2+len(curtag) >= len(data) {
365 break
366 }
367
368 j = p.htmlFindEnd(curtag, data[i-1:])
369
370 if j > 0 {
371 i += j - 1
372 found = true
373 break
374 }
375 }
376 }
377
378 if !found {
379 return 0
380 }
381
382 // the end of the block has been found
383 if doRender {
384 // trim newlines
385 end := i
386 for end > 0 && data[end-1] == '\n' {
387 end--
388 }
389 p.r.BlockHtml(out, data[:end])
390 }
391
392 return i
393}
394
395// HTML comment, lax form
396func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
397 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
398 return 0
399 }
400
401 i := 5
402
403 // scan for an end-of-comment marker, across lines if necessary
404 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
405 i++
406 }
407 i++
408
409 // no end-of-comment marker
410 if i >= len(data) {
411 return 0
412 }
413
414 // needs to end with a blank line
415 if j := p.isEmpty(data[i:]); j > 0 {
416 size := i + j
417 if doRender {
418 // trim trailing newlines
419 end := size
420 for end > 0 && data[end-1] == '\n' {
421 end--
422 }
423 p.r.BlockHtml(out, data[:end])
424 }
425 return size
426 }
427
428 return 0
429}
430
431// HR, which is the only self-closing block tag considered
432func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
433 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
434 return 0
435 }
436 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
437 // not an <hr> tag after all; at least not a valid one
438 return 0
439 }
440
441 i := 3
442 for data[i] != '>' && data[i] != '\n' {
443 i++
444 }
445
446 if data[i] == '>' {
447 i++
448 if j := p.isEmpty(data[i:]); j > 0 {
449 size := i + j
450 if doRender {
451 // trim newlines
452 end := size
453 for end > 0 && data[end-1] == '\n' {
454 end--
455 }
456 p.r.BlockHtml(out, data[:end])
457 }
458 return size
459 }
460 }
461
462 return 0
463}
464
465func (p *parser) htmlFindTag(data []byte) (string, bool) {
466 i := 0
467 for isalnum(data[i]) {
468 i++
469 }
470 key := string(data[:i])
471 if blockTags[key] {
472 return key, true
473 }
474 return "", false
475}
476
477func (p *parser) htmlFindEnd(tag string, data []byte) int {
478 // assume data[0] == '<' && data[1] == '/' already tested
479
480 // check if tag is a match
481 closetag := []byte("</" + tag + ">")
482 if !bytes.HasPrefix(data, closetag) {
483 return 0
484 }
485 i := len(closetag)
486
487 // check that the rest of the line is blank
488 skip := 0
489 if skip = p.isEmpty(data[i:]); skip == 0 {
490 return 0
491 }
492 i += skip
493 skip = 0
494
495 if i >= len(data) {
496 return i
497 }
498
499 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
500 return i
501 }
502 if skip = p.isEmpty(data[i:]); skip == 0 {
503 // following line must be blank
504 return 0
505 }
506
507 return i + skip
508}
509
510func (p *parser) isEmpty(data []byte) int {
511 // it is okay to call isEmpty on an empty buffer
512 if len(data) == 0 {
513 return 0
514 }
515
516 var i int
517 for i = 0; i < len(data) && data[i] != '\n'; i++ {
518 if data[i] != ' ' && data[i] != '\t' {
519 return 0
520 }
521 }
522 return i + 1
523}
524
525func (p *parser) isHRule(data []byte) bool {
526 i := 0
527
528 // skip up to three spaces
529 for i < 3 && data[i] == ' ' {
530 i++
531 }
532
533 // look at the hrule char
534 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
535 return false
536 }
537 c := data[i]
538
539 // the whole line must be the char or whitespace
540 n := 0
541 for data[i] != '\n' {
542 switch {
543 case data[i] == c:
544 n++
545 case data[i] != ' ':
546 return false
547 }
548 i++
549 }
550
551 return n >= 3
552}
553
554func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
555 i, size := 0, 0
556 skip = 0
557
558 // skip up to three spaces
559 for i < len(data) && i < 3 && data[i] == ' ' {
560 i++
561 }
562 if i >= len(data) {
563 return
564 }
565
566 // check for the marker characters: ~ or `
567 if data[i] != '~' && data[i] != '`' {
568 return
569 }
570
571 c := data[i]
572
573 // the whole line must be the same char or whitespace
574 for i < len(data) && data[i] == c {
575 size++
576 i++
577 }
578
579 if i >= len(data) {
580 return
581 }
582
583 // the marker char must occur at least 3 times
584 if size < 3 {
585 return
586 }
587 marker = string(data[i-size : i])
588
589 // if this is the end marker, it must match the beginning marker
590 if oldmarker != "" && marker != oldmarker {
591 return
592 }
593
594 if syntax != nil {
595 syn := 0
596
597 for i < len(data) && data[i] == ' ' {
598 i++
599 }
600
601 if i >= len(data) {
602 return
603 }
604
605 syntaxStart := i
606
607 if data[i] == '{' {
608 i++
609 syntaxStart++
610
611 for i < len(data) && data[i] != '}' && data[i] != '\n' {
612 syn++
613 i++
614 }
615
616 if i >= len(data) || data[i] != '}' {
617 return
618 }
619
620 // strip all whitespace at the beginning and the end
621 // of the {} block
622 for syn > 0 && isspace(data[syntaxStart]) {
623 syntaxStart++
624 syn--
625 }
626
627 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
628 syn--
629 }
630
631 i++
632 } else {
633 for i < len(data) && !isspace(data[i]) {
634 syn++
635 i++
636 }
637 }
638
639 language := string(data[syntaxStart : syntaxStart+syn])
640 *syntax = &language
641 }
642
643 for i < len(data) && data[i] == ' ' {
644 i++
645 }
646 if i >= len(data) || data[i] != '\n' {
647 return
648 }
649
650 skip = i + 1
651 return
652}
653
654func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
655 var lang *string
656 beg, marker := p.isFencedCode(data, &lang, "")
657 if beg == 0 || beg >= len(data) {
658 return 0
659 }
660
661 var work bytes.Buffer
662
663 for {
664 // safe to assume beg < len(data)
665
666 // check for the end of the code block
667 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
668 if fenceEnd != 0 {
669 beg += fenceEnd
670 break
671 }
672
673 // copy the current line
674 end := beg
675 for end < len(data) && data[end] != '\n' {
676 end++
677 }
678 end++
679
680 // did we reach the end of the buffer without a closing marker?
681 if end >= len(data) {
682 return 0
683 }
684
685 // verbatim copy to the working buffer
686 if doRender {
687 work.Write(data[beg:end])
688 }
689 beg = end
690 }
691
692 syntax := ""
693 if lang != nil {
694 syntax = *lang
695 }
696
697 if doRender {
698 p.r.BlockCode(out, work.Bytes(), syntax)
699 }
700
701 return beg
702}
703
704func (p *parser) table(out *bytes.Buffer, data []byte) int {
705 var header bytes.Buffer
706 i, columns := p.tableHeader(&header, data)
707 if i == 0 {
708 return 0
709 }
710
711 var body bytes.Buffer
712
713 for i < len(data) {
714 pipes, rowStart := 0, i
715 for ; data[i] != '\n'; i++ {
716 if data[i] == '|' {
717 pipes++
718 }
719 }
720
721 if pipes == 0 {
722 i = rowStart
723 break
724 }
725
726 // include the newline in data sent to tableRow
727 i++
728 p.tableRow(&body, data[rowStart:i], columns, false)
729 }
730
731 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
732
733 return i
734}
735
736// check if the specified position is preceeded by an odd number of backslashes
737func isBackslashEscaped(data []byte, i int) bool {
738 backslashes := 0
739 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
740 backslashes++
741 }
742 return backslashes&1 == 1
743}
744
745func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
746 i := 0
747 colCount := 1
748 for i = 0; data[i] != '\n'; i++ {
749 if data[i] == '|' && !isBackslashEscaped(data, i) {
750 colCount++
751 }
752 }
753
754 // doesn't look like a table header
755 if colCount == 1 {
756 return
757 }
758
759 // include the newline in the data sent to tableRow
760 header := data[:i+1]
761
762 // column count ignores pipes at beginning or end of line
763 if data[0] == '|' {
764 colCount--
765 }
766 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
767 colCount--
768 }
769
770 columns = make([]int, colCount)
771
772 // move on to the header underline
773 i++
774 if i >= len(data) {
775 return
776 }
777
778 if data[i] == '|' && !isBackslashEscaped(data, i) {
779 i++
780 }
781 for data[i] == ' ' {
782 i++
783 }
784
785 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
786 // and trailing | optional on last column
787 col := 0
788 for data[i] != '\n' {
789 dashes := 0
790
791 if data[i] == ':' {
792 i++
793 columns[col] |= TABLE_ALIGNMENT_LEFT
794 dashes++
795 }
796 for data[i] == '-' {
797 i++
798 dashes++
799 }
800 if data[i] == ':' {
801 i++
802 columns[col] |= TABLE_ALIGNMENT_RIGHT
803 dashes++
804 }
805 for data[i] == ' ' {
806 i++
807 }
808
809 // end of column test is messy
810 switch {
811 case dashes < 3:
812 // not a valid column
813 return
814
815 case data[i] == '|' && !isBackslashEscaped(data, i):
816 // marker found, now skip past trailing whitespace
817 col++
818 i++
819 for data[i] == ' ' {
820 i++
821 }
822
823 // trailing junk found after last column
824 if col >= colCount && data[i] != '\n' {
825 return
826 }
827
828 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
829 // something else found where marker was required
830 return
831
832 case data[i] == '\n':
833 // marker is optional for the last column
834 col++
835
836 default:
837 // trailing junk found after last column
838 return
839 }
840 }
841 if col != colCount {
842 return
843 }
844
845 p.tableRow(out, header, columns, true)
846 size = i + 1
847 return
848}
849
850func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
851 i, col := 0, 0
852 var rowWork bytes.Buffer
853
854 if data[i] == '|' && !isBackslashEscaped(data, i) {
855 i++
856 }
857
858 for col = 0; col < len(columns) && i < len(data); col++ {
859 for data[i] == ' ' {
860 i++
861 }
862
863 cellStart := i
864
865 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
866 i++
867 }
868
869 cellEnd := i
870
871 // skip the end-of-cell marker, possibly taking us past end of buffer
872 i++
873
874 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
875 cellEnd--
876 }
877
878 var cellWork bytes.Buffer
879 p.inline(&cellWork, data[cellStart:cellEnd])
880
881 if header {
882 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
883 } else {
884 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
885 }
886 }
887
888 // pad it out with empty columns to get the right number
889 for ; col < len(columns); col++ {
890 if header {
891 p.r.TableHeaderCell(&rowWork, nil, columns[col])
892 } else {
893 p.r.TableCell(&rowWork, nil, columns[col])
894 }
895 }
896
897 // silently ignore rows with too many cells
898
899 p.r.TableRow(out, rowWork.Bytes())
900}
901
902// returns blockquote prefix length
903func (p *parser) quotePrefix(data []byte) int {
904 i := 0
905 for i < 3 && data[i] == ' ' {
906 i++
907 }
908 if data[i] == '>' {
909 if data[i+1] == ' ' {
910 return i + 2
911 }
912 return i + 1
913 }
914 return 0
915}
916
917// parse a blockquote fragment
918func (p *parser) quote(out *bytes.Buffer, data []byte) int {
919 var raw bytes.Buffer
920 beg, end := 0, 0
921 for beg < len(data) {
922 end = beg
923 for data[end] != '\n' {
924 end++
925 }
926 end++
927
928 if pre := p.quotePrefix(data[beg:]); pre > 0 {
929 // skip the prefix
930 beg += pre
931 } else if p.isEmpty(data[beg:]) > 0 &&
932 (end >= len(data) ||
933 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
934 // blockquote ends with at least one blank line
935 // followed by something without a blockquote prefix
936 break
937 }
938
939 // this line is part of the blockquote
940 raw.Write(data[beg:end])
941 beg = end
942 }
943
944 var cooked bytes.Buffer
945 p.block(&cooked, raw.Bytes())
946 p.r.BlockQuote(out, cooked.Bytes())
947 return end
948}
949
950// returns prefix length for block code
951func (p *parser) codePrefix(data []byte) int {
952 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
953 return 4
954 }
955 return 0
956}
957
958func (p *parser) code(out *bytes.Buffer, data []byte) int {
959 var work bytes.Buffer
960
961 i := 0
962 for i < len(data) {
963 beg := i
964 for data[i] != '\n' {
965 i++
966 }
967 i++
968
969 blankline := p.isEmpty(data[beg:i]) > 0
970 if pre := p.codePrefix(data[beg:i]); pre > 0 {
971 beg += pre
972 } else if !blankline {
973 // non-empty, non-prefixed line breaks the pre
974 i = beg
975 break
976 }
977
978 // verbatim copy to the working buffeu
979 if blankline {
980 work.WriteByte('\n')
981 } else {
982 work.Write(data[beg:i])
983 }
984 }
985
986 // trim all the \n off the end of work
987 workbytes := work.Bytes()
988 eol := len(workbytes)
989 for eol > 0 && workbytes[eol-1] == '\n' {
990 eol--
991 }
992 if eol != len(workbytes) {
993 work.Truncate(eol)
994 }
995
996 work.WriteByte('\n')
997
998 p.r.BlockCode(out, work.Bytes(), "")
999
1000 return i
1001}
1002
1003// returns unordered list item prefix
1004func (p *parser) uliPrefix(data []byte) int {
1005 i := 0
1006
1007 // start with up to 3 spaces
1008 for i < 3 && data[i] == ' ' {
1009 i++
1010 }
1011
1012 // need a *, +, or - followed by a space
1013 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1014 data[i+1] != ' ' {
1015 return 0
1016 }
1017 return i + 2
1018}
1019
1020// returns ordered list item prefix
1021func (p *parser) oliPrefix(data []byte) int {
1022 i := 0
1023
1024 // start with up to 3 spaces
1025 for i < 3 && data[i] == ' ' {
1026 i++
1027 }
1028
1029 // count the digits
1030 start := i
1031 for data[i] >= '0' && data[i] <= '9' {
1032 i++
1033 }
1034
1035 // we need >= 1 digits followed by a dot and a space
1036 if start == i || data[i] != '.' || data[i+1] != ' ' {
1037 return 0
1038 }
1039 return i + 2
1040}
1041
1042// parse ordered or unordered list block
1043func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1044 i := 0
1045 flags |= LIST_ITEM_BEGINNING_OF_LIST
1046 work := func() bool {
1047 for i < len(data) {
1048 skip := p.listItem(out, data[i:], &flags)
1049 i += skip
1050
1051 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1052 break
1053 }
1054 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1055 }
1056 return true
1057 }
1058
1059 p.r.List(out, work, flags)
1060 return i
1061}
1062
1063// Parse a single list item.
1064// Assumes initial prefix is already removed if this is a sublist.
1065func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1066 // keep track of the indentation of the first line
1067 itemIndent := 0
1068 for itemIndent < 3 && data[itemIndent] == ' ' {
1069 itemIndent++
1070 }
1071
1072 i := p.uliPrefix(data)
1073 if i == 0 {
1074 i = p.oliPrefix(data)
1075 }
1076 if i == 0 {
1077 return 0
1078 }
1079
1080 // skip leading whitespace on first line
1081 for data[i] == ' ' {
1082 i++
1083 }
1084
1085 // find the end of the line
1086 line := i
1087 for data[i-1] != '\n' {
1088 i++
1089 }
1090
1091 // get working buffer
1092 var raw bytes.Buffer
1093
1094 // put the first line into the working buffer
1095 raw.Write(data[line:i])
1096 line = i
1097
1098 // process the following lines
1099 containsBlankLine := false
1100 sublist := 0
1101
1102gatherlines:
1103 for line < len(data) {
1104 i++
1105
1106 // find the end of this line
1107 for data[i-1] != '\n' {
1108 i++
1109 }
1110
1111 // if it is an empty line, guess that it is part of this item
1112 // and move on to the next line
1113 if p.isEmpty(data[line:i]) > 0 {
1114 containsBlankLine = true
1115 line = i
1116 continue
1117 }
1118
1119 // calculate the indentation
1120 indent := 0
1121 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1122 indent++
1123 }
1124
1125 chunk := data[line+indent : i]
1126
1127 // evaluate how this line fits in
1128 switch {
1129 // is this a nested list item?
1130 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1131 p.oliPrefix(chunk) > 0:
1132
1133 if containsBlankLine {
1134 *flags |= LIST_ITEM_CONTAINS_BLOCK
1135 }
1136
1137 // to be a nested list, it must be indented more
1138 // if not, it is the next item in the same list
1139 if indent <= itemIndent {
1140 break gatherlines
1141 }
1142
1143 // is this the first item in the the nested list?
1144 if sublist == 0 {
1145 sublist = raw.Len()
1146 }
1147
1148 // is this a nested prefix header?
1149 case p.isPrefixHeader(chunk):
1150 // if the header is not indented, it is not nested in the list
1151 // and thus ends the list
1152 if containsBlankLine && indent < 4 {
1153 *flags |= LIST_ITEM_END_OF_LIST
1154 break gatherlines
1155 }
1156 *flags |= LIST_ITEM_CONTAINS_BLOCK
1157
1158 // anything following an empty line is only part
1159 // of this item if it is indented 4 spaces
1160 // (regardless of the indentation of the beginning of the item)
1161 case containsBlankLine && indent < 4:
1162 *flags |= LIST_ITEM_END_OF_LIST
1163 break gatherlines
1164
1165 // a blank line means this should be parsed as a block
1166 case containsBlankLine:
1167 raw.WriteByte('\n')
1168 *flags |= LIST_ITEM_CONTAINS_BLOCK
1169 }
1170
1171 // if this line was preceeded by one or more blanks,
1172 // re-introduce the blank into the buffer
1173 if containsBlankLine {
1174 containsBlankLine = false
1175 raw.WriteByte('\n')
1176 }
1177
1178 // add the line into the working buffer without prefix
1179 raw.Write(data[line+indent : i])
1180
1181 line = i
1182 }
1183
1184 rawBytes := raw.Bytes()
1185
1186 // render the contents of the list item
1187 var cooked bytes.Buffer
1188 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1189 // intermediate render of block li
1190 if sublist > 0 {
1191 p.block(&cooked, rawBytes[:sublist])
1192 p.block(&cooked, rawBytes[sublist:])
1193 } else {
1194 p.block(&cooked, rawBytes)
1195 }
1196 } else {
1197 // intermediate render of inline li
1198 if sublist > 0 {
1199 p.inline(&cooked, rawBytes[:sublist])
1200 p.block(&cooked, rawBytes[sublist:])
1201 } else {
1202 p.inline(&cooked, rawBytes)
1203 }
1204 }
1205
1206 // render the actual list item
1207 cookedBytes := cooked.Bytes()
1208 parsedEnd := len(cookedBytes)
1209
1210 // strip trailing newlines
1211 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1212 parsedEnd--
1213 }
1214 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1215
1216 return line
1217}
1218
1219// render a single paragraph that has already been parsed out
1220func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1221 if len(data) == 0 {
1222 return
1223 }
1224
1225 // trim leading spaces
1226 beg := 0
1227 for data[beg] == ' ' {
1228 beg++
1229 }
1230
1231 // trim trailing newline
1232 end := len(data) - 1
1233
1234 // trim trailing spaces
1235 for end > beg && data[end-1] == ' ' {
1236 end--
1237 }
1238
1239 work := func() bool {
1240 p.inline(out, data[beg:end])
1241 return true
1242 }
1243 p.r.Paragraph(out, work)
1244}
1245
1246func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1247 // prev: index of 1st char of previous line
1248 // line: index of 1st char of current line
1249 // i: index of cursor/end of current line
1250 var prev, line, i int
1251
1252 // keep going until we find something to mark the end of the paragraph
1253 for i < len(data) {
1254 // mark the beginning of the current line
1255 prev = line
1256 current := data[i:]
1257 line = i
1258
1259 // did we find a blank line marking the end of the paragraph?
1260 if n := p.isEmpty(current); n > 0 {
1261 p.renderParagraph(out, data[:i])
1262 return i + n
1263 }
1264
1265 // an underline under some text marks a header, so our paragraph ended on prev line
1266 if i > 0 {
1267 if level := p.isUnderlinedHeader(current); level > 0 {
1268 // render the paragraph
1269 p.renderParagraph(out, data[:prev])
1270
1271 // ignore leading and trailing whitespace
1272 eol := i - 1
1273 for prev < eol && data[prev] == ' ' {
1274 prev++
1275 }
1276 for eol > prev && data[eol-1] == ' ' {
1277 eol--
1278 }
1279
1280 // render the header
1281 // this ugly double closure avoids forcing variables onto the heap
1282 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1283 return func() bool {
1284 pp.inline(o, d)
1285 return true
1286 }
1287 }(out, p, data[prev:eol])
1288
1289 id := ""
1290 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1291 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1292 }
1293
1294 p.r.Header(out, work, level, id)
1295
1296 // find the end of the underline
1297 for data[i] != '\n' {
1298 i++
1299 }
1300 return i
1301 }
1302 }
1303
1304 // if the next line starts a block of HTML, then the paragraph ends here
1305 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1306 if data[i] == '<' && p.html(out, current, false) > 0 {
1307 // rewind to before the HTML block
1308 p.renderParagraph(out, data[:i])
1309 return i
1310 }
1311 }
1312
1313 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1314 if p.isPrefixHeader(current) || p.isHRule(current) {
1315 p.renderParagraph(out, data[:i])
1316 return i
1317 }
1318
1319 // if there's a list after this, paragraph is over
1320 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1321 if p.uliPrefix(current) != 0 ||
1322 p.oliPrefix(current) != 0 ||
1323 p.quotePrefix(current) != 0 ||
1324 p.codePrefix(current) != 0 {
1325 p.renderParagraph(out, data[:i])
1326 return i
1327 }
1328 }
1329
1330 // otherwise, scan to the beginning of the next line
1331 for data[i] != '\n' {
1332 i++
1333 }
1334 i++
1335 }
1336
1337 p.renderParagraph(out, data[:i])
1338 return i
1339}