block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18
19 "github.com/shurcooL/sanitized_anchor_name"
20)
21
22// Parse block-level data.
23// Note: this function and many that it calls assume that
24// the input buffer ends with a newline.
25func (p *parser) block(out *bytes.Buffer, data []byte) {
26 if len(data) == 0 || data[len(data)-1] != '\n' {
27 panic("block input is missing terminating newline")
28 }
29
30 // this is called recursively: enforce a maximum depth
31 if p.nesting >= p.maxNesting {
32 return
33 }
34 p.nesting++
35
36 // parse out one block-level construct at a time
37 for len(data) > 0 {
38 // prefixed header:
39 //
40 // # Header 1
41 // ## Header 2
42 // ...
43 // ###### Header 6
44 if p.isPrefixHeader(data) {
45 data = data[p.prefixHeader(out, data):]
46 continue
47 }
48
49 // block of preformatted HTML:
50 //
51 // <div>
52 // ...
53 // </div>
54 if data[0] == '<' {
55 if i := p.html(out, data, true); i > 0 {
56 data = data[i:]
57 continue
58 }
59 }
60
61 // title block
62 //
63 // % stuff
64 // % more stuff
65 // % even more stuff
66 if p.flags&Titleblock != 0 {
67 if data[0] == '%' {
68 if i := p.titleBlock(out, data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73 }
74
75 // blank lines. note: returns the # of bytes to skip
76 if i := p.isEmpty(data); i > 0 {
77 data = data[i:]
78 continue
79 }
80
81 // indented code block:
82 //
83 // func max(a, b int) int {
84 // if a > b {
85 // return a
86 // }
87 // return b
88 // }
89 if p.codePrefix(data) > 0 {
90 data = data[p.code(out, data):]
91 continue
92 }
93
94 // fenced code block:
95 //
96 // ``` go
97 // func fact(n int) int {
98 // if n <= 1 {
99 // return n
100 // }
101 // return n * fact(n-1)
102 // }
103 // ```
104 if p.flags&FencedCode != 0 {
105 if i := p.fencedCode(out, data, true); i > 0 {
106 data = data[i:]
107 continue
108 }
109 }
110
111 // horizontal rule:
112 //
113 // ------
114 // or
115 // ******
116 // or
117 // ______
118 if p.isHRule(data) {
119 p.r.HRule(out)
120 var i int
121 for i = 0; data[i] != '\n'; i++ {
122 }
123 data = data[i:]
124 continue
125 }
126
127 // block quote:
128 //
129 // > A big quote I found somewhere
130 // > on the web
131 if p.quotePrefix(data) > 0 {
132 data = data[p.quote(out, data):]
133 continue
134 }
135
136 // table:
137 //
138 // Name | Age | Phone
139 // ------|-----|---------
140 // Bob | 31 | 555-1234
141 // Alice | 27 | 555-4321
142 if p.flags&Tables != 0 {
143 if i := p.table(out, data); i > 0 {
144 data = data[i:]
145 continue
146 }
147 }
148
149 // an itemized/unordered list:
150 //
151 // * Item 1
152 // * Item 2
153 //
154 // also works with + or -
155 if p.uliPrefix(data) > 0 {
156 data = data[p.list(out, data, 0):]
157 continue
158 }
159
160 // a numbered/ordered list:
161 //
162 // 1. Item 1
163 // 2. Item 2
164 if p.oliPrefix(data) > 0 {
165 data = data[p.list(out, data, ListTypeOrdered):]
166 continue
167 }
168
169 // definition lists:
170 //
171 // Term 1
172 // : Definition a
173 // : Definition b
174 //
175 // Term 2
176 // : Definition c
177 if p.flags&DefinitionLists != 0 {
178 if p.dliPrefix(data) > 0 {
179 data = data[p.list(out, data, ListTypeDefinition):]
180 continue
181 }
182 }
183
184 // anything else must look like a normal paragraph
185 // note: this finds underlined headers, too
186 data = data[p.paragraph(out, data):]
187 }
188
189 p.nesting--
190}
191
192func (p *parser) isPrefixHeader(data []byte) bool {
193 if data[0] != '#' {
194 return false
195 }
196
197 if p.flags&SpaceHeaders != 0 {
198 level := 0
199 for level < 6 && data[level] == '#' {
200 level++
201 }
202 if data[level] != ' ' {
203 return false
204 }
205 }
206 return true
207}
208
209func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
210 level := 0
211 for level < 6 && data[level] == '#' {
212 level++
213 }
214 i := skipChar(data, level, ' ')
215 end := skipUntilChar(data, i, '\n')
216 skip := end
217 id := ""
218 if p.flags&HeaderIDs != 0 {
219 j, k := 0, 0
220 // find start/end of header id
221 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
222 }
223 for k = j + 1; k < end && data[k] != '}'; k++ {
224 }
225 // extract header id iff found
226 if j < end && k < end {
227 id = string(data[j+2 : k])
228 end = j
229 skip = k + 1
230 for end > 0 && data[end-1] == ' ' {
231 end--
232 }
233 }
234 }
235 for end > 0 && data[end-1] == '#' {
236 if isBackslashEscaped(data, end-1) {
237 break
238 }
239 end--
240 }
241 for end > 0 && data[end-1] == ' ' {
242 end--
243 }
244 if end > i {
245 if id == "" && p.flags&AutoHeaderIDs != 0 {
246 id = sanitized_anchor_name.Create(string(data[i:end]))
247 }
248 tocMarker := p.r.BeginHeader(out, level, id)
249 p.inline(out, data[i:end])
250 p.r.EndHeader(out, level, id, tocMarker)
251 }
252 return skip
253}
254
255func (p *parser) isUnderlinedHeader(data []byte) int {
256 // test of level 1 header
257 if data[0] == '=' {
258 i := skipChar(data, 1, '=')
259 i = skipChar(data, i, ' ')
260 if data[i] == '\n' {
261 return 1
262 } else {
263 return 0
264 }
265 }
266
267 // test of level 2 header
268 if data[0] == '-' {
269 i := skipChar(data, 1, '-')
270 i = skipChar(data, i, ' ')
271 if data[i] == '\n' {
272 return 2
273 } else {
274 return 0
275 }
276 }
277
278 return 0
279}
280
281func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
282 if data[0] != '%' {
283 return 0
284 }
285 splitData := bytes.Split(data, []byte("\n"))
286 var i int
287 for idx, b := range splitData {
288 if !bytes.HasPrefix(b, []byte("%")) {
289 i = idx // - 1
290 break
291 }
292 }
293
294 data = bytes.Join(splitData[0:i], []byte("\n"))
295 p.r.TitleBlock(out, data)
296
297 return len(data)
298}
299
300func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
301 var i, j int
302
303 // identify the opening tag
304 if data[0] != '<' {
305 return 0
306 }
307 curtag, tagfound := p.htmlFindTag(data[1:])
308
309 // handle special cases
310 if !tagfound {
311 // check for an HTML comment
312 if size := p.htmlComment(out, data, doRender); size > 0 {
313 return size
314 }
315
316 // check for an <hr> tag
317 if size := p.htmlHr(out, data, doRender); size > 0 {
318 return size
319 }
320
321 // no special case recognized
322 return 0
323 }
324
325 // look for an unindented matching closing tag
326 // followed by a blank line
327 found := false
328 /*
329 closetag := []byte("\n</" + curtag + ">")
330 j = len(curtag) + 1
331 for !found {
332 // scan for a closing tag at the beginning of a line
333 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
334 j += skip + len(closetag)
335 } else {
336 break
337 }
338
339 // see if it is the only thing on the line
340 if skip := p.isEmpty(data[j:]); skip > 0 {
341 // see if it is followed by a blank line/eof
342 j += skip
343 if j >= len(data) {
344 found = true
345 i = j
346 } else {
347 if skip := p.isEmpty(data[j:]); skip > 0 {
348 j += skip
349 found = true
350 i = j
351 }
352 }
353 }
354 }
355 */
356
357 // if not found, try a second pass looking for indented match
358 // but not if tag is "ins" or "del" (following original Markdown.pl)
359 if !found && curtag != "ins" && curtag != "del" {
360 i = 1
361 for i < len(data) {
362 i++
363 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
364 i++
365 }
366
367 if i+2+len(curtag) >= len(data) {
368 break
369 }
370
371 j = p.htmlFindEnd(curtag, data[i-1:])
372
373 if j > 0 {
374 i += j - 1
375 found = true
376 break
377 }
378 }
379 }
380
381 if !found {
382 return 0
383 }
384
385 // the end of the block has been found
386 if doRender {
387 // trim newlines
388 end := i
389 for end > 0 && data[end-1] == '\n' {
390 end--
391 }
392 p.r.BlockHtml(out, data[:end])
393 }
394
395 return i
396}
397
398// HTML comment, lax form
399func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
400 i := p.inlineHtmlComment(out, data)
401 // needs to end with a blank line
402 if j := p.isEmpty(data[i:]); j > 0 {
403 size := i + j
404 if doRender {
405 // trim trailing newlines
406 end := size
407 for end > 0 && data[end-1] == '\n' {
408 end--
409 }
410 p.r.BlockHtml(out, data[:end])
411 }
412 return size
413 }
414 return 0
415}
416
417// HR, which is the only self-closing block tag considered
418func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
419 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
420 return 0
421 }
422 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
423 // not an <hr> tag after all; at least not a valid one
424 return 0
425 }
426
427 i := 3
428 for data[i] != '>' && data[i] != '\n' {
429 i++
430 }
431
432 if data[i] == '>' {
433 i++
434 if j := p.isEmpty(data[i:]); j > 0 {
435 size := i + j
436 if doRender {
437 // trim newlines
438 end := size
439 for end > 0 && data[end-1] == '\n' {
440 end--
441 }
442 p.r.BlockHtml(out, data[:end])
443 }
444 return size
445 }
446 }
447
448 return 0
449}
450
451func (p *parser) htmlFindTag(data []byte) (string, bool) {
452 i := 0
453 for isalnum(data[i]) {
454 i++
455 }
456 key := string(data[:i])
457 if _, ok := blockTags[key]; ok {
458 return key, true
459 }
460 return "", false
461}
462
463func (p *parser) htmlFindEnd(tag string, data []byte) int {
464 // assume data[0] == '<' && data[1] == '/' already tested
465
466 // check if tag is a match
467 closetag := []byte("</" + tag + ">")
468 if !bytes.HasPrefix(data, closetag) {
469 return 0
470 }
471 i := len(closetag)
472
473 // check that the rest of the line is blank
474 skip := 0
475 if skip = p.isEmpty(data[i:]); skip == 0 {
476 return 0
477 }
478 i += skip
479 skip = 0
480
481 if i >= len(data) {
482 return i
483 }
484
485 if p.flags&LaxHTMLBlocks != 0 {
486 return i
487 }
488 if skip = p.isEmpty(data[i:]); skip == 0 {
489 // following line must be blank
490 return 0
491 }
492
493 return i + skip
494}
495
496func (p *parser) isEmpty(data []byte) int {
497 // it is okay to call isEmpty on an empty buffer
498 if len(data) == 0 {
499 return 0
500 }
501
502 var i int
503 for i = 0; i < len(data) && data[i] != '\n'; i++ {
504 if data[i] != ' ' && data[i] != '\t' {
505 return 0
506 }
507 }
508 return i + 1
509}
510
511func (p *parser) isHRule(data []byte) bool {
512 i := 0
513
514 // skip up to three spaces
515 for i < 3 && data[i] == ' ' {
516 i++
517 }
518
519 // look at the hrule char
520 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
521 return false
522 }
523 c := data[i]
524
525 // the whole line must be the char or whitespace
526 n := 0
527 for data[i] != '\n' {
528 switch {
529 case data[i] == c:
530 n++
531 case data[i] != ' ':
532 return false
533 }
534 i++
535 }
536
537 return n >= 3
538}
539
540func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
541 i, size := 0, 0
542 skip = 0
543
544 // skip up to three spaces
545 for i < len(data) && i < 3 && data[i] == ' ' {
546 i++
547 }
548 if i >= len(data) {
549 return
550 }
551
552 // check for the marker characters: ~ or `
553 if data[i] != '~' && data[i] != '`' {
554 return
555 }
556
557 c := data[i]
558
559 // the whole line must be the same char or whitespace
560 for i < len(data) && data[i] == c {
561 size++
562 i++
563 }
564
565 if i >= len(data) {
566 return
567 }
568
569 // the marker char must occur at least 3 times
570 if size < 3 {
571 return
572 }
573 marker = string(data[i-size : i])
574
575 // if this is the end marker, it must match the beginning marker
576 if oldmarker != "" && marker != oldmarker {
577 return
578 }
579
580 if syntax != nil {
581 syn := 0
582 i = skipChar(data, i, ' ')
583
584 if i >= len(data) {
585 return
586 }
587
588 syntaxStart := i
589
590 if data[i] == '{' {
591 i++
592 syntaxStart++
593
594 for i < len(data) && data[i] != '}' && data[i] != '\n' {
595 syn++
596 i++
597 }
598
599 if i >= len(data) || data[i] != '}' {
600 return
601 }
602
603 // strip all whitespace at the beginning and the end
604 // of the {} block
605 for syn > 0 && isspace(data[syntaxStart]) {
606 syntaxStart++
607 syn--
608 }
609
610 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
611 syn--
612 }
613
614 i++
615 } else {
616 for i < len(data) && !isspace(data[i]) {
617 syn++
618 i++
619 }
620 }
621
622 language := string(data[syntaxStart : syntaxStart+syn])
623 *syntax = &language
624 }
625
626 i = skipChar(data, i, ' ')
627 if i >= len(data) || data[i] != '\n' {
628 return
629 }
630
631 skip = i + 1
632 return
633}
634
635func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
636 var lang *string
637 beg, marker := p.isFencedCode(data, &lang, "")
638 if beg == 0 || beg >= len(data) {
639 return 0
640 }
641
642 var work bytes.Buffer
643
644 for {
645 // safe to assume beg < len(data)
646
647 // check for the end of the code block
648 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
649 if fenceEnd != 0 {
650 beg += fenceEnd
651 break
652 }
653
654 // copy the current line
655 end := skipUntilChar(data, beg, '\n') + 1
656
657 // did we reach the end of the buffer without a closing marker?
658 if end >= len(data) {
659 return 0
660 }
661
662 // verbatim copy to the working buffer
663 if doRender {
664 work.Write(data[beg:end])
665 }
666 beg = end
667 }
668
669 syntax := ""
670 if lang != nil {
671 syntax = *lang
672 }
673
674 if doRender {
675 p.r.BlockCode(out, work.Bytes(), syntax)
676 }
677
678 return beg
679}
680
681func (p *parser) table(out *bytes.Buffer, data []byte) int {
682 var header bytes.Buffer
683 i, columns := p.tableHeader(&header, data)
684 if i == 0 {
685 return 0
686 }
687
688 var body bytes.Buffer
689
690 for i < len(data) {
691 pipes, rowStart := 0, i
692 for ; data[i] != '\n'; i++ {
693 if data[i] == '|' {
694 pipes++
695 }
696 }
697
698 if pipes == 0 {
699 i = rowStart
700 break
701 }
702
703 // include the newline in data sent to tableRow
704 i++
705 p.tableRow(&body, data[rowStart:i], columns, false)
706 }
707
708 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
709
710 return i
711}
712
713// check if the specified position is preceded by an odd number of backslashes
714func isBackslashEscaped(data []byte, i int) bool {
715 backslashes := 0
716 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
717 backslashes++
718 }
719 return backslashes&1 == 1
720}
721
722func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
723 i := 0
724 colCount := 1
725 for i = 0; data[i] != '\n'; i++ {
726 if data[i] == '|' && !isBackslashEscaped(data, i) {
727 colCount++
728 }
729 }
730
731 // doesn't look like a table header
732 if colCount == 1 {
733 return
734 }
735
736 // include the newline in the data sent to tableRow
737 header := data[:i+1]
738
739 // column count ignores pipes at beginning or end of line
740 if data[0] == '|' {
741 colCount--
742 }
743 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
744 colCount--
745 }
746
747 columns = make([]int, colCount)
748
749 // move on to the header underline
750 i++
751 if i >= len(data) {
752 return
753 }
754
755 if data[i] == '|' && !isBackslashEscaped(data, i) {
756 i++
757 }
758 i = skipChar(data, i, ' ')
759
760 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
761 // and trailing | optional on last column
762 col := 0
763 for data[i] != '\n' {
764 dashes := 0
765
766 if data[i] == ':' {
767 i++
768 columns[col] |= TableAlignmentLeft
769 dashes++
770 }
771 for data[i] == '-' {
772 i++
773 dashes++
774 }
775 if data[i] == ':' {
776 i++
777 columns[col] |= TableAlignmentRight
778 dashes++
779 }
780 for data[i] == ' ' {
781 i++
782 }
783
784 // end of column test is messy
785 switch {
786 case dashes < 3:
787 // not a valid column
788 return
789
790 case data[i] == '|' && !isBackslashEscaped(data, i):
791 // marker found, now skip past trailing whitespace
792 col++
793 i++
794 for data[i] == ' ' {
795 i++
796 }
797
798 // trailing junk found after last column
799 if col >= colCount && data[i] != '\n' {
800 return
801 }
802
803 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
804 // something else found where marker was required
805 return
806
807 case data[i] == '\n':
808 // marker is optional for the last column
809 col++
810
811 default:
812 // trailing junk found after last column
813 return
814 }
815 }
816 if col != colCount {
817 return
818 }
819
820 p.tableRow(out, header, columns, true)
821 size = i + 1
822 return
823}
824
825func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
826 i, col := 0, 0
827 var rowWork bytes.Buffer
828
829 if data[i] == '|' && !isBackslashEscaped(data, i) {
830 i++
831 }
832
833 for col = 0; col < len(columns) && i < len(data); col++ {
834 for data[i] == ' ' {
835 i++
836 }
837
838 cellStart := i
839
840 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
841 i++
842 }
843
844 cellEnd := i
845
846 // skip the end-of-cell marker, possibly taking us past end of buffer
847 i++
848
849 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
850 cellEnd--
851 }
852
853 var cellWork bytes.Buffer
854 p.inline(&cellWork, data[cellStart:cellEnd])
855
856 if header {
857 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
858 } else {
859 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
860 }
861 }
862
863 // pad it out with empty columns to get the right number
864 for ; col < len(columns); col++ {
865 if header {
866 p.r.TableHeaderCell(&rowWork, nil, columns[col])
867 } else {
868 p.r.TableCell(&rowWork, nil, columns[col])
869 }
870 }
871
872 // silently ignore rows with too many cells
873
874 p.r.TableRow(out, rowWork.Bytes())
875}
876
877// returns blockquote prefix length
878func (p *parser) quotePrefix(data []byte) int {
879 i := 0
880 for i < 3 && data[i] == ' ' {
881 i++
882 }
883 if data[i] == '>' {
884 if data[i+1] == ' ' {
885 return i + 2
886 }
887 return i + 1
888 }
889 return 0
890}
891
892// blockquote ends with at least one blank line
893// followed by something without a blockquote prefix
894func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
895 if p.isEmpty(data[beg:]) <= 0 {
896 return false
897 }
898 if end >= len(data) {
899 return true
900 }
901 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
902}
903
904// parse a blockquote fragment
905func (p *parser) quote(out *bytes.Buffer, data []byte) int {
906 var raw bytes.Buffer
907 beg, end := 0, 0
908 for beg < len(data) {
909 end = beg
910 // Step over whole lines, collecting them. While doing that, check for
911 // fenced code and if one's found, incorporate it altogether,
912 // irregardless of any contents inside it
913 for data[end] != '\n' {
914 if p.flags&FencedCode != 0 {
915 if i := p.fencedCode(out, data[end:], false); i > 0 {
916 // -1 to compensate for the extra end++ after the loop:
917 end += i - 1
918 break
919 }
920 }
921 end++
922 }
923 end++
924
925 if pre := p.quotePrefix(data[beg:]); pre > 0 {
926 // skip the prefix
927 beg += pre
928 } else if p.terminateBlockquote(data, beg, end) {
929 break
930 }
931
932 // this line is part of the blockquote
933 raw.Write(data[beg:end])
934 beg = end
935 }
936
937 var cooked bytes.Buffer
938 p.block(&cooked, raw.Bytes())
939 p.r.BlockQuote(out, cooked.Bytes())
940 return end
941}
942
943// returns prefix length for block code
944func (p *parser) codePrefix(data []byte) int {
945 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
946 return 4
947 }
948 return 0
949}
950
951func (p *parser) code(out *bytes.Buffer, data []byte) int {
952 var work bytes.Buffer
953
954 i := 0
955 for i < len(data) {
956 beg := i
957 for data[i] != '\n' {
958 i++
959 }
960 i++
961
962 blankline := p.isEmpty(data[beg:i]) > 0
963 if pre := p.codePrefix(data[beg:i]); pre > 0 {
964 beg += pre
965 } else if !blankline {
966 // non-empty, non-prefixed line breaks the pre
967 i = beg
968 break
969 }
970
971 // verbatim copy to the working buffeu
972 if blankline {
973 work.WriteByte('\n')
974 } else {
975 work.Write(data[beg:i])
976 }
977 }
978
979 // trim all the \n off the end of work
980 workbytes := work.Bytes()
981 eol := len(workbytes)
982 for eol > 0 && workbytes[eol-1] == '\n' {
983 eol--
984 }
985 if eol != len(workbytes) {
986 work.Truncate(eol)
987 }
988
989 work.WriteByte('\n')
990
991 p.r.BlockCode(out, work.Bytes(), "")
992
993 return i
994}
995
996// returns unordered list item prefix
997func (p *parser) uliPrefix(data []byte) int {
998 i := 0
999
1000 // start with up to 3 spaces
1001 for i < 3 && data[i] == ' ' {
1002 i++
1003 }
1004
1005 // need a *, +, or - followed by a space
1006 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1007 data[i+1] != ' ' {
1008 return 0
1009 }
1010 return i + 2
1011}
1012
1013// returns ordered list item prefix
1014func (p *parser) oliPrefix(data []byte) int {
1015 i := 0
1016
1017 // start with up to 3 spaces
1018 for i < 3 && data[i] == ' ' {
1019 i++
1020 }
1021
1022 // count the digits
1023 start := i
1024 for data[i] >= '0' && data[i] <= '9' {
1025 i++
1026 }
1027
1028 // we need >= 1 digits followed by a dot and a space
1029 if start == i || data[i] != '.' || data[i+1] != ' ' {
1030 return 0
1031 }
1032 return i + 2
1033}
1034
1035// returns definition list item prefix
1036func (p *parser) dliPrefix(data []byte) int {
1037 i := 0
1038
1039 // need a : followed by a spaces
1040 if data[i] != ':' || data[i+1] != ' ' {
1041 return 0
1042 }
1043 for data[i] == ' ' {
1044 i++
1045 }
1046 return i + 2
1047}
1048
1049// parse ordered or unordered list block
1050func (p *parser) list(out *bytes.Buffer, data []byte, flags ListType) int {
1051 i := 0
1052 flags |= ListItemBeginningOfList
1053 p.r.BeginList(out, flags)
1054
1055 for i < len(data) {
1056 skip := p.listItem(out, data[i:], &flags)
1057 i += skip
1058 if skip == 0 || flags&ListItemEndOfList != 0 {
1059 break
1060 }
1061 flags &= ^ListItemBeginningOfList
1062 }
1063
1064 p.r.EndList(out, flags)
1065 return i
1066}
1067
1068// Parse a single list item.
1069// Assumes initial prefix is already removed if this is a sublist.
1070func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *ListType) int {
1071 // keep track of the indentation of the first line
1072 itemIndent := 0
1073 for itemIndent < 3 && data[itemIndent] == ' ' {
1074 itemIndent++
1075 }
1076
1077 i := p.uliPrefix(data)
1078 if i == 0 {
1079 i = p.oliPrefix(data)
1080 }
1081 if i == 0 {
1082 i = p.dliPrefix(data)
1083 // reset definition term flag
1084 if i > 0 {
1085 *flags &= ^ListTypeTerm
1086 }
1087 }
1088 if i == 0 {
1089 // if in defnition list, set term flag and continue
1090 if *flags&ListTypeDefinition != 0 {
1091 *flags |= ListTypeTerm
1092 } else {
1093 return 0
1094 }
1095 }
1096
1097 // skip leading whitespace on first line
1098 for data[i] == ' ' {
1099 i++
1100 }
1101
1102 // find the end of the line
1103 line := i
1104 for i > 0 && data[i-1] != '\n' {
1105 i++
1106 }
1107
1108 // get working buffer
1109 var raw bytes.Buffer
1110
1111 // put the first line into the working buffer
1112 raw.Write(data[line:i])
1113 line = i
1114
1115 // process the following lines
1116 containsBlankLine := false
1117 sublist := 0
1118
1119gatherlines:
1120 for line < len(data) {
1121 i++
1122
1123 // find the end of this line
1124 for data[i-1] != '\n' {
1125 i++
1126 }
1127
1128 // if it is an empty line, guess that it is part of this item
1129 // and move on to the next line
1130 if p.isEmpty(data[line:i]) > 0 {
1131 containsBlankLine = true
1132 line = i
1133 continue
1134 }
1135
1136 // calculate the indentation
1137 indent := 0
1138 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1139 indent++
1140 }
1141
1142 chunk := data[line+indent : i]
1143
1144 // evaluate how this line fits in
1145 switch {
1146 // is this a nested list item?
1147 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1148 p.oliPrefix(chunk) > 0 ||
1149 p.dliPrefix(chunk) > 0:
1150
1151 if containsBlankLine {
1152 *flags |= ListItemContainsBlock
1153 }
1154
1155 // to be a nested list, it must be indented more
1156 // if not, it is the next item in the same list
1157 if indent <= itemIndent {
1158 break gatherlines
1159 }
1160
1161 // is this the first item in the nested list?
1162 if sublist == 0 {
1163 sublist = raw.Len()
1164 }
1165
1166 // is this a nested prefix header?
1167 case p.isPrefixHeader(chunk):
1168 // if the header is not indented, it is not nested in the list
1169 // and thus ends the list
1170 if containsBlankLine && indent < 4 {
1171 *flags |= ListItemEndOfList
1172 break gatherlines
1173 }
1174 *flags |= ListItemContainsBlock
1175
1176 // anything following an empty line is only part
1177 // of this item if it is indented 4 spaces
1178 // (regardless of the indentation of the beginning of the item)
1179 case containsBlankLine && indent < 4:
1180 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1181 // is the next item still a part of this list?
1182 next := i
1183 for data[next] != '\n' {
1184 next++
1185 }
1186 for next < len(data)-1 && data[next] == '\n' {
1187 next++
1188 }
1189 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1190 *flags |= ListItemEndOfList
1191 }
1192 } else {
1193 *flags |= ListItemEndOfList
1194 }
1195 break gatherlines
1196
1197 // a blank line means this should be parsed as a block
1198 case containsBlankLine:
1199 raw.WriteByte('\n')
1200 *flags |= ListItemContainsBlock
1201 }
1202
1203 // if this line was preceeded by one or more blanks,
1204 // re-introduce the blank into the buffer
1205 if containsBlankLine {
1206 containsBlankLine = false
1207 raw.WriteByte('\n')
1208
1209 }
1210
1211 // add the line into the working buffer without prefix
1212 raw.Write(data[line+indent : i])
1213
1214 line = i
1215 }
1216
1217 rawBytes := raw.Bytes()
1218
1219 // render the contents of the list item
1220 var cooked bytes.Buffer
1221 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1222 // intermediate render of block item, except for definition term
1223 if sublist > 0 {
1224 p.block(&cooked, rawBytes[:sublist])
1225 p.block(&cooked, rawBytes[sublist:])
1226 } else {
1227 p.block(&cooked, rawBytes)
1228 }
1229 } else {
1230 // intermediate render of inline item
1231 if sublist > 0 {
1232 p.inline(&cooked, rawBytes[:sublist])
1233 p.block(&cooked, rawBytes[sublist:])
1234 } else {
1235 p.inline(&cooked, rawBytes)
1236 }
1237 }
1238
1239 // render the actual list item
1240 cookedBytes := cooked.Bytes()
1241 parsedEnd := len(cookedBytes)
1242
1243 // strip trailing newlines
1244 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1245 parsedEnd--
1246 }
1247 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1248
1249 return line
1250}
1251
1252// render a single paragraph that has already been parsed out
1253func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1254 if len(data) == 0 {
1255 return
1256 }
1257
1258 // trim leading spaces
1259 beg := 0
1260 for data[beg] == ' ' {
1261 beg++
1262 }
1263
1264 // trim trailing newline
1265 end := len(data) - 1
1266
1267 // trim trailing spaces
1268 for end > beg && data[end-1] == ' ' {
1269 end--
1270 }
1271
1272 p.r.BeginParagraph(out)
1273 p.inline(out, data[beg:end])
1274 p.r.EndParagraph(out)
1275}
1276
1277func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1278 // prev: index of 1st char of previous line
1279 // line: index of 1st char of current line
1280 // i: index of cursor/end of current line
1281 var prev, line, i int
1282
1283 // keep going until we find something to mark the end of the paragraph
1284 for i < len(data) {
1285 // mark the beginning of the current line
1286 prev = line
1287 current := data[i:]
1288 line = i
1289
1290 // did we find a blank line marking the end of the paragraph?
1291 if n := p.isEmpty(current); n > 0 {
1292 // did this blank line followed by a definition list item?
1293 if p.flags&DefinitionLists != 0 {
1294 if i < len(data)-1 && data[i+1] == ':' {
1295 return p.list(out, data[prev:], ListTypeDefinition)
1296 }
1297 }
1298
1299 p.renderParagraph(out, data[:i])
1300 return i + n
1301 }
1302
1303 // an underline under some text marks a header, so our paragraph ended on prev line
1304 if i > 0 {
1305 if level := p.isUnderlinedHeader(current); level > 0 {
1306 // render the paragraph
1307 p.renderParagraph(out, data[:prev])
1308
1309 // ignore leading and trailing whitespace
1310 eol := i - 1
1311 for prev < eol && data[prev] == ' ' {
1312 prev++
1313 }
1314 for eol > prev && data[eol-1] == ' ' {
1315 eol--
1316 }
1317
1318 id := ""
1319 if p.flags&AutoHeaderIDs != 0 {
1320 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1321 }
1322
1323 tocMarker := p.r.BeginHeader(out, level, id)
1324 p.inline(out, data[prev:eol])
1325 p.r.EndHeader(out, level, id, tocMarker)
1326
1327 // find the end of the underline
1328 for data[i] != '\n' {
1329 i++
1330 }
1331 return i
1332 }
1333 }
1334
1335 // if the next line starts a block of HTML, then the paragraph ends here
1336 if p.flags&LaxHTMLBlocks != 0 {
1337 if data[i] == '<' && p.html(out, current, false) > 0 {
1338 // rewind to before the HTML block
1339 p.renderParagraph(out, data[:i])
1340 return i
1341 }
1342 }
1343
1344 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1345 if p.isPrefixHeader(current) || p.isHRule(current) {
1346 p.renderParagraph(out, data[:i])
1347 return i
1348 }
1349
1350 // if there's a fenced code block, paragraph is over
1351 if p.flags&FencedCode != 0 {
1352 if p.fencedCode(out, current, false) > 0 {
1353 p.renderParagraph(out, data[:i])
1354 return i
1355 }
1356 }
1357
1358 // if there's a definition list item, prev line is a definition term
1359 if p.flags&DefinitionLists != 0 {
1360 if p.dliPrefix(current) != 0 {
1361 return p.list(out, data[prev:], ListTypeDefinition)
1362 }
1363 }
1364
1365 // if there's a list after this, paragraph is over
1366 if p.flags&NoEmptyLineBeforeBlock != 0 {
1367 if p.uliPrefix(current) != 0 ||
1368 p.oliPrefix(current) != 0 ||
1369 p.quotePrefix(current) != 0 ||
1370 p.codePrefix(current) != 0 {
1371 p.renderParagraph(out, data[:i])
1372 return i
1373 }
1374 }
1375
1376 // otherwise, scan to the beginning of the next line
1377 for data[i] != '\n' {
1378 i++
1379 }
1380 i++
1381 }
1382
1383 p.renderParagraph(out, data[:i])
1384 return i
1385}