block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "unicode"
19)
20
21// Parse block-level data.
22// Note: this function and many that it calls assume that
23// the input buffer ends with a newline.
24func (p *parser) block(out *bytes.Buffer, data []byte) {
25 if len(data) == 0 || data[len(data)-1] != '\n' {
26 panic("block input is missing terminating newline")
27 }
28
29 // this is called recursively: enforce a maximum depth
30 if p.nesting >= p.maxNesting {
31 return
32 }
33 p.nesting++
34
35 // parse out one block-level construct at a time
36 for len(data) > 0 {
37 // prefixed header:
38 //
39 // # Header 1
40 // ## Header 2
41 // ...
42 // ###### Header 6
43 if p.isPrefixHeader(data) {
44 data = data[p.prefixHeader(out, data):]
45 continue
46 }
47
48 // block of preformatted HTML:
49 //
50 // <div>
51 // ...
52 // </div>
53 if data[0] == '<' {
54 if i := p.html(out, data, true); i > 0 {
55 data = data[i:]
56 continue
57 }
58 }
59
60 // title block
61 //
62 // % stuff
63 // % more stuff
64 // % even more stuff
65 if p.flags&EXTENSION_TITLEBLOCK != 0 {
66 if data[0] == '%' {
67 if i := p.titleBlock(out, data, true); i > 0 {
68 data = data[i:]
69 continue
70 }
71 }
72 }
73
74 // blank lines. note: returns the # of bytes to skip
75 if i := p.isEmpty(data); i > 0 {
76 data = data[i:]
77 continue
78 }
79
80 // indented code block:
81 //
82 // func max(a, b int) int {
83 // if a > b {
84 // return a
85 // }
86 // return b
87 // }
88 if p.codePrefix(data) > 0 {
89 data = data[p.code(out, data):]
90 continue
91 }
92
93 // fenced code block:
94 //
95 // ``` go
96 // func fact(n int) int {
97 // if n <= 1 {
98 // return n
99 // }
100 // return n * fact(n-1)
101 // }
102 // ```
103 if p.flags&EXTENSION_FENCED_CODE != 0 {
104 if i := p.fencedCode(out, data, true); i > 0 {
105 data = data[i:]
106 continue
107 }
108 }
109
110 // horizontal rule:
111 //
112 // ------
113 // or
114 // ******
115 // or
116 // ______
117 if p.isHRule(data) {
118 p.r.HRule(out)
119 var i int
120 for i = 0; data[i] != '\n'; i++ {
121 }
122 data = data[i:]
123 continue
124 }
125
126 // block quote:
127 //
128 // > A big quote I found somewhere
129 // > on the web
130 if p.quotePrefix(data) > 0 {
131 data = data[p.quote(out, data):]
132 continue
133 }
134
135 // table:
136 //
137 // Name | Age | Phone
138 // ------|-----|---------
139 // Bob | 31 | 555-1234
140 // Alice | 27 | 555-4321
141 if p.flags&EXTENSION_TABLES != 0 {
142 if i := p.table(out, data); i > 0 {
143 data = data[i:]
144 continue
145 }
146 }
147
148 // an itemized/unordered list:
149 //
150 // * Item 1
151 // * Item 2
152 //
153 // also works with + or -
154 if p.uliPrefix(data) > 0 {
155 data = data[p.list(out, data, 0):]
156 continue
157 }
158
159 // a numbered/ordered list:
160 //
161 // 1. Item 1
162 // 2. Item 2
163 if p.oliPrefix(data) > 0 {
164 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
165 continue
166 }
167
168 // anything else must look like a normal paragraph
169 // note: this finds underlined headers, too
170 data = data[p.paragraph(out, data):]
171 }
172
173 p.nesting--
174}
175
176func (p *parser) isPrefixHeader(data []byte) bool {
177 if data[0] != '#' {
178 return false
179 }
180
181 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
182 level := 0
183 for level < 6 && data[level] == '#' {
184 level++
185 }
186 if data[level] != ' ' {
187 return false
188 }
189 }
190 return true
191}
192
193func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
194 level := 0
195 for level < 6 && data[level] == '#' {
196 level++
197 }
198 i, end := 0, 0
199 for i = level; data[i] == ' '; i++ {
200 }
201 for end = i; data[end] != '\n'; end++ {
202 }
203 skip := end
204 id := ""
205 if p.flags&EXTENSION_HEADER_IDS != 0 {
206 j, k := 0, 0
207 // find start/end of header id
208 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
209 }
210 for k = j + 1; k < end && data[k] != '}'; k++ {
211 }
212 // extract header id iff found
213 if j < end && k < end {
214 id = string(data[j+2 : k])
215 end = j
216 skip = k + 1
217 for end > 0 && data[end-1] == ' ' {
218 end--
219 }
220 }
221 }
222 for end > 0 && data[end-1] == '#' {
223 end--
224 }
225 for end > 0 && data[end-1] == ' ' {
226 end--
227 }
228 if end > i {
229 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
230 id = createSanitizedAnchorName(string(data[i:end]))
231 }
232 work := func() bool {
233 p.inline(out, data[i:end])
234 return true
235 }
236 p.r.Header(out, work, level, id)
237 }
238 return skip
239}
240
241func (p *parser) isUnderlinedHeader(data []byte) int {
242 // test of level 1 header
243 if data[0] == '=' {
244 i := 1
245 for data[i] == '=' {
246 i++
247 }
248 for data[i] == ' ' {
249 i++
250 }
251 if data[i] == '\n' {
252 return 1
253 } else {
254 return 0
255 }
256 }
257
258 // test of level 2 header
259 if data[0] == '-' {
260 i := 1
261 for data[i] == '-' {
262 i++
263 }
264 for data[i] == ' ' {
265 i++
266 }
267 if data[i] == '\n' {
268 return 2
269 } else {
270 return 0
271 }
272 }
273
274 return 0
275}
276
277func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
278 if data[0] != '%' {
279 return 0
280 }
281 splitData := bytes.Split(data, []byte("\n"))
282 var i int
283 for idx, b := range splitData {
284 if !bytes.HasPrefix(b, []byte("%")) {
285 i = idx // - 1
286 break
287 }
288 }
289
290 data = bytes.Join(splitData[0:i], []byte("\n"))
291 p.r.TitleBlock(out, data)
292
293 return len(data)
294}
295
296func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
297 var i, j int
298
299 // identify the opening tag
300 if data[0] != '<' {
301 return 0
302 }
303 curtag, tagfound := p.htmlFindTag(data[1:])
304
305 // handle special cases
306 if !tagfound {
307 // check for an HTML comment
308 if size := p.htmlComment(out, data, doRender); size > 0 {
309 return size
310 }
311
312 // check for an <hr> tag
313 if size := p.htmlHr(out, data, doRender); size > 0 {
314 return size
315 }
316
317 // no special case recognized
318 return 0
319 }
320
321 // look for an unindented matching closing tag
322 // followed by a blank line
323 found := false
324 /*
325 closetag := []byte("\n</" + curtag + ">")
326 j = len(curtag) + 1
327 for !found {
328 // scan for a closing tag at the beginning of a line
329 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
330 j += skip + len(closetag)
331 } else {
332 break
333 }
334
335 // see if it is the only thing on the line
336 if skip := p.isEmpty(data[j:]); skip > 0 {
337 // see if it is followed by a blank line/eof
338 j += skip
339 if j >= len(data) {
340 found = true
341 i = j
342 } else {
343 if skip := p.isEmpty(data[j:]); skip > 0 {
344 j += skip
345 found = true
346 i = j
347 }
348 }
349 }
350 }
351 */
352
353 // if not found, try a second pass looking for indented match
354 // but not if tag is "ins" or "del" (following original Markdown.pl)
355 if !found && curtag != "ins" && curtag != "del" {
356 i = 1
357 for i < len(data) {
358 i++
359 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
360 i++
361 }
362
363 if i+2+len(curtag) >= len(data) {
364 break
365 }
366
367 j = p.htmlFindEnd(curtag, data[i-1:])
368
369 if j > 0 {
370 i += j - 1
371 found = true
372 break
373 }
374 }
375 }
376
377 if !found {
378 return 0
379 }
380
381 // the end of the block has been found
382 if doRender {
383 // trim newlines
384 end := i
385 for end > 0 && data[end-1] == '\n' {
386 end--
387 }
388 p.r.BlockHtml(out, data[:end])
389 }
390
391 return i
392}
393
394// HTML comment, lax form
395func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
396 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
397 return 0
398 }
399
400 i := 5
401
402 // scan for an end-of-comment marker, across lines if necessary
403 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
404 i++
405 }
406 i++
407
408 // no end-of-comment marker
409 if i >= len(data) {
410 return 0
411 }
412
413 // needs to end with a blank line
414 if j := p.isEmpty(data[i:]); j > 0 {
415 size := i + j
416 if doRender {
417 // trim trailing newlines
418 end := size
419 for end > 0 && data[end-1] == '\n' {
420 end--
421 }
422 p.r.BlockHtml(out, data[:end])
423 }
424 return size
425 }
426
427 return 0
428}
429
430// HR, which is the only self-closing block tag considered
431func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
432 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
433 return 0
434 }
435 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
436 // not an <hr> tag after all; at least not a valid one
437 return 0
438 }
439
440 i := 3
441 for data[i] != '>' && data[i] != '\n' {
442 i++
443 }
444
445 if data[i] == '>' {
446 i++
447 if j := p.isEmpty(data[i:]); j > 0 {
448 size := i + j
449 if doRender {
450 // trim newlines
451 end := size
452 for end > 0 && data[end-1] == '\n' {
453 end--
454 }
455 p.r.BlockHtml(out, data[:end])
456 }
457 return size
458 }
459 }
460
461 return 0
462}
463
464func (p *parser) htmlFindTag(data []byte) (string, bool) {
465 i := 0
466 for isalnum(data[i]) {
467 i++
468 }
469 key := string(data[:i])
470 if blockTags[key] {
471 return key, true
472 }
473 return "", false
474}
475
476func (p *parser) htmlFindEnd(tag string, data []byte) int {
477 // assume data[0] == '<' && data[1] == '/' already tested
478
479 // check if tag is a match
480 closetag := []byte("</" + tag + ">")
481 if !bytes.HasPrefix(data, closetag) {
482 return 0
483 }
484 i := len(closetag)
485
486 // check that the rest of the line is blank
487 skip := 0
488 if skip = p.isEmpty(data[i:]); skip == 0 {
489 return 0
490 }
491 i += skip
492 skip = 0
493
494 if i >= len(data) {
495 return i
496 }
497
498 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
499 return i
500 }
501 if skip = p.isEmpty(data[i:]); skip == 0 {
502 // following line must be blank
503 return 0
504 }
505
506 return i + skip
507}
508
509func (p *parser) isEmpty(data []byte) int {
510 // it is okay to call isEmpty on an empty buffer
511 if len(data) == 0 {
512 return 0
513 }
514
515 var i int
516 for i = 0; i < len(data) && data[i] != '\n'; i++ {
517 if data[i] != ' ' && data[i] != '\t' {
518 return 0
519 }
520 }
521 return i + 1
522}
523
524func (p *parser) isHRule(data []byte) bool {
525 i := 0
526
527 // skip up to three spaces
528 for i < 3 && data[i] == ' ' {
529 i++
530 }
531
532 // look at the hrule char
533 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
534 return false
535 }
536 c := data[i]
537
538 // the whole line must be the char or whitespace
539 n := 0
540 for data[i] != '\n' {
541 switch {
542 case data[i] == c:
543 n++
544 case data[i] != ' ':
545 return false
546 }
547 i++
548 }
549
550 return n >= 3
551}
552
553func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
554 i, size := 0, 0
555 skip = 0
556
557 // skip up to three spaces
558 for i < 3 && data[i] == ' ' {
559 i++
560 }
561
562 // check for the marker characters: ~ or `
563 if data[i] != '~' && data[i] != '`' {
564 return
565 }
566
567 c := data[i]
568
569 // the whole line must be the same char or whitespace
570 for data[i] == c {
571 size++
572 i++
573 }
574
575 // the marker char must occur at least 3 times
576 if size < 3 {
577 return
578 }
579 marker = string(data[i-size : i])
580
581 // if this is the end marker, it must match the beginning marker
582 if oldmarker != "" && marker != oldmarker {
583 return
584 }
585
586 if syntax != nil {
587 syn := 0
588
589 for data[i] == ' ' {
590 i++
591 }
592
593 syntaxStart := i
594
595 if data[i] == '{' {
596 i++
597 syntaxStart++
598
599 for data[i] != '}' && data[i] != '\n' {
600 syn++
601 i++
602 }
603
604 if data[i] != '}' {
605 return
606 }
607
608 // strip all whitespace at the beginning and the end
609 // of the {} block
610 for syn > 0 && isspace(data[syntaxStart]) {
611 syntaxStart++
612 syn--
613 }
614
615 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
616 syn--
617 }
618
619 i++
620 } else {
621 for !isspace(data[i]) {
622 syn++
623 i++
624 }
625 }
626
627 language := string(data[syntaxStart : syntaxStart+syn])
628 *syntax = &language
629 }
630
631 for data[i] == ' ' {
632 i++
633 }
634 if data[i] != '\n' {
635 return
636 }
637
638 skip = i + 1
639 return
640}
641
642func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
643 var lang *string
644 beg, marker := p.isFencedCode(data, &lang, "")
645 if beg == 0 || beg >= len(data) {
646 return 0
647 }
648
649 var work bytes.Buffer
650
651 for {
652 // safe to assume beg < len(data)
653
654 // check for the end of the code block
655 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
656 if fenceEnd != 0 {
657 beg += fenceEnd
658 break
659 }
660
661 // copy the current line
662 end := beg
663 for data[end] != '\n' {
664 end++
665 }
666 end++
667
668 // did we reach the end of the buffer without a closing marker?
669 if end >= len(data) {
670 return 0
671 }
672
673 // verbatim copy to the working buffer
674 if doRender {
675 work.Write(data[beg:end])
676 }
677 beg = end
678 }
679
680 syntax := ""
681 if lang != nil {
682 syntax = *lang
683 }
684
685 if doRender {
686 p.r.BlockCode(out, work.Bytes(), syntax)
687 }
688
689 return beg
690}
691
692func (p *parser) table(out *bytes.Buffer, data []byte) int {
693 var header bytes.Buffer
694 i, columns := p.tableHeader(&header, data)
695 if i == 0 {
696 return 0
697 }
698
699 var body bytes.Buffer
700
701 for i < len(data) {
702 pipes, rowStart := 0, i
703 for ; data[i] != '\n'; i++ {
704 if data[i] == '|' {
705 pipes++
706 }
707 }
708
709 if pipes == 0 {
710 i = rowStart
711 break
712 }
713
714 // include the newline in data sent to tableRow
715 i++
716 p.tableRow(&body, data[rowStart:i], columns, false)
717 }
718
719 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
720
721 return i
722}
723
724// check if the specified position is preceeded by an odd number of backslashes
725func isBackslashEscaped(data []byte, i int) bool {
726 backslashes := 0
727 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
728 backslashes++
729 }
730 return backslashes&1 == 1
731}
732
733func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
734 i := 0
735 colCount := 1
736 for i = 0; data[i] != '\n'; i++ {
737 if data[i] == '|' && !isBackslashEscaped(data, i) {
738 colCount++
739 }
740 }
741
742 // doesn't look like a table header
743 if colCount == 1 {
744 return
745 }
746
747 // include the newline in the data sent to tableRow
748 header := data[:i+1]
749
750 // column count ignores pipes at beginning or end of line
751 if data[0] == '|' {
752 colCount--
753 }
754 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
755 colCount--
756 }
757
758 columns = make([]int, colCount)
759
760 // move on to the header underline
761 i++
762 if i >= len(data) {
763 return
764 }
765
766 if data[i] == '|' && !isBackslashEscaped(data, i) {
767 i++
768 }
769 for data[i] == ' ' {
770 i++
771 }
772
773 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
774 // and trailing | optional on last column
775 col := 0
776 for data[i] != '\n' {
777 dashes := 0
778
779 if data[i] == ':' {
780 i++
781 columns[col] |= TABLE_ALIGNMENT_LEFT
782 dashes++
783 }
784 for data[i] == '-' {
785 i++
786 dashes++
787 }
788 if data[i] == ':' {
789 i++
790 columns[col] |= TABLE_ALIGNMENT_RIGHT
791 dashes++
792 }
793 for data[i] == ' ' {
794 i++
795 }
796
797 // end of column test is messy
798 switch {
799 case dashes < 3:
800 // not a valid column
801 return
802
803 case data[i] == '|' && !isBackslashEscaped(data, i):
804 // marker found, now skip past trailing whitespace
805 col++
806 i++
807 for data[i] == ' ' {
808 i++
809 }
810
811 // trailing junk found after last column
812 if col >= colCount && data[i] != '\n' {
813 return
814 }
815
816 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
817 // something else found where marker was required
818 return
819
820 case data[i] == '\n':
821 // marker is optional for the last column
822 col++
823
824 default:
825 // trailing junk found after last column
826 return
827 }
828 }
829 if col != colCount {
830 return
831 }
832
833 p.tableRow(out, header, columns, true)
834 size = i + 1
835 return
836}
837
838func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
839 i, col := 0, 0
840 var rowWork bytes.Buffer
841
842 if data[i] == '|' && !isBackslashEscaped(data, i) {
843 i++
844 }
845
846 for col = 0; col < len(columns) && i < len(data); col++ {
847 for data[i] == ' ' {
848 i++
849 }
850
851 cellStart := i
852
853 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
854 i++
855 }
856
857 cellEnd := i
858
859 // skip the end-of-cell marker, possibly taking us past end of buffer
860 i++
861
862 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
863 cellEnd--
864 }
865
866 var cellWork bytes.Buffer
867 p.inline(&cellWork, data[cellStart:cellEnd])
868
869 if header {
870 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
871 } else {
872 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
873 }
874 }
875
876 // pad it out with empty columns to get the right number
877 for ; col < len(columns); col++ {
878 if header {
879 p.r.TableHeaderCell(&rowWork, nil, columns[col])
880 } else {
881 p.r.TableCell(&rowWork, nil, columns[col])
882 }
883 }
884
885 // silently ignore rows with too many cells
886
887 p.r.TableRow(out, rowWork.Bytes())
888}
889
890// returns blockquote prefix length
891func (p *parser) quotePrefix(data []byte) int {
892 i := 0
893 for i < 3 && data[i] == ' ' {
894 i++
895 }
896 if data[i] == '>' {
897 if data[i+1] == ' ' {
898 return i + 2
899 }
900 return i + 1
901 }
902 return 0
903}
904
905// parse a blockquote fragment
906func (p *parser) quote(out *bytes.Buffer, data []byte) int {
907 var raw bytes.Buffer
908 beg, end := 0, 0
909 for beg < len(data) {
910 end = beg
911 for data[end] != '\n' {
912 end++
913 }
914 end++
915
916 if pre := p.quotePrefix(data[beg:]); pre > 0 {
917 // skip the prefix
918 beg += pre
919 } else if p.isEmpty(data[beg:]) > 0 &&
920 (end >= len(data) ||
921 (p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0)) {
922 // blockquote ends with at least one blank line
923 // followed by something without a blockquote prefix
924 break
925 }
926
927 // this line is part of the blockquote
928 raw.Write(data[beg:end])
929 beg = end
930 }
931
932 var cooked bytes.Buffer
933 p.block(&cooked, raw.Bytes())
934 p.r.BlockQuote(out, cooked.Bytes())
935 return end
936}
937
938// returns prefix length for block code
939func (p *parser) codePrefix(data []byte) int {
940 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
941 return 4
942 }
943 return 0
944}
945
946func (p *parser) code(out *bytes.Buffer, data []byte) int {
947 var work bytes.Buffer
948
949 i := 0
950 for i < len(data) {
951 beg := i
952 for data[i] != '\n' {
953 i++
954 }
955 i++
956
957 blankline := p.isEmpty(data[beg:i]) > 0
958 if pre := p.codePrefix(data[beg:i]); pre > 0 {
959 beg += pre
960 } else if !blankline {
961 // non-empty, non-prefixed line breaks the pre
962 i = beg
963 break
964 }
965
966 // verbatim copy to the working buffeu
967 if blankline {
968 work.WriteByte('\n')
969 } else {
970 work.Write(data[beg:i])
971 }
972 }
973
974 // trim all the \n off the end of work
975 workbytes := work.Bytes()
976 eol := len(workbytes)
977 for eol > 0 && workbytes[eol-1] == '\n' {
978 eol--
979 }
980 if eol != len(workbytes) {
981 work.Truncate(eol)
982 }
983
984 work.WriteByte('\n')
985
986 p.r.BlockCode(out, work.Bytes(), "")
987
988 return i
989}
990
991// returns unordered list item prefix
992func (p *parser) uliPrefix(data []byte) int {
993 i := 0
994
995 // start with up to 3 spaces
996 for i < 3 && data[i] == ' ' {
997 i++
998 }
999
1000 // need a *, +, or - followed by a space
1001 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1002 data[i+1] != ' ' {
1003 return 0
1004 }
1005 return i + 2
1006}
1007
1008// returns ordered list item prefix
1009func (p *parser) oliPrefix(data []byte) int {
1010 i := 0
1011
1012 // start with up to 3 spaces
1013 for i < 3 && data[i] == ' ' {
1014 i++
1015 }
1016
1017 // count the digits
1018 start := i
1019 for data[i] >= '0' && data[i] <= '9' {
1020 i++
1021 }
1022
1023 // we need >= 1 digits followed by a dot and a space
1024 if start == i || data[i] != '.' || data[i+1] != ' ' {
1025 return 0
1026 }
1027 return i + 2
1028}
1029
1030// parse ordered or unordered list block
1031func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1032 i := 0
1033 flags |= LIST_ITEM_BEGINNING_OF_LIST
1034 work := func() bool {
1035 for i < len(data) {
1036 skip := p.listItem(out, data[i:], &flags)
1037 i += skip
1038
1039 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1040 break
1041 }
1042 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1043 }
1044 return true
1045 }
1046
1047 p.r.List(out, work, flags)
1048 return i
1049}
1050
1051// Parse a single list item.
1052// Assumes initial prefix is already removed if this is a sublist.
1053func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1054 // keep track of the indentation of the first line
1055 itemIndent := 0
1056 for itemIndent < 3 && data[itemIndent] == ' ' {
1057 itemIndent++
1058 }
1059
1060 i := p.uliPrefix(data)
1061 if i == 0 {
1062 i = p.oliPrefix(data)
1063 }
1064 if i == 0 {
1065 return 0
1066 }
1067
1068 // skip leading whitespace on first line
1069 for data[i] == ' ' {
1070 i++
1071 }
1072
1073 // find the end of the line
1074 line := i
1075 for data[i-1] != '\n' {
1076 i++
1077 }
1078
1079 // get working buffer
1080 var raw bytes.Buffer
1081
1082 // put the first line into the working buffer
1083 raw.Write(data[line:i])
1084 line = i
1085
1086 // process the following lines
1087 containsBlankLine := false
1088 sublist := 0
1089
1090gatherlines:
1091 for line < len(data) {
1092 i++
1093
1094 // find the end of this line
1095 for data[i-1] != '\n' {
1096 i++
1097 }
1098
1099 // if it is an empty line, guess that it is part of this item
1100 // and move on to the next line
1101 if p.isEmpty(data[line:i]) > 0 {
1102 containsBlankLine = true
1103 line = i
1104 continue
1105 }
1106
1107 // calculate the indentation
1108 indent := 0
1109 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1110 indent++
1111 }
1112
1113 chunk := data[line+indent : i]
1114
1115 // evaluate how this line fits in
1116 switch {
1117 // is this a nested list item?
1118 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1119 p.oliPrefix(chunk) > 0:
1120
1121 if containsBlankLine {
1122 *flags |= LIST_ITEM_CONTAINS_BLOCK
1123 }
1124
1125 // to be a nested list, it must be indented more
1126 // if not, it is the next item in the same list
1127 if indent <= itemIndent {
1128 break gatherlines
1129 }
1130
1131 // is this the first item in the the nested list?
1132 if sublist == 0 {
1133 sublist = raw.Len()
1134 }
1135
1136 // is this a nested prefix header?
1137 case p.isPrefixHeader(chunk):
1138 // if the header is not indented, it is not nested in the list
1139 // and thus ends the list
1140 if containsBlankLine && indent < 4 {
1141 *flags |= LIST_ITEM_END_OF_LIST
1142 break gatherlines
1143 }
1144 *flags |= LIST_ITEM_CONTAINS_BLOCK
1145
1146 // anything following an empty line is only part
1147 // of this item if it is indented 4 spaces
1148 // (regardless of the indentation of the beginning of the item)
1149 case containsBlankLine && indent < 4:
1150 *flags |= LIST_ITEM_END_OF_LIST
1151 break gatherlines
1152
1153 // a blank line means this should be parsed as a block
1154 case containsBlankLine:
1155 raw.WriteByte('\n')
1156 *flags |= LIST_ITEM_CONTAINS_BLOCK
1157 }
1158
1159 // if this line was preceeded by one or more blanks,
1160 // re-introduce the blank into the buffer
1161 if containsBlankLine {
1162 containsBlankLine = false
1163 raw.WriteByte('\n')
1164 }
1165
1166 // add the line into the working buffer without prefix
1167 raw.Write(data[line+indent : i])
1168
1169 line = i
1170 }
1171
1172 rawBytes := raw.Bytes()
1173
1174 // render the contents of the list item
1175 var cooked bytes.Buffer
1176 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
1177 // intermediate render of block li
1178 if sublist > 0 {
1179 p.block(&cooked, rawBytes[:sublist])
1180 p.block(&cooked, rawBytes[sublist:])
1181 } else {
1182 p.block(&cooked, rawBytes)
1183 }
1184 } else {
1185 // intermediate render of inline li
1186 if sublist > 0 {
1187 p.inline(&cooked, rawBytes[:sublist])
1188 p.block(&cooked, rawBytes[sublist:])
1189 } else {
1190 p.inline(&cooked, rawBytes)
1191 }
1192 }
1193
1194 // render the actual list item
1195 cookedBytes := cooked.Bytes()
1196 parsedEnd := len(cookedBytes)
1197
1198 // strip trailing newlines
1199 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1200 parsedEnd--
1201 }
1202 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1203
1204 return line
1205}
1206
1207// render a single paragraph that has already been parsed out
1208func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1209 if len(data) == 0 {
1210 return
1211 }
1212
1213 // trim leading spaces
1214 beg := 0
1215 for data[beg] == ' ' {
1216 beg++
1217 }
1218
1219 // trim trailing newline
1220 end := len(data) - 1
1221
1222 // trim trailing spaces
1223 for end > beg && data[end-1] == ' ' {
1224 end--
1225 }
1226
1227 work := func() bool {
1228 p.inline(out, data[beg:end])
1229 return true
1230 }
1231 p.r.Paragraph(out, work)
1232}
1233
1234func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1235 // prev: index of 1st char of previous line
1236 // line: index of 1st char of current line
1237 // i: index of cursor/end of current line
1238 var prev, line, i int
1239
1240 // keep going until we find something to mark the end of the paragraph
1241 for i < len(data) {
1242 // mark the beginning of the current line
1243 prev = line
1244 current := data[i:]
1245 line = i
1246
1247 // did we find a blank line marking the end of the paragraph?
1248 if n := p.isEmpty(current); n > 0 {
1249 p.renderParagraph(out, data[:i])
1250 return i + n
1251 }
1252
1253 // an underline under some text marks a header, so our paragraph ended on prev line
1254 if i > 0 {
1255 if level := p.isUnderlinedHeader(current); level > 0 {
1256 // render the paragraph
1257 p.renderParagraph(out, data[:prev])
1258
1259 // ignore leading and trailing whitespace
1260 eol := i - 1
1261 for prev < eol && data[prev] == ' ' {
1262 prev++
1263 }
1264 for eol > prev && data[eol-1] == ' ' {
1265 eol--
1266 }
1267
1268 // render the header
1269 // this ugly double closure avoids forcing variables onto the heap
1270 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1271 return func() bool {
1272 pp.inline(o, d)
1273 return true
1274 }
1275 }(out, p, data[prev:eol])
1276
1277 id := ""
1278 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1279 id = createSanitizedAnchorName(string(data[prev:eol]))
1280 }
1281
1282 p.r.Header(out, work, level, id)
1283
1284 // find the end of the underline
1285 for data[i] != '\n' {
1286 i++
1287 }
1288 return i
1289 }
1290 }
1291
1292 // if the next line starts a block of HTML, then the paragraph ends here
1293 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1294 if data[i] == '<' && p.html(out, current, false) > 0 {
1295 // rewind to before the HTML block
1296 p.renderParagraph(out, data[:i])
1297 return i
1298 }
1299 }
1300
1301 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1302 if p.isPrefixHeader(current) || p.isHRule(current) {
1303 p.renderParagraph(out, data[:i])
1304 return i
1305 }
1306
1307 // if there's a list after this, paragraph is over
1308 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1309 if p.uliPrefix(current) != 0 ||
1310 p.oliPrefix(current) != 0 ||
1311 p.quotePrefix(current) != 0 ||
1312 p.codePrefix(current) != 0 {
1313 p.renderParagraph(out, data[:i])
1314 return i
1315 }
1316 }
1317
1318 // otherwise, scan to the beginning of the next line
1319 for data[i] != '\n' {
1320 i++
1321 }
1322 i++
1323 }
1324
1325 p.renderParagraph(out, data[:i])
1326 return i
1327}
1328
1329func createSanitizedAnchorName(text string) string {
1330 var anchorName []rune
1331 for _, r := range []rune(text) {
1332 switch {
1333 case r == ' ':
1334 anchorName = append(anchorName, '-')
1335 case unicode.IsLetter(r) || unicode.IsNumber(r):
1336 anchorName = append(anchorName, unicode.ToLower(r))
1337 }
1338 }
1339 return string(anchorName)
1340}