block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "html"
19 "regexp"
20
21 "github.com/shurcooL/sanitized_anchor_name"
22)
23
24const (
25 Entity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
26 Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
27)
28
29var (
30 reBackslashOrAmp = regexp.MustCompile("[\\&]")
31 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
32 reTrailingWhitespace = regexp.MustCompile("(\n *)+$")
33)
34
35// Parse block-level data.
36// Note: this function and many that it calls assume that
37// the input buffer ends with a newline.
38func (p *parser) block(data []byte) {
39 if len(data) == 0 || data[len(data)-1] != '\n' {
40 panic("block input is missing terminating newline")
41 }
42
43 // this is called recursively: enforce a maximum depth
44 if p.nesting >= p.maxNesting {
45 return
46 }
47 p.nesting++
48
49 // parse out one block-level construct at a time
50 for len(data) > 0 {
51 // prefixed header:
52 //
53 // # Header 1
54 // ## Header 2
55 // ...
56 // ###### Header 6
57 if p.isPrefixHeader(data) {
58 data = data[p.prefixHeader(data):]
59 continue
60 }
61
62 // block of preformatted HTML:
63 //
64 // <div>
65 // ...
66 // </div>
67 if data[0] == '<' {
68 if i := p.html(data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73
74 // title block
75 //
76 // % stuff
77 // % more stuff
78 // % even more stuff
79 if p.flags&Titleblock != 0 {
80 if data[0] == '%' {
81 if i := p.titleBlock(data, true); i > 0 {
82 data = data[i:]
83 continue
84 }
85 }
86 }
87
88 // blank lines. note: returns the # of bytes to skip
89 if i := p.isEmpty(data); i > 0 {
90 data = data[i:]
91 continue
92 }
93
94 // indented code block:
95 //
96 // func max(a, b int) int {
97 // if a > b {
98 // return a
99 // }
100 // return b
101 // }
102 if p.codePrefix(data) > 0 {
103 data = data[p.code(data):]
104 continue
105 }
106
107 // fenced code block:
108 //
109 // ``` go
110 // func fact(n int) int {
111 // if n <= 1 {
112 // return n
113 // }
114 // return n * fact(n-1)
115 // }
116 // ```
117 if p.flags&FencedCode != 0 {
118 if i := p.fencedCode(data, true); i > 0 {
119 data = data[i:]
120 continue
121 }
122 }
123
124 // horizontal rule:
125 //
126 // ------
127 // or
128 // ******
129 // or
130 // ______
131 if p.isHRule(data) {
132 p.addBlock(HorizontalRule, nil)
133 var i int
134 for i = 0; data[i] != '\n'; i++ {
135 }
136 data = data[i:]
137 continue
138 }
139
140 // block quote:
141 //
142 // > A big quote I found somewhere
143 // > on the web
144 if p.quotePrefix(data) > 0 {
145 data = data[p.quote(data):]
146 continue
147 }
148
149 // table:
150 //
151 // Name | Age | Phone
152 // ------|-----|---------
153 // Bob | 31 | 555-1234
154 // Alice | 27 | 555-4321
155 if p.flags&Tables != 0 {
156 if i := p.table(data); i > 0 {
157 data = data[i:]
158 continue
159 }
160 }
161
162 // an itemized/unordered list:
163 //
164 // * Item 1
165 // * Item 2
166 //
167 // also works with + or -
168 if p.uliPrefix(data) > 0 {
169 data = data[p.list(data, 0):]
170 continue
171 }
172
173 // a numbered/ordered list:
174 //
175 // 1. Item 1
176 // 2. Item 2
177 if p.oliPrefix(data) > 0 {
178 data = data[p.list(data, ListTypeOrdered):]
179 continue
180 }
181
182 // definition lists:
183 //
184 // Term 1
185 // : Definition a
186 // : Definition b
187 //
188 // Term 2
189 // : Definition c
190 if p.flags&DefinitionLists != 0 {
191 if p.dliPrefix(data) > 0 {
192 data = data[p.list(data, ListTypeDefinition):]
193 continue
194 }
195 }
196
197 // anything else must look like a normal paragraph
198 // note: this finds underlined headers, too
199 data = data[p.paragraph(data):]
200 }
201
202 p.nesting--
203}
204
205func (p *parser) addBlock(typ NodeType, content []byte) *Node {
206 p.closeUnmatchedBlocks()
207 container := p.addChild(typ, 0)
208 container.content = content
209 return container
210}
211
212func (p *parser) isPrefixHeader(data []byte) bool {
213 if data[0] != '#' {
214 return false
215 }
216
217 if p.flags&SpaceHeaders != 0 {
218 level := 0
219 for level < 6 && data[level] == '#' {
220 level++
221 }
222 if data[level] != ' ' {
223 return false
224 }
225 }
226 return true
227}
228
229func (p *parser) prefixHeader(data []byte) int {
230 level := 0
231 for level < 6 && data[level] == '#' {
232 level++
233 }
234 i := skipChar(data, level, ' ')
235 end := skipUntilChar(data, i, '\n')
236 skip := end
237 id := ""
238 if p.flags&HeaderIDs != 0 {
239 j, k := 0, 0
240 // find start/end of header id
241 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
242 }
243 for k = j + 1; k < end && data[k] != '}'; k++ {
244 }
245 // extract header id iff found
246 if j < end && k < end {
247 id = string(data[j+2 : k])
248 end = j
249 skip = k + 1
250 for end > 0 && data[end-1] == ' ' {
251 end--
252 }
253 }
254 }
255 for end > 0 && data[end-1] == '#' {
256 if isBackslashEscaped(data, end-1) {
257 break
258 }
259 end--
260 }
261 for end > 0 && data[end-1] == ' ' {
262 end--
263 }
264 if end > i {
265 if id == "" && p.flags&AutoHeaderIDs != 0 {
266 id = sanitized_anchor_name.Create(string(data[i:end]))
267 }
268 block := p.addBlock(Header, data[i:end])
269 block.HeaderID = id
270 block.Level = uint32(level)
271 }
272 return skip
273}
274
275func (p *parser) isUnderlinedHeader(data []byte) int {
276 // test of level 1 header
277 if data[0] == '=' {
278 i := skipChar(data, 1, '=')
279 i = skipChar(data, i, ' ')
280 if data[i] == '\n' {
281 return 1
282 } else {
283 return 0
284 }
285 }
286
287 // test of level 2 header
288 if data[0] == '-' {
289 i := skipChar(data, 1, '-')
290 i = skipChar(data, i, ' ')
291 if data[i] == '\n' {
292 return 2
293 } else {
294 return 0
295 }
296 }
297
298 return 0
299}
300
301func (p *parser) titleBlock(data []byte, doRender bool) int {
302 if data[0] != '%' {
303 return 0
304 }
305 splitData := bytes.Split(data, []byte("\n"))
306 var i int
307 for idx, b := range splitData {
308 if !bytes.HasPrefix(b, []byte("%")) {
309 i = idx // - 1
310 break
311 }
312 }
313
314 data = bytes.Join(splitData[0:i], []byte("\n"))
315 consumed := len(data)
316 data = bytes.TrimPrefix(data, []byte("% "))
317 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
318 block := p.addBlock(Header, data)
319 block.Level = 1
320 block.IsTitleblock = true
321
322 return consumed
323}
324
325func (p *parser) html(data []byte, doRender bool) int {
326 var i, j int
327
328 // identify the opening tag
329 if data[0] != '<' {
330 return 0
331 }
332 curtag, tagfound := p.htmlFindTag(data[1:])
333
334 // handle special cases
335 if !tagfound {
336 // check for an HTML comment
337 if size := p.htmlComment(data, doRender); size > 0 {
338 return size
339 }
340
341 // check for an <hr> tag
342 if size := p.htmlHr(data, doRender); size > 0 {
343 return size
344 }
345
346 // no special case recognized
347 return 0
348 }
349
350 // look for an unindented matching closing tag
351 // followed by a blank line
352 found := false
353 /*
354 closetag := []byte("\n</" + curtag + ">")
355 j = len(curtag) + 1
356 for !found {
357 // scan for a closing tag at the beginning of a line
358 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
359 j += skip + len(closetag)
360 } else {
361 break
362 }
363
364 // see if it is the only thing on the line
365 if skip := p.isEmpty(data[j:]); skip > 0 {
366 // see if it is followed by a blank line/eof
367 j += skip
368 if j >= len(data) {
369 found = true
370 i = j
371 } else {
372 if skip := p.isEmpty(data[j:]); skip > 0 {
373 j += skip
374 found = true
375 i = j
376 }
377 }
378 }
379 }
380 */
381
382 // if not found, try a second pass looking for indented match
383 // but not if tag is "ins" or "del" (following original Markdown.pl)
384 if !found && curtag != "ins" && curtag != "del" {
385 i = 1
386 for i < len(data) {
387 i++
388 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
389 i++
390 }
391
392 if i+2+len(curtag) >= len(data) {
393 break
394 }
395
396 j = p.htmlFindEnd(curtag, data[i-1:])
397
398 if j > 0 {
399 i += j - 1
400 found = true
401 break
402 }
403 }
404 }
405
406 if !found {
407 return 0
408 }
409
410 // the end of the block has been found
411 if doRender {
412 // trim newlines
413 end := i
414 for end > 0 && data[end-1] == '\n' {
415 end--
416 }
417 finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
418 }
419
420 return i
421}
422
423func finalizeHtmlBlock(block *Node) {
424 block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
425 block.content = []byte{}
426}
427
428// HTML comment, lax form
429func (p *parser) htmlComment(data []byte, doRender bool) int {
430 i := p.inlineHtmlComment(data)
431 // needs to end with a blank line
432 if j := p.isEmpty(data[i:]); j > 0 {
433 size := i + j
434 if doRender {
435 // trim trailing newlines
436 end := size
437 for end > 0 && data[end-1] == '\n' {
438 end--
439 }
440 block := p.addBlock(HtmlBlock, data[:end])
441 finalizeHtmlBlock(block)
442 }
443 return size
444 }
445 return 0
446}
447
448// HR, which is the only self-closing block tag considered
449func (p *parser) htmlHr(data []byte, doRender bool) int {
450 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
451 return 0
452 }
453 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
454 // not an <hr> tag after all; at least not a valid one
455 return 0
456 }
457
458 i := 3
459 for data[i] != '>' && data[i] != '\n' {
460 i++
461 }
462
463 if data[i] == '>' {
464 i++
465 if j := p.isEmpty(data[i:]); j > 0 {
466 size := i + j
467 if doRender {
468 // trim newlines
469 end := size
470 for end > 0 && data[end-1] == '\n' {
471 end--
472 }
473 finalizeHtmlBlock(p.addBlock(HtmlBlock, data[:end]))
474 }
475 return size
476 }
477 }
478
479 return 0
480}
481
482func (p *parser) htmlFindTag(data []byte) (string, bool) {
483 i := 0
484 for isalnum(data[i]) {
485 i++
486 }
487 key := string(data[:i])
488 if _, ok := blockTags[key]; ok {
489 return key, true
490 }
491 return "", false
492}
493
494func (p *parser) htmlFindEnd(tag string, data []byte) int {
495 // assume data[0] == '<' && data[1] == '/' already tested
496 if tag == "hr" {
497 return 2
498 }
499 // check if tag is a match
500 closetag := []byte("</" + tag + ">")
501 if !bytes.HasPrefix(data, closetag) {
502 return 0
503 }
504 i := len(closetag)
505
506 // check that the rest of the line is blank
507 skip := 0
508 if skip = p.isEmpty(data[i:]); skip == 0 {
509 return 0
510 }
511 i += skip
512 skip = 0
513
514 if i >= len(data) {
515 return i
516 }
517
518 if p.flags&LaxHTMLBlocks != 0 {
519 return i
520 }
521 if skip = p.isEmpty(data[i:]); skip == 0 {
522 // following line must be blank
523 return 0
524 }
525
526 return i + skip
527}
528
529func (p *parser) isEmpty(data []byte) int {
530 // it is okay to call isEmpty on an empty buffer
531 if len(data) == 0 {
532 return 0
533 }
534
535 var i int
536 for i = 0; i < len(data) && data[i] != '\n'; i++ {
537 if data[i] != ' ' && data[i] != '\t' {
538 return 0
539 }
540 }
541 return i + 1
542}
543
544func (p *parser) isHRule(data []byte) bool {
545 i := 0
546
547 // skip up to three spaces
548 for i < 3 && data[i] == ' ' {
549 i++
550 }
551
552 // look at the hrule char
553 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
554 return false
555 }
556 c := data[i]
557
558 // the whole line must be the char or whitespace
559 n := 0
560 for data[i] != '\n' {
561 switch {
562 case data[i] == c:
563 n++
564 case data[i] != ' ':
565 return false
566 }
567 i++
568 }
569
570 return n >= 3
571}
572
573func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
574 i, size := 0, 0
575 skip = 0
576
577 // skip up to three spaces
578 for i < len(data) && i < 3 && data[i] == ' ' {
579 i++
580 }
581 if i >= len(data) {
582 return
583 }
584
585 // check for the marker characters: ~ or `
586 if data[i] != '~' && data[i] != '`' {
587 return
588 }
589
590 c := data[i]
591
592 // the whole line must be the same char or whitespace
593 for i < len(data) && data[i] == c {
594 size++
595 i++
596 }
597
598 if i >= len(data) {
599 return
600 }
601
602 // the marker char must occur at least 3 times
603 if size < 3 {
604 return
605 }
606 marker = string(data[i-size : i])
607
608 // if this is the end marker, it must match the beginning marker
609 if oldmarker != "" && marker != oldmarker {
610 return
611 }
612
613 if syntax != nil {
614 syn := 0
615 i = skipChar(data, i, ' ')
616
617 if i >= len(data) {
618 return
619 }
620
621 syntaxStart := i
622
623 if data[i] == '{' {
624 i++
625 syntaxStart++
626
627 for i < len(data) && data[i] != '}' && data[i] != '\n' {
628 syn++
629 i++
630 }
631
632 if i >= len(data) || data[i] != '}' {
633 return
634 }
635
636 // strip all whitespace at the beginning and the end
637 // of the {} block
638 for syn > 0 && isspace(data[syntaxStart]) {
639 syntaxStart++
640 syn--
641 }
642
643 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
644 syn--
645 }
646
647 i++
648 } else {
649 for i < len(data) && !isspace(data[i]) {
650 syn++
651 i++
652 }
653 }
654
655 language := string(data[syntaxStart : syntaxStart+syn])
656 *syntax = &language
657 }
658
659 i = skipChar(data, i, ' ')
660 if i >= len(data) || data[i] != '\n' {
661 return
662 }
663
664 skip = i + 1
665 return
666}
667
668func (p *parser) fencedCode(data []byte, doRender bool) int {
669 var lang *string
670 beg, marker := p.isFencedCode(data, &lang, "")
671 if beg == 0 || beg >= len(data) {
672 return 0
673 }
674
675 var work bytes.Buffer
676 if lang != nil {
677 work.Write([]byte(*lang))
678 work.WriteByte('\n')
679 }
680
681 for {
682 // safe to assume beg < len(data)
683
684 // check for the end of the code block
685 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
686 if fenceEnd != 0 {
687 beg += fenceEnd
688 break
689 }
690
691 // copy the current line
692 end := skipUntilChar(data, beg, '\n') + 1
693
694 // did we reach the end of the buffer without a closing marker?
695 if end >= len(data) {
696 return 0
697 }
698
699 // verbatim copy to the working buffer
700 if doRender {
701 work.Write(data[beg:end])
702 }
703 beg = end
704 }
705
706 //syntax := ""
707 //if lang != nil {
708 // syntax = *lang
709 //}
710
711 if doRender {
712 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
713 block.IsFenced = true
714 finalizeCodeBlock(block)
715 }
716
717 return beg
718}
719
720func unescapeChar(str []byte) []byte {
721 if str[0] == '\\' {
722 return []byte{str[1]}
723 }
724 return []byte(html.UnescapeString(string(str)))
725}
726
727func unescapeString(str []byte) []byte {
728 if reBackslashOrAmp.Match(str) {
729 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
730 } else {
731 return str
732 }
733}
734
735func finalizeCodeBlock(block *Node) {
736 if block.IsFenced {
737 newlinePos := bytes.IndexByte(block.content, '\n')
738 firstLine := block.content[:newlinePos]
739 rest := block.content[newlinePos+1:]
740 block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
741 block.Literal = rest
742 } else {
743 block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
744 }
745 block.content = nil
746}
747
748func (p *parser) table(data []byte) int {
749 table := p.addBlock(Table, nil)
750 i, columns := p.tableHeader(data)
751 if i == 0 {
752 p.tip = table.Parent
753 table.unlink()
754 return 0
755 }
756
757 p.addBlock(TableBody, nil)
758
759 for i < len(data) {
760 pipes, rowStart := 0, i
761 for ; data[i] != '\n'; i++ {
762 if data[i] == '|' {
763 pipes++
764 }
765 }
766
767 if pipes == 0 {
768 i = rowStart
769 break
770 }
771
772 // include the newline in data sent to tableRow
773 i++
774 p.tableRow(data[rowStart:i], columns, false)
775 }
776
777 return i
778}
779
780// check if the specified position is preceded by an odd number of backslashes
781func isBackslashEscaped(data []byte, i int) bool {
782 backslashes := 0
783 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
784 backslashes++
785 }
786 return backslashes&1 == 1
787}
788
789func (p *parser) tableHeader(data []byte) (size int, columns []int) {
790 i := 0
791 colCount := 1
792 for i = 0; data[i] != '\n'; i++ {
793 if data[i] == '|' && !isBackslashEscaped(data, i) {
794 colCount++
795 }
796 }
797
798 // doesn't look like a table header
799 if colCount == 1 {
800 return
801 }
802
803 // include the newline in the data sent to tableRow
804 header := data[:i+1]
805
806 // column count ignores pipes at beginning or end of line
807 if data[0] == '|' {
808 colCount--
809 }
810 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
811 colCount--
812 }
813
814 columns = make([]int, colCount)
815
816 // move on to the header underline
817 i++
818 if i >= len(data) {
819 return
820 }
821
822 if data[i] == '|' && !isBackslashEscaped(data, i) {
823 i++
824 }
825 i = skipChar(data, i, ' ')
826
827 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
828 // and trailing | optional on last column
829 col := 0
830 for data[i] != '\n' {
831 dashes := 0
832
833 if data[i] == ':' {
834 i++
835 columns[col] |= TableAlignmentLeft
836 dashes++
837 }
838 for data[i] == '-' {
839 i++
840 dashes++
841 }
842 if data[i] == ':' {
843 i++
844 columns[col] |= TableAlignmentRight
845 dashes++
846 }
847 for data[i] == ' ' {
848 i++
849 }
850
851 // end of column test is messy
852 switch {
853 case dashes < 3:
854 // not a valid column
855 return
856
857 case data[i] == '|' && !isBackslashEscaped(data, i):
858 // marker found, now skip past trailing whitespace
859 col++
860 i++
861 for data[i] == ' ' {
862 i++
863 }
864
865 // trailing junk found after last column
866 if col >= colCount && data[i] != '\n' {
867 return
868 }
869
870 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
871 // something else found where marker was required
872 return
873
874 case data[i] == '\n':
875 // marker is optional for the last column
876 col++
877
878 default:
879 // trailing junk found after last column
880 return
881 }
882 }
883 if col != colCount {
884 return
885 }
886
887 p.addBlock(TableHead, nil)
888 p.tableRow(header, columns, true)
889 size = i + 1
890 return
891}
892
893func (p *parser) tableRow(data []byte, columns []int, header bool) {
894 p.addBlock(TableRow, nil)
895 i, col := 0, 0
896
897 if data[i] == '|' && !isBackslashEscaped(data, i) {
898 i++
899 }
900
901 for col = 0; col < len(columns) && i < len(data); col++ {
902 for data[i] == ' ' {
903 i++
904 }
905
906 cellStart := i
907
908 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
909 i++
910 }
911
912 cellEnd := i
913
914 // skip the end-of-cell marker, possibly taking us past end of buffer
915 i++
916
917 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
918 cellEnd--
919 }
920
921 cell := p.addBlock(TableCell, data[cellStart:cellEnd])
922 cell.IsHeader = header
923 cell.Align = columns[col]
924 }
925
926 // pad it out with empty columns to get the right number
927 for ; col < len(columns); col++ {
928 cell := p.addBlock(TableCell, nil)
929 cell.IsHeader = header
930 cell.Align = columns[col]
931 }
932
933 // silently ignore rows with too many cells
934}
935
936// returns blockquote prefix length
937func (p *parser) quotePrefix(data []byte) int {
938 i := 0
939 for i < 3 && data[i] == ' ' {
940 i++
941 }
942 if data[i] == '>' {
943 if data[i+1] == ' ' {
944 return i + 2
945 }
946 return i + 1
947 }
948 return 0
949}
950
951// blockquote ends with at least one blank line
952// followed by something without a blockquote prefix
953func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
954 if p.isEmpty(data[beg:]) <= 0 {
955 return false
956 }
957 if end >= len(data) {
958 return true
959 }
960 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
961}
962
963// parse a blockquote fragment
964func (p *parser) quote(data []byte) int {
965 block := p.addBlock(BlockQuote, nil)
966 var raw bytes.Buffer
967 beg, end := 0, 0
968 for beg < len(data) {
969 end = beg
970 // Step over whole lines, collecting them. While doing that, check for
971 // fenced code and if one's found, incorporate it altogether,
972 // irregardless of any contents inside it
973 for data[end] != '\n' {
974 if p.flags&FencedCode != 0 {
975 if i := p.fencedCode(data[end:], false); i > 0 {
976 // -1 to compensate for the extra end++ after the loop:
977 end += i - 1
978 break
979 }
980 }
981 end++
982 }
983 end++
984 if pre := p.quotePrefix(data[beg:]); pre > 0 {
985 // skip the prefix
986 beg += pre
987 } else if p.terminateBlockquote(data, beg, end) {
988 break
989 }
990 // this line is part of the blockquote
991 raw.Write(data[beg:end])
992 beg = end
993 }
994 p.block(raw.Bytes())
995 p.finalize(block)
996 return end
997}
998
999// returns prefix length for block code
1000func (p *parser) codePrefix(data []byte) int {
1001 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1002 return 4
1003 }
1004 return 0
1005}
1006
1007func (p *parser) code(data []byte) int {
1008 var work bytes.Buffer
1009
1010 i := 0
1011 for i < len(data) {
1012 beg := i
1013 for data[i] != '\n' {
1014 i++
1015 }
1016 i++
1017
1018 blankline := p.isEmpty(data[beg:i]) > 0
1019 if pre := p.codePrefix(data[beg:i]); pre > 0 {
1020 beg += pre
1021 } else if !blankline {
1022 // non-empty, non-prefixed line breaks the pre
1023 i = beg
1024 break
1025 }
1026
1027 // verbatim copy to the working buffeu
1028 if blankline {
1029 work.WriteByte('\n')
1030 } else {
1031 work.Write(data[beg:i])
1032 }
1033 }
1034
1035 // trim all the \n off the end of work
1036 workbytes := work.Bytes()
1037 eol := len(workbytes)
1038 for eol > 0 && workbytes[eol-1] == '\n' {
1039 eol--
1040 }
1041 if eol != len(workbytes) {
1042 work.Truncate(eol)
1043 }
1044
1045 work.WriteByte('\n')
1046
1047 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1048 block.IsFenced = false
1049 finalizeCodeBlock(block)
1050
1051 return i
1052}
1053
1054// returns unordered list item prefix
1055func (p *parser) uliPrefix(data []byte) int {
1056 i := 0
1057
1058 // start with up to 3 spaces
1059 for i < 3 && data[i] == ' ' {
1060 i++
1061 }
1062
1063 // need a *, +, or - followed by a space
1064 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1065 data[i+1] != ' ' {
1066 return 0
1067 }
1068 return i + 2
1069}
1070
1071// returns ordered list item prefix
1072func (p *parser) oliPrefix(data []byte) int {
1073 i := 0
1074
1075 // start with up to 3 spaces
1076 for i < 3 && data[i] == ' ' {
1077 i++
1078 }
1079
1080 // count the digits
1081 start := i
1082 for data[i] >= '0' && data[i] <= '9' {
1083 i++
1084 }
1085
1086 // we need >= 1 digits followed by a dot and a space
1087 if start == i || data[i] != '.' || data[i+1] != ' ' {
1088 return 0
1089 }
1090 return i + 2
1091}
1092
1093// returns definition list item prefix
1094func (p *parser) dliPrefix(data []byte) int {
1095 i := 0
1096
1097 // need a : followed by a spaces
1098 if data[i] != ':' || data[i+1] != ' ' {
1099 return 0
1100 }
1101 for data[i] == ' ' {
1102 i++
1103 }
1104 return i + 2
1105}
1106
1107// parse ordered or unordered list block
1108func (p *parser) list(data []byte, flags ListType) int {
1109 i := 0
1110 flags |= ListItemBeginningOfList
1111 block := p.addBlock(List, nil)
1112 block.ListData = &ListData{ // TODO: fill in the real ListData
1113 Flags: flags,
1114 Tight: true,
1115 BulletChar: '*',
1116 Delimiter: 0,
1117 }
1118
1119 for i < len(data) {
1120 skip := p.listItem(data[i:], &flags)
1121 if flags&ListItemContainsBlock != 0 {
1122 block.ListData.Tight = false
1123 }
1124 i += skip
1125 if skip == 0 || flags&ListItemEndOfList != 0 {
1126 break
1127 }
1128 flags &= ^ListItemBeginningOfList
1129 }
1130
1131 above := block.Parent
1132 finalizeList(block)
1133 p.tip = above
1134 return i
1135}
1136
1137// Returns true if block ends with a blank line, descending if needed
1138// into lists and sublists.
1139func endsWithBlankLine(block *Node) bool {
1140 // TODO: figure this out. Always false now.
1141 for block != nil {
1142 //if block.lastLineBlank {
1143 //return true
1144 //}
1145 t := block.Type
1146 if t == List || t == Item {
1147 block = block.LastChild
1148 } else {
1149 break
1150 }
1151 }
1152 return false
1153}
1154
1155func finalizeList(block *Node) {
1156 block.open = false
1157 item := block.FirstChild
1158 for item != nil {
1159 // check for non-final list item ending with blank line:
1160 if endsWithBlankLine(item) && item.Next != nil {
1161 block.ListData.Tight = false
1162 break
1163 }
1164 // recurse into children of list item, to see if there are spaces
1165 // between any of them:
1166 subItem := item.FirstChild
1167 for subItem != nil {
1168 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1169 block.ListData.Tight = false
1170 break
1171 }
1172 subItem = subItem.Next
1173 }
1174 item = item.Next
1175 }
1176}
1177
1178// Parse a single list item.
1179// Assumes initial prefix is already removed if this is a sublist.
1180func (p *parser) listItem(data []byte, flags *ListType) int {
1181 // keep track of the indentation of the first line
1182 itemIndent := 0
1183 for itemIndent < 3 && data[itemIndent] == ' ' {
1184 itemIndent++
1185 }
1186
1187 i := p.uliPrefix(data)
1188 if i == 0 {
1189 i = p.oliPrefix(data)
1190 }
1191 if i == 0 {
1192 i = p.dliPrefix(data)
1193 // reset definition term flag
1194 if i > 0 {
1195 *flags &= ^ListTypeTerm
1196 }
1197 }
1198 if i == 0 {
1199 // if in defnition list, set term flag and continue
1200 if *flags&ListTypeDefinition != 0 {
1201 *flags |= ListTypeTerm
1202 } else {
1203 return 0
1204 }
1205 }
1206
1207 // skip leading whitespace on first line
1208 for data[i] == ' ' {
1209 i++
1210 }
1211
1212 // find the end of the line
1213 line := i
1214 for i > 0 && data[i-1] != '\n' {
1215 i++
1216 }
1217
1218 // get working buffer
1219 var raw bytes.Buffer
1220
1221 // put the first line into the working buffer
1222 raw.Write(data[line:i])
1223 line = i
1224
1225 // process the following lines
1226 containsBlankLine := false
1227 sublist := 0
1228
1229gatherlines:
1230 for line < len(data) {
1231 i++
1232
1233 // find the end of this line
1234 for data[i-1] != '\n' {
1235 i++
1236 }
1237
1238 // if it is an empty line, guess that it is part of this item
1239 // and move on to the next line
1240 if p.isEmpty(data[line:i]) > 0 {
1241 containsBlankLine = true
1242 line = i
1243 continue
1244 }
1245
1246 // calculate the indentation
1247 indent := 0
1248 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1249 indent++
1250 }
1251
1252 chunk := data[line+indent : i]
1253
1254 // evaluate how this line fits in
1255 switch {
1256 // is this a nested list item?
1257 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1258 p.oliPrefix(chunk) > 0 ||
1259 p.dliPrefix(chunk) > 0:
1260
1261 if containsBlankLine {
1262 *flags |= ListItemContainsBlock
1263 }
1264
1265 // to be a nested list, it must be indented more
1266 // if not, it is the next item in the same list
1267 if indent <= itemIndent {
1268 break gatherlines
1269 }
1270
1271 // is this the first item in the nested list?
1272 if sublist == 0 {
1273 sublist = raw.Len()
1274 }
1275
1276 // is this a nested prefix header?
1277 case p.isPrefixHeader(chunk):
1278 // if the header is not indented, it is not nested in the list
1279 // and thus ends the list
1280 if containsBlankLine && indent < 4 {
1281 *flags |= ListItemEndOfList
1282 break gatherlines
1283 }
1284 *flags |= ListItemContainsBlock
1285
1286 // anything following an empty line is only part
1287 // of this item if it is indented 4 spaces
1288 // (regardless of the indentation of the beginning of the item)
1289 case containsBlankLine && indent < 4:
1290 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1291 // is the next item still a part of this list?
1292 next := i
1293 for data[next] != '\n' {
1294 next++
1295 }
1296 for next < len(data)-1 && data[next] == '\n' {
1297 next++
1298 }
1299 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1300 *flags |= ListItemEndOfList
1301 }
1302 } else {
1303 *flags |= ListItemEndOfList
1304 }
1305 break gatherlines
1306
1307 // a blank line means this should be parsed as a block
1308 case containsBlankLine:
1309 raw.WriteByte('\n')
1310 *flags |= ListItemContainsBlock
1311 }
1312
1313 // if this line was preceeded by one or more blanks,
1314 // re-introduce the blank into the buffer
1315 if containsBlankLine {
1316 containsBlankLine = false
1317 raw.WriteByte('\n')
1318
1319 }
1320
1321 // add the line into the working buffer without prefix
1322 raw.Write(data[line+indent : i])
1323
1324 line = i
1325 }
1326
1327 rawBytes := raw.Bytes()
1328
1329 block := p.addBlock(Item, nil)
1330 block.ListData = &ListData{ // TODO: fill in the real ListData
1331 Flags: *flags,
1332 Tight: false,
1333 BulletChar: '*',
1334 Delimiter: 0,
1335 }
1336
1337 // render the contents of the list item
1338 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1339 // intermediate render of block item, except for definition term
1340 if sublist > 0 {
1341 p.block(rawBytes[:sublist])
1342 p.block(rawBytes[sublist:])
1343 } else {
1344 p.block(rawBytes)
1345 }
1346 } else {
1347 // intermediate render of inline item
1348 if sublist > 0 {
1349 child := p.addChild(Paragraph, 0)
1350 child.content = rawBytes[:sublist]
1351 p.block(rawBytes[sublist:])
1352 } else {
1353 child := p.addChild(Paragraph, 0)
1354 child.content = rawBytes
1355 }
1356 }
1357 return line
1358}
1359
1360// render a single paragraph that has already been parsed out
1361func (p *parser) renderParagraph(data []byte) {
1362 if len(data) == 0 {
1363 return
1364 }
1365
1366 // trim leading spaces
1367 beg := 0
1368 for data[beg] == ' ' {
1369 beg++
1370 }
1371
1372 // trim trailing newline
1373 end := len(data) - 1
1374
1375 // trim trailing spaces
1376 for end > beg && data[end-1] == ' ' {
1377 end--
1378 }
1379
1380 p.addBlock(Paragraph, data[beg:end])
1381}
1382
1383func (p *parser) paragraph(data []byte) int {
1384 // prev: index of 1st char of previous line
1385 // line: index of 1st char of current line
1386 // i: index of cursor/end of current line
1387 var prev, line, i int
1388
1389 // keep going until we find something to mark the end of the paragraph
1390 for i < len(data) {
1391 // mark the beginning of the current line
1392 prev = line
1393 current := data[i:]
1394 line = i
1395
1396 // did we find a blank line marking the end of the paragraph?
1397 if n := p.isEmpty(current); n > 0 {
1398 // did this blank line followed by a definition list item?
1399 if p.flags&DefinitionLists != 0 {
1400 if i < len(data)-1 && data[i+1] == ':' {
1401 return p.list(data[prev:], ListTypeDefinition)
1402 }
1403 }
1404
1405 p.renderParagraph(data[:i])
1406 return i + n
1407 }
1408
1409 // an underline under some text marks a header, so our paragraph ended on prev line
1410 if i > 0 {
1411 if level := p.isUnderlinedHeader(current); level > 0 {
1412 // render the paragraph
1413 p.renderParagraph(data[:prev])
1414
1415 // ignore leading and trailing whitespace
1416 eol := i - 1
1417 for prev < eol && data[prev] == ' ' {
1418 prev++
1419 }
1420 for eol > prev && data[eol-1] == ' ' {
1421 eol--
1422 }
1423
1424 id := ""
1425 if p.flags&AutoHeaderIDs != 0 {
1426 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1427 }
1428
1429 block := p.addBlock(Header, data[prev:eol])
1430 block.Level = uint32(level)
1431 block.HeaderID = id
1432
1433 // find the end of the underline
1434 for data[i] != '\n' {
1435 i++
1436 }
1437 return i
1438 }
1439 }
1440
1441 // if the next line starts a block of HTML, then the paragraph ends here
1442 if p.flags&LaxHTMLBlocks != 0 {
1443 if data[i] == '<' && p.html(current, false) > 0 {
1444 // rewind to before the HTML block
1445 p.renderParagraph(data[:i])
1446 return i
1447 }
1448 }
1449
1450 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1451 if p.isPrefixHeader(current) || p.isHRule(current) {
1452 p.renderParagraph(data[:i])
1453 return i
1454 }
1455
1456 // if there's a fenced code block, paragraph is over
1457 if p.flags&FencedCode != 0 {
1458 if p.fencedCode(current, false) > 0 {
1459 p.renderParagraph(data[:i])
1460 return i
1461 }
1462 }
1463
1464 // if there's a definition list item, prev line is a definition term
1465 if p.flags&DefinitionLists != 0 {
1466 if p.dliPrefix(current) != 0 {
1467 return p.list(data[prev:], ListTypeDefinition)
1468 }
1469 }
1470
1471 // if there's a list after this, paragraph is over
1472 if p.flags&NoEmptyLineBeforeBlock != 0 {
1473 if p.uliPrefix(current) != 0 ||
1474 p.oliPrefix(current) != 0 ||
1475 p.quotePrefix(current) != 0 ||
1476 p.codePrefix(current) != 0 {
1477 p.renderParagraph(data[:i])
1478 return i
1479 }
1480 }
1481
1482 // otherwise, scan to the beginning of the next line
1483 for data[i] != '\n' {
1484 i++
1485 }
1486 i++
1487 }
1488
1489 p.renderParagraph(data[:i])
1490 return i
1491}