block.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "html"
19 "regexp"
20
21 "github.com/shurcooL/sanitized_anchor_name"
22)
23
24const (
25 Entity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
26 Escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
27)
28
29var (
30 reBackslashOrAmp = regexp.MustCompile("[\\&]")
31 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + Escapable + "|" + Entity)
32 reTrailingWhitespace = regexp.MustCompile("(\n *)+$")
33)
34
35// Parse block-level data.
36// Note: this function and many that it calls assume that
37// the input buffer ends with a newline.
38func (p *parser) block(data []byte) {
39 if len(data) == 0 || data[len(data)-1] != '\n' {
40 panic("block input is missing terminating newline")
41 }
42
43 // this is called recursively: enforce a maximum depth
44 if p.nesting >= p.maxNesting {
45 return
46 }
47 p.nesting++
48
49 // parse out one block-level construct at a time
50 for len(data) > 0 {
51 // prefixed header:
52 //
53 // # Header 1
54 // ## Header 2
55 // ...
56 // ###### Header 6
57 if p.isPrefixHeader(data) {
58 data = data[p.prefixHeader(data):]
59 continue
60 }
61
62 // block of preformatted HTML:
63 //
64 // <div>
65 // ...
66 // </div>
67 if data[0] == '<' {
68 if i := p.html(data, true); i > 0 {
69 data = data[i:]
70 continue
71 }
72 }
73
74 // title block
75 //
76 // % stuff
77 // % more stuff
78 // % even more stuff
79 if p.flags&Titleblock != 0 {
80 if data[0] == '%' {
81 if i := p.titleBlock(data, true); i > 0 {
82 data = data[i:]
83 continue
84 }
85 }
86 }
87
88 // blank lines. note: returns the # of bytes to skip
89 if i := p.isEmpty(data); i > 0 {
90 data = data[i:]
91 continue
92 }
93
94 // indented code block:
95 //
96 // func max(a, b int) int {
97 // if a > b {
98 // return a
99 // }
100 // return b
101 // }
102 if p.codePrefix(data) > 0 {
103 data = data[p.code(data):]
104 continue
105 }
106
107 // fenced code block:
108 //
109 // ``` go
110 // func fact(n int) int {
111 // if n <= 1 {
112 // return n
113 // }
114 // return n * fact(n-1)
115 // }
116 // ```
117 if p.flags&FencedCode != 0 {
118 if i := p.fencedCode(data, true); i > 0 {
119 data = data[i:]
120 continue
121 }
122 }
123
124 // horizontal rule:
125 //
126 // ------
127 // or
128 // ******
129 // or
130 // ______
131 if p.isHRule(data) {
132 p.addBlock(HorizontalRule, nil)
133 var i int
134 for i = 0; data[i] != '\n'; i++ {
135 }
136 data = data[i:]
137 continue
138 }
139
140 // block quote:
141 //
142 // > A big quote I found somewhere
143 // > on the web
144 if p.quotePrefix(data) > 0 {
145 data = data[p.quote(data):]
146 continue
147 }
148
149 // table:
150 //
151 // Name | Age | Phone
152 // ------|-----|---------
153 // Bob | 31 | 555-1234
154 // Alice | 27 | 555-4321
155 if p.flags&Tables != 0 {
156 if i := p.table(data); i > 0 {
157 data = data[i:]
158 continue
159 }
160 }
161
162 // an itemized/unordered list:
163 //
164 // * Item 1
165 // * Item 2
166 //
167 // also works with + or -
168 if p.uliPrefix(data) > 0 {
169 data = data[p.list(data, 0):]
170 continue
171 }
172
173 // a numbered/ordered list:
174 //
175 // 1. Item 1
176 // 2. Item 2
177 if p.oliPrefix(data) > 0 {
178 data = data[p.list(data, ListTypeOrdered):]
179 continue
180 }
181
182 // definition lists:
183 //
184 // Term 1
185 // : Definition a
186 // : Definition b
187 //
188 // Term 2
189 // : Definition c
190 if p.flags&DefinitionLists != 0 {
191 if p.dliPrefix(data) > 0 {
192 data = data[p.list(data, ListTypeDefinition):]
193 continue
194 }
195 }
196
197 // anything else must look like a normal paragraph
198 // note: this finds underlined headers, too
199 data = data[p.paragraph(data):]
200 }
201
202 p.nesting--
203}
204
205func (p *parser) addBlock(typ NodeType, content []byte) *Node {
206 p.closeUnmatchedBlocks()
207 container := p.addChild(typ, 0)
208 container.content = content
209 return container
210}
211
212func (p *parser) isPrefixHeader(data []byte) bool {
213 if data[0] != '#' {
214 return false
215 }
216
217 if p.flags&SpaceHeaders != 0 {
218 level := 0
219 for level < 6 && data[level] == '#' {
220 level++
221 }
222 if data[level] != ' ' {
223 return false
224 }
225 }
226 return true
227}
228
229func (p *parser) prefixHeader(data []byte) int {
230 level := 0
231 for level < 6 && data[level] == '#' {
232 level++
233 }
234 i := skipChar(data, level, ' ')
235 end := skipUntilChar(data, i, '\n')
236 skip := end
237 id := ""
238 if p.flags&HeaderIDs != 0 {
239 j, k := 0, 0
240 // find start/end of header id
241 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
242 }
243 for k = j + 1; k < end && data[k] != '}'; k++ {
244 }
245 // extract header id iff found
246 if j < end && k < end {
247 id = string(data[j+2 : k])
248 end = j
249 skip = k + 1
250 for end > 0 && data[end-1] == ' ' {
251 end--
252 }
253 }
254 }
255 for end > 0 && data[end-1] == '#' {
256 if isBackslashEscaped(data, end-1) {
257 break
258 }
259 end--
260 }
261 for end > 0 && data[end-1] == ' ' {
262 end--
263 }
264 if end > i {
265 if id == "" && p.flags&AutoHeaderIDs != 0 {
266 id = sanitized_anchor_name.Create(string(data[i:end]))
267 }
268 block := p.addBlock(Header, data[i:end])
269 block.HeaderID = id
270 block.Level = level
271 }
272 return skip
273}
274
275func (p *parser) isUnderlinedHeader(data []byte) int {
276 // test of level 1 header
277 if data[0] == '=' {
278 i := skipChar(data, 1, '=')
279 i = skipChar(data, i, ' ')
280 if data[i] == '\n' {
281 return 1
282 } else {
283 return 0
284 }
285 }
286
287 // test of level 2 header
288 if data[0] == '-' {
289 i := skipChar(data, 1, '-')
290 i = skipChar(data, i, ' ')
291 if data[i] == '\n' {
292 return 2
293 } else {
294 return 0
295 }
296 }
297
298 return 0
299}
300
301func (p *parser) titleBlock(data []byte, doRender bool) int {
302 if data[0] != '%' {
303 return 0
304 }
305 splitData := bytes.Split(data, []byte("\n"))
306 var i int
307 for idx, b := range splitData {
308 if !bytes.HasPrefix(b, []byte("%")) {
309 i = idx // - 1
310 break
311 }
312 }
313
314 data = bytes.Join(splitData[0:i], []byte("\n"))
315 consumed := len(data)
316 data = bytes.TrimPrefix(data, []byte("% "))
317 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
318 block := p.addBlock(Header, data)
319 block.Level = 1
320 block.IsTitleblock = true
321
322 return consumed
323}
324
325func (p *parser) html(data []byte, doRender bool) int {
326 var i, j int
327
328 // identify the opening tag
329 if data[0] != '<' {
330 return 0
331 }
332 curtag, tagfound := p.htmlFindTag(data[1:])
333
334 // handle special cases
335 if !tagfound {
336 // check for an HTML comment
337 if size := p.htmlComment(data, doRender); size > 0 {
338 return size
339 }
340
341 // check for an <hr> tag
342 if size := p.htmlHr(data, doRender); size > 0 {
343 return size
344 }
345
346 // no special case recognized
347 return 0
348 }
349
350 // look for an unindented matching closing tag
351 // followed by a blank line
352 found := false
353 /*
354 closetag := []byte("\n</" + curtag + ">")
355 j = len(curtag) + 1
356 for !found {
357 // scan for a closing tag at the beginning of a line
358 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
359 j += skip + len(closetag)
360 } else {
361 break
362 }
363
364 // see if it is the only thing on the line
365 if skip := p.isEmpty(data[j:]); skip > 0 {
366 // see if it is followed by a blank line/eof
367 j += skip
368 if j >= len(data) {
369 found = true
370 i = j
371 } else {
372 if skip := p.isEmpty(data[j:]); skip > 0 {
373 j += skip
374 found = true
375 i = j
376 }
377 }
378 }
379 }
380 */
381
382 // if not found, try a second pass looking for indented match
383 // but not if tag is "ins" or "del" (following original Markdown.pl)
384 if !found && curtag != "ins" && curtag != "del" {
385 i = 1
386 for i < len(data) {
387 i++
388 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
389 i++
390 }
391
392 if i+2+len(curtag) >= len(data) {
393 break
394 }
395
396 j = p.htmlFindEnd(curtag, data[i-1:])
397
398 if j > 0 {
399 i += j - 1
400 found = true
401 break
402 }
403 }
404 }
405
406 if !found {
407 return 0
408 }
409
410 // the end of the block has been found
411 if doRender {
412 // trim newlines
413 end := i
414 for end > 0 && data[end-1] == '\n' {
415 end--
416 }
417 finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
418 }
419
420 return i
421}
422
423func finalizeHtmlBlock(block *Node) {
424 block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
425 block.content = []byte{}
426}
427
428// HTML comment, lax form
429func (p *parser) htmlComment(data []byte, doRender bool) int {
430 i := p.inlineHtmlComment(data)
431 // needs to end with a blank line
432 if j := p.isEmpty(data[i:]); j > 0 {
433 size := i + j
434 if doRender {
435 // trim trailing newlines
436 end := size
437 for end > 0 && data[end-1] == '\n' {
438 end--
439 }
440 block := p.addBlock(HTMLBlock, data[:end])
441 finalizeHtmlBlock(block)
442 }
443 return size
444 }
445 return 0
446}
447
448// HR, which is the only self-closing block tag considered
449func (p *parser) htmlHr(data []byte, doRender bool) int {
450 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
451 return 0
452 }
453 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
454 // not an <hr> tag after all; at least not a valid one
455 return 0
456 }
457
458 i := 3
459 for data[i] != '>' && data[i] != '\n' {
460 i++
461 }
462
463 if data[i] == '>' {
464 i++
465 if j := p.isEmpty(data[i:]); j > 0 {
466 size := i + j
467 if doRender {
468 // trim newlines
469 end := size
470 for end > 0 && data[end-1] == '\n' {
471 end--
472 }
473 finalizeHtmlBlock(p.addBlock(HTMLBlock, data[:end]))
474 }
475 return size
476 }
477 }
478
479 return 0
480}
481
482func (p *parser) htmlFindTag(data []byte) (string, bool) {
483 i := 0
484 for isalnum(data[i]) {
485 i++
486 }
487 key := string(data[:i])
488 if _, ok := blockTags[key]; ok {
489 return key, true
490 }
491 return "", false
492}
493
494func (p *parser) htmlFindEnd(tag string, data []byte) int {
495 // assume data[0] == '<' && data[1] == '/' already tested
496 if tag == "hr" {
497 return 2
498 }
499 // check if tag is a match
500 closetag := []byte("</" + tag + ">")
501 if !bytes.HasPrefix(data, closetag) {
502 return 0
503 }
504 i := len(closetag)
505
506 // check that the rest of the line is blank
507 skip := 0
508 if skip = p.isEmpty(data[i:]); skip == 0 {
509 return 0
510 }
511 i += skip
512 skip = 0
513
514 if i >= len(data) {
515 return i
516 }
517
518 if p.flags&LaxHTMLBlocks != 0 {
519 return i
520 }
521 if skip = p.isEmpty(data[i:]); skip == 0 {
522 // following line must be blank
523 return 0
524 }
525
526 return i + skip
527}
528
529func (p *parser) isEmpty(data []byte) int {
530 // it is okay to call isEmpty on an empty buffer
531 if len(data) == 0 {
532 return 0
533 }
534
535 var i int
536 for i = 0; i < len(data) && data[i] != '\n'; i++ {
537 if data[i] != ' ' && data[i] != '\t' {
538 return 0
539 }
540 }
541 return i + 1
542}
543
544func (p *parser) isHRule(data []byte) bool {
545 i := 0
546
547 // skip up to three spaces
548 for i < 3 && data[i] == ' ' {
549 i++
550 }
551
552 // look at the hrule char
553 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
554 return false
555 }
556 c := data[i]
557
558 // the whole line must be the char or whitespace
559 n := 0
560 for data[i] != '\n' {
561 switch {
562 case data[i] == c:
563 n++
564 case data[i] != ' ':
565 return false
566 }
567 i++
568 }
569
570 return n >= 3
571}
572
573func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
574 i, size := 0, 0
575 skip = 0
576
577 // skip up to three spaces
578 for i < len(data) && i < 3 && data[i] == ' ' {
579 i++
580 }
581 if i >= len(data) {
582 return
583 }
584
585 // check for the marker characters: ~ or `
586 if data[i] != '~' && data[i] != '`' {
587 return
588 }
589
590 c := data[i]
591
592 // the whole line must be the same char or whitespace
593 for i < len(data) && data[i] == c {
594 size++
595 i++
596 }
597
598 if i >= len(data) {
599 return
600 }
601
602 // the marker char must occur at least 3 times
603 if size < 3 {
604 return
605 }
606 marker = string(data[i-size : i])
607
608 // if this is the end marker, it must match the beginning marker
609 if oldmarker != "" && marker != oldmarker {
610 return
611 }
612
613 if syntax != nil {
614 syn := 0
615 i = skipChar(data, i, ' ')
616
617 if i >= len(data) {
618 return
619 }
620
621 syntaxStart := i
622
623 if data[i] == '{' {
624 i++
625 syntaxStart++
626
627 for i < len(data) && data[i] != '}' && data[i] != '\n' {
628 syn++
629 i++
630 }
631
632 if i >= len(data) || data[i] != '}' {
633 return
634 }
635
636 // strip all whitespace at the beginning and the end
637 // of the {} block
638 for syn > 0 && isspace(data[syntaxStart]) {
639 syntaxStart++
640 syn--
641 }
642
643 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
644 syn--
645 }
646
647 i++
648 } else {
649 for i < len(data) && !isspace(data[i]) {
650 syn++
651 i++
652 }
653 }
654
655 language := string(data[syntaxStart : syntaxStart+syn])
656 *syntax = &language
657 }
658
659 i = skipChar(data, i, ' ')
660 if i >= len(data) || data[i] != '\n' {
661 return
662 }
663
664 skip = i + 1
665 return
666}
667
668func (p *parser) fencedCode(data []byte, doRender bool) int {
669 var lang *string
670 beg, marker := p.isFencedCode(data, &lang, "")
671 if beg == 0 || beg >= len(data) {
672 return 0
673 }
674
675 var work bytes.Buffer
676 if lang != nil {
677 work.Write([]byte(*lang))
678 work.WriteByte('\n')
679 }
680
681 for {
682 // safe to assume beg < len(data)
683
684 // check for the end of the code block
685 fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
686 if fenceEnd != 0 {
687 beg += fenceEnd
688 break
689 }
690
691 // copy the current line
692 end := skipUntilChar(data, beg, '\n') + 1
693
694 // did we reach the end of the buffer without a closing marker?
695 if end >= len(data) {
696 return 0
697 }
698
699 // verbatim copy to the working buffer
700 if doRender {
701 work.Write(data[beg:end])
702 }
703 beg = end
704 }
705
706 //syntax := ""
707 //if lang != nil {
708 // syntax = *lang
709 //}
710
711 if doRender {
712 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
713 block.IsFenced = true
714 finalizeCodeBlock(block)
715 }
716
717 return beg
718}
719
720func unescapeChar(str []byte) []byte {
721 if str[0] == '\\' {
722 return []byte{str[1]}
723 }
724 return []byte(html.UnescapeString(string(str)))
725}
726
727func unescapeString(str []byte) []byte {
728 if reBackslashOrAmp.Match(str) {
729 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
730 } else {
731 return str
732 }
733}
734
735func finalizeCodeBlock(block *Node) {
736 if block.IsFenced {
737 newlinePos := bytes.IndexByte(block.content, '\n')
738 firstLine := block.content[:newlinePos]
739 rest := block.content[newlinePos+1:]
740 block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
741 block.Literal = rest
742 } else {
743 block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
744 }
745 block.content = nil
746}
747
748func (p *parser) table(data []byte) int {
749 table := p.addBlock(Table, nil)
750 i, columns := p.tableHeader(data)
751 if i == 0 {
752 p.tip = table.Parent
753 table.unlink()
754 return 0
755 }
756
757 p.addBlock(TableBody, nil)
758
759 for i < len(data) {
760 pipes, rowStart := 0, i
761 for ; data[i] != '\n'; i++ {
762 if data[i] == '|' {
763 pipes++
764 }
765 }
766
767 if pipes == 0 {
768 i = rowStart
769 break
770 }
771
772 // include the newline in data sent to tableRow
773 i++
774 p.tableRow(data[rowStart:i], columns, false)
775 }
776
777 return i
778}
779
780// check if the specified position is preceded by an odd number of backslashes
781func isBackslashEscaped(data []byte, i int) bool {
782 backslashes := 0
783 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
784 backslashes++
785 }
786 return backslashes&1 == 1
787}
788
789func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
790 i := 0
791 colCount := 1
792 for i = 0; data[i] != '\n'; i++ {
793 if data[i] == '|' && !isBackslashEscaped(data, i) {
794 colCount++
795 }
796 }
797
798 // doesn't look like a table header
799 if colCount == 1 {
800 return
801 }
802
803 // include the newline in the data sent to tableRow
804 header := data[:i+1]
805
806 // column count ignores pipes at beginning or end of line
807 if data[0] == '|' {
808 colCount--
809 }
810 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
811 colCount--
812 }
813
814 columns = make([]CellAlignFlags, colCount)
815
816 // move on to the header underline
817 i++
818 if i >= len(data) {
819 return
820 }
821
822 if data[i] == '|' && !isBackslashEscaped(data, i) {
823 i++
824 }
825 i = skipChar(data, i, ' ')
826
827 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
828 // and trailing | optional on last column
829 col := 0
830 for data[i] != '\n' {
831 dashes := 0
832
833 if data[i] == ':' {
834 i++
835 columns[col] |= TableAlignmentLeft
836 dashes++
837 }
838 for data[i] == '-' {
839 i++
840 dashes++
841 }
842 if data[i] == ':' {
843 i++
844 columns[col] |= TableAlignmentRight
845 dashes++
846 }
847 for data[i] == ' ' {
848 i++
849 }
850
851 // end of column test is messy
852 switch {
853 case dashes < 3:
854 // not a valid column
855 return
856
857 case data[i] == '|' && !isBackslashEscaped(data, i):
858 // marker found, now skip past trailing whitespace
859 col++
860 i++
861 for data[i] == ' ' {
862 i++
863 }
864
865 // trailing junk found after last column
866 if col >= colCount && data[i] != '\n' {
867 return
868 }
869
870 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
871 // something else found where marker was required
872 return
873
874 case data[i] == '\n':
875 // marker is optional for the last column
876 col++
877
878 default:
879 // trailing junk found after last column
880 return
881 }
882 }
883 if col != colCount {
884 return
885 }
886
887 p.addBlock(TableHead, nil)
888 p.tableRow(header, columns, true)
889 size = i + 1
890 return
891}
892
893func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
894 p.addBlock(TableRow, nil)
895 i, col := 0, 0
896
897 if data[i] == '|' && !isBackslashEscaped(data, i) {
898 i++
899 }
900
901 for col = 0; col < len(columns) && i < len(data); col++ {
902 for data[i] == ' ' {
903 i++
904 }
905
906 cellStart := i
907
908 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
909 i++
910 }
911
912 cellEnd := i
913
914 // skip the end-of-cell marker, possibly taking us past end of buffer
915 i++
916
917 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
918 cellEnd--
919 }
920
921 cell := p.addBlock(TableCell, data[cellStart:cellEnd])
922 cell.IsHeader = header
923 cell.Align = columns[col]
924 }
925
926 // pad it out with empty columns to get the right number
927 for ; col < len(columns); col++ {
928 cell := p.addBlock(TableCell, nil)
929 cell.IsHeader = header
930 cell.Align = columns[col]
931 }
932
933 // silently ignore rows with too many cells
934}
935
936// returns blockquote prefix length
937func (p *parser) quotePrefix(data []byte) int {
938 i := 0
939 for i < 3 && data[i] == ' ' {
940 i++
941 }
942 if data[i] == '>' {
943 if data[i+1] == ' ' {
944 return i + 2
945 }
946 return i + 1
947 }
948 return 0
949}
950
951// blockquote ends with at least one blank line
952// followed by something without a blockquote prefix
953func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
954 if p.isEmpty(data[beg:]) <= 0 {
955 return false
956 }
957 if end >= len(data) {
958 return true
959 }
960 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
961}
962
963// parse a blockquote fragment
964func (p *parser) quote(data []byte) int {
965 block := p.addBlock(BlockQuote, nil)
966 var raw bytes.Buffer
967 beg, end := 0, 0
968 for beg < len(data) {
969 end = beg
970 // Step over whole lines, collecting them. While doing that, check for
971 // fenced code and if one's found, incorporate it altogether,
972 // irregardless of any contents inside it
973 for data[end] != '\n' {
974 if p.flags&FencedCode != 0 {
975 if i := p.fencedCode(data[end:], false); i > 0 {
976 // -1 to compensate for the extra end++ after the loop:
977 end += i - 1
978 break
979 }
980 }
981 end++
982 }
983 end++
984 if pre := p.quotePrefix(data[beg:]); pre > 0 {
985 // skip the prefix
986 beg += pre
987 } else if p.terminateBlockquote(data, beg, end) {
988 break
989 }
990 // this line is part of the blockquote
991 raw.Write(data[beg:end])
992 beg = end
993 }
994 p.block(raw.Bytes())
995 p.finalize(block)
996 return end
997}
998
999// returns prefix length for block code
1000func (p *parser) codePrefix(data []byte) int {
1001 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1002 return 4
1003 }
1004 return 0
1005}
1006
1007func (p *parser) code(data []byte) int {
1008 var work bytes.Buffer
1009
1010 i := 0
1011 for i < len(data) {
1012 beg := i
1013 for data[i] != '\n' {
1014 i++
1015 }
1016 i++
1017
1018 blankline := p.isEmpty(data[beg:i]) > 0
1019 if pre := p.codePrefix(data[beg:i]); pre > 0 {
1020 beg += pre
1021 } else if !blankline {
1022 // non-empty, non-prefixed line breaks the pre
1023 i = beg
1024 break
1025 }
1026
1027 // verbatim copy to the working buffeu
1028 if blankline {
1029 work.WriteByte('\n')
1030 } else {
1031 work.Write(data[beg:i])
1032 }
1033 }
1034
1035 // trim all the \n off the end of work
1036 workbytes := work.Bytes()
1037 eol := len(workbytes)
1038 for eol > 0 && workbytes[eol-1] == '\n' {
1039 eol--
1040 }
1041 if eol != len(workbytes) {
1042 work.Truncate(eol)
1043 }
1044
1045 work.WriteByte('\n')
1046
1047 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
1048 block.IsFenced = false
1049 finalizeCodeBlock(block)
1050
1051 return i
1052}
1053
1054// returns unordered list item prefix
1055func (p *parser) uliPrefix(data []byte) int {
1056 i := 0
1057
1058 // start with up to 3 spaces
1059 for i < 3 && data[i] == ' ' {
1060 i++
1061 }
1062
1063 // need a *, +, or - followed by a space
1064 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1065 data[i+1] != ' ' {
1066 return 0
1067 }
1068 return i + 2
1069}
1070
1071// returns ordered list item prefix
1072func (p *parser) oliPrefix(data []byte) int {
1073 i := 0
1074
1075 // start with up to 3 spaces
1076 for i < 3 && data[i] == ' ' {
1077 i++
1078 }
1079
1080 // count the digits
1081 start := i
1082 for data[i] >= '0' && data[i] <= '9' {
1083 i++
1084 }
1085
1086 // we need >= 1 digits followed by a dot and a space
1087 if start == i || data[i] != '.' || data[i+1] != ' ' {
1088 return 0
1089 }
1090 return i + 2
1091}
1092
1093// returns definition list item prefix
1094func (p *parser) dliPrefix(data []byte) int {
1095 i := 0
1096
1097 // need a : followed by a spaces
1098 if data[i] != ':' || data[i+1] != ' ' {
1099 return 0
1100 }
1101 for data[i] == ' ' {
1102 i++
1103 }
1104 return i + 2
1105}
1106
1107// parse ordered or unordered list block
1108func (p *parser) list(data []byte, flags ListType) int {
1109 i := 0
1110 flags |= ListItemBeginningOfList
1111 block := p.addBlock(List, nil)
1112 block.ListFlags = flags
1113 block.Tight = true
1114
1115 for i < len(data) {
1116 skip := p.listItem(data[i:], &flags)
1117 if flags&ListItemContainsBlock != 0 {
1118 block.ListData.Tight = false
1119 }
1120 i += skip
1121 if skip == 0 || flags&ListItemEndOfList != 0 {
1122 break
1123 }
1124 flags &= ^ListItemBeginningOfList
1125 }
1126
1127 above := block.Parent
1128 finalizeList(block)
1129 p.tip = above
1130 return i
1131}
1132
1133// Returns true if block ends with a blank line, descending if needed
1134// into lists and sublists.
1135func endsWithBlankLine(block *Node) bool {
1136 // TODO: figure this out. Always false now.
1137 for block != nil {
1138 //if block.lastLineBlank {
1139 //return true
1140 //}
1141 t := block.Type
1142 if t == List || t == Item {
1143 block = block.LastChild
1144 } else {
1145 break
1146 }
1147 }
1148 return false
1149}
1150
1151func finalizeList(block *Node) {
1152 block.open = false
1153 item := block.FirstChild
1154 for item != nil {
1155 // check for non-final list item ending with blank line:
1156 if endsWithBlankLine(item) && item.Next != nil {
1157 block.ListData.Tight = false
1158 break
1159 }
1160 // recurse into children of list item, to see if there are spaces
1161 // between any of them:
1162 subItem := item.FirstChild
1163 for subItem != nil {
1164 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
1165 block.ListData.Tight = false
1166 break
1167 }
1168 subItem = subItem.Next
1169 }
1170 item = item.Next
1171 }
1172}
1173
1174// Parse a single list item.
1175// Assumes initial prefix is already removed if this is a sublist.
1176func (p *parser) listItem(data []byte, flags *ListType) int {
1177 // keep track of the indentation of the first line
1178 itemIndent := 0
1179 for itemIndent < 3 && data[itemIndent] == ' ' {
1180 itemIndent++
1181 }
1182
1183 var bulletChar byte = '*'
1184 i := p.uliPrefix(data)
1185 if i == 0 {
1186 i = p.oliPrefix(data)
1187 } else {
1188 bulletChar = data[i-2]
1189 }
1190 if i == 0 {
1191 i = p.dliPrefix(data)
1192 // reset definition term flag
1193 if i > 0 {
1194 *flags &= ^ListTypeTerm
1195 }
1196 }
1197 if i == 0 {
1198 // if in definition list, set term flag and continue
1199 if *flags&ListTypeDefinition != 0 {
1200 *flags |= ListTypeTerm
1201 } else {
1202 return 0
1203 }
1204 }
1205
1206 // skip leading whitespace on first line
1207 for data[i] == ' ' {
1208 i++
1209 }
1210
1211 // find the end of the line
1212 line := i
1213 for i > 0 && data[i-1] != '\n' {
1214 i++
1215 }
1216
1217 // get working buffer
1218 var raw bytes.Buffer
1219
1220 // put the first line into the working buffer
1221 raw.Write(data[line:i])
1222 line = i
1223
1224 // process the following lines
1225 containsBlankLine := false
1226 sublist := 0
1227
1228gatherlines:
1229 for line < len(data) {
1230 i++
1231
1232 // find the end of this line
1233 for data[i-1] != '\n' {
1234 i++
1235 }
1236
1237 // if it is an empty line, guess that it is part of this item
1238 // and move on to the next line
1239 if p.isEmpty(data[line:i]) > 0 {
1240 containsBlankLine = true
1241 line = i
1242 continue
1243 }
1244
1245 // calculate the indentation
1246 indent := 0
1247 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1248 indent++
1249 }
1250
1251 chunk := data[line+indent : i]
1252
1253 // evaluate how this line fits in
1254 switch {
1255 // is this a nested list item?
1256 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1257 p.oliPrefix(chunk) > 0 ||
1258 p.dliPrefix(chunk) > 0:
1259
1260 if containsBlankLine {
1261 *flags |= ListItemContainsBlock
1262 }
1263
1264 // to be a nested list, it must be indented more
1265 // if not, it is the next item in the same list
1266 if indent <= itemIndent {
1267 break gatherlines
1268 }
1269
1270 // is this the first item in the nested list?
1271 if sublist == 0 {
1272 sublist = raw.Len()
1273 }
1274
1275 // is this a nested prefix header?
1276 case p.isPrefixHeader(chunk):
1277 // if the header is not indented, it is not nested in the list
1278 // and thus ends the list
1279 if containsBlankLine && indent < 4 {
1280 *flags |= ListItemEndOfList
1281 break gatherlines
1282 }
1283 *flags |= ListItemContainsBlock
1284
1285 // anything following an empty line is only part
1286 // of this item if it is indented 4 spaces
1287 // (regardless of the indentation of the beginning of the item)
1288 case containsBlankLine && indent < 4:
1289 if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
1290 // is the next item still a part of this list?
1291 next := i
1292 for data[next] != '\n' {
1293 next++
1294 }
1295 for next < len(data)-1 && data[next] == '\n' {
1296 next++
1297 }
1298 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1299 *flags |= ListItemEndOfList
1300 }
1301 } else {
1302 *flags |= ListItemEndOfList
1303 }
1304 break gatherlines
1305
1306 // a blank line means this should be parsed as a block
1307 case containsBlankLine:
1308 raw.WriteByte('\n')
1309 *flags |= ListItemContainsBlock
1310 }
1311
1312 // if this line was preceded by one or more blanks,
1313 // re-introduce the blank into the buffer
1314 if containsBlankLine {
1315 containsBlankLine = false
1316 raw.WriteByte('\n')
1317 }
1318
1319 // add the line into the working buffer without prefix
1320 raw.Write(data[line+indent : i])
1321
1322 line = i
1323 }
1324
1325 rawBytes := raw.Bytes()
1326
1327 block := p.addBlock(Item, nil)
1328 block.ListFlags = *flags
1329 block.Tight = false
1330 block.BulletChar = bulletChar
1331 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1332
1333 // render the contents of the list item
1334 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
1335 // intermediate render of block item, except for definition term
1336 if sublist > 0 {
1337 p.block(rawBytes[:sublist])
1338 p.block(rawBytes[sublist:])
1339 } else {
1340 p.block(rawBytes)
1341 }
1342 } else {
1343 // intermediate render of inline item
1344 if sublist > 0 {
1345 child := p.addChild(Paragraph, 0)
1346 child.content = rawBytes[:sublist]
1347 p.block(rawBytes[sublist:])
1348 } else {
1349 child := p.addChild(Paragraph, 0)
1350 child.content = rawBytes
1351 }
1352 }
1353 return line
1354}
1355
1356// render a single paragraph that has already been parsed out
1357func (p *parser) renderParagraph(data []byte) {
1358 if len(data) == 0 {
1359 return
1360 }
1361
1362 // trim leading spaces
1363 beg := 0
1364 for data[beg] == ' ' {
1365 beg++
1366 }
1367
1368 // trim trailing newline
1369 end := len(data) - 1
1370
1371 // trim trailing spaces
1372 for end > beg && data[end-1] == ' ' {
1373 end--
1374 }
1375
1376 p.addBlock(Paragraph, data[beg:end])
1377}
1378
1379func (p *parser) paragraph(data []byte) int {
1380 // prev: index of 1st char of previous line
1381 // line: index of 1st char of current line
1382 // i: index of cursor/end of current line
1383 var prev, line, i int
1384
1385 // keep going until we find something to mark the end of the paragraph
1386 for i < len(data) {
1387 // mark the beginning of the current line
1388 prev = line
1389 current := data[i:]
1390 line = i
1391
1392 // did we find a blank line marking the end of the paragraph?
1393 if n := p.isEmpty(current); n > 0 {
1394 // did this blank line followed by a definition list item?
1395 if p.flags&DefinitionLists != 0 {
1396 if i < len(data)-1 && data[i+1] == ':' {
1397 return p.list(data[prev:], ListTypeDefinition)
1398 }
1399 }
1400
1401 p.renderParagraph(data[:i])
1402 return i + n
1403 }
1404
1405 // an underline under some text marks a header, so our paragraph ended on prev line
1406 if i > 0 {
1407 if level := p.isUnderlinedHeader(current); level > 0 {
1408 // render the paragraph
1409 p.renderParagraph(data[:prev])
1410
1411 // ignore leading and trailing whitespace
1412 eol := i - 1
1413 for prev < eol && data[prev] == ' ' {
1414 prev++
1415 }
1416 for eol > prev && data[eol-1] == ' ' {
1417 eol--
1418 }
1419
1420 id := ""
1421 if p.flags&AutoHeaderIDs != 0 {
1422 id = sanitized_anchor_name.Create(string(data[prev:eol]))
1423 }
1424
1425 block := p.addBlock(Header, data[prev:eol])
1426 block.Level = level
1427 block.HeaderID = id
1428
1429 // find the end of the underline
1430 for data[i] != '\n' {
1431 i++
1432 }
1433 return i
1434 }
1435 }
1436
1437 // if the next line starts a block of HTML, then the paragraph ends here
1438 if p.flags&LaxHTMLBlocks != 0 {
1439 if data[i] == '<' && p.html(current, false) > 0 {
1440 // rewind to before the HTML block
1441 p.renderParagraph(data[:i])
1442 return i
1443 }
1444 }
1445
1446 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1447 if p.isPrefixHeader(current) || p.isHRule(current) {
1448 p.renderParagraph(data[:i])
1449 return i
1450 }
1451
1452 // if there's a fenced code block, paragraph is over
1453 if p.flags&FencedCode != 0 {
1454 if p.fencedCode(current, false) > 0 {
1455 p.renderParagraph(data[:i])
1456 return i
1457 }
1458 }
1459
1460 // if there's a definition list item, prev line is a definition term
1461 if p.flags&DefinitionLists != 0 {
1462 if p.dliPrefix(current) != 0 {
1463 return p.list(data[prev:], ListTypeDefinition)
1464 }
1465 }
1466
1467 // if there's a list after this, paragraph is over
1468 if p.flags&NoEmptyLineBeforeBlock != 0 {
1469 if p.uliPrefix(current) != 0 ||
1470 p.oliPrefix(current) != 0 ||
1471 p.quotePrefix(current) != 0 ||
1472 p.codePrefix(current) != 0 {
1473 p.renderParagraph(data[:i])
1474 return i
1475 }
1476 }
1477
1478 // otherwise, scan to the beginning of the next line
1479 for data[i] != '\n' {
1480 i++
1481 }
1482 i++
1483 }
1484
1485 p.renderParagraph(data[:i])
1486 return i
1487}