markdown.go (view raw)
1package main
2
3import (
4 "bytes"
5 "fmt"
6 "html"
7 "sort"
8 "unicode"
9)
10
11const (
12 MKDA_NOT_AUTOLINK = iota
13 MKDA_NORMAL
14 MKDA_EMAIL
15)
16
17const (
18 MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
19 MKDEXT_TABLES
20 MKDEXT_FENCED_CODE
21 MKDEXT_AUTOLINK
22 MKDEXT_STRIKETHROUGH
23 MKDEXT_LAX_HTML_BLOCKS
24 MKDEXT_SPACE_HEADERS
25)
26
27const (
28 _ = iota
29 MKD_LIST_ORDERED
30 MKD_LI_BLOCK // <li> containing block data
31 MKD_LI_END = 8
32)
33
34const (
35 MKD_TABLE_ALIGN_L = 1 << iota
36 MKD_TABLE_ALIGN_R
37 MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
38)
39
40var block_tags = map[string]int{
41 "p": 1, // 0
42 "dl": 2,
43 "h1": 2,
44 "h2": 2,
45 "h3": 2,
46 "h4": 2,
47 "h5": 2,
48 "h6": 2,
49 "ol": 2,
50 "ul": 2,
51 "del": 3, // 10
52 "div": 3,
53 "ins": 3, // 12
54 "pre": 3,
55 "form": 4,
56 "math": 4,
57 "table": 5,
58 "iframe": 6,
59 "script": 6,
60 "fieldset": 8,
61 "noscript": 8,
62 "blockquote": 10,
63}
64
65// functions for rendering parsed data
66type mkd_renderer struct {
67 // block-level callbacks---nil skips the block
68 blockcode func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
69 blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
70 blockhtml func(ob *bytes.Buffer, text []byte, opaque interface{})
71 header func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
72 hrule func(ob *bytes.Buffer, opaque interface{})
73 list func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
74 listitem func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
75 paragraph func(ob *bytes.Buffer, text []byte, opaque interface{})
76 table func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
77 table_row func(ob *bytes.Buffer, text []byte, opaque interface{})
78 table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
79
80 // span-level callbacks---nil or return 0 prints the span verbatim
81 autolink func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
82 codespan func(ob *bytes.Buffer, text []byte, opaque interface{}) int
83 double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
84 emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
85 image func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
86 linebreak func(ob *bytes.Buffer, opaque interface{}) int
87 link func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
88 raw_html_tag func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
89 triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
90 strikethrough func(ob *bytes.Buffer, text []byte, opaque interface{}) int
91
92 // low-level callbacks---nil copies input directly into the output
93 entity func(ob *bytes.Buffer, entity []byte, opaque interface{})
94 normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
95
96 // header and footer
97 doc_header func(ob *bytes.Buffer, opaque interface{})
98 doc_footer func(ob *bytes.Buffer, opaque interface{})
99
100 // user data---passed back to every callback
101 opaque interface{}
102}
103
104type link_ref struct {
105 id []byte
106 link []byte
107 title []byte
108}
109
110type link_ref_array []*link_ref
111
112// implement the sorting interface
113func (elt link_ref_array) Len() int {
114 return len(elt)
115}
116
117func (elt link_ref_array) Less(i, j int) bool {
118 a, b := elt[i].id, elt[j].id
119
120 // adapted from bytes.Compare in stdlib
121 m := len(a)
122 if m > len(b) {
123 m = len(b)
124 }
125 for i, ac := range a[0:m] {
126 // do a case-insensitive comparison
127 ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
128 switch {
129 case ai > bi:
130 return false
131 case ai < bi:
132 return true
133 }
134 }
135 switch {
136 case len(a) < len(b):
137 return true
138 case len(a) > len(b):
139 return false
140 }
141 return false
142}
143
144func (elt link_ref_array) Swap(i, j int) {
145 elt[i], elt[j] = elt[j], elt[i]
146}
147
148// returns whether or not a line is a reference
149func is_ref(data []byte, beg int, last *int, rndr *render) bool {
150 // up to 3 optional leading spaces
151 if beg+3 > len(data) {
152 return false
153 }
154 i := 0
155 if data[beg] == ' ' {
156 i++
157 if data[beg+1] == ' ' {
158 i++
159 if data[beg+2] == ' ' {
160 i++
161 if data[beg+3] == ' ' {
162 return false
163 }
164 }
165 }
166 }
167 i += beg
168
169 // id part: anything but a newline between brackets
170 if data[i] != '[' {
171 return false
172 }
173 i++
174 id_offset := i
175 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
176 i++
177 }
178 if i >= len(data) || data[i] != ']' {
179 return false
180 }
181 id_end := i
182
183 // spacer: colon (space | tab)* newline? (space | tab)*
184 i++
185 if i >= len(data) || data[i] != ':' {
186 return false
187 }
188 i++
189 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
190 i++
191 }
192 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
193 i++
194 if i < len(data) && data[i] == '\r' && data[i-1] == '\n' {
195 i++
196 }
197 }
198 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
199 i++
200 }
201 if i >= len(data) {
202 return false
203 }
204
205 // link: whitespace-free sequence, optionally between angle brackets
206 if data[i] == '<' {
207 i++
208 }
209 link_offset := i
210 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
211 i++
212 }
213 var link_end int
214 if data[i-1] == '>' {
215 link_end = i - 1
216 } else {
217 link_end = i
218 }
219
220 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
221 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
222 i++
223 }
224 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
225 return false
226 }
227
228 // compute end-of-line
229 line_end := 0
230 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
231 line_end = i
232 }
233 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
234 line_end = i + 1
235 }
236
237 // optional (space|tab)* spacer after a newline
238 if line_end > 0 {
239 i = line_end + 1
240 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
241 i++
242 }
243 }
244
245 // optional title: any non-newline sequence enclosed in '"() alone on its line
246 title_offset, title_end := 0, 0
247 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
248 i++
249 title_offset = i
250
251 // looking for EOL
252 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
253 i++
254 }
255 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
256 title_end = i + 1
257 } else {
258 title_end = i
259 }
260
261 // stepping back
262 i--
263 for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
264 i--
265 }
266 if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
267 line_end = title_end
268 title_end = i
269 }
270 }
271 if line_end == 0 { // garbage after the link
272 return false
273 }
274
275 // a valid ref has been found; fill in return structures
276 if last != nil {
277 *last = line_end
278 }
279 if rndr == nil {
280 return true
281 }
282 item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
283 rndr.refs = append(rndr.refs, item)
284
285 return true
286}
287
288type render struct {
289 mk *mkd_renderer
290 refs link_ref_array
291 active_char [256]int
292 ext_flags uint32
293 nesting int
294 max_nesting int
295}
296
297const (
298 MD_CHAR_NONE = iota
299 MD_CHAR_EMPHASIS
300 MD_CHAR_CODESPAN
301 MD_CHAR_LINEBREAK
302 MD_CHAR_LINK
303 MD_CHAR_LANGLE
304 MD_CHAR_ESCAPE
305 MD_CHAR_ENTITITY
306 MD_CHAR_AUTOLINK
307)
308
309// closures to render active chars, each:
310// returns the number of chars taken care of
311// data is the complete block being rendered
312// offset is the number of valid chars before the data
313//
314// Note: this is filled in in Markdown to prevent an initilization loop
315var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
316
317func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
318 if rndr.nesting >= rndr.max_nesting {
319 return
320 }
321 rndr.nesting++
322
323 i, end := 0, 0
324 for i < len(data) {
325 // copy inactive chars into the output
326 for end < len(data) && rndr.active_char[data[end]] == 0 {
327 end++
328 }
329
330 if rndr.mk.normal_text != nil {
331 rndr.mk.normal_text(ob, data[i:], rndr.mk.opaque)
332 } else {
333 ob.Write(data[i:])
334 }
335
336 if end >= len(data) {
337 break
338 }
339 i = end
340
341 // call the trigger
342 action := rndr.active_char[data[end]]
343 end = markdown_char_ptrs[action](ob, rndr, data, i)
344
345 if end == 0 { // no action from the callback
346 end = i + 1
347 } else {
348 i += end
349 end = i
350 }
351 }
352
353 rndr.nesting--
354}
355
356// single and double emphasis parsing
357func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
358 data = data[offset:]
359 c := data[0]
360 ret := 0
361
362 if len(data) > 2 && data[1] != c {
363 // whitespace cannot follow an opening emphasis;
364 // strikethrough only takes two characters '~~'
365 if c == '~' || unicode.IsSpace(int(data[1])) {
366 return 0
367 }
368 if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
369 return 0
370 }
371
372 return ret + 1
373 }
374
375 if len(data) > 3 && data[1] == c && data[2] != c {
376 if unicode.IsSpace(int(data[2])) {
377 return 0
378 }
379 if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
380 return 0
381 }
382
383 return ret + 2
384 }
385
386 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
387 if c == '~' || unicode.IsSpace(int(data[3])) {
388 return 0
389 }
390 if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
391 return 0
392 }
393
394 return ret + 3
395 }
396
397 return 0
398}
399
400func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
401 data = data[offset:]
402
403 nb := 0
404
405 // counting the number of backticks in the delimiter
406 for nb < len(data) && data[nb] == '`' {
407 nb++
408 }
409
410 // finding the next delimiter
411 i, end := 0, 0
412 for end = nb; end < len(data) && i < nb; end++ {
413 if data[end] == '`' {
414 i++
415 } else {
416 i = 0
417 }
418 }
419
420 if i < nb && end >= len(data) {
421 return 0 // no matching delimiter
422 }
423
424 // trim outside whitespace
425 f_begin := nb
426 for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
427 f_begin++
428 }
429
430 f_end := end - nb
431 for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
432 f_end--
433 }
434
435 // real code span
436 if rndr.mk.codespan == nil {
437 return 0
438 }
439 if f_begin < f_end {
440 if rndr.mk.codespan(ob, data[f_end:f_end], rndr.mk.opaque) == 0 {
441 end = 0
442 }
443 } else {
444 if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
445 end = 0
446 }
447 }
448
449 return end
450
451}
452
453// '\n' preceded by two spaces
454func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
455 if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
456 return 0
457 }
458
459 // remove trailing spaces from ob and render
460 ob_bytes := ob.Bytes()
461 end := len(ob_bytes)
462 for end > 0 && ob_bytes[end-1] == ' ' {
463 end--
464 }
465 ob.Truncate(end)
466
467 if rndr.mk.linebreak == nil {
468 return 0
469 }
470 if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
471 return 1
472 } else {
473 return 0
474 }
475
476 return 0
477}
478
479func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
480 data = data[offset:]
481 return 0
482}
483
484// '<' when tags or autolinks are allowed
485func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
486 data = data[offset:]
487 altype := MKDA_NOT_AUTOLINK
488 end := tag_length(data, &altype)
489 ret := 0
490
491 if end > 2 {
492 switch {
493 case rndr.mk.autolink != nil && altype != MKDA_NOT_AUTOLINK:
494 u_link := bytes.NewBuffer(nil)
495 unscape_text(u_link, data[1:end-2])
496 ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
497 case rndr.mk.raw_html_tag != nil:
498 ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
499 }
500 }
501
502 if ret == 0 {
503 return 0
504 }
505 return end
506}
507
508// '\\' backslash escape
509var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
510
511func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
512 data = data[offset:]
513
514 if len(data) > 1 {
515 if bytes.IndexByte(escape_chars, data[1]) < 0 {
516 return 0
517 }
518
519 if rndr.mk.normal_text != nil {
520 rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
521 } else {
522 ob.WriteByte(data[1])
523 }
524 }
525
526 return 2
527}
528
529// '&' escaped when it doesn't belong to an entity
530// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
531func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
532 data = data[offset:]
533
534 end := 1
535
536 if end < len(data) && data[end] == '#' {
537 end++
538 }
539
540 for end < len(data) && (unicode.IsDigit(int(data[end])) || unicode.IsLetter(int(data[end]))) {
541 end++
542 }
543
544 if end < len(data) && data[end] == ';' {
545 end++ // real entity
546 } else {
547 return 0 // lone '&'
548 }
549
550 if rndr.mk.entity != nil {
551 rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
552 } else {
553 ob.Write(data[:end])
554 }
555
556 return end
557}
558
559func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
560 //orig_data := data
561 data = data[offset:]
562 return 0
563}
564
565// taken from regexp in the stdlib
566func ispunct(c int) bool {
567 for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" {
568 if c == r {
569 return true
570 }
571 }
572 return false
573}
574
575// return the length of the given tag, or 0 is it's not valid
576func tag_length(data []byte, autolink *int) int {
577 var i, j int
578
579 // a valid tag can't be shorter than 3 chars
580 if len(data) < 3 {
581 return 0
582 }
583
584 // begins with a '<' optionally followed by '/', followed by letter or number
585 if data[0] != '<' {
586 return 0
587 }
588 if data[1] == '/' {
589 i = 2
590 } else {
591 i = 1
592 }
593
594 if !unicode.IsDigit(int(data[i])) && !unicode.IsLetter(int(data[i])) {
595 return 0
596 }
597
598 // scheme test
599 *autolink = MKDA_NOT_AUTOLINK
600
601 // try to find the beggining of an URI
602 for i < len(data) && ((unicode.IsLetter(int(data[i])) || unicode.IsDigit(int(data[i]))) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
603 i++
604 }
605
606 if i > 1 && data[i] == '@' {
607 if j = is_mail_autolink(data[i:]); j != 0 {
608 *autolink = MKDA_EMAIL
609 return i + j
610 }
611 }
612
613 if i > 2 && data[i] == ':' {
614 *autolink = MKDA_NORMAL
615 i++
616 }
617
618 // complete autolink test: no whitespace or ' or "
619 switch {
620 case i >= len(data):
621 *autolink = MKDA_NOT_AUTOLINK
622 case *autolink != 0:
623 j = i
624
625 for i < len(data) {
626 if data[i] == '\\' {
627 i += 2
628 } else {
629 if data[i] == '>' || data[i] == '\'' || data[i] == '"' || unicode.IsSpace(int(data[i])) {
630 break
631 } else {
632 i++
633 }
634 }
635
636 }
637
638 if i >= len(data) {
639 return 0
640 }
641 if i > j && data[i] == '>' {
642 return i + 1
643 }
644
645 // one of the forbidden chars has been found
646 *autolink = MKDA_NOT_AUTOLINK
647 }
648
649 // looking for sometinhg looking like a tag end
650 for i < len(data) && data[i] != '>' {
651 i++
652 }
653 if i >= len(data) {
654 return 0
655 }
656 return i + 1
657}
658
659// look for the address part of a mail autolink and '>'
660// this is less strict than the original markdown e-mail address matching
661func is_mail_autolink(data []byte) int {
662 nb := 0
663
664 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
665 for i := 0; i < len(data); i++ {
666 if unicode.IsLetter(int(data[i])) || unicode.IsDigit(int(data[i])) {
667 continue
668 }
669
670 switch data[i] {
671 case '@':
672 nb++
673
674 case '-', '.', '_':
675 break
676
677 case '>':
678 if nb == 1 {
679 return i + 1
680 } else {
681 return 0
682 }
683 default:
684 return 0
685 }
686 }
687
688 return 0
689}
690
691// look for the next emph char, skipping other constructs
692func find_emph_char(data []byte, c byte) int {
693 i := 1
694
695 for i < len(data) {
696 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
697 i++
698 }
699 if data[i] == c {
700 return i
701 }
702
703 // do not count escaped chars
704 if i != 0 && data[i-1] == '\\' {
705 i++
706 continue
707 }
708
709 if data[i] == '`' {
710 // skip a code span
711 tmp_i := 0
712 i++
713 for i < len(data) && data[i] != '`' {
714 if tmp_i == 0 && data[i] == c {
715 tmp_i = i
716 }
717 i++
718 }
719 if i >= len(data) {
720 return tmp_i
721 }
722 i++
723 } else {
724 if data[i] == '[' {
725 // skip a link
726 tmp_i := 0
727 i++
728 for i < len(data) && data[i] != ']' {
729 if tmp_i == 0 && data[i] == c {
730 tmp_i = i
731 }
732 i++
733 }
734 i++
735 for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
736 i++
737 }
738 if i >= len(data) {
739 return tmp_i
740 }
741 if data[i] != '[' && data[i] != '(' { // not a link
742 if tmp_i > 0 {
743 return tmp_i
744 } else {
745 continue
746 }
747 }
748 cc := data[i]
749 i++
750 for i < len(data) && data[i] != cc {
751 if tmp_i == 0 && data[i] == c {
752 tmp_i = i
753 }
754 i++
755 }
756 if i >= len(data) {
757 return tmp_i
758 }
759 i++
760 }
761 }
762 }
763 return 0
764}
765
766func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
767 i := 0
768
769 if rndr.mk.emphasis == nil {
770 return 0
771 }
772
773 // skip one symbol if coming from emph3
774 if len(data) > 1 && data[0] == c && data[1] == c {
775 i = 1
776 }
777
778 for i < len(data) {
779 length := find_emph_char(data[i:], c)
780 if length == 0 {
781 return 0
782 }
783 i += length
784 if i >= len(data) {
785 return 0
786 }
787
788 if i+1 < len(data) && data[i+1] == c {
789 i++
790 continue
791 }
792
793 if data[i] == c && !unicode.IsSpace(int(data[i-1])) {
794
795 if rndr.ext_flags&MKDEXT_NO_INTRA_EMPHASIS != 0 {
796 if !(i+1 == len(data) || unicode.IsSpace(int(data[i+1])) || ispunct(int(data[i+1]))) {
797 continue
798 }
799 }
800
801 work := bytes.NewBuffer(nil)
802 parse_inline(work, rndr, data[:i])
803 r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
804 if r > 0 {
805 return i + 1
806 } else {
807 return 0
808 }
809 }
810 }
811
812 return 0
813}
814
815func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
816 render_method := rndr.mk.double_emphasis
817 if c == '~' {
818 render_method = rndr.mk.strikethrough
819 }
820
821 if render_method == nil {
822 return 0
823 }
824
825 i := 0
826
827 for i < len(data) {
828 length := find_emph_char(data[i:], c)
829 if length == 0 {
830 return 0
831 }
832 i += length
833
834 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !unicode.IsSpace(int(data[i-1])) {
835 work := bytes.NewBuffer(nil)
836 parse_inline(work, rndr, data[:i])
837 r := render_method(ob, work.Bytes(), rndr.mk.opaque)
838 if r > 0 {
839 return i + 2
840 } else {
841 return 0
842 }
843 }
844 i++
845 }
846 return 0
847}
848
849func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
850 i := 0
851 orig_data := data
852 data = data[offset:]
853
854 for i < len(data) {
855 length := find_emph_char(data[i:], c)
856 if length == 0 {
857 return 0
858 }
859 i += length
860
861 // skip whitespace preceded symbols
862 if data[i] != c || unicode.IsSpace(int(data[i-1])) {
863 continue
864 }
865
866 switch {
867 case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
868 // triple symbol found
869 work := bytes.NewBuffer(nil)
870
871 parse_inline(work, rndr, data[:i])
872 r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
873 if r > 0 {
874 return i + 3
875 } else {
876 return 0
877 }
878 case (i+1 < len(data) && data[i+1] == c):
879 // double symbol found, handing over to emph1
880 length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
881 if length == 0 {
882 return 0
883 } else {
884 return length - 2
885 }
886 default:
887 // single symbol found, handing over to emph2
888 length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
889 if length == 0 {
890 return 0
891 } else {
892 return length - 1
893 }
894 }
895 }
896 return 0
897}
898
899// parse block-level data
900func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
901 if rndr.nesting >= rndr.max_nesting {
902 return
903 }
904 rndr.nesting++
905
906 for len(data) > 0 {
907 if is_atxheader(rndr, data) {
908 data = data[parse_atxheader(ob, rndr, data):]
909 continue
910 }
911 if data[0] == '<' && rndr.mk.blockhtml != nil {
912 if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
913 data = data[i:]
914 continue
915 }
916 }
917 if i := is_empty(data); i > 0 {
918 data = data[i:]
919 continue
920 }
921 if is_hrule(data) {
922 if rndr.mk.hrule != nil {
923 rndr.mk.hrule(ob, rndr.mk.opaque)
924 }
925 var i int
926 for i = 0; i < len(data) && data[i] != '\n'; i++ {
927 }
928 data = data[i:]
929 continue
930 }
931 if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
932 if i := parse_fencedcode(ob, rndr, data); i > 0 {
933 data = data[i:]
934 continue
935 }
936 }
937 if rndr.ext_flags&MKDEXT_TABLES != 0 {
938 if i := parse_table(ob, rndr, data); i > 0 {
939 data = data[i:]
940 continue
941 }
942 }
943 if prefix_quote(data) > 0 {
944 data = data[parse_blockquote(ob, rndr, data):]
945 continue
946 }
947 if prefix_code(data) > 0 {
948 data = data[parse_blockcode(ob, rndr, data):]
949 continue
950 }
951 if prefix_uli(data) > 0 {
952 data = data[parse_list(ob, rndr, data, 0):]
953 continue
954 }
955 if prefix_oli(data) > 0 {
956 data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
957 continue
958 }
959
960 data = data[parse_paragraph(ob, rndr, data):]
961 }
962
963 rndr.nesting--
964}
965
966func is_atxheader(rndr *render, data []byte) bool {
967 if data[0] != '#' {
968 return false
969 }
970
971 if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
972 level := 0
973 for level < len(data) && level < 6 && data[level] == '#' {
974 level++
975 }
976 if level < len(data) && data[level] != ' ' && data[level] != '\t' {
977 return false
978 }
979 }
980 return true
981}
982
983func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
984 level := 0
985 for level < len(data) && level < 6 && data[level] == '#' {
986 level++
987 }
988 i, end := 0, 0
989 for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
990 }
991 for end = i; end < len(data) && data[end] != '\n'; end++ {
992 }
993 skip := end
994 for end > 0 && data[end-1] == '#' {
995 end--
996 }
997 for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
998 end--
999 }
1000 if end > i {
1001 work := bytes.NewBuffer(nil)
1002 parse_inline(work, rndr, data[i:end])
1003 if rndr.mk.header != nil {
1004 rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1005 }
1006 }
1007 return skip
1008}
1009
1010func is_headerline(data []byte) int {
1011 i := 0
1012
1013 // test of level 1 header
1014 if data[i] == '=' {
1015 for i = 1; i < len(data) && data[i] == '='; i++ {
1016 }
1017 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1018 i++
1019 }
1020 if i >= len(data) || data[i] == '\n' {
1021 return 1
1022 } else {
1023 return 0
1024 }
1025 }
1026
1027 // test of level 2 header
1028 if data[i] == '-' {
1029 for i = 1; i < len(data) && data[i] == '-'; i++ {
1030 }
1031 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1032 i++
1033 }
1034 if i >= len(data) || data[i] == '\n' {
1035 return 2
1036 } else {
1037 return 0
1038 }
1039 }
1040
1041 return 0
1042}
1043
1044func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1045 var i, j int
1046
1047 // identify the opening tag
1048 if len(data) < 2 || data[0] != '<' {
1049 return 0
1050 }
1051 curtag, tagfound := find_block_tag(data[1:])
1052
1053 // handle special cases
1054 if !tagfound {
1055
1056 // HTML comment, laxist form
1057 if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1058 i = 5
1059
1060 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1061 i++
1062 }
1063 i++
1064
1065 if i < len(data) {
1066 j = is_empty(data[i:])
1067 }
1068
1069 if j > 0 {
1070 size := i + j
1071 if do_render && rndr.mk.blockhtml != nil {
1072 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1073 }
1074 return size
1075 }
1076 }
1077
1078 // HR, which is the only self-closing block tag considered
1079 if len(data) > 4 && (data[i] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1080 i = 3
1081 for i < len(data) && data[i] != '>' {
1082 i++
1083 }
1084
1085 if i+1 < len(data) {
1086 i++
1087 j = is_empty(data[i:])
1088 if j > 0 {
1089 size := i + j
1090 if do_render && rndr.mk.blockhtml != nil {
1091 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1092 }
1093 return size
1094 }
1095 }
1096 }
1097
1098 // no special case recognized
1099 return 0
1100 }
1101
1102 // look for an unindented matching closing tag
1103 // followed by a blank line
1104 i = 1
1105 found := false
1106
1107 // if not found, try a second pass looking for indented match
1108 // but not if tag is "ins" or "del" (following original Markdown.pl)
1109 if curtag != "ins" && curtag != "del" {
1110 i = 1
1111 for i < len(data) {
1112 i++
1113 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1114 i++
1115 }
1116
1117 if i+2+len(curtag) >= len(data) {
1118 break
1119 }
1120
1121 j = htmlblock_end(curtag, rndr, data[i-1:])
1122
1123 if j > 0 {
1124 i += j - 1
1125 found = true
1126 break
1127 }
1128 }
1129 }
1130
1131 if !found {
1132 return 0
1133 }
1134
1135 // the end of the block has been found
1136 if do_render && rndr.mk.blockhtml != nil {
1137 rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1138 }
1139
1140 return i
1141}
1142
1143func find_block_tag(data []byte) (string, bool) {
1144 i := 0
1145 for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1146 i++
1147 }
1148 if i >= len(data) {
1149 return "", false
1150 }
1151 key := string(data[:i])
1152 if _, ok := block_tags[key]; ok {
1153 return key, true
1154 }
1155 return "", false
1156}
1157
1158func htmlblock_end(tag string, rndr *render, data []byte) int {
1159 // assume data[0] == '<' && data[1] == '/' already tested
1160
1161 // check if tag is a match
1162 if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1163 return 0
1164 }
1165
1166 // check white lines
1167 i := len(tag) + 3
1168 w := 0
1169 if i < len(data) {
1170 if w = is_empty(data[i:]); w == 0 {
1171 return 0 // non-blank after tag
1172 }
1173 }
1174 i += w
1175 w = 0
1176
1177 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1178 if i < len(data) {
1179 w = is_empty(data[i:])
1180 }
1181 } else {
1182 if i < len(data) {
1183 if w = is_empty(data[i:]); w == 0 {
1184 return 0 // non-blank line after tag line
1185 }
1186 }
1187 }
1188
1189 return i + w
1190}
1191
1192func is_empty(data []byte) int {
1193 var i int
1194 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1195 if data[i] != ' ' && data[i] != '\t' {
1196 return 0
1197 }
1198 }
1199 return i + 1
1200}
1201
1202func is_hrule(data []byte) bool {
1203 // skip initial spaces
1204 if len(data) < 3 {
1205 return false
1206 }
1207 i := 0
1208 if data[0] == ' ' {
1209 i++
1210 if data[1] == ' ' {
1211 i++
1212 if data[2] == ' ' {
1213 i++
1214 }
1215 }
1216 }
1217
1218 // look at the hrule char
1219 if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1220 return false
1221 }
1222 c := data[i]
1223
1224 // the whole line must be the char or whitespace
1225 n := 0
1226 for i < len(data) && data[i] != '\n' {
1227 switch {
1228 case data[i] == c:
1229 n++
1230 case data[i] != ' ' && data[i] != '\t':
1231 return false
1232 }
1233 i++
1234 }
1235
1236 return n >= 3
1237}
1238
1239func is_codefence(data []byte, syntax **string) int {
1240 i, n := 0, 0
1241
1242 // skip initial spaces
1243 if len(data) < 3 {
1244 return 0
1245 }
1246 if data[0] == ' ' {
1247 i++
1248 if data[1] == ' ' {
1249 i++
1250 if data[2] == ' ' {
1251 i++
1252 }
1253 }
1254 }
1255
1256 // look at the hrule char
1257 if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1258 return 0
1259 }
1260
1261 c := data[i]
1262
1263 // the whole line must be the char or whitespace
1264 for i < len(data) && data[i] == c {
1265 n++
1266 i++
1267 }
1268
1269 if n < 3 {
1270 return 0
1271 }
1272
1273 if syntax != nil {
1274 syn := 0
1275
1276 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1277 i++
1278 }
1279
1280 syntax_start := i
1281
1282 if i < len(data) && data[i] == '{' {
1283 i++
1284 syntax_start++
1285
1286 for i < len(data) && data[i] != '}' && data[i] != '\n' {
1287 syn++
1288 i++
1289 }
1290
1291 if i == len(data) || data[i] != '}' {
1292 return 0
1293 }
1294
1295 // string all whitespace at the beginning and the end
1296 // of the {} block
1297 for syn > 0 && unicode.IsSpace(int(data[syntax_start])) {
1298 syntax_start++
1299 syn--
1300 }
1301
1302 for syn > 0 && unicode.IsSpace(int(data[syntax_start+syn-1])) {
1303 syn--
1304 }
1305
1306 i++
1307 } else {
1308 for i < len(data) && !unicode.IsSpace(int(data[i])) {
1309 syn++
1310 i++
1311 }
1312 }
1313
1314 language := string(data[syntax_start : syntax_start+syn])
1315 *syntax = &language
1316 }
1317
1318 for i < len(data) && data[i] != '\n' {
1319 if !unicode.IsSpace(int(data[i])) {
1320 return 0
1321 }
1322 i++
1323 }
1324
1325 return i + 1
1326}
1327
1328func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1329 var lang *string
1330 beg := is_codefence(data, &lang)
1331 if beg == 0 {
1332 return 0
1333 }
1334
1335 work := bytes.NewBuffer(nil)
1336
1337 for beg < len(data) {
1338 fence_end := is_codefence(data[beg:], nil)
1339 if fence_end != 0 {
1340 beg += fence_end
1341 break
1342 }
1343
1344 var end int
1345 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1346 }
1347
1348 if beg < end {
1349 // verbatim copy to the working buffer, escaping entities
1350 if is_empty(data[beg:]) > 0 {
1351 work.WriteByte('\n')
1352 } else {
1353 work.Write(data[beg:end])
1354 }
1355 }
1356 beg = end
1357 }
1358
1359 if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1360 work.WriteByte('\n')
1361 }
1362
1363 if rndr.mk.blockcode != nil {
1364 syntax := ""
1365 if lang != nil {
1366 syntax = *lang
1367 }
1368
1369 rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1370 }
1371
1372 return beg
1373}
1374
1375func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1376 header_work := bytes.NewBuffer(nil)
1377 i, columns, col_data := parse_table_header(header_work, rndr, data)
1378 if i > 0 {
1379 body_work := bytes.NewBuffer(nil)
1380
1381 for i < len(data) {
1382 pipes, row_start := 0, i
1383 for ; i < len(data) && data[i] != '\n'; i++ {
1384 if data[i] == '|' {
1385 pipes++
1386 }
1387 }
1388
1389 if pipes == 0 || i == len(data) {
1390 i = row_start
1391 break
1392 }
1393
1394 parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1395 i++
1396 }
1397
1398 if rndr.mk.table != nil {
1399 rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1400 }
1401 }
1402
1403 return i
1404}
1405
1406func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1407 i, pipes := 0, 0
1408 column_data = []int{}
1409 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1410 if data[i] == '|' {
1411 pipes++
1412 }
1413 }
1414
1415 if i == len(data) || pipes == 0 {
1416 return 0, 0, column_data
1417 }
1418
1419 header_end := i
1420
1421 if data[0] == '|' {
1422 pipes--
1423 }
1424
1425 if i > 2 && data[i-1] == '|' {
1426 pipes--
1427 }
1428
1429 columns = pipes + 1
1430 column_data = make([]int, columns)
1431
1432 // parse the header underline
1433 i++
1434 if i < len(data) && data[i] == '|' {
1435 i++
1436 }
1437
1438 under_end := i
1439 for under_end < len(data) && data[under_end] != '\n' {
1440 under_end++
1441 }
1442
1443 col := 0
1444 for ; col < columns && i < under_end; col++ {
1445 dashes := 0
1446
1447 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1448 i++
1449 }
1450
1451 if data[i] == ':' {
1452 i++
1453 column_data[col] |= MKD_TABLE_ALIGN_L
1454 dashes++
1455 }
1456
1457 for i < under_end && data[i] == '-' {
1458 i++
1459 dashes++
1460 }
1461
1462 if i < under_end && data[i] == ':' {
1463 i++
1464 column_data[col] |= MKD_TABLE_ALIGN_R
1465 dashes++
1466 }
1467
1468 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1469 i++
1470 }
1471
1472 if i < under_end && data[i] != '|' {
1473 break
1474 }
1475
1476 if dashes < 3 {
1477 break
1478 }
1479
1480 i++
1481 }
1482
1483 if col < columns {
1484 return 0, 0, column_data
1485 }
1486
1487 parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1488 size = under_end + 1
1489 return
1490}
1491
1492func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1493 i, col := 0, 0
1494 row_work := bytes.NewBuffer(nil)
1495
1496 if i < len(data) && data[i] == '|' {
1497 i++
1498 }
1499
1500 for col = 0; col < columns && i < len(data); col++ {
1501 for i < len(data) && unicode.IsSpace(int(data[i])) {
1502 i++
1503 }
1504
1505 cell_start := i
1506
1507 for i < len(data) && data[i] != '|' {
1508 i++
1509 }
1510
1511 cell_end := i - 1
1512
1513 for cell_end > cell_start && unicode.IsSpace(int(data[cell_end])) {
1514 cell_end--
1515 }
1516
1517 cell_work := bytes.NewBuffer(nil)
1518 parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1519
1520 if rndr.mk.table_cell != nil {
1521 cdata := 0
1522 if col < len(col_data) {
1523 cdata = col_data[col]
1524 }
1525 rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1526 }
1527
1528 i++
1529 }
1530
1531 for ; col < columns; col++ {
1532 empty_cell := []byte{}
1533 if rndr.mk.table_cell != nil {
1534 cdata := 0
1535 if col < len(col_data) {
1536 cdata = col_data[col]
1537 }
1538 rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1539 }
1540 }
1541
1542 if rndr.mk.table_row != nil {
1543 rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1544 }
1545}
1546
1547// returns blockquote prefix length
1548func prefix_quote(data []byte) int {
1549 i := 0
1550 for i < len(data) && i < 3 && data[i] == ' ' {
1551 i++
1552 }
1553 if i < len(data) && data[i] == '>' {
1554 if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1555 return i + 2
1556 }
1557 return i + 1
1558 }
1559 return 0
1560}
1561
1562// handles parsing of a blockquote fragment
1563func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1564 out := bytes.NewBuffer(nil)
1565 work := bytes.NewBuffer(nil)
1566 beg, end := 0, 0
1567 for beg < len(data) {
1568 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1569 }
1570
1571 if pre := prefix_quote(data[beg:]); pre > 0 {
1572 beg += pre // skip prefix
1573 } else {
1574 // empty line followed by non-quote line
1575 if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1576 break
1577 }
1578 }
1579
1580 if beg < end { // copy into the in-place working buffer
1581 work.Write(data[beg:end])
1582 }
1583 beg = end
1584 }
1585
1586 parse_block(out, rndr, work.Bytes())
1587 if rndr.mk.blockquote != nil {
1588 rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1589 }
1590 return end
1591}
1592
1593// returns prefix length for block code
1594func prefix_code(data []byte) int {
1595 if len(data) > 0 && data[0] == '\t' {
1596 return 1
1597 }
1598 if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1599 return 4
1600 }
1601 return 0
1602}
1603
1604func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1605 work := bytes.NewBuffer(nil)
1606
1607 beg, end := 0, 0
1608 for beg < len(data) {
1609 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1610 }
1611
1612 chunk := data[beg:end]
1613 if pre := prefix_code(chunk); pre > 0 {
1614 beg += pre
1615 } else {
1616 if is_empty(chunk) == 0 {
1617 // non-empty non-prefixed line breaks the pre
1618 break
1619 }
1620 }
1621
1622 if beg < end {
1623 // verbatim copy to the working buffer, escaping entities
1624 if is_empty(chunk) > 0 {
1625 work.WriteByte('\n')
1626 } else {
1627 work.Write(chunk)
1628 }
1629 }
1630 beg = end
1631 }
1632
1633 // trim all the \n off the end of work
1634 workbytes := work.Bytes()
1635 n := 0
1636 for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
1637 n++
1638 }
1639 if n > 0 {
1640 work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
1641 }
1642
1643 work.WriteByte('\n')
1644
1645 if rndr.mk.blockcode != nil {
1646 rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
1647 }
1648
1649 return beg
1650}
1651
1652// returns unordered list item prefix
1653func prefix_uli(data []byte) int {
1654 i := 0
1655 for i < len(data) && i < 3 && data[i] == ' ' {
1656 i++
1657 }
1658 if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
1659 return 0
1660 }
1661 return i + 2
1662}
1663
1664// returns ordered list item prefix
1665func prefix_oli(data []byte) int {
1666 i := 0
1667 for i < len(data) && i < 3 && data[i] == ' ' {
1668 i++
1669 }
1670 if i >= len(data) || data[i] < '0' || data[i] > '9' {
1671 return 0
1672 }
1673 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1674 i++
1675 }
1676 if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
1677 return 0
1678 }
1679 return i + 2
1680}
1681
1682// parsing ordered or unordered list block
1683func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
1684 work := bytes.NewBuffer(nil)
1685
1686 i, j := 0, 0
1687 for i < len(data) {
1688 j = parse_listitem(work, rndr, data[i:], &flags)
1689 i += j
1690
1691 if j == 0 || flags&MKD_LI_END != 0 {
1692 break
1693 }
1694 }
1695
1696 if rndr.mk.list != nil {
1697 rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
1698 }
1699 return i
1700}
1701
1702// parse a single list item
1703// assumes initial prefix is already removed
1704func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
1705 // keep track of the first indentation prefix
1706 beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
1707
1708 for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
1709 orgpre++
1710 }
1711
1712 beg = prefix_uli(data)
1713 if beg == 0 {
1714 beg = prefix_oli(data)
1715 }
1716 if beg == 0 {
1717 return 0
1718 }
1719
1720 // skip to the beginning of the following line
1721 end = beg
1722 for end < len(data) && data[end-1] != '\n' {
1723 end++
1724 }
1725
1726 // get working buffers
1727 work := bytes.NewBuffer(nil)
1728 inter := bytes.NewBuffer(nil)
1729
1730 // put the first line into the working buffer
1731 work.Write(data[beg:end])
1732 beg = end
1733
1734 // process the following lines
1735 in_empty, has_inside_empty := false, false
1736 for beg < len(data) {
1737 end++
1738
1739 for end < len(data) && data[end-1] != '\n' {
1740 end++
1741 }
1742
1743 // process an empty line
1744 if is_empty(data[beg:end]) > 0 {
1745 in_empty = true
1746 beg = end
1747 continue
1748 }
1749
1750 // calculate the indentation
1751 i = 0
1752 for i < 4 && beg+i < end && data[beg+i] == ' ' {
1753 i++
1754 }
1755
1756 pre = i
1757 if data[beg] == '\t' {
1758 i = 1
1759 pre = 8
1760 }
1761
1762 // check for a new item
1763 chunk := data[beg+i : end]
1764 if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
1765 if in_empty {
1766 has_inside_empty = true
1767 }
1768
1769 if pre == orgpre { // the following item must have the same indentation
1770 break
1771 }
1772
1773 if sublist == 0 {
1774 sublist = work.Len()
1775 }
1776 } else {
1777 // only join indented stuff after empty lines
1778 if in_empty && i < 4 && data[beg] != '\t' {
1779 *flags |= MKD_LI_END
1780 break
1781 } else {
1782 if in_empty {
1783 work.WriteByte('\n')
1784 has_inside_empty = true
1785 }
1786 }
1787 }
1788
1789 in_empty = false
1790
1791 // add the line into the working buffer without prefix
1792 work.Write(data[beg+i : end])
1793 beg = end
1794 }
1795
1796 // render li contents
1797 if has_inside_empty {
1798 *flags |= MKD_LI_BLOCK
1799 }
1800
1801 workbytes := work.Bytes()
1802 if *flags&MKD_LI_BLOCK != 0 {
1803 // intermediate render of block li
1804 if sublist > 0 && sublist < len(workbytes) {
1805 parse_block(inter, rndr, workbytes[:sublist])
1806 parse_block(inter, rndr, workbytes[sublist:])
1807 } else {
1808 parse_block(inter, rndr, workbytes)
1809 }
1810 } else {
1811 // intermediate render of inline li
1812 if sublist > 0 && sublist < len(workbytes) {
1813 parse_inline(inter, rndr, workbytes[:sublist])
1814 parse_inline(inter, rndr, workbytes[sublist:])
1815 } else {
1816 parse_inline(inter, rndr, workbytes)
1817 }
1818 }
1819
1820 // render li itself
1821 if rndr.mk.listitem != nil {
1822 rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
1823 }
1824
1825 return beg
1826}
1827
1828func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
1829 i, end, level := 0, 0, 0
1830
1831 for i < len(data) {
1832 for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
1833 }
1834
1835 if is_empty(data[i:]) > 0 {
1836 break
1837 }
1838 if level = is_headerline(data[i:]); level > 0 {
1839 break
1840 }
1841
1842 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1843 if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
1844 end = i
1845 break
1846 }
1847 }
1848
1849 if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
1850 end = i
1851 break
1852 }
1853
1854 i = end
1855 }
1856
1857 work := data
1858 size := i
1859 for size > 0 && work[size-1] == '\n' {
1860 size--
1861 }
1862
1863 if level == 0 {
1864 tmp := bytes.NewBuffer(nil)
1865 parse_inline(tmp, rndr, work[:size])
1866 if rndr.mk.paragraph != nil {
1867 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
1868 }
1869 } else {
1870 if size > 0 {
1871 beg := 0
1872 i = size
1873 size--
1874
1875 for size > 0 && work[size] != '\n' {
1876 size--
1877 }
1878
1879 beg = size + 1
1880 for size > 0 && work[size-1] == '\n' {
1881 size--
1882 }
1883
1884 if size > 0 {
1885 tmp := bytes.NewBuffer(nil)
1886 parse_inline(tmp, rndr, work[:size])
1887 if rndr.mk.paragraph != nil {
1888 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
1889 }
1890
1891 work = work[beg:]
1892 size = i - beg
1893 } else {
1894 size = i
1895 }
1896 }
1897
1898 header_work := bytes.NewBuffer(nil)
1899 parse_inline(header_work, rndr, work[:size])
1900
1901 if rndr.mk.header != nil {
1902 rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
1903 }
1904 }
1905
1906 return end
1907}
1908
1909
1910//
1911//
1912// HTML rendering
1913//
1914//
1915
1916const (
1917 HTML_SKIP_HTML = 1 << iota
1918 HTML_SKIP_STYLE
1919 HTML_SKIP_IMAGES
1920 HTML_SKIP_LINKS
1921 HTML_EXPAND_TABS
1922 HTML_SAFELINK
1923 HTML_TOC
1924 HTML_HARD_WRAP
1925 HTML_GITHUB_BLOCKCODE
1926 HTML_USE_XHTML
1927)
1928
1929type html_renderopts struct {
1930 toc_data struct {
1931 header_count int
1932 current_level int
1933 }
1934 flags uint32
1935 close_tag string
1936}
1937
1938func attr_escape(ob *bytes.Buffer, src []byte) {
1939 ob.WriteString(html.EscapeString(string(src)))
1940}
1941
1942func unscape_text(ob *bytes.Buffer, src []byte) {
1943 i := 0
1944 for i < len(src) {
1945 org := i
1946 for i < len(src) && src[i] != '\\' {
1947 i++
1948 }
1949
1950 if i > org {
1951 ob.Write(src[org:i])
1952 }
1953
1954 if i+1 >= len(src) {
1955 break
1956 }
1957
1958 ob.WriteByte(src[i+1])
1959 i += 2
1960 }
1961}
1962
1963func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
1964 options := opaque.(*html_renderopts)
1965
1966 if ob.Len() > 0 {
1967 ob.WriteByte('\n')
1968 }
1969
1970 if options.flags&HTML_TOC != 0 {
1971 ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
1972 options.toc_data.header_count++
1973 } else {
1974 ob.WriteString(fmt.Sprintf("<h%d>", level))
1975 }
1976
1977 ob.Write(text)
1978 ob.WriteString(fmt.Sprintf("</h%d>\n", level))
1979}
1980
1981func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
1982 sz := len(text)
1983 for sz > 0 && text[sz-1] == '\n' {
1984 sz--
1985 }
1986 org := 0
1987 for org < sz && text[org] == '\n' {
1988 org++
1989 }
1990 if org >= sz {
1991 return
1992 }
1993 if ob.Len() > 0 {
1994 ob.WriteByte('\n')
1995 }
1996 ob.Write(text[org:sz])
1997 ob.WriteByte('\n')
1998}
1999
2000func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
2001 options := opaque.(*html_renderopts)
2002
2003 if ob.Len() > 0 {
2004 ob.WriteByte('\n')
2005 }
2006 ob.WriteString("<hr")
2007 ob.WriteString(options.close_tag)
2008}
2009
2010func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
2011 if ob.Len() > 0 {
2012 ob.WriteByte('\n')
2013 }
2014
2015 if lang != "" {
2016 ob.WriteString("<pre><code class=\"")
2017
2018 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
2019 for i < len(lang) && unicode.IsSpace(int(lang[i])) {
2020 i++
2021 }
2022
2023 if i < len(lang) {
2024 org := i
2025 for i < len(lang) && !unicode.IsSpace(int(lang[i])) {
2026 i++
2027 }
2028
2029 if lang[org] == '.' {
2030 org++
2031 }
2032
2033 if cls > 0 {
2034 ob.WriteByte(' ')
2035 }
2036 attr_escape(ob, []byte(lang[org:]))
2037 }
2038 }
2039
2040 ob.WriteString("\">")
2041 } else {
2042 ob.WriteString("<pre><code>")
2043 }
2044
2045 if len(text) > 0 {
2046 attr_escape(ob, text)
2047 }
2048
2049 ob.WriteString("</code></pre>\n")
2050}
2051
2052func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
2053 if ob.Len() > 0 {
2054 ob.WriteByte('\n')
2055 }
2056 ob.WriteString("<table><thead>\n")
2057 ob.Write(header)
2058 ob.WriteString("\n</thead><tbody>\n")
2059 ob.Write(body)
2060 ob.WriteString("\n</tbody></table>")
2061}
2062
2063func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
2064 if ob.Len() > 0 {
2065 ob.WriteByte('\n')
2066 }
2067 ob.WriteString("<tr>\n")
2068 ob.Write(text)
2069 ob.WriteString("\n</tr>")
2070}
2071
2072func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
2073 if ob.Len() > 0 {
2074 ob.WriteByte('\n')
2075 }
2076 switch align {
2077 case MKD_TABLE_ALIGN_L:
2078 ob.WriteString("<td align=\"left\">")
2079 case MKD_TABLE_ALIGN_R:
2080 ob.WriteString("<td align=\"right\">")
2081 case MKD_TABLE_ALIGN_CENTER:
2082 ob.WriteString("<td align=\"center\">")
2083 default:
2084 ob.WriteString("<td>")
2085 }
2086
2087 ob.Write(text)
2088 ob.WriteString("</td>")
2089}
2090
2091func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2092 if ob.Len() > 0 {
2093 ob.WriteByte('\n')
2094 }
2095 if flags&MKD_LIST_ORDERED != 0 {
2096 ob.WriteString("<ol>\n")
2097 } else {
2098 ob.WriteString("<ul>\n")
2099 }
2100 ob.Write(text)
2101 if flags&MKD_LIST_ORDERED != 0 {
2102 ob.WriteString("</ol>\n")
2103 } else {
2104 ob.WriteString("</ul>\n")
2105 }
2106}
2107
2108func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2109 ob.WriteString("<li>")
2110 size := len(text)
2111 for size > 0 && text[size-1] == '\n' {
2112 size--
2113 }
2114 ob.Write(text[:size])
2115 ob.WriteString("</li>\n")
2116}
2117
2118func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
2119 options := opaque.(*html_renderopts)
2120 i := 0
2121
2122 if ob.Len() > 0 {
2123 ob.WriteByte('\n')
2124 }
2125
2126 if len(text) == 0 {
2127 return
2128 }
2129
2130 for i < len(text) && unicode.IsSpace(int(text[i])) {
2131 i++
2132 }
2133
2134 if i == len(text) {
2135 return
2136 }
2137
2138 ob.WriteString("<p>")
2139 if options.flags&HTML_HARD_WRAP != 0 {
2140 for i < len(text) {
2141 org := i
2142 for i < len(text) && text[i] != '\n' {
2143 i++
2144 }
2145
2146 if i > org {
2147 ob.Write(text[org:i])
2148 }
2149
2150 if i >= len(text) {
2151 break
2152 }
2153
2154 ob.WriteString("<br>")
2155 ob.WriteString(options.close_tag)
2156 i++
2157 }
2158 } else {
2159 ob.Write(text[i:])
2160 }
2161 ob.WriteString("</p>\n")
2162}
2163
2164
2165func main() {
2166 ob := bytes.NewBuffer(nil)
2167 input := ""
2168 input += "##Header##\n"
2169 input += "\n"
2170 input += "----------\n"
2171 input += "\n"
2172 input += "Underlined header\n"
2173 input += "-----------------\n"
2174 input += "\n"
2175 input += "<p>Some block html\n"
2176 input += "</p>\n"
2177 input += "\n"
2178 input += "Score | Grade\n"
2179 input += "------|------\n"
2180 input += "94 | A\n"
2181 input += "85 | B\n"
2182 input += "74 | C\n"
2183 input += "65 | D\n"
2184 input += "\n"
2185 input += "``` go\n"
2186 input += "func fib(n int) int {\n"
2187 input += " if n <= 1 {\n"
2188 input += " return n\n"
2189 input += " }\n"
2190 input += " return n * fib(n-1)\n"
2191 input += "}\n"
2192 input += "```\n"
2193 input += "\n"
2194 input += "> A blockquote\n"
2195 input += "> or something like that\n"
2196 input += "> With a table | of two columns\n"
2197 input += "> -------------|---------------\n"
2198 input += "> key | value \n"
2199 input += "\n"
2200 input += "\n"
2201 input += "Some **bold** Some *italic* and [a link][1] \n"
2202 input += "\n"
2203 input += "A little code sample\n"
2204 input += "\n"
2205 input += " </head>\n"
2206 input += " <title>Web Page Title</title>\n"
2207 input += " </head>\n"
2208 input += "\n"
2209 input += "A picture\n"
2210 input += "\n"
2211 input += "![alt text][2]\n"
2212 input += "\n"
2213 input += "A list\n"
2214 input += "\n"
2215 input += "- apples\n"
2216 input += "- oranges\n"
2217 input += "- eggs\n"
2218 input += "\n"
2219 input += "A numbered list\n"
2220 input += "\n"
2221 input += "1. a\n"
2222 input += "2. b\n"
2223 input += "3. c\n"
2224 input += "\n"
2225 input += "A little quote\n"
2226 input += "\n"
2227 input += "> It is now time for all good men to come to the aid of their country. \n"
2228 input += "\n"
2229 input += "A final paragraph.\n"
2230 input += "\n"
2231 input += " [1]: http://www.google.com\n"
2232 input += " [2]: http://www.google.com/intl/en_ALL/images/logo.gif\n"
2233
2234 ib := []byte(input)
2235 rndrer := new(mkd_renderer)
2236 rndrer.blockcode = rndr_blockcode
2237 rndrer.blockhtml = rndr_raw_block
2238 rndrer.header = rndr_header
2239 rndrer.hrule = rndr_hrule
2240 rndrer.list = rndr_list
2241 rndrer.listitem = rndr_listitem
2242 rndrer.paragraph = rndr_paragraph
2243 rndrer.table = rndr_table
2244 rndrer.table_row = rndr_tablerow
2245 rndrer.table_cell = rndr_tablecell
2246 rndrer.opaque = &html_renderopts{close_tag: " />"}
2247 var extensions uint32 = MKDEXT_FENCED_CODE | MKDEXT_TABLES
2248 Markdown(ob, ib, rndrer, extensions)
2249 fmt.Print(ob.String())
2250}
2251
2252func expand_tabs(ob *bytes.Buffer, line []byte) {
2253 i, tab := 0, 0
2254
2255 for i < len(line) {
2256 org := i
2257 for i < len(line) && line[i] != '\t' {
2258 i++
2259 tab++
2260 }
2261
2262 if i > org {
2263 ob.Write(line[org:i])
2264 }
2265
2266 if i >= len(line) {
2267 break
2268 }
2269
2270 for {
2271 ob.WriteByte(' ')
2272 tab++
2273 if tab%4 == 0 {
2274 break
2275 }
2276 }
2277
2278 i++
2279 }
2280}
2281
2282func Markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
2283 // no point in parsing if we can't render
2284 if rndrer == nil {
2285 return
2286 }
2287
2288 // fill in the character-level parsers
2289 markdown_char_ptrs[MD_CHAR_NONE] = nil
2290 markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
2291 markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
2292 markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
2293 markdown_char_ptrs[MD_CHAR_LINK] = char_link
2294 markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
2295 markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
2296 markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
2297 markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
2298
2299 // fill in the render structure
2300 rndr := new(render)
2301 rndr.mk = rndrer
2302 rndr.ext_flags = extensions
2303 rndr.max_nesting = 16
2304
2305 if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
2306 rndr.active_char['*'] = MD_CHAR_EMPHASIS
2307 rndr.active_char['_'] = MD_CHAR_EMPHASIS
2308 if extensions&MKDEXT_STRIKETHROUGH != 0 {
2309 rndr.active_char['~'] = MD_CHAR_EMPHASIS
2310 }
2311 }
2312 if rndr.mk.codespan != nil {
2313 rndr.active_char['`'] = MD_CHAR_CODESPAN
2314 }
2315 if rndr.mk.linebreak != nil {
2316 rndr.active_char['\n'] = MD_CHAR_LINEBREAK
2317 }
2318 if rndr.mk.image != nil || rndr.mk.link != nil {
2319 rndr.active_char['['] = MD_CHAR_LINK
2320 }
2321 rndr.active_char['<'] = MD_CHAR_LANGLE
2322 rndr.active_char['\\'] = MD_CHAR_ESCAPE
2323 rndr.active_char['&'] = MD_CHAR_ENTITITY
2324
2325 if extensions&MKDEXT_AUTOLINK != 0 {
2326 rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
2327 rndr.active_char['H'] = MD_CHAR_AUTOLINK
2328
2329 rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
2330 rndr.active_char['F'] = MD_CHAR_AUTOLINK
2331
2332 rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
2333 rndr.active_char['M'] = MD_CHAR_AUTOLINK
2334 }
2335
2336 // first pass: look for references, copying everything else
2337 text := bytes.NewBuffer(nil)
2338 beg, end := 0, 0
2339 for beg < len(ib) { // iterate over lines
2340 if is_ref(ib, beg, &end, rndr) {
2341 beg = end
2342 } else { // skip to the next line
2343 end = beg
2344 for end < len(ib) && ib[end] != '\n' && ib[end] != '\r' {
2345 end++
2346 }
2347
2348 // add the line body if present
2349 if end > beg {
2350 expand_tabs(text, ib[beg:end])
2351 }
2352
2353 for end < len(ib) && (ib[end] == '\n' || ib[end] == '\r') {
2354 // add one \n per newline
2355 if ib[end] == '\n' || (end+1 < len(ib) && ib[end+1] != '\n') {
2356 text.WriteByte('\n')
2357 }
2358 end++
2359 }
2360
2361 beg = end
2362 }
2363 }
2364
2365 // sort the reference array
2366 if len(rndr.refs) > 1 {
2367 sort.Sort(rndr.refs)
2368 }
2369
2370 // second pass: actual rendering
2371 if rndr.mk.doc_header != nil {
2372 rndr.mk.doc_header(ob, rndr.mk.opaque)
2373 }
2374
2375 if text.Len() > 0 {
2376 // add a final newline if not already present
2377 finalchar := text.Bytes()[text.Len()-1]
2378 if finalchar != '\n' && finalchar != '\r' {
2379 text.WriteByte('\n')
2380 }
2381 parse_block(ob, rndr, text.Bytes())
2382 }
2383
2384 if rndr.mk.doc_footer != nil {
2385 rndr.mk.doc_footer(ob, rndr.mk.opaque)
2386 }
2387
2388 if rndr.nesting != 0 {
2389 panic("Nesting level did not end at zero")
2390 }
2391}