markdown.go (view raw)
1package main
2
3import (
4 "bytes"
5 "fmt"
6 "html"
7 "unicode"
8)
9
10const (
11 MKDA_NOT_AUTOLINK = iota
12 MKDA_NORMAL
13 MKDA_EMAIL
14)
15
16const (
17 MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
18 MKDEXT_TABLES
19 MKDEXT_FENCED_CODE
20 MKDEXT_AUTOLINK
21 MKDEXT_STRIKETHROUGH
22 MKDEXT_LAX_HTML_BLOCKS
23 MKDEXT_SPACE_HEADERS
24)
25
26const (
27 _ = iota
28 MKD_LIST_ORDERED
29 MKD_LI_BLOCK // <li> containing block data
30 MKD_LI_END = 8
31)
32
33const (
34 MKD_TABLE_ALIGN_L = 1 << iota
35 MKD_TABLE_ALIGN_R
36 MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
37)
38
39var block_tags = map[string]int{
40 "p": 1, // 0
41 "dl": 2,
42 "h1": 2,
43 "h2": 2,
44 "h3": 2,
45 "h4": 2,
46 "h5": 2,
47 "h6": 2,
48 "ol": 2,
49 "ul": 2,
50 "del": 3, // 10
51 "div": 3,
52 "ins": 3, // 12
53 "pre": 3,
54 "form": 4,
55 "math": 4,
56 "table": 5,
57 "iframe": 6,
58 "script": 6,
59 "fieldset": 8,
60 "noscript": 8,
61 "blockquote": 10,
62}
63
64// functions for rendering parsed data
65type mkd_renderer struct {
66 // block-level callbacks---nil skips the block
67 blockcode func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
68 blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
69 blockhtml func(ob *bytes.Buffer, text []byte, opaque interface{})
70 header func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
71 hrule func(ob *bytes.Buffer, opaque interface{})
72 list func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
73 listitem func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
74 paragraph func(ob *bytes.Buffer, text []byte, opaque interface{})
75 table func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
76 table_row func(ob *bytes.Buffer, text []byte, opaque interface{})
77 table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
78
79 // user data---passed back to every callback
80 opaque interface{}
81}
82
83type render struct {
84 mk mkd_renderer
85 ext_flags uint32
86 // ...
87}
88
89func parse_inline(work *bytes.Buffer, rndr *render, data []byte) {
90 // TODO: inline rendering
91 work.Write(data)
92}
93
94// parse block-level data
95func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
96 // TODO: quit if max_nesting exceeded
97
98 for len(data) > 0 {
99 if is_atxheader(rndr, data) {
100 data = data[parse_atxheader(ob, rndr, data):]
101 continue
102 }
103 if data[0] == '<' && rndr.mk.blockhtml != nil {
104 if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
105 data = data[i:]
106 continue
107 }
108 }
109 if i := is_empty(data); i > 0 {
110 data = data[i:]
111 continue
112 }
113 if is_hrule(data) {
114 if rndr.mk.hrule != nil {
115 rndr.mk.hrule(ob, rndr.mk.opaque)
116 }
117 var i int
118 for i = 0; i < len(data) && data[i] != '\n'; i++ {
119 }
120 data = data[i:]
121 continue
122 }
123 if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
124 if i := parse_fencedcode(ob, rndr, data); i > 0 {
125 data = data[i:]
126 continue
127 }
128 }
129 if rndr.ext_flags&MKDEXT_TABLES != 0 {
130 if i := parse_table(ob, rndr, data); i > 0 {
131 data = data[i:]
132 continue
133 }
134 }
135 if prefix_quote(data) > 0 {
136 data = data[parse_blockquote(ob, rndr, data):]
137 continue
138 }
139 if prefix_code(data) > 0 {
140 data = data[parse_blockcode(ob, rndr, data):]
141 continue
142 }
143 if prefix_uli(data) > 0 {
144 data = data[parse_list(ob, rndr, data, 0):]
145 continue
146 }
147 if prefix_oli(data) > 0 {
148 data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
149 continue
150 }
151
152 data = data[parse_paragraph(ob, rndr, data):]
153 }
154}
155
156func is_atxheader(rndr *render, data []byte) bool {
157 if data[0] != '#' {
158 return false
159 }
160
161 if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
162 level := 0
163 for level < len(data) && level < 6 && data[level] == '#' {
164 level++
165 }
166 if level < len(data) && data[level] != ' ' && data[level] != '\t' {
167 return false
168 }
169 }
170 return true
171}
172
173func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
174 level := 0
175 for level < len(data) && level < 6 && data[level] == '#' {
176 level++
177 }
178 i, end := 0, 0
179 for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
180 }
181 for end = i; end < len(data) && data[end] != '\n'; end++ {
182 }
183 skip := end
184 for end > 0 && data[end-1] == '#' {
185 end--
186 }
187 for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
188 end--
189 }
190 if end > i {
191 work := bytes.NewBuffer(nil)
192 parse_inline(work, rndr, data[i:end])
193 if rndr.mk.header != nil {
194 rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
195 }
196 }
197 return skip
198}
199
200func is_headerline(data []byte) int {
201 i := 0
202
203 // test of level 1 header
204 if data[i] == '=' {
205 for i = 1; i < len(data) && data[i] == '='; i++ {
206 }
207 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
208 i++
209 }
210 if i >= len(data) || data[i] == '\n' {
211 return 1
212 } else {
213 return 0
214 }
215 }
216
217 // test of level 2 header
218 if data[i] == '-' {
219 for i = 1; i < len(data) && data[i] == '-'; i++ {
220 }
221 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
222 i++
223 }
224 if i >= len(data) || data[i] == '\n' {
225 return 2
226 } else {
227 return 0
228 }
229 }
230
231 return 0
232}
233
234func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
235 var i, j int
236
237 // identification of the opening tag
238 if len(data) < 2 || data[0] != '<' {
239 return 0
240 }
241 curtag, tagfound := find_block_tag(data[1:])
242
243 // handling of special cases
244 if !tagfound {
245
246 // HTML comment, laxist form
247 if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
248 i = 5
249
250 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
251 i++
252 }
253 i++
254
255 if i < len(data) {
256 j = is_empty(data[i:])
257 }
258
259 if j > 0 {
260 size := i + j
261 if do_render && rndr.mk.blockhtml != nil {
262 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
263 }
264 return size
265 }
266 }
267
268 // HR, which is the only self-closing block tag considered
269 if len(data) > 4 && (data[i] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
270 i = 3
271 for i < len(data) && data[i] != '>' {
272 i++
273 }
274
275 if i+1 < len(data) {
276 i++
277 j = is_empty(data[i:])
278 if j > 0 {
279 size := i + j
280 if do_render && rndr.mk.blockhtml != nil {
281 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
282 }
283 return size
284 }
285 }
286 }
287
288 // no special case recognized
289 return 0
290 }
291
292 // looking for an unindented matching closing tag
293 // followed by a blank line
294 i = 1
295 found := false
296
297 // if not found, trying a second pass looking for indented match
298 // but not if tag is "ins" or "del" (following original Markdown.pl)
299 if curtag != "ins" && curtag != "del" {
300 i = 1
301 for i < len(data) {
302 i++
303 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
304 i++
305 }
306
307 if i+2+len(curtag) >= len(data) {
308 break
309 }
310
311 j = htmlblock_end(curtag, rndr, data[i-1:])
312
313 if j > 0 {
314 i += j - 1
315 found = true
316 break
317 }
318 }
319 }
320
321 if !found {
322 return 0
323 }
324
325 // the end of the block has been found
326 if do_render && rndr.mk.blockhtml != nil {
327 rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
328 }
329
330 return i
331}
332
333func find_block_tag(data []byte) (string, bool) {
334 i := 0
335 for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
336 i++
337 }
338 if i >= len(data) {
339 return "", false
340 }
341 key := string(data[:i])
342 if _, ok := block_tags[key]; ok {
343 return key, true
344 }
345 return "", false
346}
347
348func htmlblock_end(tag string, rndr *render, data []byte) int {
349 // assuming data[0] == '<' && data[1] == '/' already tested
350
351 // checking tag is a match
352 if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
353 return 0
354 }
355
356 // checking white lines
357 i := len(tag) + 3
358 w := 0
359 if i < len(data) {
360 if w = is_empty(data[i:]); w == 0 {
361 return 0 // non-blank after tag
362 }
363 }
364 i += w
365 w = 0
366
367 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
368 if i < len(data) {
369 w = is_empty(data[i:])
370 }
371 } else {
372 if i < len(data) {
373 if w = is_empty(data[i:]); w == 0 {
374 return 0 // non-blank line after tag line
375 }
376 }
377 }
378
379 return i + w
380}
381
382func is_empty(data []byte) int {
383 var i int
384 for i = 0; i < len(data) && data[i] != '\n'; i++ {
385 if data[i] != ' ' && data[i] != '\t' {
386 return 0
387 }
388 }
389 return i + 1
390}
391
392func is_hrule(data []byte) bool {
393 // skipping initial spaces
394 if len(data) < 3 {
395 return false
396 }
397 i := 0
398 if data[0] == ' ' {
399 i++
400 if data[1] == ' ' {
401 i++
402 if data[2] == ' ' {
403 i++
404 }
405 }
406 }
407
408 // looking at the hrule char
409 if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
410 return false
411 }
412 c := data[i]
413
414 // the whole line must be the char or whitespace
415 n := 0
416 for i < len(data) && data[i] != '\n' {
417 switch {
418 case data[i] == c:
419 n++
420 case data[i] != ' ' && data[i] != '\t':
421 return false
422 }
423 i++
424 }
425
426 return n >= 3
427}
428
429func is_codefence(data []byte, syntax **string) int {
430 i, n := 0, 0
431
432 // skipping initial spaces
433 if len(data) < 3 {
434 return 0
435 }
436 if data[0] == ' ' {
437 i++
438 if data[1] == ' ' {
439 i++
440 if data[2] == ' ' {
441 i++
442 }
443 }
444 }
445
446 // looking at the hrule char
447 if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
448 return 0
449 }
450
451 c := data[i]
452
453 // the whole line must be the char or whitespace
454 for i < len(data) && data[i] == c {
455 n++
456 i++
457 }
458
459 if n < 3 {
460 return 0
461 }
462
463 if syntax != nil {
464 syn := 0
465
466 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
467 i++
468 }
469
470 syntax_start := i
471
472 if i < len(data) && data[i] == '{' {
473 i++
474 syntax_start++
475
476 for i < len(data) && data[i] != '}' && data[i] != '\n' {
477 syn++
478 i++
479 }
480
481 if i == len(data) || data[i] != '}' {
482 return 0
483 }
484
485 // string all whitespace at the beginning and the end
486 // of the {} block
487 for syn > 0 && unicode.IsSpace(int(data[syntax_start])) {
488 syntax_start++
489 syn--
490 }
491
492 for syn > 0 && unicode.IsSpace(int(data[syntax_start+syn-1])) {
493 syn--
494 }
495
496 i++
497 } else {
498 for i < len(data) && !unicode.IsSpace(int(data[i])) {
499 syn++
500 i++
501 }
502 }
503
504 language := string(data[syntax_start : syntax_start+syn])
505 *syntax = &language
506 }
507
508 for i < len(data) && data[i] != '\n' {
509 if !unicode.IsSpace(int(data[i])) {
510 return 0
511 }
512 i++
513 }
514
515 return i + 1
516}
517
518func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
519 var lang *string
520 beg := is_codefence(data, &lang)
521 if beg == 0 {
522 return 0
523 }
524
525 work := bytes.NewBuffer(nil)
526
527 for beg < len(data) {
528 fence_end := is_codefence(data[beg:], nil)
529 if fence_end != 0 {
530 beg += fence_end
531 break
532 }
533
534 var end int
535 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
536 }
537
538 if beg < end {
539 // verbatim copy to the working buffer, escaping entities
540 if is_empty(data[beg:]) > 0 {
541 work.WriteByte('\n')
542 } else {
543 work.Write(data[beg:end])
544 }
545 }
546 beg = end
547 }
548
549 if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
550 work.WriteByte('\n')
551 }
552
553 if rndr.mk.blockcode != nil {
554 syntax := ""
555 if lang != nil {
556 syntax = *lang
557 }
558
559 rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
560 }
561
562 return beg
563}
564
565func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
566 header_work := bytes.NewBuffer(nil)
567 i, columns, col_data := parse_table_header(header_work, rndr, data)
568 if i > 0 {
569 body_work := bytes.NewBuffer(nil)
570
571 for i < len(data) {
572 pipes, row_start := 0, i
573 for ; i < len(data) && data[i] != '\n'; i++ {
574 if data[i] == '|' {
575 pipes++
576 }
577 }
578
579 if pipes == 0 || i == len(data) {
580 i = row_start
581 break
582 }
583
584 parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
585 i++
586 }
587
588 if rndr.mk.table != nil {
589 rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
590 }
591 }
592
593 return i
594}
595
596func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
597 i, pipes := 0, 0
598 column_data = []int{}
599 for i = 0; i < len(data) && data[i] != '\n'; i++ {
600 if data[i] == '|' {
601 pipes++
602 }
603 }
604
605 if i == len(data) || pipes == 0 {
606 return 0, 0, column_data
607 }
608
609 header_end := i
610
611 if data[0] == '|' {
612 pipes--
613 }
614
615 if i > 2 && data[i-1] == '|' {
616 pipes--
617 }
618
619 columns = pipes + 1
620 column_data = make([]int, columns)
621
622 // parse the header underline
623 i++
624 if i < len(data) && data[i] == '|' {
625 i++
626 }
627
628 under_end := i
629 for under_end < len(data) && data[under_end] != '\n' {
630 under_end++
631 }
632
633 col := 0
634 for ; col < columns && i < under_end; col++ {
635 dashes := 0
636
637 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
638 i++
639 }
640
641 if data[i] == ':' {
642 i++
643 column_data[col] |= MKD_TABLE_ALIGN_L
644 dashes++
645 }
646
647 for i < under_end && data[i] == '-' {
648 i++
649 dashes++
650 }
651
652 if i < under_end && data[i] == ':' {
653 i++
654 column_data[col] |= MKD_TABLE_ALIGN_R
655 dashes++
656 }
657
658 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
659 i++
660 }
661
662 if i < under_end && data[i] != '|' {
663 break
664 }
665
666 if dashes < 3 {
667 break
668 }
669
670 i++
671 }
672
673 if col < columns {
674 return 0, 0, column_data
675 }
676
677 parse_table_row(ob, rndr, data[:header_end], columns, column_data)
678 size = under_end + 1
679 return
680}
681
682func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
683 i, col := 0, 0
684 row_work := bytes.NewBuffer(nil)
685
686 if i < len(data) && data[i] == '|' {
687 i++
688 }
689
690 for col = 0; col < columns && i < len(data); col++ {
691 for i < len(data) && unicode.IsSpace(int(data[i])) {
692 i++
693 }
694
695 cell_start := i
696
697 for i < len(data) && data[i] != '|' {
698 i++
699 }
700
701 cell_end := i - 1
702
703 for cell_end > cell_start && unicode.IsSpace(int(data[cell_end])) {
704 cell_end--
705 }
706
707 cell_work := bytes.NewBuffer(nil)
708 parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
709
710 if rndr.mk.table_cell != nil {
711 cdata := 0
712 if col < len(col_data) {
713 cdata = col_data[col]
714 }
715 rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
716 }
717
718 i++
719 }
720
721 for ; col < columns; col++ {
722 empty_cell := []byte{}
723 if rndr.mk.table_cell != nil {
724 cdata := 0
725 if col < len(col_data) {
726 cdata = col_data[col]
727 }
728 rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
729 }
730 }
731
732 if rndr.mk.table_row != nil {
733 rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
734 }
735}
736
737// returns blockquote prefix length
738func prefix_quote(data []byte) int {
739 i := 0
740 for i < len(data) && i < 3 && data[i] == ' ' {
741 i++
742 }
743 if i < len(data) && data[i] == '>' {
744 if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
745 return i + 2
746 }
747 return i + 1
748 }
749 return 0
750}
751
752// handles parsing of a blockquote fragment
753func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
754 out := bytes.NewBuffer(nil)
755 work := bytes.NewBuffer(nil)
756 beg, end := 0, 0
757 for beg < len(data) {
758 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
759 }
760
761 if pre := prefix_quote(data[beg:]); pre > 0 {
762 beg += pre // skipping prefix
763 } else {
764 // empty line followed by non-quote line
765 if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
766 break
767 }
768 }
769
770 if beg < end { // copy into the in-place working buffer
771 work.Write(data[beg:end])
772 }
773 beg = end
774 }
775
776 parse_block(out, rndr, work.Bytes())
777 if rndr.mk.blockquote != nil {
778 rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
779 }
780 return end
781}
782
783// returns prefix length for block code
784func prefix_code(data []byte) int {
785 if len(data) > 0 && data[0] == '\t' {
786 return 1
787 }
788 if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
789 return 4
790 }
791 return 0
792}
793
794func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
795 work := bytes.NewBuffer(nil)
796
797 beg, end := 0, 0
798 for beg < len(data) {
799 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
800 }
801
802 chunk := data[beg:end]
803 if pre := prefix_code(chunk); pre > 0 {
804 beg += pre
805 } else {
806 if is_empty(chunk) == 0 {
807 // non-empty non-prefixed line breaks the pre
808 break
809 }
810 }
811
812 if beg < end {
813 // verbatim copy to the working buffer, escaping entities
814 if is_empty(chunk) > 0 {
815 work.WriteByte('\n')
816 } else {
817 work.Write(chunk)
818 }
819 }
820 beg = end
821 }
822
823 // trim all the \n off the end of work
824 workbytes := work.Bytes()
825 n := 0
826 for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
827 n++
828 }
829 if n > 0 {
830 work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
831 }
832
833 work.WriteByte('\n')
834
835 if rndr.mk.blockcode != nil {
836 rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
837 }
838
839 return beg
840}
841
842// returns unordered list item prefix
843func prefix_uli(data []byte) int {
844 i := 0
845 for i < len(data) && i < 3 && data[i] == ' ' {
846 i++
847 }
848 if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
849 return 0
850 }
851 return i + 2
852}
853
854// returns ordered list item prefix
855func prefix_oli(data []byte) int {
856 i := 0
857 for i < len(data) && i < 3 && data[i] == ' ' {
858 i++
859 }
860 if i >= len(data) || data[i] < '0' || data[i] > '9' {
861 return 0
862 }
863 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
864 i++
865 }
866 if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
867 return 0
868 }
869 return i + 2
870}
871
872// parsing ordered or unordered list block
873func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
874 work := bytes.NewBuffer(nil)
875
876 i, j := 0, 0
877 for i < len(data) {
878 j, flags = parse_listitem(work, rndr, data[i:], flags)
879 i += j
880
881 if j == 0 || flags&MKD_LI_END != 0 {
882 break
883 }
884 }
885
886 if rndr.mk.list != nil {
887 rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
888 }
889 return i
890}
891
892// parsing a single list item
893// assuming initial prefix is already removed
894func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags_in int) (size int, flags int) {
895 size, flags = 0, flags_in
896
897 // keeping book of the first indentation prefix
898 beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
899
900 for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
901 orgpre++
902 }
903
904 beg = prefix_uli(data)
905 if beg == 0 {
906 beg = prefix_oli(data)
907 }
908 if beg == 0 {
909 return
910 }
911
912 // skipping to the beginning of the following line
913 end = beg
914 for end < len(data) && data[end-1] != '\n' {
915 end++
916 }
917
918 // getting working buffers
919 work := bytes.NewBuffer(nil)
920 inter := bytes.NewBuffer(nil)
921
922 // putting the first line into the working buffer
923 work.Write(data[beg:end])
924 beg = end
925
926 // process the following lines
927 in_empty, has_inside_empty := false, false
928 for beg < len(data) {
929 end++
930
931 for end < len(data) && data[end-1] != '\n' {
932 end++
933 }
934
935 // process an empty line
936 if is_empty(data[beg:end]) > 0 {
937 in_empty = true
938 beg = end
939 continue
940 }
941
942 // calculating the indentation
943 i = 0
944 for i < 4 && beg+i < end && data[beg+i] == ' ' {
945 i++
946 }
947
948 pre = i
949 if data[beg] == '\t' {
950 i = 1
951 pre = 8
952 }
953
954 // checking for a new item
955 chunk := data[beg+i : end]
956 if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
957 if in_empty {
958 has_inside_empty = true
959 }
960
961 if pre == orgpre { // the following item must have
962 break // the same indentation
963 }
964
965 if sublist == 0 {
966 sublist = work.Len()
967 }
968 } else {
969 // joining only indented stuff after empty lines
970 if in_empty && i < 4 && data[beg] != '\t' {
971 flags |= MKD_LI_END
972 break
973 } else {
974 if in_empty {
975 work.WriteByte('\n')
976 has_inside_empty = true
977 }
978 }
979 }
980
981 in_empty = false
982
983 // adding the line without prefix into the working buffer
984 work.Write(data[beg+i : end])
985 beg = end
986 }
987
988 // render of li contents
989 if has_inside_empty {
990 flags |= MKD_LI_BLOCK
991 }
992
993 workbytes := work.Bytes()
994 if flags&MKD_LI_BLOCK != 0 {
995 // intermediate render of block li
996 if sublist > 0 && sublist < len(workbytes) {
997 parse_block(inter, rndr, workbytes[:sublist])
998 parse_block(inter, rndr, workbytes[sublist:])
999 } else {
1000 parse_block(inter, rndr, workbytes)
1001 }
1002 } else {
1003 // intermediate render of inline li
1004 if sublist > 0 && sublist < len(workbytes) {
1005 parse_inline(inter, rndr, workbytes[:sublist])
1006 parse_inline(inter, rndr, workbytes[sublist:])
1007 } else {
1008 parse_inline(inter, rndr, workbytes)
1009 }
1010 }
1011
1012 // render of li itself
1013 if rndr.mk.listitem != nil {
1014 rndr.mk.listitem(ob, inter.Bytes(), flags, rndr.mk.opaque)
1015 }
1016
1017 size = beg
1018 return
1019}
1020
1021func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
1022 i, end, level := 0, 0, 0
1023
1024 for i < len(data) {
1025 for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
1026 }
1027
1028 if is_empty(data[i:]) > 0 {
1029 break
1030 }
1031 if level = is_headerline(data[i:]); level > 0 {
1032 break
1033 }
1034
1035 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1036 if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
1037 end = i
1038 break
1039 }
1040 }
1041
1042 if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
1043 end = i
1044 break
1045 }
1046
1047 i = end
1048 }
1049
1050 work := data
1051 size := i
1052 for size > 0 && work[size-1] == '\n' {
1053 size--
1054 }
1055
1056 if level == 0 {
1057 tmp := bytes.NewBuffer(nil)
1058 parse_inline(tmp, rndr, work[:size])
1059 if rndr.mk.paragraph != nil {
1060 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
1061 }
1062 } else {
1063 if size > 0 {
1064 beg := 0
1065 i = size
1066 size--
1067
1068 for size > 0 && work[size] != '\n' {
1069 size--
1070 }
1071
1072 beg = size + 1
1073 for size > 0 && work[size-1] == '\n' {
1074 size--
1075 }
1076
1077 if size > 0 {
1078 tmp := bytes.NewBuffer(nil)
1079 parse_inline(tmp, rndr, work[:size])
1080 if rndr.mk.paragraph != nil {
1081 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
1082 }
1083
1084 work = work[beg:]
1085 size = i - beg
1086 } else {
1087 size = i
1088 }
1089 }
1090
1091 header_work := bytes.NewBuffer(nil)
1092 parse_inline(header_work, rndr, work[:size])
1093
1094 if rndr.mk.header != nil {
1095 rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
1096 }
1097 }
1098
1099 return end
1100}
1101
1102
1103//
1104//
1105// HTML rendering
1106//
1107//
1108
1109const (
1110 HTML_SKIP_HTML = 1 << iota
1111 HTML_SKIP_STYLE
1112 HTML_SKIP_IMAGES
1113 HTML_SKIP_LINKS
1114 HTML_EXPAND_TABS
1115 HTML_SAFELINK
1116 HTML_TOC
1117 HTML_HARD_WRAP
1118 HTML_GITHUB_BLOCKCODE
1119 HTML_USE_XHTML
1120)
1121
1122type html_renderopts struct {
1123 toc_data struct {
1124 header_count int
1125 current_level int
1126 }
1127 flags uint32
1128 close_tag string
1129}
1130
1131func attr_escape(ob *bytes.Buffer, src []byte) {
1132 ob.WriteString(html.EscapeString(string(src)))
1133}
1134
1135func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
1136 options := opaque.(*html_renderopts)
1137
1138 if ob.Len() > 0 {
1139 ob.WriteByte('\n')
1140 }
1141
1142 if options.flags&HTML_TOC != 0 {
1143 ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
1144 options.toc_data.header_count++
1145 } else {
1146 ob.WriteString(fmt.Sprintf("<h%d>", level))
1147 }
1148
1149 ob.Write(text)
1150 ob.WriteString(fmt.Sprintf("</h%d>\n", level))
1151}
1152
1153func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
1154 sz := len(text)
1155 for sz > 0 && text[sz-1] == '\n' {
1156 sz--
1157 }
1158 org := 0
1159 for org < sz && text[org] == '\n' {
1160 org++
1161 }
1162 if org >= sz {
1163 return
1164 }
1165 if ob.Len() > 0 {
1166 ob.WriteByte('\n')
1167 }
1168 ob.Write(text[org:sz])
1169 ob.WriteByte('\n')
1170}
1171
1172func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
1173 options := opaque.(*html_renderopts)
1174
1175 if ob.Len() > 0 {
1176 ob.WriteByte('\n')
1177 }
1178 ob.WriteString("<hr")
1179 ob.WriteString(options.close_tag)
1180}
1181
1182func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
1183 if ob.Len() > 0 {
1184 ob.WriteByte('\n')
1185 }
1186
1187 if lang != "" {
1188 ob.WriteString("<pre><code class=\"")
1189
1190 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
1191 for i < len(lang) && unicode.IsSpace(int(lang[i])) {
1192 i++
1193 }
1194
1195 if i < len(lang) {
1196 org := i
1197 for i < len(lang) && !unicode.IsSpace(int(lang[i])) {
1198 i++
1199 }
1200
1201 if lang[org] == '.' {
1202 org++
1203 }
1204
1205 if cls > 0 {
1206 ob.WriteByte(' ')
1207 }
1208 attr_escape(ob, []byte(lang[org:]))
1209 }
1210 }
1211
1212 ob.WriteString("\">")
1213 } else {
1214 ob.WriteString("<pre><code>")
1215 }
1216
1217 if len(text) > 0 {
1218 attr_escape(ob, text)
1219 }
1220
1221 ob.WriteString("</code></pre>\n")
1222}
1223
1224func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
1225 if ob.Len() > 0 {
1226 ob.WriteByte('\n')
1227 }
1228 ob.WriteString("<table><thead>\n")
1229 ob.Write(header)
1230 ob.WriteString("\n</thead><tbody>\n")
1231 ob.Write(body)
1232 ob.WriteString("\n</tbody></table>")
1233}
1234
1235func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
1236 if ob.Len() > 0 {
1237 ob.WriteByte('\n')
1238 }
1239 ob.WriteString("<tr>\n")
1240 ob.Write(text)
1241 ob.WriteString("\n</tr>")
1242}
1243
1244func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
1245 if ob.Len() > 0 {
1246 ob.WriteByte('\n')
1247 }
1248 switch align {
1249 case MKD_TABLE_ALIGN_L:
1250 ob.WriteString("<td align=\"left\">")
1251 case MKD_TABLE_ALIGN_R:
1252 ob.WriteString("<td align=\"right\">")
1253 case MKD_TABLE_ALIGN_CENTER:
1254 ob.WriteString("<td align=\"center\">")
1255 default:
1256 ob.WriteString("<td>")
1257 }
1258
1259 ob.Write(text)
1260 ob.WriteString("</td>")
1261}
1262
1263func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
1264 if ob.Len() > 0 {
1265 ob.WriteByte('\n')
1266 }
1267 if flags&MKD_LIST_ORDERED != 0 {
1268 ob.WriteString("<ol>\n")
1269 } else {
1270 ob.WriteString("<ul>\n")
1271 }
1272 ob.Write(text)
1273 if flags&MKD_LIST_ORDERED != 0 {
1274 ob.WriteString("</ol>\n")
1275 } else {
1276 ob.WriteString("</ul>\n")
1277 }
1278}
1279
1280func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
1281 ob.WriteString("<li>")
1282 size := len(text)
1283 for size > 0 && text[size-1] == '\n' {
1284 size--
1285 }
1286 ob.Write(text[:size])
1287 ob.WriteString("</li>\n")
1288}
1289
1290func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
1291 options := opaque.(*html_renderopts)
1292 i := 0
1293
1294 if ob.Len() > 0 {
1295 ob.WriteByte('\n')
1296 }
1297
1298 if len(text) == 0 {
1299 return
1300 }
1301
1302 for i < len(text) && unicode.IsSpace(int(text[i])) {
1303 i++
1304 }
1305
1306 if i == len(text) {
1307 return
1308 }
1309
1310 ob.WriteString("<p>")
1311 if options.flags&HTML_HARD_WRAP != 0 {
1312 for i < len(text) {
1313 org := i
1314 for i < len(text) && text[i] != '\n' {
1315 i++
1316 }
1317
1318 if i > org {
1319 ob.Write(text[org:i])
1320 }
1321
1322 if i >= len(text) {
1323 break
1324 }
1325
1326 ob.WriteString("<br>")
1327 ob.WriteString(options.close_tag)
1328 i++
1329 }
1330 } else {
1331 ob.Write(text[i:])
1332 }
1333 ob.WriteString("</p>\n")
1334}
1335
1336
1337func main() {
1338 ob := bytes.NewBuffer(nil)
1339 input := "##Header##\n"
1340 input += "\n"
1341 input += "----------\n"
1342 input += "\n"
1343 input += "Underlined header\n"
1344 input += "-----------------\n"
1345 input += "\n"
1346 input += "<p>Some block html\n"
1347 input += "</p>\n"
1348 input += "\n"
1349 input += "Score | Grade\n"
1350 input += "------|------\n"
1351 input += "94 | A\n"
1352 input += "85 | B\n"
1353 input += "74 | C\n"
1354 input += "65 | D\n"
1355 input += "\n"
1356 input += "``` go\n"
1357 input += "func fib(n int) int {\n"
1358 input += " if n <= 1 {\n"
1359 input += " return n\n"
1360 input += " }\n"
1361 input += " return n * fib(n-1)\n"
1362 input += "}\n"
1363 input += "```\n"
1364 input += "\n"
1365 input += "> A blockquote\n"
1366 input += "> or something like that\n"
1367 input += "> With a table | of two columns\n"
1368 input += "> -------------|---------------\n"
1369 input += "> key | value \n"
1370 input += "\n"
1371 input += "\n"
1372 input += "Some **bold** Some *italic* and [a link][1] \n"
1373 input += "\n"
1374 input += "A little code sample\n"
1375 input += "\n"
1376 input += " </head>\n"
1377 input += " <title>Web Page Title</title>\n"
1378 input += " </head>\n"
1379 input += "\n"
1380 input += "A picture\n"
1381 input += "\n"
1382 input += "![alt text][2]\n"
1383 input += "\n"
1384 input += "A list\n"
1385 input += "\n"
1386 input += "- apples\n"
1387 input += "- oranges\n"
1388 input += "- eggs\n"
1389 input += "\n"
1390 input += "A numbered list\n"
1391 input += "\n"
1392 input += "1. a\n"
1393 input += "2. b\n"
1394 input += "3. c\n"
1395 input += "\n"
1396 input += "A little quote\n"
1397 input += "\n"
1398 input += "> It is now time for all good men to come to the aid of their country. \n"
1399 input += "\n"
1400 input += "A final paragraph.\n"
1401 input += "\n"
1402 input += " [1]: http://www.google.com\n"
1403 input += " [2]: http://www.google.com/intl/en_ALL/images/logo.gif\n"
1404
1405 ib := []byte(input)
1406 rndrer := new(mkd_renderer)
1407 rndrer.blockcode = rndr_blockcode
1408 rndrer.blockhtml = rndr_raw_block
1409 rndrer.header = rndr_header
1410 rndrer.hrule = rndr_hrule
1411 rndrer.list = rndr_list
1412 rndrer.listitem = rndr_listitem
1413 rndrer.paragraph = rndr_paragraph
1414 rndrer.table = rndr_table
1415 rndrer.table_row = rndr_tablerow
1416 rndrer.table_cell = rndr_tablecell
1417 rndrer.opaque = &html_renderopts{close_tag: " />"}
1418 var extensions uint32 = MKDEXT_FENCED_CODE | MKDEXT_TABLES
1419 Ups_markdown(ob, ib, rndrer, extensions)
1420 fmt.Print(ob.String())
1421}
1422
1423func Ups_markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
1424
1425 /* filling the render structure */
1426 if rndrer == nil {
1427 return
1428 }
1429
1430 rndr := &render{*rndrer, extensions}
1431
1432 parse_block(ob, rndr, ib)
1433}