markdown.go (view raw)
1//
2// Black Friday Markdown Processor
3// Ported to Go from http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7package main
8
9import (
10 "bytes"
11 "fmt"
12 "io/ioutil"
13 "os"
14 "sort"
15 "unicode"
16)
17
18const (
19 MKDA_NOT_AUTOLINK = iota
20 MKDA_NORMAL
21 MKDA_EMAIL
22)
23
24const (
25 MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
26 MKDEXT_TABLES
27 MKDEXT_FENCED_CODE
28 MKDEXT_AUTOLINK
29 MKDEXT_STRIKETHROUGH
30 MKDEXT_LAX_HTML_BLOCKS
31 MKDEXT_SPACE_HEADERS
32)
33
34const (
35 _ = iota
36 MKD_LIST_ORDERED
37 MKD_LI_BLOCK // <li> containing block data
38 MKD_LI_END = 8
39)
40
41const (
42 MKD_TABLE_ALIGN_L = 1 << iota
43 MKD_TABLE_ALIGN_R
44 MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
45)
46
47var block_tags = map[string]int{
48 "p": 1, // 0
49 "dl": 2,
50 "h1": 2,
51 "h2": 2,
52 "h3": 2,
53 "h4": 2,
54 "h5": 2,
55 "h6": 2,
56 "ol": 2,
57 "ul": 2,
58 "del": 3, // 10
59 "div": 3,
60 "ins": 3, // 12
61 "pre": 3,
62 "form": 4,
63 "math": 4,
64 "table": 5,
65 "iframe": 6,
66 "script": 6,
67 "fieldset": 8,
68 "noscript": 8,
69 "blockquote": 10,
70}
71
72// functions for rendering parsed data
73type mkd_renderer struct {
74 // block-level callbacks---nil skips the block
75 blockcode func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
76 blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
77 blockhtml func(ob *bytes.Buffer, text []byte, opaque interface{})
78 header func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
79 hrule func(ob *bytes.Buffer, opaque interface{})
80 list func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
81 listitem func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
82 paragraph func(ob *bytes.Buffer, text []byte, opaque interface{})
83 table func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
84 table_row func(ob *bytes.Buffer, text []byte, opaque interface{})
85 table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
86
87 // span-level callbacks---nil or return 0 prints the span verbatim
88 autolink func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
89 codespan func(ob *bytes.Buffer, text []byte, opaque interface{}) int
90 double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
91 emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
92 image func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
93 linebreak func(ob *bytes.Buffer, opaque interface{}) int
94 link func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
95 raw_html_tag func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
96 triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
97 strikethrough func(ob *bytes.Buffer, text []byte, opaque interface{}) int
98
99 // low-level callbacks---nil copies input directly into the output
100 entity func(ob *bytes.Buffer, entity []byte, opaque interface{})
101 normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
102
103 // header and footer
104 doc_header func(ob *bytes.Buffer, opaque interface{})
105 doc_footer func(ob *bytes.Buffer, opaque interface{})
106
107 // user data---passed back to every callback
108 opaque interface{}
109}
110
111type link_ref struct {
112 id []byte
113 link []byte
114 title []byte
115}
116
117type link_ref_array []*link_ref
118
119// implement the sorting interface
120func (elt link_ref_array) Len() int {
121 return len(elt)
122}
123
124func (elt link_ref_array) Less(i, j int) bool {
125 return byteslice_less(elt[i].id, elt[j].id)
126}
127
128func byteslice_less(a []byte, b []byte) bool {
129 // adapted from bytes.Compare in stdlib
130 m := len(a)
131 if m > len(b) {
132 m = len(b)
133 }
134 for i, ac := range a[0:m] {
135 // do a case-insensitive comparison
136 ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
137 switch {
138 case ai > bi:
139 return false
140 case ai < bi:
141 return true
142 }
143 }
144 switch {
145 case len(a) < len(b):
146 return true
147 case len(a) > len(b):
148 return false
149 }
150 return false
151}
152
153func (elt link_ref_array) Swap(i, j int) {
154 elt[i], elt[j] = elt[j], elt[i]
155}
156
157// returns whether or not a line is a reference
158func is_ref(data []byte, beg int, last *int, rndr *render) bool {
159 // up to 3 optional leading spaces
160 if beg+3 > len(data) {
161 return false
162 }
163 i := 0
164 if data[beg] == ' ' {
165 i++
166 if data[beg+1] == ' ' {
167 i++
168 if data[beg+2] == ' ' {
169 i++
170 if data[beg+3] == ' ' {
171 return false
172 }
173 }
174 }
175 }
176 i += beg
177
178 // id part: anything but a newline between brackets
179 if data[i] != '[' {
180 return false
181 }
182 i++
183 id_offset := i
184 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
185 i++
186 }
187 if i >= len(data) || data[i] != ']' {
188 return false
189 }
190 id_end := i
191
192 // spacer: colon (space | tab)* newline? (space | tab)*
193 i++
194 if i >= len(data) || data[i] != ':' {
195 return false
196 }
197 i++
198 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
199 i++
200 }
201 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
202 i++
203 if i < len(data) && data[i] == '\r' && data[i-1] == '\n' {
204 i++
205 }
206 }
207 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
208 i++
209 }
210 if i >= len(data) {
211 return false
212 }
213
214 // link: whitespace-free sequence, optionally between angle brackets
215 if data[i] == '<' {
216 i++
217 }
218 link_offset := i
219 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
220 i++
221 }
222 var link_end int
223 if data[i-1] == '>' {
224 link_end = i - 1
225 } else {
226 link_end = i
227 }
228
229 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
230 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
231 i++
232 }
233 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
234 return false
235 }
236
237 // compute end-of-line
238 line_end := 0
239 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
240 line_end = i
241 }
242 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
243 line_end = i + 1
244 }
245
246 // optional (space|tab)* spacer after a newline
247 if line_end > 0 {
248 i = line_end + 1
249 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
250 i++
251 }
252 }
253
254 // optional title: any non-newline sequence enclosed in '"() alone on its line
255 title_offset, title_end := 0, 0
256 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
257 i++
258 title_offset = i
259
260 // looking for EOL
261 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
262 i++
263 }
264 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
265 title_end = i + 1
266 } else {
267 title_end = i
268 }
269
270 // stepping back
271 i--
272 for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
273 i--
274 }
275 if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
276 line_end = title_end
277 title_end = i
278 }
279 }
280 if line_end == 0 { // garbage after the link
281 return false
282 }
283
284 // a valid ref has been found; fill in return structures
285 if last != nil {
286 *last = line_end
287 }
288 if rndr == nil {
289 return true
290 }
291 item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
292 rndr.refs = append(rndr.refs, item)
293
294 return true
295}
296
297type render struct {
298 mk *mkd_renderer
299 refs link_ref_array
300 active_char [256]int
301 ext_flags uint32
302 nesting int
303 max_nesting int
304}
305
306const (
307 MD_CHAR_NONE = iota
308 MD_CHAR_EMPHASIS
309 MD_CHAR_CODESPAN
310 MD_CHAR_LINEBREAK
311 MD_CHAR_LINK
312 MD_CHAR_LANGLE
313 MD_CHAR_ESCAPE
314 MD_CHAR_ENTITITY
315 MD_CHAR_AUTOLINK
316)
317
318// closures to render active chars, each:
319// returns the number of chars taken care of
320// data is the complete block being rendered
321// offset is the number of valid chars before the data
322//
323// Note: this is filled in in Markdown to prevent an initilization loop
324var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
325
326func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
327 if rndr.nesting >= rndr.max_nesting {
328 return
329 }
330 rndr.nesting++
331
332 i, end := 0, 0
333 for i < len(data) {
334 // copy inactive chars into the output
335 for end < len(data) && rndr.active_char[data[end]] == 0 {
336 end++
337 }
338
339 if rndr.mk.normal_text != nil {
340 rndr.mk.normal_text(ob, data[i:end], rndr.mk.opaque)
341 } else {
342 ob.Write(data[i:end])
343 }
344
345 if end >= len(data) {
346 break
347 }
348 i = end
349
350 // call the trigger
351 action := rndr.active_char[data[end]]
352 end = markdown_char_ptrs[action](ob, rndr, data, i)
353
354 if end == 0 { // no action from the callback
355 end = i + 1
356 } else {
357 i += end
358 end = i
359 }
360 }
361
362 rndr.nesting--
363}
364
365// single and double emphasis parsing
366func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
367 data = data[offset:]
368 c := data[0]
369 ret := 0
370
371 if len(data) > 2 && data[1] != c {
372 // whitespace cannot follow an opening emphasis;
373 // strikethrough only takes two characters '~~'
374 if c == '~' || isspace(data[1]) {
375 return 0
376 }
377 if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
378 return 0
379 }
380
381 return ret + 1
382 }
383
384 if len(data) > 3 && data[1] == c && data[2] != c {
385 if isspace(data[2]) {
386 return 0
387 }
388 if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
389 return 0
390 }
391
392 return ret + 2
393 }
394
395 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
396 if c == '~' || isspace(data[3]) {
397 return 0
398 }
399 if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
400 return 0
401 }
402
403 return ret + 3
404 }
405
406 return 0
407}
408
409func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
410 data = data[offset:]
411
412 nb := 0
413
414 // counting the number of backticks in the delimiter
415 for nb < len(data) && data[nb] == '`' {
416 nb++
417 }
418
419 // finding the next delimiter
420 i, end := 0, 0
421 for end = nb; end < len(data) && i < nb; end++ {
422 if data[end] == '`' {
423 i++
424 } else {
425 i = 0
426 }
427 }
428
429 if i < nb && end >= len(data) {
430 return 0 // no matching delimiter
431 }
432
433 // trim outside whitespace
434 f_begin := nb
435 for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
436 f_begin++
437 }
438
439 f_end := end - nb
440 for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
441 f_end--
442 }
443
444 // real code span
445 if rndr.mk.codespan == nil {
446 return 0
447 }
448 if f_begin < f_end {
449 if rndr.mk.codespan(ob, data[f_begin:f_end], rndr.mk.opaque) == 0 {
450 end = 0
451 }
452 } else {
453 if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
454 end = 0
455 }
456 }
457
458 return end
459
460}
461
462// '\n' preceded by two spaces
463func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
464 if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
465 return 0
466 }
467
468 // remove trailing spaces from ob and render
469 ob_bytes := ob.Bytes()
470 end := len(ob_bytes)
471 for end > 0 && ob_bytes[end-1] == ' ' {
472 end--
473 }
474 ob.Truncate(end)
475
476 if rndr.mk.linebreak == nil {
477 return 0
478 }
479 if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
480 return 1
481 } else {
482 return 0
483 }
484
485 return 0
486}
487
488// '[': parsing a link or an image
489func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
490 is_img := offset > 0 && data[offset-1] == '!'
491
492 data = data[offset:]
493
494 i := 1
495 var title, link []byte
496 text_has_nl := false
497
498 // checking whether the correct renderer exists
499 if (is_img && rndr.mk.image == nil) || (!is_img && rndr.mk.link == nil) {
500 return 0
501 }
502
503 // looking for the matching closing bracket
504 for level := 1; level > 0 && i < len(data); i++ {
505 switch {
506 case data[i] == '\n':
507 text_has_nl = true
508
509 case data[i-1] == '\\':
510 continue
511
512 case data[i] == '[':
513 level++
514
515 case data[i] == ']':
516 level--
517 if level <= 0 {
518 i-- // compensate for extra i++ in for loop
519 }
520 }
521 }
522
523 if i >= len(data) {
524 return 0
525 }
526
527 txt_e := i
528 i++
529
530 // skip any amount of whitespace or newline
531 // (this is much more lax than original markdown syntax)
532 for i < len(data) && isspace(data[i]) {
533 i++
534 }
535
536 // inline style link
537 switch {
538 case i < len(data) && data[i] == '(':
539 // skipping initial whitespace
540 i++
541
542 for i < len(data) && isspace(data[i]) {
543 i++
544 }
545
546 link_b := i
547
548 // looking for link end: ' " )
549 for i < len(data) {
550 if data[i] == '\\' {
551 i += 2
552 } else {
553 if data[i] == ')' || data[i] == '\'' || data[i] == '"' {
554 break
555 }
556 i++
557 }
558 }
559
560 if i >= len(data) {
561 return 0
562 }
563 link_e := i
564
565 // looking for title end if present
566 title_b, title_e := 0, 0
567 if data[i] == '\'' || data[i] == '"' {
568 i++
569 title_b = i
570
571 for i < len(data) {
572 if data[i] == '\\' {
573 i += 2
574 } else {
575 if data[i] == ')' {
576 break
577 }
578 i++
579 }
580 }
581
582 if i >= len(data) {
583 return 0
584 }
585
586 // skipping whitespaces after title
587 title_e = i - 1
588 for title_e > title_b && isspace(data[title_e]) {
589 title_e--
590 }
591
592 // checking for closing quote presence
593 if data[title_e] != '\'' && data[title_e] != '"' {
594 title_b, title_e = 0, 0
595 link_e = i
596 }
597 }
598
599 // remove whitespace at the end of the link
600 for link_e > link_b && isspace(data[link_e-1]) {
601 link_e--
602 }
603
604 // remove optional angle brackets around the link
605 if data[link_b] == '<' {
606 link_b++
607 }
608 if data[link_e-1] == '>' {
609 link_e--
610 }
611
612 // building escaped link and title
613 if link_e > link_b {
614 link = data[link_b:link_e]
615 }
616
617 if title_e > title_b {
618 title = data[title_b:title_e]
619 }
620
621 i++
622
623 // reference style link
624 case i < len(data) && data[i] == '[':
625 var id []byte
626
627 // looking for the id
628 i++
629 link_b := i
630 for i < len(data) && data[i] != ']' {
631 i++
632 }
633 if i >= len(data) {
634 return 0
635 }
636 link_e := i
637
638 // find the link_ref
639 if link_b == link_e {
640 if text_has_nl {
641 b := bytes.NewBuffer(nil)
642
643 for j := 1; j < txt_e; j++ {
644 switch {
645 case data[j] != '\n':
646 b.WriteByte(data[j])
647 case data[j-1] != ' ':
648 b.WriteByte(' ')
649 }
650 }
651
652 id = b.Bytes()
653 } else {
654 id = data[1:txt_e]
655 }
656 } else {
657 id = data[link_b:link_e]
658 }
659
660 // find the link_ref with matching id
661 index := sortDotSearch(len(rndr.refs), func(i int) bool {
662 return !byteslice_less(rndr.refs[i].id, id)
663 })
664 if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
665 return 0
666 }
667 lr := rndr.refs[index]
668
669 // keep link and title from link_ref
670 link = lr.link
671 title = lr.title
672 i++
673
674 // shortcut reference style link
675 default:
676 var id []byte
677
678 // crafting the id
679 if text_has_nl {
680 b := bytes.NewBuffer(nil)
681
682 for j := 1; j < txt_e; j++ {
683 switch {
684 case data[j] != '\n':
685 b.WriteByte(data[j])
686 case data[j-1] != ' ':
687 b.WriteByte(' ')
688 }
689 }
690
691 id = b.Bytes()
692 } else {
693 id = data[1:txt_e]
694 }
695
696 // find the link_ref with matching id
697 index := sortDotSearch(len(rndr.refs), func(i int) bool {
698 return !byteslice_less(rndr.refs[i].id, id)
699 })
700 if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
701 return 0
702 }
703 lr := rndr.refs[index]
704
705 // keep link and title from link_ref
706 link = lr.link
707 title = lr.title
708
709 // rewinding the whitespace
710 i = txt_e + 1
711 }
712
713 // building content: img alt is escaped, link content is parsed
714 content := bytes.NewBuffer(nil)
715 if txt_e > 1 {
716 if is_img {
717 content.Write(data[1:txt_e])
718 } else {
719 parse_inline(content, rndr, data[1:txt_e])
720 }
721 }
722
723 var u_link []byte
724 if len(link) > 0 {
725 u_link_buf := bytes.NewBuffer(nil)
726 unscape_text(u_link_buf, link)
727 u_link = u_link_buf.Bytes()
728 }
729
730 // calling the relevant rendering function
731 ret := 0
732 if is_img {
733 ob_size := ob.Len()
734 ob_bytes := ob.Bytes()
735 if ob_size > 0 && ob_bytes[ob_size-1] == '!' {
736 ob.Truncate(ob_size - 1)
737 }
738
739 ret = rndr.mk.image(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
740 } else {
741 ret = rndr.mk.link(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
742 }
743
744 if ret > 0 {
745 return i
746 }
747 return 0
748}
749
750// '<' when tags or autolinks are allowed
751func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
752 data = data[offset:]
753 altype := MKDA_NOT_AUTOLINK
754 end := tag_length(data, &altype)
755 ret := 0
756
757 if end > 2 {
758 switch {
759 case rndr.mk.autolink != nil && altype != MKDA_NOT_AUTOLINK:
760 u_link := bytes.NewBuffer(nil)
761 unscape_text(u_link, data[1:end+1-2])
762 ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
763 case rndr.mk.raw_html_tag != nil:
764 ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
765 }
766 }
767
768 if ret == 0 {
769 return 0
770 }
771 return end
772}
773
774// '\\' backslash escape
775var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
776
777func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
778 data = data[offset:]
779
780 if len(data) > 1 {
781 if bytes.IndexByte(escape_chars, data[1]) < 0 {
782 return 0
783 }
784
785 if rndr.mk.normal_text != nil {
786 rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
787 } else {
788 ob.WriteByte(data[1])
789 }
790 }
791
792 return 2
793}
794
795// '&' escaped when it doesn't belong to an entity
796// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
797func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
798 data = data[offset:]
799
800 end := 1
801
802 if end < len(data) && data[end] == '#' {
803 end++
804 }
805
806 for end < len(data) && isalnum(data[end]) {
807 end++
808 }
809
810 if end < len(data) && data[end] == ';' {
811 end++ // real entity
812 } else {
813 return 0 // lone '&'
814 }
815
816 if rndr.mk.entity != nil {
817 rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
818 } else {
819 ob.Write(data[:end])
820 }
821
822 return end
823}
824
825func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
826 orig_data := data
827 data = data[offset:]
828
829 if offset > 0 {
830 if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) {
831 return 0
832 }
833 }
834
835 if !is_safe_link(data) {
836 return 0
837 }
838
839 link_end := 0
840 for link_end < len(data) && !isspace(data[link_end]) {
841 link_end++
842 }
843
844 // Skip punctuation at the end of the link
845 if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' {
846 link_end--
847 }
848
849 // See if the link finishes with a punctuation sign that can be closed.
850 var copen byte
851 switch data[link_end-1] {
852 case '"':
853 copen = '"'
854 case '\'':
855 copen = '\''
856 case ')':
857 copen = '('
858 case ']':
859 copen = '['
860 case '}':
861 copen = '{'
862 default:
863 copen = 0
864 }
865
866 if copen != 0 {
867 buf_end := offset + link_end - 2
868
869 open_delim := 1
870
871 /* Try to close the final punctuation sign in this same line;
872 * if we managed to close it outside of the URL, that means that it's
873 * not part of the URL. If it closes inside the URL, that means it
874 * is part of the URL.
875 *
876 * Examples:
877 *
878 * foo http://www.pokemon.com/Pikachu_(Electric) bar
879 * => http://www.pokemon.com/Pikachu_(Electric)
880 *
881 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
882 * => http://www.pokemon.com/Pikachu_(Electric)
883 *
884 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
885 * => http://www.pokemon.com/Pikachu_(Electric))
886 *
887 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
888 * => foo http://www.pokemon.com/Pikachu_(Electric)
889 */
890
891 for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 {
892 if orig_data[buf_end] == data[link_end-1] {
893 open_delim++
894 }
895
896 if orig_data[buf_end] == copen {
897 open_delim--
898 }
899
900 buf_end--
901 }
902
903 if open_delim == 0 {
904 link_end--
905 }
906 }
907
908 if rndr.mk.autolink != nil {
909 u_link := bytes.NewBuffer(nil)
910 unscape_text(u_link, data[:link_end])
911
912 rndr.mk.autolink(ob, u_link.Bytes(), MKDA_NORMAL, rndr.mk.opaque)
913 }
914
915 return link_end
916}
917
918var valid_uris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
919
920func is_safe_link(link []byte) bool {
921 for _, prefix := range valid_uris {
922 if len(link) > len(prefix) && !byteslice_less(link[:len(prefix)], prefix) && !byteslice_less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) {
923 return true
924 }
925 }
926
927 return false
928}
929
930
931// taken from regexp in the stdlib
932func ispunct(c byte) bool {
933 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
934 if c == r {
935 return true
936 }
937 }
938 return false
939}
940
941// this is sort.Search, reproduced here because an older
942// version of the library had a bug
943func sortDotSearch(n int, f func(int) bool) int {
944 // Define f(-1) == false and f(n) == true.
945 // Invariant: f(i-1) == false, f(j) == true.
946 i, j := 0, n
947 for i < j {
948 h := i + (j-i)/2 // avoid overflow when computing h
949 // i ≤ h < j
950 if !f(h) {
951 i = h + 1 // preserves f(i-1) == false
952 } else {
953 j = h // preserves f(j) == true
954 }
955 }
956 // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
957 return i
958}
959
960func isspace(c byte) bool {
961 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
962}
963
964func isalnum(c byte) bool {
965 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
966}
967
968// return the length of the given tag, or 0 is it's not valid
969func tag_length(data []byte, autolink *int) int {
970 var i, j int
971
972 // a valid tag can't be shorter than 3 chars
973 if len(data) < 3 {
974 return 0
975 }
976
977 // begins with a '<' optionally followed by '/', followed by letter or number
978 if data[0] != '<' {
979 return 0
980 }
981 if data[1] == '/' {
982 i = 2
983 } else {
984 i = 1
985 }
986
987 if !isalnum(data[i]) {
988 return 0
989 }
990
991 // scheme test
992 *autolink = MKDA_NOT_AUTOLINK
993
994 // try to find the beggining of an URI
995 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
996 i++
997 }
998
999 if i > 1 && data[i] == '@' {
1000 if j = is_mail_autolink(data[i:]); j != 0 {
1001 *autolink = MKDA_EMAIL
1002 return i + j
1003 }
1004 }
1005
1006 if i > 2 && data[i] == ':' {
1007 *autolink = MKDA_NORMAL
1008 i++
1009 }
1010
1011 // complete autolink test: no whitespace or ' or "
1012 switch {
1013 case i >= len(data):
1014 *autolink = MKDA_NOT_AUTOLINK
1015 case *autolink != 0:
1016 j = i
1017
1018 for i < len(data) {
1019 if data[i] == '\\' {
1020 i += 2
1021 } else {
1022 if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
1023 break
1024 } else {
1025 i++
1026 }
1027 }
1028
1029 }
1030
1031 if i >= len(data) {
1032 return 0
1033 }
1034 if i > j && data[i] == '>' {
1035 return i + 1
1036 }
1037
1038 // one of the forbidden chars has been found
1039 *autolink = MKDA_NOT_AUTOLINK
1040 }
1041
1042 // looking for sometinhg looking like a tag end
1043 for i < len(data) && data[i] != '>' {
1044 i++
1045 }
1046 if i >= len(data) {
1047 return 0
1048 }
1049 return i + 1
1050}
1051
1052// look for the address part of a mail autolink and '>'
1053// this is less strict than the original markdown e-mail address matching
1054func is_mail_autolink(data []byte) int {
1055 nb := 0
1056
1057 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1058 for i := 0; i < len(data); i++ {
1059 if isalnum(data[i]) {
1060 continue
1061 }
1062
1063 switch data[i] {
1064 case '@':
1065 nb++
1066
1067 case '-', '.', '_':
1068 break
1069
1070 case '>':
1071 if nb == 1 {
1072 return i + 1
1073 } else {
1074 return 0
1075 }
1076 default:
1077 return 0
1078 }
1079 }
1080
1081 return 0
1082}
1083
1084// look for the next emph char, skipping other constructs
1085func find_emph_char(data []byte, c byte) int {
1086 i := 1
1087
1088 for i < len(data) {
1089 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1090 i++
1091 }
1092 if i >= len(data) {
1093 return 0
1094 }
1095 if data[i] == c {
1096 return i
1097 }
1098
1099 // do not count escaped chars
1100 if i != 0 && data[i-1] == '\\' {
1101 i++
1102 continue
1103 }
1104
1105 if data[i] == '`' {
1106 // skip a code span
1107 tmp_i := 0
1108 i++
1109 for i < len(data) && data[i] != '`' {
1110 if tmp_i == 0 && data[i] == c {
1111 tmp_i = i
1112 }
1113 i++
1114 }
1115 if i >= len(data) {
1116 return tmp_i
1117 }
1118 i++
1119 } else {
1120 if data[i] == '[' {
1121 // skip a link
1122 tmp_i := 0
1123 i++
1124 for i < len(data) && data[i] != ']' {
1125 if tmp_i == 0 && data[i] == c {
1126 tmp_i = i
1127 }
1128 i++
1129 }
1130 i++
1131 for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
1132 i++
1133 }
1134 if i >= len(data) {
1135 return tmp_i
1136 }
1137 if data[i] != '[' && data[i] != '(' { // not a link
1138 if tmp_i > 0 {
1139 return tmp_i
1140 } else {
1141 continue
1142 }
1143 }
1144 cc := data[i]
1145 i++
1146 for i < len(data) && data[i] != cc {
1147 if tmp_i == 0 && data[i] == c {
1148 tmp_i = i
1149 }
1150 i++
1151 }
1152 if i >= len(data) {
1153 return tmp_i
1154 }
1155 i++
1156 }
1157 }
1158 }
1159 return 0
1160}
1161
1162func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1163 i := 0
1164
1165 if rndr.mk.emphasis == nil {
1166 return 0
1167 }
1168
1169 // skip one symbol if coming from emph3
1170 if len(data) > 1 && data[0] == c && data[1] == c {
1171 i = 1
1172 }
1173
1174 for i < len(data) {
1175 length := find_emph_char(data[i:], c)
1176 if length == 0 {
1177 return 0
1178 }
1179 i += length
1180 if i >= len(data) {
1181 return 0
1182 }
1183
1184 if i+1 < len(data) && data[i+1] == c {
1185 i++
1186 continue
1187 }
1188
1189 if data[i] == c && !isspace(data[i-1]) {
1190
1191 if rndr.ext_flags&MKDEXT_NO_INTRA_EMPHASIS != 0 {
1192 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1193 continue
1194 }
1195 }
1196
1197 work := bytes.NewBuffer(nil)
1198 parse_inline(work, rndr, data[:i])
1199 r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
1200 if r > 0 {
1201 return i + 1
1202 } else {
1203 return 0
1204 }
1205 }
1206 }
1207
1208 return 0
1209}
1210
1211func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1212 render_method := rndr.mk.double_emphasis
1213 if c == '~' {
1214 render_method = rndr.mk.strikethrough
1215 }
1216
1217 if render_method == nil {
1218 return 0
1219 }
1220
1221 i := 0
1222
1223 for i < len(data) {
1224 length := find_emph_char(data[i:], c)
1225 if length == 0 {
1226 return 0
1227 }
1228 i += length
1229
1230 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1231 work := bytes.NewBuffer(nil)
1232 parse_inline(work, rndr, data[:i])
1233 r := render_method(ob, work.Bytes(), rndr.mk.opaque)
1234 if r > 0 {
1235 return i + 2
1236 } else {
1237 return 0
1238 }
1239 }
1240 i++
1241 }
1242 return 0
1243}
1244
1245func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
1246 i := 0
1247 orig_data := data
1248 data = data[offset:]
1249
1250 for i < len(data) {
1251 length := find_emph_char(data[i:], c)
1252 if length == 0 {
1253 return 0
1254 }
1255 i += length
1256
1257 // skip whitespace preceded symbols
1258 if data[i] != c || isspace(data[i-1]) {
1259 continue
1260 }
1261
1262 switch {
1263 case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
1264 // triple symbol found
1265 work := bytes.NewBuffer(nil)
1266
1267 parse_inline(work, rndr, data[:i])
1268 r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
1269 if r > 0 {
1270 return i + 3
1271 } else {
1272 return 0
1273 }
1274 case (i+1 < len(data) && data[i+1] == c):
1275 // double symbol found, handing over to emph1
1276 length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
1277 if length == 0 {
1278 return 0
1279 } else {
1280 return length - 2
1281 }
1282 default:
1283 // single symbol found, handing over to emph2
1284 length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
1285 if length == 0 {
1286 return 0
1287 } else {
1288 return length - 1
1289 }
1290 }
1291 }
1292 return 0
1293}
1294
1295// parse block-level data
1296func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
1297 if rndr.nesting >= rndr.max_nesting {
1298 return
1299 }
1300 rndr.nesting++
1301
1302 for len(data) > 0 {
1303 if is_atxheader(rndr, data) {
1304 data = data[parse_atxheader(ob, rndr, data):]
1305 continue
1306 }
1307 if data[0] == '<' && rndr.mk.blockhtml != nil {
1308 if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
1309 data = data[i:]
1310 continue
1311 }
1312 }
1313 if i := is_empty(data); i > 0 {
1314 data = data[i:]
1315 continue
1316 }
1317 if is_hrule(data) {
1318 if rndr.mk.hrule != nil {
1319 rndr.mk.hrule(ob, rndr.mk.opaque)
1320 }
1321 var i int
1322 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1323 }
1324 data = data[i:]
1325 continue
1326 }
1327 if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
1328 if i := parse_fencedcode(ob, rndr, data); i > 0 {
1329 data = data[i:]
1330 continue
1331 }
1332 }
1333 if rndr.ext_flags&MKDEXT_TABLES != 0 {
1334 if i := parse_table(ob, rndr, data); i > 0 {
1335 data = data[i:]
1336 continue
1337 }
1338 }
1339 if prefix_quote(data) > 0 {
1340 data = data[parse_blockquote(ob, rndr, data):]
1341 continue
1342 }
1343 if prefix_code(data) > 0 {
1344 data = data[parse_blockcode(ob, rndr, data):]
1345 continue
1346 }
1347 if prefix_uli(data) > 0 {
1348 data = data[parse_list(ob, rndr, data, 0):]
1349 continue
1350 }
1351 if prefix_oli(data) > 0 {
1352 data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
1353 continue
1354 }
1355
1356 data = data[parse_paragraph(ob, rndr, data):]
1357 }
1358
1359 rndr.nesting--
1360}
1361
1362func is_atxheader(rndr *render, data []byte) bool {
1363 if data[0] != '#' {
1364 return false
1365 }
1366
1367 if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
1368 level := 0
1369 for level < len(data) && level < 6 && data[level] == '#' {
1370 level++
1371 }
1372 if level < len(data) && data[level] != ' ' && data[level] != '\t' {
1373 return false
1374 }
1375 }
1376 return true
1377}
1378
1379func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
1380 level := 0
1381 for level < len(data) && level < 6 && data[level] == '#' {
1382 level++
1383 }
1384 i, end := 0, 0
1385 for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
1386 }
1387 for end = i; end < len(data) && data[end] != '\n'; end++ {
1388 }
1389 skip := end
1390 for end > 0 && data[end-1] == '#' {
1391 end--
1392 }
1393 for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
1394 end--
1395 }
1396 if end > i {
1397 work := bytes.NewBuffer(nil)
1398 parse_inline(work, rndr, data[i:end])
1399 if rndr.mk.header != nil {
1400 rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1401 }
1402 }
1403 return skip
1404}
1405
1406func is_headerline(data []byte) int {
1407 i := 0
1408
1409 // test of level 1 header
1410 if data[i] == '=' {
1411 for i = 1; i < len(data) && data[i] == '='; i++ {
1412 }
1413 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1414 i++
1415 }
1416 if i >= len(data) || data[i] == '\n' {
1417 return 1
1418 } else {
1419 return 0
1420 }
1421 }
1422
1423 // test of level 2 header
1424 if data[i] == '-' {
1425 for i = 1; i < len(data) && data[i] == '-'; i++ {
1426 }
1427 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1428 i++
1429 }
1430 if i >= len(data) || data[i] == '\n' {
1431 return 2
1432 } else {
1433 return 0
1434 }
1435 }
1436
1437 return 0
1438}
1439
1440func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1441 var i, j int
1442
1443 // identify the opening tag
1444 if len(data) < 2 || data[0] != '<' {
1445 return 0
1446 }
1447 curtag, tagfound := find_block_tag(data[1:])
1448
1449 // handle special cases
1450 if !tagfound {
1451
1452 // HTML comment, laxist form
1453 if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1454 i = 5
1455
1456 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1457 i++
1458 }
1459 i++
1460
1461 if i < len(data) {
1462 j = is_empty(data[i:])
1463 }
1464
1465 if j > 0 {
1466 size := i + j
1467 if do_render && rndr.mk.blockhtml != nil {
1468 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1469 }
1470 return size
1471 }
1472 }
1473
1474 // HR, which is the only self-closing block tag considered
1475 if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1476 i = 3
1477 for i < len(data) && data[i] != '>' {
1478 i++
1479 }
1480
1481 if i+1 < len(data) {
1482 i++
1483 j = is_empty(data[i:])
1484 if j > 0 {
1485 size := i + j
1486 if do_render && rndr.mk.blockhtml != nil {
1487 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1488 }
1489 return size
1490 }
1491 }
1492 }
1493
1494 // no special case recognized
1495 return 0
1496 }
1497
1498 // look for an unindented matching closing tag
1499 // followed by a blank line
1500 i = 1
1501 found := false
1502
1503 // if not found, try a second pass looking for indented match
1504 // but not if tag is "ins" or "del" (following original Markdown.pl)
1505 if curtag != "ins" && curtag != "del" {
1506 i = 1
1507 for i < len(data) {
1508 i++
1509 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1510 i++
1511 }
1512
1513 if i+2+len(curtag) >= len(data) {
1514 break
1515 }
1516
1517 j = htmlblock_end(curtag, rndr, data[i-1:])
1518
1519 if j > 0 {
1520 i += j - 1
1521 found = true
1522 break
1523 }
1524 }
1525 }
1526
1527 if !found {
1528 return 0
1529 }
1530
1531 // the end of the block has been found
1532 if do_render && rndr.mk.blockhtml != nil {
1533 rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1534 }
1535
1536 return i
1537}
1538
1539func find_block_tag(data []byte) (string, bool) {
1540 i := 0
1541 for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1542 i++
1543 }
1544 if i >= len(data) {
1545 return "", false
1546 }
1547 key := string(data[:i])
1548 if _, ok := block_tags[key]; ok {
1549 return key, true
1550 }
1551 return "", false
1552}
1553
1554func htmlblock_end(tag string, rndr *render, data []byte) int {
1555 // assume data[0] == '<' && data[1] == '/' already tested
1556
1557 // check if tag is a match
1558 if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1559 return 0
1560 }
1561
1562 // check white lines
1563 i := len(tag) + 3
1564 w := 0
1565 if i < len(data) {
1566 if w = is_empty(data[i:]); w == 0 {
1567 return 0 // non-blank after tag
1568 }
1569 }
1570 i += w
1571 w = 0
1572
1573 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1574 if i < len(data) {
1575 w = is_empty(data[i:])
1576 }
1577 } else {
1578 if i < len(data) {
1579 if w = is_empty(data[i:]); w == 0 {
1580 return 0 // non-blank line after tag line
1581 }
1582 }
1583 }
1584
1585 return i + w
1586}
1587
1588func is_empty(data []byte) int {
1589 var i int
1590 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1591 if data[i] != ' ' && data[i] != '\t' {
1592 return 0
1593 }
1594 }
1595 return i + 1
1596}
1597
1598func is_hrule(data []byte) bool {
1599 // skip initial spaces
1600 if len(data) < 3 {
1601 return false
1602 }
1603 i := 0
1604 if data[0] == ' ' {
1605 i++
1606 if data[1] == ' ' {
1607 i++
1608 if data[2] == ' ' {
1609 i++
1610 }
1611 }
1612 }
1613
1614 // look at the hrule char
1615 if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1616 return false
1617 }
1618 c := data[i]
1619
1620 // the whole line must be the char or whitespace
1621 n := 0
1622 for i < len(data) && data[i] != '\n' {
1623 switch {
1624 case data[i] == c:
1625 n++
1626 case data[i] != ' ' && data[i] != '\t':
1627 return false
1628 }
1629 i++
1630 }
1631
1632 return n >= 3
1633}
1634
1635func is_codefence(data []byte, syntax **string) int {
1636 i, n := 0, 0
1637
1638 // skip initial spaces
1639 if len(data) < 3 {
1640 return 0
1641 }
1642 if data[0] == ' ' {
1643 i++
1644 if data[1] == ' ' {
1645 i++
1646 if data[2] == ' ' {
1647 i++
1648 }
1649 }
1650 }
1651
1652 // look at the hrule char
1653 if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1654 return 0
1655 }
1656
1657 c := data[i]
1658
1659 // the whole line must be the char or whitespace
1660 for i < len(data) && data[i] == c {
1661 n++
1662 i++
1663 }
1664
1665 if n < 3 {
1666 return 0
1667 }
1668
1669 if syntax != nil {
1670 syn := 0
1671
1672 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1673 i++
1674 }
1675
1676 syntax_start := i
1677
1678 if i < len(data) && data[i] == '{' {
1679 i++
1680 syntax_start++
1681
1682 for i < len(data) && data[i] != '}' && data[i] != '\n' {
1683 syn++
1684 i++
1685 }
1686
1687 if i == len(data) || data[i] != '}' {
1688 return 0
1689 }
1690
1691 // string all whitespace at the beginning and the end
1692 // of the {} block
1693 for syn > 0 && isspace(data[syntax_start]) {
1694 syntax_start++
1695 syn--
1696 }
1697
1698 for syn > 0 && isspace(data[syntax_start+syn-1]) {
1699 syn--
1700 }
1701
1702 i++
1703 } else {
1704 for i < len(data) && !isspace(data[i]) {
1705 syn++
1706 i++
1707 }
1708 }
1709
1710 language := string(data[syntax_start : syntax_start+syn])
1711 *syntax = &language
1712 }
1713
1714 for i < len(data) && data[i] != '\n' {
1715 if !isspace(data[i]) {
1716 return 0
1717 }
1718 i++
1719 }
1720
1721 return i + 1
1722}
1723
1724func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1725 var lang *string
1726 beg := is_codefence(data, &lang)
1727 if beg == 0 {
1728 return 0
1729 }
1730
1731 work := bytes.NewBuffer(nil)
1732
1733 for beg < len(data) {
1734 fence_end := is_codefence(data[beg:], nil)
1735 if fence_end != 0 {
1736 beg += fence_end
1737 break
1738 }
1739
1740 var end int
1741 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1742 }
1743
1744 if beg < end {
1745 // verbatim copy to the working buffer, escaping entities
1746 if is_empty(data[beg:]) > 0 {
1747 work.WriteByte('\n')
1748 } else {
1749 work.Write(data[beg:end])
1750 }
1751 }
1752 beg = end
1753 }
1754
1755 if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1756 work.WriteByte('\n')
1757 }
1758
1759 if rndr.mk.blockcode != nil {
1760 syntax := ""
1761 if lang != nil {
1762 syntax = *lang
1763 }
1764
1765 rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1766 }
1767
1768 return beg
1769}
1770
1771func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1772 header_work := bytes.NewBuffer(nil)
1773 i, columns, col_data := parse_table_header(header_work, rndr, data)
1774 if i > 0 {
1775 body_work := bytes.NewBuffer(nil)
1776
1777 for i < len(data) {
1778 pipes, row_start := 0, i
1779 for ; i < len(data) && data[i] != '\n'; i++ {
1780 if data[i] == '|' {
1781 pipes++
1782 }
1783 }
1784
1785 if pipes == 0 || i == len(data) {
1786 i = row_start
1787 break
1788 }
1789
1790 parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1791 i++
1792 }
1793
1794 if rndr.mk.table != nil {
1795 rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1796 }
1797 }
1798
1799 return i
1800}
1801
1802func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1803 i, pipes := 0, 0
1804 column_data = []int{}
1805 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1806 if data[i] == '|' {
1807 pipes++
1808 }
1809 }
1810
1811 if i == len(data) || pipes == 0 {
1812 return 0, 0, column_data
1813 }
1814
1815 header_end := i
1816
1817 if data[0] == '|' {
1818 pipes--
1819 }
1820
1821 if i > 2 && data[i-1] == '|' {
1822 pipes--
1823 }
1824
1825 columns = pipes + 1
1826 column_data = make([]int, columns)
1827
1828 // parse the header underline
1829 i++
1830 if i < len(data) && data[i] == '|' {
1831 i++
1832 }
1833
1834 under_end := i
1835 for under_end < len(data) && data[under_end] != '\n' {
1836 under_end++
1837 }
1838
1839 col := 0
1840 for ; col < columns && i < under_end; col++ {
1841 dashes := 0
1842
1843 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1844 i++
1845 }
1846
1847 if data[i] == ':' {
1848 i++
1849 column_data[col] |= MKD_TABLE_ALIGN_L
1850 dashes++
1851 }
1852
1853 for i < under_end && data[i] == '-' {
1854 i++
1855 dashes++
1856 }
1857
1858 if i < under_end && data[i] == ':' {
1859 i++
1860 column_data[col] |= MKD_TABLE_ALIGN_R
1861 dashes++
1862 }
1863
1864 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1865 i++
1866 }
1867
1868 if i < under_end && data[i] != '|' {
1869 break
1870 }
1871
1872 if dashes < 3 {
1873 break
1874 }
1875
1876 i++
1877 }
1878
1879 if col < columns {
1880 return 0, 0, column_data
1881 }
1882
1883 parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1884 size = under_end + 1
1885 return
1886}
1887
1888func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1889 i, col := 0, 0
1890 row_work := bytes.NewBuffer(nil)
1891
1892 if i < len(data) && data[i] == '|' {
1893 i++
1894 }
1895
1896 for col = 0; col < columns && i < len(data); col++ {
1897 for i < len(data) && isspace(data[i]) {
1898 i++
1899 }
1900
1901 cell_start := i
1902
1903 for i < len(data) && data[i] != '|' {
1904 i++
1905 }
1906
1907 cell_end := i - 1
1908
1909 for cell_end > cell_start && isspace(data[cell_end]) {
1910 cell_end--
1911 }
1912
1913 cell_work := bytes.NewBuffer(nil)
1914 parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1915
1916 if rndr.mk.table_cell != nil {
1917 cdata := 0
1918 if col < len(col_data) {
1919 cdata = col_data[col]
1920 }
1921 rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1922 }
1923
1924 i++
1925 }
1926
1927 for ; col < columns; col++ {
1928 empty_cell := []byte{}
1929 if rndr.mk.table_cell != nil {
1930 cdata := 0
1931 if col < len(col_data) {
1932 cdata = col_data[col]
1933 }
1934 rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1935 }
1936 }
1937
1938 if rndr.mk.table_row != nil {
1939 rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1940 }
1941}
1942
1943// returns blockquote prefix length
1944func prefix_quote(data []byte) int {
1945 i := 0
1946 for i < len(data) && i < 3 && data[i] == ' ' {
1947 i++
1948 }
1949 if i < len(data) && data[i] == '>' {
1950 if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1951 return i + 2
1952 }
1953 return i + 1
1954 }
1955 return 0
1956}
1957
1958// handles parsing of a blockquote fragment
1959func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1960 out := bytes.NewBuffer(nil)
1961 work := bytes.NewBuffer(nil)
1962 beg, end := 0, 0
1963 for beg < len(data) {
1964 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1965 }
1966
1967 if pre := prefix_quote(data[beg:]); pre > 0 {
1968 beg += pre // skip prefix
1969 } else {
1970 // empty line followed by non-quote line
1971 if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1972 break
1973 }
1974 }
1975
1976 if beg < end { // copy into the in-place working buffer
1977 work.Write(data[beg:end])
1978 }
1979 beg = end
1980 }
1981
1982 parse_block(out, rndr, work.Bytes())
1983 if rndr.mk.blockquote != nil {
1984 rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1985 }
1986 return end
1987}
1988
1989// returns prefix length for block code
1990func prefix_code(data []byte) int {
1991 if len(data) > 0 && data[0] == '\t' {
1992 return 1
1993 }
1994 if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1995 return 4
1996 }
1997 return 0
1998}
1999
2000func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
2001 work := bytes.NewBuffer(nil)
2002
2003 beg, end := 0, 0
2004 for beg < len(data) {
2005 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
2006 }
2007
2008 if pre := prefix_code(data[beg:end]); pre > 0 {
2009 beg += pre
2010 } else {
2011 if is_empty(data[beg:end]) == 0 {
2012 // non-empty non-prefixed line breaks the pre
2013 break
2014 }
2015 }
2016
2017 if beg < end {
2018 // verbatim copy to the working buffer, escaping entities
2019 if is_empty(data[beg:end]) > 0 {
2020 work.WriteByte('\n')
2021 } else {
2022 work.Write(data[beg:end])
2023 }
2024 }
2025 beg = end
2026 }
2027
2028 // trim all the \n off the end of work
2029 workbytes := work.Bytes()
2030 n := 0
2031 for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
2032 n++
2033 }
2034 if n > 0 {
2035 work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
2036 }
2037
2038 work.WriteByte('\n')
2039
2040 if rndr.mk.blockcode != nil {
2041 rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
2042 }
2043
2044 return beg
2045}
2046
2047// returns unordered list item prefix
2048func prefix_uli(data []byte) int {
2049 i := 0
2050 for i < len(data) && i < 3 && data[i] == ' ' {
2051 i++
2052 }
2053 if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
2054 return 0
2055 }
2056 return i + 2
2057}
2058
2059// returns ordered list item prefix
2060func prefix_oli(data []byte) int {
2061 i := 0
2062 for i < len(data) && i < 3 && data[i] == ' ' {
2063 i++
2064 }
2065 if i >= len(data) || data[i] < '0' || data[i] > '9' {
2066 return 0
2067 }
2068 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
2069 i++
2070 }
2071 if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
2072 return 0
2073 }
2074 return i + 2
2075}
2076
2077// parsing ordered or unordered list block
2078func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
2079 work := bytes.NewBuffer(nil)
2080
2081 i, j := 0, 0
2082 for i < len(data) {
2083 j = parse_listitem(work, rndr, data[i:], &flags)
2084 i += j
2085
2086 if j == 0 || flags&MKD_LI_END != 0 {
2087 break
2088 }
2089 }
2090
2091 if rndr.mk.list != nil {
2092 rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
2093 }
2094 return i
2095}
2096
2097// parse a single list item
2098// assumes initial prefix is already removed
2099func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
2100 // keep track of the first indentation prefix
2101 beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
2102
2103 for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
2104 orgpre++
2105 }
2106
2107 beg = prefix_uli(data)
2108 if beg == 0 {
2109 beg = prefix_oli(data)
2110 }
2111 if beg == 0 {
2112 return 0
2113 }
2114
2115 // skip to the beginning of the following line
2116 end = beg
2117 for end < len(data) && data[end-1] != '\n' {
2118 end++
2119 }
2120
2121 // get working buffers
2122 work := bytes.NewBuffer(nil)
2123 inter := bytes.NewBuffer(nil)
2124
2125 // put the first line into the working buffer
2126 work.Write(data[beg:end])
2127 beg = end
2128
2129 // process the following lines
2130 in_empty, has_inside_empty := false, false
2131 for beg < len(data) {
2132 end++
2133
2134 for end < len(data) && data[end-1] != '\n' {
2135 end++
2136 }
2137
2138 // process an empty line
2139 if is_empty(data[beg:end]) > 0 {
2140 in_empty = true
2141 beg = end
2142 continue
2143 }
2144
2145 // calculate the indentation
2146 i = 0
2147 for i < 4 && beg+i < end && data[beg+i] == ' ' {
2148 i++
2149 }
2150
2151 pre = i
2152 if data[beg] == '\t' {
2153 i = 1
2154 pre = 8
2155 }
2156
2157 // check for a new item
2158 chunk := data[beg+i : end]
2159 if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
2160 if in_empty {
2161 has_inside_empty = true
2162 }
2163
2164 if pre == orgpre { // the following item must have the same indentation
2165 break
2166 }
2167
2168 if sublist == 0 {
2169 sublist = work.Len()
2170 }
2171 } else {
2172 // only join indented stuff after empty lines
2173 if in_empty && i < 4 && data[beg] != '\t' {
2174 *flags |= MKD_LI_END
2175 break
2176 } else {
2177 if in_empty {
2178 work.WriteByte('\n')
2179 has_inside_empty = true
2180 }
2181 }
2182 }
2183
2184 in_empty = false
2185
2186 // add the line into the working buffer without prefix
2187 work.Write(data[beg+i : end])
2188 beg = end
2189 }
2190
2191 // render li contents
2192 if has_inside_empty {
2193 *flags |= MKD_LI_BLOCK
2194 }
2195
2196 workbytes := work.Bytes()
2197 if *flags&MKD_LI_BLOCK != 0 {
2198 // intermediate render of block li
2199 if sublist > 0 && sublist < len(workbytes) {
2200 parse_block(inter, rndr, workbytes[:sublist])
2201 parse_block(inter, rndr, workbytes[sublist:])
2202 } else {
2203 parse_block(inter, rndr, workbytes)
2204 }
2205 } else {
2206 // intermediate render of inline li
2207 if sublist > 0 && sublist < len(workbytes) {
2208 parse_inline(inter, rndr, workbytes[:sublist])
2209 parse_block(inter, rndr, workbytes[sublist:])
2210 } else {
2211 parse_inline(inter, rndr, workbytes)
2212 }
2213 }
2214
2215 // render li itself
2216 if rndr.mk.listitem != nil {
2217 rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
2218 }
2219
2220 return beg
2221}
2222
2223func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
2224 i, end, level := 0, 0, 0
2225
2226 for i < len(data) {
2227 for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
2228 }
2229
2230 if is_empty(data[i:]) > 0 {
2231 break
2232 }
2233 if level = is_headerline(data[i:]); level > 0 {
2234 break
2235 }
2236
2237 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
2238 if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
2239 end = i
2240 break
2241 }
2242 }
2243
2244 if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
2245 end = i
2246 break
2247 }
2248
2249 i = end
2250 }
2251
2252 work := data
2253 size := i
2254 for size > 0 && work[size-1] == '\n' {
2255 size--
2256 }
2257
2258 if level == 0 {
2259 tmp := bytes.NewBuffer(nil)
2260 parse_inline(tmp, rndr, work[:size])
2261 if rndr.mk.paragraph != nil {
2262 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2263 }
2264 } else {
2265 if size > 0 {
2266 beg := 0
2267 i = size
2268 size--
2269
2270 for size > 0 && work[size] != '\n' {
2271 size--
2272 }
2273
2274 beg = size + 1
2275 for size > 0 && work[size-1] == '\n' {
2276 size--
2277 }
2278
2279 if size > 0 {
2280 tmp := bytes.NewBuffer(nil)
2281 parse_inline(tmp, rndr, work[:size])
2282 if rndr.mk.paragraph != nil {
2283 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2284 }
2285
2286 work = work[beg:]
2287 size = i - beg
2288 } else {
2289 size = i
2290 }
2291 }
2292
2293 header_work := bytes.NewBuffer(nil)
2294 parse_inline(header_work, rndr, work[:size])
2295
2296 if rndr.mk.header != nil {
2297 rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
2298 }
2299 }
2300
2301 return end
2302}
2303
2304
2305//
2306//
2307// HTML rendering
2308//
2309//
2310
2311const (
2312 HTML_SKIP_HTML = 1 << iota
2313 HTML_SKIP_STYLE
2314 HTML_SKIP_IMAGES
2315 HTML_SKIP_LINKS
2316 HTML_EXPAND_TABS
2317 HTML_SAFELINK
2318 HTML_TOC
2319 HTML_HARD_WRAP
2320 HTML_GITHUB_BLOCKCODE
2321 HTML_USE_XHTML
2322)
2323
2324type html_renderopts struct {
2325 toc_data struct {
2326 header_count int
2327 current_level int
2328 }
2329 flags uint32
2330 close_tag string
2331}
2332
2333func attr_escape(ob *bytes.Buffer, src []byte) {
2334 for i := 0; i < len(src); i++ {
2335 // directly copy unescaped characters
2336 org := i
2337 for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
2338 i++
2339 }
2340 if i > org {
2341 ob.Write(src[org:i])
2342 }
2343
2344 // escaping
2345 if i >= len(src) {
2346 break
2347 }
2348 switch src[i] {
2349 case '<':
2350 ob.WriteString("<")
2351 case '>':
2352 ob.WriteString(">")
2353 case '&':
2354 ob.WriteString("&")
2355 case '"':
2356 ob.WriteString(""")
2357 }
2358 }
2359}
2360
2361func unscape_text(ob *bytes.Buffer, src []byte) {
2362 i := 0
2363 for i < len(src) {
2364 org := i
2365 for i < len(src) && src[i] != '\\' {
2366 i++
2367 }
2368
2369 if i > org {
2370 ob.Write(src[org:i])
2371 }
2372
2373 if i+1 >= len(src) {
2374 break
2375 }
2376
2377 ob.WriteByte(src[i+1])
2378 i += 2
2379 }
2380}
2381
2382func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
2383 options := opaque.(*html_renderopts)
2384
2385 if ob.Len() > 0 {
2386 ob.WriteByte('\n')
2387 }
2388
2389 if options.flags&HTML_TOC != 0 {
2390 ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
2391 options.toc_data.header_count++
2392 } else {
2393 ob.WriteString(fmt.Sprintf("<h%d>", level))
2394 }
2395
2396 ob.Write(text)
2397 ob.WriteString(fmt.Sprintf("</h%d>\n", level))
2398}
2399
2400func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
2401 sz := len(text)
2402 for sz > 0 && text[sz-1] == '\n' {
2403 sz--
2404 }
2405 org := 0
2406 for org < sz && text[org] == '\n' {
2407 org++
2408 }
2409 if org >= sz {
2410 return
2411 }
2412 if ob.Len() > 0 {
2413 ob.WriteByte('\n')
2414 }
2415 ob.Write(text[org:sz])
2416 ob.WriteByte('\n')
2417}
2418
2419func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
2420 options := opaque.(*html_renderopts)
2421
2422 if ob.Len() > 0 {
2423 ob.WriteByte('\n')
2424 }
2425 ob.WriteString("<hr")
2426 ob.WriteString(options.close_tag)
2427}
2428
2429func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
2430 if ob.Len() > 0 {
2431 ob.WriteByte('\n')
2432 }
2433
2434 if lang != "" {
2435 ob.WriteString("<pre><code class=\"")
2436
2437 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
2438 for i < len(lang) && isspace(lang[i]) {
2439 i++
2440 }
2441
2442 if i < len(lang) {
2443 org := i
2444 for i < len(lang) && !isspace(lang[i]) {
2445 i++
2446 }
2447
2448 if lang[org] == '.' {
2449 org++
2450 }
2451
2452 if cls > 0 {
2453 ob.WriteByte(' ')
2454 }
2455 attr_escape(ob, []byte(lang[org:]))
2456 }
2457 }
2458
2459 ob.WriteString("\">")
2460 } else {
2461 ob.WriteString("<pre><code>")
2462 }
2463
2464 if len(text) > 0 {
2465 attr_escape(ob, text)
2466 }
2467
2468 ob.WriteString("</code></pre>\n")
2469}
2470
2471func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
2472 ob.WriteString("<blockquote>\n")
2473 ob.Write(text)
2474 ob.WriteString("</blockquote>")
2475}
2476
2477func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
2478 if ob.Len() > 0 {
2479 ob.WriteByte('\n')
2480 }
2481 ob.WriteString("<table><thead>\n")
2482 ob.Write(header)
2483 ob.WriteString("\n</thead><tbody>\n")
2484 ob.Write(body)
2485 ob.WriteString("\n</tbody></table>")
2486}
2487
2488func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
2489 if ob.Len() > 0 {
2490 ob.WriteByte('\n')
2491 }
2492 ob.WriteString("<tr>\n")
2493 ob.Write(text)
2494 ob.WriteString("\n</tr>")
2495}
2496
2497func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
2498 if ob.Len() > 0 {
2499 ob.WriteByte('\n')
2500 }
2501 switch align {
2502 case MKD_TABLE_ALIGN_L:
2503 ob.WriteString("<td align=\"left\">")
2504 case MKD_TABLE_ALIGN_R:
2505 ob.WriteString("<td align=\"right\">")
2506 case MKD_TABLE_ALIGN_CENTER:
2507 ob.WriteString("<td align=\"center\">")
2508 default:
2509 ob.WriteString("<td>")
2510 }
2511
2512 ob.Write(text)
2513 ob.WriteString("</td>")
2514}
2515
2516func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2517 if ob.Len() > 0 {
2518 ob.WriteByte('\n')
2519 }
2520 if flags&MKD_LIST_ORDERED != 0 {
2521 ob.WriteString("<ol>\n")
2522 } else {
2523 ob.WriteString("<ul>\n")
2524 }
2525 ob.Write(text)
2526 if flags&MKD_LIST_ORDERED != 0 {
2527 ob.WriteString("</ol>\n")
2528 } else {
2529 ob.WriteString("</ul>\n")
2530 }
2531}
2532
2533func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2534 ob.WriteString("<li>")
2535 size := len(text)
2536 for size > 0 && text[size-1] == '\n' {
2537 size--
2538 }
2539 ob.Write(text[:size])
2540 ob.WriteString("</li>\n")
2541}
2542
2543func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
2544 options := opaque.(*html_renderopts)
2545 i := 0
2546
2547 if ob.Len() > 0 {
2548 ob.WriteByte('\n')
2549 }
2550
2551 if len(text) == 0 {
2552 return
2553 }
2554
2555 for i < len(text) && isspace(text[i]) {
2556 i++
2557 }
2558
2559 if i == len(text) {
2560 return
2561 }
2562
2563 ob.WriteString("<p>")
2564 if options.flags&HTML_HARD_WRAP != 0 {
2565 for i < len(text) {
2566 org := i
2567 for i < len(text) && text[i] != '\n' {
2568 i++
2569 }
2570
2571 if i > org {
2572 ob.Write(text[org:i])
2573 }
2574
2575 if i >= len(text) {
2576 break
2577 }
2578
2579 ob.WriteString("<br>")
2580 ob.WriteString(options.close_tag)
2581 i++
2582 }
2583 } else {
2584 ob.Write(text[i:])
2585 }
2586 ob.WriteString("</p>\n")
2587}
2588
2589func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
2590 options := opaque.(*html_renderopts)
2591
2592 if len(link) == 0 {
2593 return 0
2594 }
2595 if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != MKDA_EMAIL {
2596 return 0
2597 }
2598
2599 ob.WriteString("<a href=\"")
2600 if kind == MKDA_EMAIL {
2601 ob.WriteString("mailto:")
2602 }
2603 ob.Write(link)
2604 ob.WriteString("\">")
2605
2606 /*
2607 * Pretty printing: if we get an email address as
2608 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
2609 * want to print the `mailto:` prefix
2610 */
2611 if bytes.HasPrefix(link, []byte("mailto:")) {
2612 attr_escape(ob, link[7:])
2613 } else {
2614 attr_escape(ob, link)
2615 }
2616
2617 ob.WriteString("</a>")
2618
2619 return 1
2620}
2621
2622func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2623 ob.WriteString("<code>")
2624 attr_escape(ob, text)
2625 ob.WriteString("</code>")
2626 return 1
2627}
2628
2629func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2630 if len(text) == 0 {
2631 return 0
2632 }
2633 ob.WriteString("<strong>")
2634 ob.Write(text)
2635 ob.WriteString("</strong>")
2636 return 1
2637}
2638
2639func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2640 if len(text) == 0 {
2641 return 0
2642 }
2643 ob.WriteString("<em>")
2644 ob.Write(text)
2645 ob.WriteString("</em>")
2646 return 1
2647}
2648
2649func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
2650 options := opaque.(*html_renderopts)
2651 if len(link) == 0 {
2652 return 0
2653 }
2654 ob.WriteString("<img src=\"")
2655 attr_escape(ob, link)
2656 ob.WriteString("\" alt=\"")
2657 if len(alt) > 0 {
2658 attr_escape(ob, alt)
2659 }
2660 if len(title) > 0 {
2661 ob.WriteString("\" title=\"")
2662 attr_escape(ob, title)
2663 }
2664
2665 ob.WriteByte('"')
2666 ob.WriteString(options.close_tag)
2667 return 1
2668}
2669
2670func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
2671 options := opaque.(*html_renderopts)
2672 ob.WriteString("<br")
2673 ob.WriteString(options.close_tag)
2674 return 1
2675}
2676
2677func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
2678 options := opaque.(*html_renderopts)
2679
2680 if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
2681 return 0
2682 }
2683
2684 ob.WriteString("<a href=\"")
2685 if len(link) > 0 {
2686 ob.Write(link)
2687 }
2688 if len(title) > 0 {
2689 ob.WriteString("\" title=\"")
2690 attr_escape(ob, title)
2691 }
2692 ob.WriteString("\">")
2693 if len(content) > 0 {
2694 ob.Write(content)
2695 }
2696 ob.WriteString("</a>")
2697 return 1
2698}
2699
2700func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2701 options := opaque.(*html_renderopts)
2702 if options.flags&HTML_SKIP_HTML != 0 {
2703 return 1
2704 }
2705 if options.flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
2706 return 1
2707 }
2708 if options.flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
2709 return 1
2710 }
2711 if options.flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
2712 return 1
2713 }
2714 ob.Write(text)
2715 return 1
2716}
2717
2718func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2719 if len(text) == 0 {
2720 return 0
2721 }
2722 ob.WriteString("<strong><em>")
2723 ob.Write(text)
2724 ob.WriteString("</em></strong>")
2725 return 1
2726}
2727
2728func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2729 if len(text) == 0 {
2730 return 0
2731 }
2732 ob.WriteString("<del>")
2733 ob.Write(text)
2734 ob.WriteString("</del>")
2735 return 1
2736}
2737
2738func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
2739 attr_escape(ob, text)
2740}
2741
2742func is_html_tag(tag []byte, tagname string) bool {
2743 i := 0
2744 if i < len(tag) && tag[0] != '<' {
2745 return false
2746 }
2747 i++
2748 for i < len(tag) && isspace(tag[i]) {
2749 i++
2750 }
2751
2752 if i < len(tag) && tag[i] == '/' {
2753 i++
2754 }
2755
2756 for i < len(tag) && isspace(tag[i]) {
2757 i++
2758 }
2759
2760 tag_i := i
2761 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
2762 if tag_i >= len(tagname) {
2763 break
2764 }
2765
2766 if tag[i] != tagname[tag_i] {
2767 return false
2768 }
2769 }
2770
2771 if i == len(tag) {
2772 return false
2773 }
2774
2775 return isspace(tag[i]) || tag[i] == '>'
2776}
2777
2778
2779//
2780//
2781// Public interface
2782//
2783//
2784
2785func expand_tabs(ob *bytes.Buffer, line []byte) {
2786 i, tab := 0, 0
2787
2788 for i < len(line) {
2789 org := i
2790 for i < len(line) && line[i] != '\t' {
2791 i++
2792 tab++
2793 }
2794
2795 if i > org {
2796 ob.Write(line[org:i])
2797 }
2798
2799 if i >= len(line) {
2800 break
2801 }
2802
2803 for {
2804 ob.WriteByte(' ')
2805 tab++
2806 if tab%4 == 0 {
2807 break
2808 }
2809 }
2810
2811 i++
2812 }
2813}
2814
2815func Markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
2816 // no point in parsing if we can't render
2817 if rndrer == nil {
2818 return
2819 }
2820
2821 // fill in the character-level parsers
2822 markdown_char_ptrs[MD_CHAR_NONE] = nil
2823 markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
2824 markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
2825 markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
2826 markdown_char_ptrs[MD_CHAR_LINK] = char_link
2827 markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
2828 markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
2829 markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
2830 markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
2831
2832 // fill in the render structure
2833 rndr := new(render)
2834 rndr.mk = rndrer
2835 rndr.ext_flags = extensions
2836 rndr.max_nesting = 16
2837
2838 if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
2839 rndr.active_char['*'] = MD_CHAR_EMPHASIS
2840 rndr.active_char['_'] = MD_CHAR_EMPHASIS
2841 if extensions&MKDEXT_STRIKETHROUGH != 0 {
2842 rndr.active_char['~'] = MD_CHAR_EMPHASIS
2843 }
2844 }
2845 if rndr.mk.codespan != nil {
2846 rndr.active_char['`'] = MD_CHAR_CODESPAN
2847 }
2848 if rndr.mk.linebreak != nil {
2849 rndr.active_char['\n'] = MD_CHAR_LINEBREAK
2850 }
2851 if rndr.mk.image != nil || rndr.mk.link != nil {
2852 rndr.active_char['['] = MD_CHAR_LINK
2853 }
2854 rndr.active_char['<'] = MD_CHAR_LANGLE
2855 rndr.active_char['\\'] = MD_CHAR_ESCAPE
2856 rndr.active_char['&'] = MD_CHAR_ENTITITY
2857
2858 if extensions&MKDEXT_AUTOLINK != 0 {
2859 rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
2860 rndr.active_char['H'] = MD_CHAR_AUTOLINK
2861
2862 rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
2863 rndr.active_char['F'] = MD_CHAR_AUTOLINK
2864
2865 rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
2866 rndr.active_char['M'] = MD_CHAR_AUTOLINK
2867 }
2868
2869 // first pass: look for references, copying everything else
2870 text := bytes.NewBuffer(nil)
2871 beg, end := 0, 0
2872 for beg < len(ib) { // iterate over lines
2873 if is_ref(ib, beg, &end, rndr) {
2874 beg = end
2875 } else { // skip to the next line
2876 end = beg
2877 for end < len(ib) && ib[end] != '\n' && ib[end] != '\r' {
2878 end++
2879 }
2880
2881 // add the line body if present
2882 if end > beg {
2883 expand_tabs(text, ib[beg:end])
2884 }
2885
2886 for end < len(ib) && (ib[end] == '\n' || ib[end] == '\r') {
2887 // add one \n per newline
2888 if ib[end] == '\n' || (end+1 < len(ib) && ib[end+1] != '\n') {
2889 text.WriteByte('\n')
2890 }
2891 end++
2892 }
2893
2894 beg = end
2895 }
2896 }
2897
2898 // sort the reference array
2899 if len(rndr.refs) > 1 {
2900 sort.Sort(rndr.refs)
2901 }
2902
2903 // second pass: actual rendering
2904 if rndr.mk.doc_header != nil {
2905 rndr.mk.doc_header(ob, rndr.mk.opaque)
2906 }
2907
2908 if text.Len() > 0 {
2909 // add a final newline if not already present
2910 finalchar := text.Bytes()[text.Len()-1]
2911 if finalchar != '\n' && finalchar != '\r' {
2912 text.WriteByte('\n')
2913 }
2914 parse_block(ob, rndr, text.Bytes())
2915 }
2916
2917 if rndr.mk.doc_footer != nil {
2918 rndr.mk.doc_footer(ob, rndr.mk.opaque)
2919 }
2920
2921 if rndr.nesting != 0 {
2922 panic("Nesting level did not end at zero")
2923 }
2924}
2925
2926func main() {
2927 // configure the rendering engine
2928 rndrer := new(mkd_renderer)
2929 rndrer.blockcode = rndr_blockcode
2930 rndrer.blockquote = rndr_blockquote
2931 rndrer.blockhtml = rndr_raw_block
2932 rndrer.header = rndr_header
2933 rndrer.hrule = rndr_hrule
2934 rndrer.list = rndr_list
2935 rndrer.listitem = rndr_listitem
2936 rndrer.paragraph = rndr_paragraph
2937 rndrer.table = rndr_table
2938 rndrer.table_row = rndr_tablerow
2939 rndrer.table_cell = rndr_tablecell
2940
2941 rndrer.autolink = rndr_autolink
2942 rndrer.codespan = rndr_codespan
2943 rndrer.double_emphasis = rndr_double_emphasis
2944 rndrer.emphasis = rndr_emphasis
2945 rndrer.image = rndr_image
2946 rndrer.linebreak = rndr_linebreak
2947 rndrer.link = rndr_link
2948 rndrer.raw_html_tag = rndr_raw_html_tag
2949 rndrer.triple_emphasis = rndr_triple_emphasis
2950 rndrer.strikethrough = rndr_strikethrough
2951
2952 rndrer.normal_text = rndr_normal_text
2953
2954 rndrer.opaque = &html_renderopts{close_tag: ">\n"}
2955
2956 var extensions uint32
2957 extensions |= MKDEXT_NO_INTRA_EMPHASIS
2958 extensions |= MKDEXT_TABLES
2959 extensions |= MKDEXT_FENCED_CODE
2960 extensions |= MKDEXT_AUTOLINK
2961 extensions |= MKDEXT_STRIKETHROUGH
2962 extensions |= MKDEXT_LAX_HTML_BLOCKS
2963 extensions |= MKDEXT_SPACE_HEADERS
2964
2965 // read the input
2966 var ib []byte
2967 var err os.Error
2968 switch len(os.Args) {
2969 case 1:
2970 if ib, err = ioutil.ReadAll(os.Stdin); err != nil {
2971 fmt.Fprintln(os.Stderr, "Error reading from Stdin:", err)
2972 os.Exit(-1)
2973 }
2974 case 2, 3:
2975 if ib, err = ioutil.ReadFile(os.Args[1]); err != nil {
2976 fmt.Fprintln(os.Stderr, "Error reading from", os.Args[1], ":", err)
2977 os.Exit(-1)
2978 }
2979 default:
2980 fmt.Fprintln(os.Stderr, "Usage:", os.Args[0], "[inputfile [outputfile]]")
2981 os.Exit(-1)
2982 }
2983
2984 // call the main renderer function
2985 ob := bytes.NewBuffer(nil)
2986 Markdown(ob, ib, rndrer, extensions)
2987
2988 // output the result
2989 if len(os.Args) == 3 {
2990 if err = ioutil.WriteFile(os.Args[2], ob.Bytes(), 0644); err != nil {
2991 fmt.Fprintln(os.Stderr, "Error writing to", os.Args[2], ":", err)
2992 os.Exit(-1)
2993 }
2994 } else {
2995 if _, err = os.Stdout.Write(ob.Bytes()); err != nil {
2996 fmt.Fprintln(os.Stderr, "Error writing to Stdout:", err)
2997 os.Exit(-1)
2998 }
2999 }
3000}