markdown.go (view raw)
1//
2// Black Friday Markdown Processor
3// Ported to Go from http://github.com/tanoku/upskirt
4// by Russ Ross <russ@russross.com>
5//
6
7package main
8
9import (
10 "bytes"
11 "fmt"
12 "io/ioutil"
13 "os"
14 "sort"
15 "unicode"
16)
17
18const (
19 MKDA_NOT_AUTOLINK = iota
20 MKDA_NORMAL
21 MKDA_EMAIL
22)
23
24const (
25 MKDEXT_NO_INTRA_EMPHASIS = 1 << iota
26 MKDEXT_TABLES
27 MKDEXT_FENCED_CODE
28 MKDEXT_AUTOLINK
29 MKDEXT_STRIKETHROUGH
30 MKDEXT_LAX_HTML_BLOCKS
31 MKDEXT_SPACE_HEADERS
32)
33
34const (
35 _ = iota
36 MKD_LIST_ORDERED
37 MKD_LI_BLOCK // <li> containing block data
38 MKD_LI_END = 8
39)
40
41const (
42 MKD_TABLE_ALIGN_L = 1 << iota
43 MKD_TABLE_ALIGN_R
44 MKD_TABLE_ALIGN_CENTER = (MKD_TABLE_ALIGN_L | MKD_TABLE_ALIGN_R)
45)
46
47var block_tags = map[string]bool{
48 "p": true,
49 "dl": true,
50 "h1": true,
51 "h2": true,
52 "h3": true,
53 "h4": true,
54 "h5": true,
55 "h6": true,
56 "ol": true,
57 "ul": true,
58 "del": true,
59 "div": true,
60 "ins": true,
61 "pre": true,
62 "form": true,
63 "math": true,
64 "table": true,
65 "iframe": true,
66 "script": true,
67 "fieldset": true,
68 "noscript": true,
69 "blockquote": true,
70}
71
72// functions for rendering parsed data
73type mkd_renderer struct {
74 // block-level callbacks---nil skips the block
75 blockcode func(ob *bytes.Buffer, text []byte, lang string, opaque interface{})
76 blockquote func(ob *bytes.Buffer, text []byte, opaque interface{})
77 blockhtml func(ob *bytes.Buffer, text []byte, opaque interface{})
78 header func(ob *bytes.Buffer, text []byte, level int, opaque interface{})
79 hrule func(ob *bytes.Buffer, opaque interface{})
80 list func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
81 listitem func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
82 paragraph func(ob *bytes.Buffer, text []byte, opaque interface{})
83 table func(ob *bytes.Buffer, header []byte, body []byte, opaque interface{})
84 table_row func(ob *bytes.Buffer, text []byte, opaque interface{})
85 table_cell func(ob *bytes.Buffer, text []byte, flags int, opaque interface{})
86
87 // span-level callbacks---nil or return 0 prints the span verbatim
88 autolink func(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int
89 codespan func(ob *bytes.Buffer, text []byte, opaque interface{}) int
90 double_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
91 emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
92 image func(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int
93 linebreak func(ob *bytes.Buffer, opaque interface{}) int
94 link func(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int
95 raw_html_tag func(ob *bytes.Buffer, tag []byte, opaque interface{}) int
96 triple_emphasis func(ob *bytes.Buffer, text []byte, opaque interface{}) int
97 strikethrough func(ob *bytes.Buffer, text []byte, opaque interface{}) int
98
99 // low-level callbacks---nil copies input directly into the output
100 entity func(ob *bytes.Buffer, entity []byte, opaque interface{})
101 normal_text func(ob *bytes.Buffer, text []byte, opaque interface{})
102
103 // header and footer
104 doc_header func(ob *bytes.Buffer, opaque interface{})
105 doc_footer func(ob *bytes.Buffer, opaque interface{})
106
107 // user data---passed back to every callback
108 opaque interface{}
109}
110
111type link_ref struct {
112 id []byte
113 link []byte
114 title []byte
115}
116
117type link_ref_array []*link_ref
118
119// implement the sorting interface
120func (elt link_ref_array) Len() int {
121 return len(elt)
122}
123
124func (elt link_ref_array) Less(i, j int) bool {
125 return byteslice_less(elt[i].id, elt[j].id)
126}
127
128func byteslice_less(a []byte, b []byte) bool {
129 // adapted from bytes.Compare in stdlib
130 m := len(a)
131 if m > len(b) {
132 m = len(b)
133 }
134 for i, ac := range a[0:m] {
135 // do a case-insensitive comparison
136 ai, bi := unicode.ToLower(int(ac)), unicode.ToLower(int(b[i]))
137 switch {
138 case ai > bi:
139 return false
140 case ai < bi:
141 return true
142 }
143 }
144 switch {
145 case len(a) < len(b):
146 return true
147 case len(a) > len(b):
148 return false
149 }
150 return false
151}
152
153func (elt link_ref_array) Swap(i, j int) {
154 elt[i], elt[j] = elt[j], elt[i]
155}
156
157// returns whether or not a line is a reference
158func is_ref(data []byte, beg int, last *int, rndr *render) bool {
159 // up to 3 optional leading spaces
160 if beg+3 > len(data) {
161 return false
162 }
163 i := 0
164 if data[beg] == ' ' {
165 i++
166 if data[beg+1] == ' ' {
167 i++
168 if data[beg+2] == ' ' {
169 i++
170 if data[beg+3] == ' ' {
171 return false
172 }
173 }
174 }
175 }
176 i += beg
177
178 // id part: anything but a newline between brackets
179 if data[i] != '[' {
180 return false
181 }
182 i++
183 id_offset := i
184 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
185 i++
186 }
187 if i >= len(data) || data[i] != ']' {
188 return false
189 }
190 id_end := i
191
192 // spacer: colon (space | tab)* newline? (space | tab)*
193 i++
194 if i >= len(data) || data[i] != ':' {
195 return false
196 }
197 i++
198 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
199 i++
200 }
201 if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
202 i++
203 if i < len(data) && data[i] == '\r' && data[i-1] == '\n' {
204 i++
205 }
206 }
207 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
208 i++
209 }
210 if i >= len(data) {
211 return false
212 }
213
214 // link: whitespace-free sequence, optionally between angle brackets
215 if data[i] == '<' {
216 i++
217 }
218 link_offset := i
219 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
220 i++
221 }
222 var link_end int
223 if data[i-1] == '>' {
224 link_end = i - 1
225 } else {
226 link_end = i
227 }
228
229 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
230 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
231 i++
232 }
233 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
234 return false
235 }
236
237 // compute end-of-line
238 line_end := 0
239 if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
240 line_end = i
241 }
242 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
243 line_end = i + 1
244 }
245
246 // optional (space|tab)* spacer after a newline
247 if line_end > 0 {
248 i = line_end + 1
249 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
250 i++
251 }
252 }
253
254 // optional title: any non-newline sequence enclosed in '"() alone on its line
255 title_offset, title_end := 0, 0
256 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
257 i++
258 title_offset = i
259
260 // look for EOL
261 for i < len(data) && data[i] != '\n' && data[i] != '\r' {
262 i++
263 }
264 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
265 title_end = i + 1
266 } else {
267 title_end = i
268 }
269
270 // step back
271 i--
272 for i > title_offset && (data[i] == ' ' || data[i] == '\t') {
273 i--
274 }
275 if i > title_offset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
276 line_end = title_end
277 title_end = i
278 }
279 }
280 if line_end == 0 { // garbage after the link
281 return false
282 }
283
284 // a valid ref has been found; fill in return structures
285 if last != nil {
286 *last = line_end
287 }
288 if rndr == nil {
289 return true
290 }
291 item := &link_ref{id: data[id_offset:id_end], link: data[link_offset:link_end], title: data[title_offset:title_end]}
292 rndr.refs = append(rndr.refs, item)
293
294 return true
295}
296
297type render struct {
298 mk *mkd_renderer
299 refs link_ref_array
300 active_char [256]int
301 ext_flags uint32
302 nesting int
303 max_nesting int
304}
305
306const (
307 MD_CHAR_NONE = iota
308 MD_CHAR_EMPHASIS
309 MD_CHAR_CODESPAN
310 MD_CHAR_LINEBREAK
311 MD_CHAR_LINK
312 MD_CHAR_LANGLE
313 MD_CHAR_ESCAPE
314 MD_CHAR_ENTITITY
315 MD_CHAR_AUTOLINK
316)
317
318// closures to render active chars, each:
319// returns the number of chars taken care of
320// data is the complete block being rendered
321// offset is the number of valid chars before the data
322//
323// Note: this is filled in in Markdown to prevent an initilization loop
324var markdown_char_ptrs [9]func(ob *bytes.Buffer, rndr *render, data []byte, offset int) int
325
326func parse_inline(ob *bytes.Buffer, rndr *render, data []byte) {
327 if rndr.nesting >= rndr.max_nesting {
328 return
329 }
330 rndr.nesting++
331
332 i, end := 0, 0
333 for i < len(data) {
334 // copy inactive chars into the output
335 for end < len(data) && rndr.active_char[data[end]] == 0 {
336 end++
337 }
338
339 if rndr.mk.normal_text != nil {
340 rndr.mk.normal_text(ob, data[i:end], rndr.mk.opaque)
341 } else {
342 ob.Write(data[i:end])
343 }
344
345 if end >= len(data) {
346 break
347 }
348 i = end
349
350 // call the trigger
351 action := rndr.active_char[data[end]]
352 end = markdown_char_ptrs[action](ob, rndr, data, i)
353
354 if end == 0 { // no action from the callback
355 end = i + 1
356 } else {
357 i += end
358 end = i
359 }
360 }
361
362 rndr.nesting--
363}
364
365// single and double emphasis parsing
366func char_emphasis(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
367 data = data[offset:]
368 c := data[0]
369 ret := 0
370
371 if len(data) > 2 && data[1] != c {
372 // whitespace cannot follow an opening emphasis;
373 // strikethrough only takes two characters '~~'
374 if c == '~' || isspace(data[1]) {
375 return 0
376 }
377 if ret = parse_emph1(ob, rndr, data[1:], c); ret == 0 {
378 return 0
379 }
380
381 return ret + 1
382 }
383
384 if len(data) > 3 && data[1] == c && data[2] != c {
385 if isspace(data[2]) {
386 return 0
387 }
388 if ret = parse_emph2(ob, rndr, data[2:], c); ret == 0 {
389 return 0
390 }
391
392 return ret + 2
393 }
394
395 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
396 if c == '~' || isspace(data[3]) {
397 return 0
398 }
399 if ret = parse_emph3(ob, rndr, data, 3, c); ret == 0 {
400 return 0
401 }
402
403 return ret + 3
404 }
405
406 return 0
407}
408
409func char_codespan(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
410 data = data[offset:]
411
412 nb := 0
413
414 // count the number of backticks in the delimiter
415 for nb < len(data) && data[nb] == '`' {
416 nb++
417 }
418
419 // find the next delimiter
420 i, end := 0, 0
421 for end = nb; end < len(data) && i < nb; end++ {
422 if data[end] == '`' {
423 i++
424 } else {
425 i = 0
426 }
427 }
428
429 if i < nb && end >= len(data) {
430 return 0 // no matching delimiter
431 }
432
433 // trim outside whitespace
434 f_begin := nb
435 for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') {
436 f_begin++
437 }
438
439 f_end := end - nb
440 for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') {
441 f_end--
442 }
443
444 // real code span
445 if rndr.mk.codespan == nil {
446 return 0
447 }
448 if f_begin < f_end {
449 if rndr.mk.codespan(ob, data[f_begin:f_end], rndr.mk.opaque) == 0 {
450 end = 0
451 }
452 } else {
453 if rndr.mk.codespan(ob, nil, rndr.mk.opaque) == 0 {
454 end = 0
455 }
456 }
457
458 return end
459
460}
461
462// '\n' preceded by two spaces
463func char_linebreak(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
464 if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' {
465 return 0
466 }
467
468 // remove trailing spaces from ob and render
469 ob_bytes := ob.Bytes()
470 end := len(ob_bytes)
471 for end > 0 && ob_bytes[end-1] == ' ' {
472 end--
473 }
474 ob.Truncate(end)
475
476 if rndr.mk.linebreak == nil {
477 return 0
478 }
479 if rndr.mk.linebreak(ob, rndr.mk.opaque) > 0 {
480 return 1
481 } else {
482 return 0
483 }
484
485 return 0
486}
487
488// '[': parse a link or an image
489func char_link(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
490 is_img := offset > 0 && data[offset-1] == '!'
491
492 data = data[offset:]
493
494 i := 1
495 var title, link []byte
496 text_has_nl := false
497
498 // check whether the correct renderer exists
499 if (is_img && rndr.mk.image == nil) || (!is_img && rndr.mk.link == nil) {
500 return 0
501 }
502
503 // look for the matching closing bracket
504 for level := 1; level > 0 && i < len(data); i++ {
505 switch {
506 case data[i] == '\n':
507 text_has_nl = true
508
509 case data[i-1] == '\\':
510 continue
511
512 case data[i] == '[':
513 level++
514
515 case data[i] == ']':
516 level--
517 if level <= 0 {
518 i-- // compensate for extra i++ in for loop
519 }
520 }
521 }
522
523 if i >= len(data) {
524 return 0
525 }
526
527 txt_e := i
528 i++
529
530 // skip any amount of whitespace or newline
531 // (this is much more lax than original markdown syntax)
532 for i < len(data) && isspace(data[i]) {
533 i++
534 }
535
536 // inline style link
537 switch {
538 case i < len(data) && data[i] == '(':
539 // skip initial whitespace
540 i++
541
542 for i < len(data) && isspace(data[i]) {
543 i++
544 }
545
546 link_b := i
547
548 // look for link end: ' " )
549 for i < len(data) {
550 if data[i] == '\\' {
551 i += 2
552 } else {
553 if data[i] == ')' || data[i] == '\'' || data[i] == '"' {
554 break
555 }
556 i++
557 }
558 }
559
560 if i >= len(data) {
561 return 0
562 }
563 link_e := i
564
565 // look for title end if present
566 title_b, title_e := 0, 0
567 if data[i] == '\'' || data[i] == '"' {
568 i++
569 title_b = i
570
571 for i < len(data) {
572 if data[i] == '\\' {
573 i += 2
574 } else {
575 if data[i] == ')' {
576 break
577 }
578 i++
579 }
580 }
581
582 if i >= len(data) {
583 return 0
584 }
585
586 // skip whitespace after title
587 title_e = i - 1
588 for title_e > title_b && isspace(data[title_e]) {
589 title_e--
590 }
591
592 // check for closing quote presence
593 if data[title_e] != '\'' && data[title_e] != '"' {
594 title_b, title_e = 0, 0
595 link_e = i
596 }
597 }
598
599 // remove whitespace at the end of the link
600 for link_e > link_b && isspace(data[link_e-1]) {
601 link_e--
602 }
603
604 // remove optional angle brackets around the link
605 if data[link_b] == '<' {
606 link_b++
607 }
608 if data[link_e-1] == '>' {
609 link_e--
610 }
611
612 // build escaped link and title
613 if link_e > link_b {
614 link = data[link_b:link_e]
615 }
616
617 if title_e > title_b {
618 title = data[title_b:title_e]
619 }
620
621 i++
622
623 // reference style link
624 case i < len(data) && data[i] == '[':
625 var id []byte
626
627 // look for the id
628 i++
629 link_b := i
630 for i < len(data) && data[i] != ']' {
631 i++
632 }
633 if i >= len(data) {
634 return 0
635 }
636 link_e := i
637
638 // find the link_ref
639 if link_b == link_e {
640 if text_has_nl {
641 b := bytes.NewBuffer(nil)
642
643 for j := 1; j < txt_e; j++ {
644 switch {
645 case data[j] != '\n':
646 b.WriteByte(data[j])
647 case data[j-1] != ' ':
648 b.WriteByte(' ')
649 }
650 }
651
652 id = b.Bytes()
653 } else {
654 id = data[1:txt_e]
655 }
656 } else {
657 id = data[link_b:link_e]
658 }
659
660 // find the link_ref with matching id
661 index := sortDotSearch(len(rndr.refs), func(i int) bool {
662 return !byteslice_less(rndr.refs[i].id, id)
663 })
664 if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
665 return 0
666 }
667 lr := rndr.refs[index]
668
669 // keep link and title from link_ref
670 link = lr.link
671 title = lr.title
672 i++
673
674 // shortcut reference style link
675 default:
676 var id []byte
677
678 // craft the id
679 if text_has_nl {
680 b := bytes.NewBuffer(nil)
681
682 for j := 1; j < txt_e; j++ {
683 switch {
684 case data[j] != '\n':
685 b.WriteByte(data[j])
686 case data[j-1] != ' ':
687 b.WriteByte(' ')
688 }
689 }
690
691 id = b.Bytes()
692 } else {
693 id = data[1:txt_e]
694 }
695
696 // find the link_ref with matching id
697 index := sortDotSearch(len(rndr.refs), func(i int) bool {
698 return !byteslice_less(rndr.refs[i].id, id)
699 })
700 if index >= len(rndr.refs) || !bytes.Equal(rndr.refs[index].id, id) {
701 return 0
702 }
703 lr := rndr.refs[index]
704
705 // keep link and title from link_ref
706 link = lr.link
707 title = lr.title
708
709 // rewind the whitespace
710 i = txt_e + 1
711 }
712
713 // build content: img alt is escaped, link content is parsed
714 content := bytes.NewBuffer(nil)
715 if txt_e > 1 {
716 if is_img {
717 content.Write(data[1:txt_e])
718 } else {
719 parse_inline(content, rndr, data[1:txt_e])
720 }
721 }
722
723 var u_link []byte
724 if len(link) > 0 {
725 u_link_buf := bytes.NewBuffer(nil)
726 unescape_text(u_link_buf, link)
727 u_link = u_link_buf.Bytes()
728 }
729
730 // call the relevant rendering function
731 ret := 0
732 if is_img {
733 ob_size := ob.Len()
734 ob_bytes := ob.Bytes()
735 if ob_size > 0 && ob_bytes[ob_size-1] == '!' {
736 ob.Truncate(ob_size - 1)
737 }
738
739 ret = rndr.mk.image(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
740 } else {
741 ret = rndr.mk.link(ob, u_link, title, content.Bytes(), rndr.mk.opaque)
742 }
743
744 if ret > 0 {
745 return i
746 }
747 return 0
748}
749
750// '<' when tags or autolinks are allowed
751func char_langle_tag(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
752 data = data[offset:]
753 altype := MKDA_NOT_AUTOLINK
754 end := tag_length(data, &altype)
755 ret := 0
756
757 if end > 2 {
758 switch {
759 case rndr.mk.autolink != nil && altype != MKDA_NOT_AUTOLINK:
760 u_link := bytes.NewBuffer(nil)
761 unescape_text(u_link, data[1:end+1-2])
762 ret = rndr.mk.autolink(ob, u_link.Bytes(), altype, rndr.mk.opaque)
763 case rndr.mk.raw_html_tag != nil:
764 ret = rndr.mk.raw_html_tag(ob, data[:end], rndr.mk.opaque)
765 }
766 }
767
768 if ret == 0 {
769 return 0
770 }
771 return end
772}
773
774// '\\' backslash escape
775var escape_chars = []byte("\\`*_{}[]()#+-.!:|&<>")
776
777func char_escape(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
778 data = data[offset:]
779
780 if len(data) > 1 {
781 if bytes.IndexByte(escape_chars, data[1]) < 0 {
782 return 0
783 }
784
785 if rndr.mk.normal_text != nil {
786 rndr.mk.normal_text(ob, data[1:2], rndr.mk.opaque)
787 } else {
788 ob.WriteByte(data[1])
789 }
790 }
791
792 return 2
793}
794
795// '&' escaped when it doesn't belong to an entity
796// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
797func char_entity(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
798 data = data[offset:]
799
800 end := 1
801
802 if end < len(data) && data[end] == '#' {
803 end++
804 }
805
806 for end < len(data) && isalnum(data[end]) {
807 end++
808 }
809
810 if end < len(data) && data[end] == ';' {
811 end++ // real entity
812 } else {
813 return 0 // lone '&'
814 }
815
816 if rndr.mk.entity != nil {
817 rndr.mk.entity(ob, data[:end], rndr.mk.opaque)
818 } else {
819 ob.Write(data[:end])
820 }
821
822 return end
823}
824
825func char_autolink(ob *bytes.Buffer, rndr *render, data []byte, offset int) int {
826 orig_data := data
827 data = data[offset:]
828
829 if offset > 0 {
830 if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) {
831 return 0
832 }
833 }
834
835 if !is_safe_link(data) {
836 return 0
837 }
838
839 link_end := 0
840 for link_end < len(data) && !isspace(data[link_end]) {
841 link_end++
842 }
843
844 // Skip punctuation at the end of the link
845 if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' {
846 link_end--
847 }
848
849 // See if the link finishes with a punctuation sign that can be closed.
850 var copen byte
851 switch data[link_end-1] {
852 case '"':
853 copen = '"'
854 case '\'':
855 copen = '\''
856 case ')':
857 copen = '('
858 case ']':
859 copen = '['
860 case '}':
861 copen = '{'
862 default:
863 copen = 0
864 }
865
866 if copen != 0 {
867 buf_end := offset + link_end - 2
868
869 open_delim := 1
870
871 /* Try to close the final punctuation sign in this same line;
872 * if we managed to close it outside of the URL, that means that it's
873 * not part of the URL. If it closes inside the URL, that means it
874 * is part of the URL.
875 *
876 * Examples:
877 *
878 * foo http://www.pokemon.com/Pikachu_(Electric) bar
879 * => http://www.pokemon.com/Pikachu_(Electric)
880 *
881 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
882 * => http://www.pokemon.com/Pikachu_(Electric)
883 *
884 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
885 * => http://www.pokemon.com/Pikachu_(Electric))
886 *
887 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
888 * => foo http://www.pokemon.com/Pikachu_(Electric)
889 */
890
891 for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 {
892 if orig_data[buf_end] == data[link_end-1] {
893 open_delim++
894 }
895
896 if orig_data[buf_end] == copen {
897 open_delim--
898 }
899
900 buf_end--
901 }
902
903 if open_delim == 0 {
904 link_end--
905 }
906 }
907
908 if rndr.mk.autolink != nil {
909 u_link := bytes.NewBuffer(nil)
910 unescape_text(u_link, data[:link_end])
911
912 rndr.mk.autolink(ob, u_link.Bytes(), MKDA_NORMAL, rndr.mk.opaque)
913 }
914
915 return link_end
916}
917
918var valid_uris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
919
920func is_safe_link(link []byte) bool {
921 for _, prefix := range valid_uris {
922 if len(link) > len(prefix) && !byteslice_less(link[:len(prefix)], prefix) && !byteslice_less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) {
923 return true
924 }
925 }
926
927 return false
928}
929
930
931// taken from regexp in the stdlib
932func ispunct(c byte) bool {
933 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
934 if c == r {
935 return true
936 }
937 }
938 return false
939}
940
941// this is sort.Search, reproduced here because an older
942// version of the library had a bug
943func sortDotSearch(n int, f func(int) bool) int {
944 // Define f(-1) == false and f(n) == true.
945 // Invariant: f(i-1) == false, f(j) == true.
946 i, j := 0, n
947 for i < j {
948 h := i + (j-i)/2 // avoid overflow when computing h
949 // i ≤ h < j
950 if !f(h) {
951 i = h + 1 // preserves f(i-1) == false
952 } else {
953 j = h // preserves f(j) == true
954 }
955 }
956 // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
957 return i
958}
959
960func isspace(c byte) bool {
961 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
962}
963
964func isalnum(c byte) bool {
965 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
966}
967
968// return the length of the given tag, or 0 is it's not valid
969func tag_length(data []byte, autolink *int) int {
970 var i, j int
971
972 // a valid tag can't be shorter than 3 chars
973 if len(data) < 3 {
974 return 0
975 }
976
977 // begins with a '<' optionally followed by '/', followed by letter or number
978 if data[0] != '<' {
979 return 0
980 }
981 if data[1] == '/' {
982 i = 2
983 } else {
984 i = 1
985 }
986
987 if !isalnum(data[i]) {
988 return 0
989 }
990
991 // scheme test
992 *autolink = MKDA_NOT_AUTOLINK
993
994 // try to find the beggining of an URI
995 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
996 i++
997 }
998
999 if i > 1 && data[i] == '@' {
1000 if j = is_mail_autolink(data[i:]); j != 0 {
1001 *autolink = MKDA_EMAIL
1002 return i + j
1003 }
1004 }
1005
1006 if i > 2 && data[i] == ':' {
1007 *autolink = MKDA_NORMAL
1008 i++
1009 }
1010
1011 // complete autolink test: no whitespace or ' or "
1012 switch {
1013 case i >= len(data):
1014 *autolink = MKDA_NOT_AUTOLINK
1015 case *autolink != 0:
1016 j = i
1017
1018 for i < len(data) {
1019 if data[i] == '\\' {
1020 i += 2
1021 } else {
1022 if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
1023 break
1024 } else {
1025 i++
1026 }
1027 }
1028
1029 }
1030
1031 if i >= len(data) {
1032 return 0
1033 }
1034 if i > j && data[i] == '>' {
1035 return i + 1
1036 }
1037
1038 // one of the forbidden chars has been found
1039 *autolink = MKDA_NOT_AUTOLINK
1040 }
1041
1042 // look for something looking like a tag end
1043 for i < len(data) && data[i] != '>' {
1044 i++
1045 }
1046 if i >= len(data) {
1047 return 0
1048 }
1049 return i + 1
1050}
1051
1052// look for the address part of a mail autolink and '>'
1053// this is less strict than the original markdown e-mail address matching
1054func is_mail_autolink(data []byte) int {
1055 nb := 0
1056
1057 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1058 for i := 0; i < len(data); i++ {
1059 if isalnum(data[i]) {
1060 continue
1061 }
1062
1063 switch data[i] {
1064 case '@':
1065 nb++
1066
1067 case '-', '.', '_':
1068 break
1069
1070 case '>':
1071 if nb == 1 {
1072 return i + 1
1073 } else {
1074 return 0
1075 }
1076 default:
1077 return 0
1078 }
1079 }
1080
1081 return 0
1082}
1083
1084// look for the next emph char, skipping other constructs
1085func find_emph_char(data []byte, c byte) int {
1086 i := 1
1087
1088 for i < len(data) {
1089 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1090 i++
1091 }
1092 if i >= len(data) {
1093 return 0
1094 }
1095 if data[i] == c {
1096 return i
1097 }
1098
1099 // do not count escaped chars
1100 if i != 0 && data[i-1] == '\\' {
1101 i++
1102 continue
1103 }
1104
1105 if data[i] == '`' {
1106 // skip a code span
1107 tmp_i := 0
1108 i++
1109 for i < len(data) && data[i] != '`' {
1110 if tmp_i == 0 && data[i] == c {
1111 tmp_i = i
1112 }
1113 i++
1114 }
1115 if i >= len(data) {
1116 return tmp_i
1117 }
1118 i++
1119 } else {
1120 if data[i] == '[' {
1121 // skip a link
1122 tmp_i := 0
1123 i++
1124 for i < len(data) && data[i] != ']' {
1125 if tmp_i == 0 && data[i] == c {
1126 tmp_i = i
1127 }
1128 i++
1129 }
1130 i++
1131 for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') {
1132 i++
1133 }
1134 if i >= len(data) {
1135 return tmp_i
1136 }
1137 if data[i] != '[' && data[i] != '(' { // not a link
1138 if tmp_i > 0 {
1139 return tmp_i
1140 } else {
1141 continue
1142 }
1143 }
1144 cc := data[i]
1145 i++
1146 for i < len(data) && data[i] != cc {
1147 if tmp_i == 0 && data[i] == c {
1148 tmp_i = i
1149 }
1150 i++
1151 }
1152 if i >= len(data) {
1153 return tmp_i
1154 }
1155 i++
1156 }
1157 }
1158 }
1159 return 0
1160}
1161
1162func parse_emph1(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1163 i := 0
1164
1165 if rndr.mk.emphasis == nil {
1166 return 0
1167 }
1168
1169 // skip one symbol if coming from emph3
1170 if len(data) > 1 && data[0] == c && data[1] == c {
1171 i = 1
1172 }
1173
1174 for i < len(data) {
1175 length := find_emph_char(data[i:], c)
1176 if length == 0 {
1177 return 0
1178 }
1179 i += length
1180 if i >= len(data) {
1181 return 0
1182 }
1183
1184 if i+1 < len(data) && data[i+1] == c {
1185 i++
1186 continue
1187 }
1188
1189 if data[i] == c && !isspace(data[i-1]) {
1190
1191 if rndr.ext_flags&MKDEXT_NO_INTRA_EMPHASIS != 0 {
1192 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1193 continue
1194 }
1195 }
1196
1197 work := bytes.NewBuffer(nil)
1198 parse_inline(work, rndr, data[:i])
1199 r := rndr.mk.emphasis(ob, work.Bytes(), rndr.mk.opaque)
1200 if r > 0 {
1201 return i + 1
1202 } else {
1203 return 0
1204 }
1205 }
1206 }
1207
1208 return 0
1209}
1210
1211func parse_emph2(ob *bytes.Buffer, rndr *render, data []byte, c byte) int {
1212 render_method := rndr.mk.double_emphasis
1213 if c == '~' {
1214 render_method = rndr.mk.strikethrough
1215 }
1216
1217 if render_method == nil {
1218 return 0
1219 }
1220
1221 i := 0
1222
1223 for i < len(data) {
1224 length := find_emph_char(data[i:], c)
1225 if length == 0 {
1226 return 0
1227 }
1228 i += length
1229
1230 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1231 work := bytes.NewBuffer(nil)
1232 parse_inline(work, rndr, data[:i])
1233 r := render_method(ob, work.Bytes(), rndr.mk.opaque)
1234 if r > 0 {
1235 return i + 2
1236 } else {
1237 return 0
1238 }
1239 }
1240 i++
1241 }
1242 return 0
1243}
1244
1245func parse_emph3(ob *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int {
1246 i := 0
1247 orig_data := data
1248 data = data[offset:]
1249
1250 for i < len(data) {
1251 length := find_emph_char(data[i:], c)
1252 if length == 0 {
1253 return 0
1254 }
1255 i += length
1256
1257 // skip whitespace preceded symbols
1258 if data[i] != c || isspace(data[i-1]) {
1259 continue
1260 }
1261
1262 switch {
1263 case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.triple_emphasis != nil):
1264 // triple symbol found
1265 work := bytes.NewBuffer(nil)
1266
1267 parse_inline(work, rndr, data[:i])
1268 r := rndr.mk.triple_emphasis(ob, work.Bytes(), rndr.mk.opaque)
1269 if r > 0 {
1270 return i + 3
1271 } else {
1272 return 0
1273 }
1274 case (i+1 < len(data) && data[i+1] == c):
1275 // double symbol found, hand over to emph1
1276 length = parse_emph1(ob, rndr, orig_data[offset-2:], c)
1277 if length == 0 {
1278 return 0
1279 } else {
1280 return length - 2
1281 }
1282 default:
1283 // single symbol found, hand over to emph2
1284 length = parse_emph2(ob, rndr, orig_data[offset-1:], c)
1285 if length == 0 {
1286 return 0
1287 } else {
1288 return length - 1
1289 }
1290 }
1291 }
1292 return 0
1293}
1294
1295// parse block-level data
1296func parse_block(ob *bytes.Buffer, rndr *render, data []byte) {
1297 if rndr.nesting >= rndr.max_nesting {
1298 return
1299 }
1300 rndr.nesting++
1301
1302 for len(data) > 0 {
1303 if is_atxheader(rndr, data) {
1304 data = data[parse_atxheader(ob, rndr, data):]
1305 continue
1306 }
1307 if data[0] == '<' && rndr.mk.blockhtml != nil {
1308 if i := parse_htmlblock(ob, rndr, data, true); i > 0 {
1309 data = data[i:]
1310 continue
1311 }
1312 }
1313 if i := is_empty(data); i > 0 {
1314 data = data[i:]
1315 continue
1316 }
1317 if is_hrule(data) {
1318 if rndr.mk.hrule != nil {
1319 rndr.mk.hrule(ob, rndr.mk.opaque)
1320 }
1321 var i int
1322 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1323 }
1324 data = data[i:]
1325 continue
1326 }
1327 if rndr.ext_flags&MKDEXT_FENCED_CODE != 0 {
1328 if i := parse_fencedcode(ob, rndr, data); i > 0 {
1329 data = data[i:]
1330 continue
1331 }
1332 }
1333 if rndr.ext_flags&MKDEXT_TABLES != 0 {
1334 if i := parse_table(ob, rndr, data); i > 0 {
1335 data = data[i:]
1336 continue
1337 }
1338 }
1339 if prefix_quote(data) > 0 {
1340 data = data[parse_blockquote(ob, rndr, data):]
1341 continue
1342 }
1343 if prefix_code(data) > 0 {
1344 data = data[parse_blockcode(ob, rndr, data):]
1345 continue
1346 }
1347 if prefix_uli(data) > 0 {
1348 data = data[parse_list(ob, rndr, data, 0):]
1349 continue
1350 }
1351 if prefix_oli(data) > 0 {
1352 data = data[parse_list(ob, rndr, data, MKD_LIST_ORDERED):]
1353 continue
1354 }
1355
1356 data = data[parse_paragraph(ob, rndr, data):]
1357 }
1358
1359 rndr.nesting--
1360}
1361
1362func is_atxheader(rndr *render, data []byte) bool {
1363 if data[0] != '#' {
1364 return false
1365 }
1366
1367 if rndr.ext_flags&MKDEXT_SPACE_HEADERS != 0 {
1368 level := 0
1369 for level < len(data) && level < 6 && data[level] == '#' {
1370 level++
1371 }
1372 if level < len(data) && data[level] != ' ' && data[level] != '\t' {
1373 return false
1374 }
1375 }
1376 return true
1377}
1378
1379func parse_atxheader(ob *bytes.Buffer, rndr *render, data []byte) int {
1380 level := 0
1381 for level < len(data) && level < 6 && data[level] == '#' {
1382 level++
1383 }
1384 i, end := 0, 0
1385 for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ {
1386 }
1387 for end = i; end < len(data) && data[end] != '\n'; end++ {
1388 }
1389 skip := end
1390 for end > 0 && data[end-1] == '#' {
1391 end--
1392 }
1393 for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') {
1394 end--
1395 }
1396 if end > i {
1397 work := bytes.NewBuffer(nil)
1398 parse_inline(work, rndr, data[i:end])
1399 if rndr.mk.header != nil {
1400 rndr.mk.header(ob, work.Bytes(), level, rndr.mk.opaque)
1401 }
1402 }
1403 return skip
1404}
1405
1406func is_headerline(data []byte) int {
1407 i := 0
1408
1409 // test of level 1 header
1410 if data[i] == '=' {
1411 for i = 1; i < len(data) && data[i] == '='; i++ {
1412 }
1413 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1414 i++
1415 }
1416 if i >= len(data) || data[i] == '\n' {
1417 return 1
1418 } else {
1419 return 0
1420 }
1421 }
1422
1423 // test of level 2 header
1424 if data[i] == '-' {
1425 for i = 1; i < len(data) && data[i] == '-'; i++ {
1426 }
1427 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1428 i++
1429 }
1430 if i >= len(data) || data[i] == '\n' {
1431 return 2
1432 } else {
1433 return 0
1434 }
1435 }
1436
1437 return 0
1438}
1439
1440func parse_htmlblock(ob *bytes.Buffer, rndr *render, data []byte, do_render bool) int {
1441 var i, j int
1442
1443 // identify the opening tag
1444 if len(data) < 2 || data[0] != '<' {
1445 return 0
1446 }
1447 curtag, tagfound := find_block_tag(data[1:])
1448
1449 // handle special cases
1450 if !tagfound {
1451
1452 // HTML comment, laxist form
1453 if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' {
1454 i = 5
1455
1456 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
1457 i++
1458 }
1459 i++
1460
1461 if i < len(data) {
1462 j = is_empty(data[i:])
1463 }
1464
1465 if j > 0 {
1466 size := i + j
1467 if do_render && rndr.mk.blockhtml != nil {
1468 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1469 }
1470 return size
1471 }
1472 }
1473
1474 // HR, which is the only self-closing block tag considered
1475 if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') {
1476 i = 3
1477 for i < len(data) && data[i] != '>' {
1478 i++
1479 }
1480
1481 if i+1 < len(data) {
1482 i++
1483 j = is_empty(data[i:])
1484 if j > 0 {
1485 size := i + j
1486 if do_render && rndr.mk.blockhtml != nil {
1487 rndr.mk.blockhtml(ob, data[:size], rndr.mk.opaque)
1488 }
1489 return size
1490 }
1491 }
1492 }
1493
1494 // no special case recognized
1495 return 0
1496 }
1497
1498 // look for an unindented matching closing tag
1499 // followed by a blank line
1500 i = 1
1501 found := false
1502
1503 // if not found, try a second pass looking for indented match
1504 // but not if tag is "ins" or "del" (following original Markdown.pl)
1505 if curtag != "ins" && curtag != "del" {
1506 i = 1
1507 for i < len(data) {
1508 i++
1509 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
1510 i++
1511 }
1512
1513 if i+2+len(curtag) >= len(data) {
1514 break
1515 }
1516
1517 j = htmlblock_end(curtag, rndr, data[i-1:])
1518
1519 if j > 0 {
1520 i += j - 1
1521 found = true
1522 break
1523 }
1524 }
1525 }
1526
1527 if !found {
1528 return 0
1529 }
1530
1531 // the end of the block has been found
1532 if do_render && rndr.mk.blockhtml != nil {
1533 rndr.mk.blockhtml(ob, data[:i], rndr.mk.opaque)
1534 }
1535
1536 return i
1537}
1538
1539func find_block_tag(data []byte) (string, bool) {
1540 i := 0
1541 for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) {
1542 i++
1543 }
1544 if i >= len(data) {
1545 return "", false
1546 }
1547 key := string(data[:i])
1548 if block_tags[key] {
1549 return key, true
1550 }
1551 return "", false
1552}
1553
1554func htmlblock_end(tag string, rndr *render, data []byte) int {
1555 // assume data[0] == '<' && data[1] == '/' already tested
1556
1557 // check if tag is a match
1558 if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' {
1559 return 0
1560 }
1561
1562 // check white lines
1563 i := len(tag) + 3
1564 w := 0
1565 if i < len(data) {
1566 if w = is_empty(data[i:]); w == 0 {
1567 return 0 // non-blank after tag
1568 }
1569 }
1570 i += w
1571 w = 0
1572
1573 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
1574 if i < len(data) {
1575 w = is_empty(data[i:])
1576 }
1577 } else {
1578 if i < len(data) {
1579 if w = is_empty(data[i:]); w == 0 {
1580 return 0 // non-blank line after tag line
1581 }
1582 }
1583 }
1584
1585 return i + w
1586}
1587
1588func is_empty(data []byte) int {
1589 var i int
1590 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1591 if data[i] != ' ' && data[i] != '\t' {
1592 return 0
1593 }
1594 }
1595 return i + 1
1596}
1597
1598func is_hrule(data []byte) bool {
1599 // skip initial spaces
1600 if len(data) < 3 {
1601 return false
1602 }
1603 i := 0
1604 if data[0] == ' ' {
1605 i++
1606 if data[1] == ' ' {
1607 i++
1608 if data[2] == ' ' {
1609 i++
1610 }
1611 }
1612 }
1613
1614 // look at the hrule char
1615 if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') {
1616 return false
1617 }
1618 c := data[i]
1619
1620 // the whole line must be the char or whitespace
1621 n := 0
1622 for i < len(data) && data[i] != '\n' {
1623 switch {
1624 case data[i] == c:
1625 n++
1626 case data[i] != ' ' && data[i] != '\t':
1627 return false
1628 }
1629 i++
1630 }
1631
1632 return n >= 3
1633}
1634
1635func is_codefence(data []byte, syntax **string) int {
1636 i, n := 0, 0
1637
1638 // skip initial spaces
1639 if len(data) < 3 {
1640 return 0
1641 }
1642 if data[0] == ' ' {
1643 i++
1644 if data[1] == ' ' {
1645 i++
1646 if data[2] == ' ' {
1647 i++
1648 }
1649 }
1650 }
1651
1652 // look at the hrule char
1653 if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') {
1654 return 0
1655 }
1656
1657 c := data[i]
1658
1659 // the whole line must be the char or whitespace
1660 for i < len(data) && data[i] == c {
1661 n++
1662 i++
1663 }
1664
1665 if n < 3 {
1666 return 0
1667 }
1668
1669 if syntax != nil {
1670 syn := 0
1671
1672 for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
1673 i++
1674 }
1675
1676 syntax_start := i
1677
1678 if i < len(data) && data[i] == '{' {
1679 i++
1680 syntax_start++
1681
1682 for i < len(data) && data[i] != '}' && data[i] != '\n' {
1683 syn++
1684 i++
1685 }
1686
1687 if i == len(data) || data[i] != '}' {
1688 return 0
1689 }
1690
1691 // string all whitespace at the beginning and the end
1692 // of the {} block
1693 for syn > 0 && isspace(data[syntax_start]) {
1694 syntax_start++
1695 syn--
1696 }
1697
1698 for syn > 0 && isspace(data[syntax_start+syn-1]) {
1699 syn--
1700 }
1701
1702 i++
1703 } else {
1704 for i < len(data) && !isspace(data[i]) {
1705 syn++
1706 i++
1707 }
1708 }
1709
1710 language := string(data[syntax_start : syntax_start+syn])
1711 *syntax = &language
1712 }
1713
1714 for i < len(data) && data[i] != '\n' {
1715 if !isspace(data[i]) {
1716 return 0
1717 }
1718 i++
1719 }
1720
1721 return i + 1
1722}
1723
1724func parse_fencedcode(ob *bytes.Buffer, rndr *render, data []byte) int {
1725 var lang *string
1726 beg := is_codefence(data, &lang)
1727 if beg == 0 {
1728 return 0
1729 }
1730
1731 work := bytes.NewBuffer(nil)
1732
1733 for beg < len(data) {
1734 fence_end := is_codefence(data[beg:], nil)
1735 if fence_end != 0 {
1736 beg += fence_end
1737 break
1738 }
1739
1740 var end int
1741 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1742 }
1743
1744 if beg < end {
1745 // verbatim copy to the working buffer, escaping entities
1746 if is_empty(data[beg:]) > 0 {
1747 work.WriteByte('\n')
1748 } else {
1749 work.Write(data[beg:end])
1750 }
1751 }
1752 beg = end
1753 }
1754
1755 if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
1756 work.WriteByte('\n')
1757 }
1758
1759 if rndr.mk.blockcode != nil {
1760 syntax := ""
1761 if lang != nil {
1762 syntax = *lang
1763 }
1764
1765 rndr.mk.blockcode(ob, work.Bytes(), syntax, rndr.mk.opaque)
1766 }
1767
1768 return beg
1769}
1770
1771func parse_table(ob *bytes.Buffer, rndr *render, data []byte) int {
1772 header_work := bytes.NewBuffer(nil)
1773 i, columns, col_data := parse_table_header(header_work, rndr, data)
1774 if i > 0 {
1775 body_work := bytes.NewBuffer(nil)
1776
1777 for i < len(data) {
1778 pipes, row_start := 0, i
1779 for ; i < len(data) && data[i] != '\n'; i++ {
1780 if data[i] == '|' {
1781 pipes++
1782 }
1783 }
1784
1785 if pipes == 0 || i == len(data) {
1786 i = row_start
1787 break
1788 }
1789
1790 parse_table_row(body_work, rndr, data[row_start:i], columns, col_data)
1791 i++
1792 }
1793
1794 if rndr.mk.table != nil {
1795 rndr.mk.table(ob, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque)
1796 }
1797 }
1798
1799 return i
1800}
1801
1802func parse_table_header(ob *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) {
1803 i, pipes := 0, 0
1804 column_data = []int{}
1805 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1806 if data[i] == '|' {
1807 pipes++
1808 }
1809 }
1810
1811 if i == len(data) || pipes == 0 {
1812 return 0, 0, column_data
1813 }
1814
1815 header_end := i
1816
1817 if data[0] == '|' {
1818 pipes--
1819 }
1820
1821 if i > 2 && data[i-1] == '|' {
1822 pipes--
1823 }
1824
1825 columns = pipes + 1
1826 column_data = make([]int, columns)
1827
1828 // parse the header underline
1829 i++
1830 if i < len(data) && data[i] == '|' {
1831 i++
1832 }
1833
1834 under_end := i
1835 for under_end < len(data) && data[under_end] != '\n' {
1836 under_end++
1837 }
1838
1839 col := 0
1840 for ; col < columns && i < under_end; col++ {
1841 dashes := 0
1842
1843 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1844 i++
1845 }
1846
1847 if data[i] == ':' {
1848 i++
1849 column_data[col] |= MKD_TABLE_ALIGN_L
1850 dashes++
1851 }
1852
1853 for i < under_end && data[i] == '-' {
1854 i++
1855 dashes++
1856 }
1857
1858 if i < under_end && data[i] == ':' {
1859 i++
1860 column_data[col] |= MKD_TABLE_ALIGN_R
1861 dashes++
1862 }
1863
1864 for i < under_end && (data[i] == ' ' || data[i] == '\t') {
1865 i++
1866 }
1867
1868 if i < under_end && data[i] != '|' {
1869 break
1870 }
1871
1872 if dashes < 3 {
1873 break
1874 }
1875
1876 i++
1877 }
1878
1879 if col < columns {
1880 return 0, 0, column_data
1881 }
1882
1883 parse_table_row(ob, rndr, data[:header_end], columns, column_data)
1884 size = under_end + 1
1885 return
1886}
1887
1888func parse_table_row(ob *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) {
1889 i, col := 0, 0
1890 row_work := bytes.NewBuffer(nil)
1891
1892 if i < len(data) && data[i] == '|' {
1893 i++
1894 }
1895
1896 for col = 0; col < columns && i < len(data); col++ {
1897 for i < len(data) && isspace(data[i]) {
1898 i++
1899 }
1900
1901 cell_start := i
1902
1903 for i < len(data) && data[i] != '|' {
1904 i++
1905 }
1906
1907 cell_end := i - 1
1908
1909 for cell_end > cell_start && isspace(data[cell_end]) {
1910 cell_end--
1911 }
1912
1913 cell_work := bytes.NewBuffer(nil)
1914 parse_inline(cell_work, rndr, data[cell_start:cell_end+1])
1915
1916 if rndr.mk.table_cell != nil {
1917 cdata := 0
1918 if col < len(col_data) {
1919 cdata = col_data[col]
1920 }
1921 rndr.mk.table_cell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque)
1922 }
1923
1924 i++
1925 }
1926
1927 for ; col < columns; col++ {
1928 empty_cell := []byte{}
1929 if rndr.mk.table_cell != nil {
1930 cdata := 0
1931 if col < len(col_data) {
1932 cdata = col_data[col]
1933 }
1934 rndr.mk.table_cell(row_work, empty_cell, cdata, rndr.mk.opaque)
1935 }
1936 }
1937
1938 if rndr.mk.table_row != nil {
1939 rndr.mk.table_row(ob, row_work.Bytes(), rndr.mk.opaque)
1940 }
1941}
1942
1943// returns blockquote prefix length
1944func prefix_quote(data []byte) int {
1945 i := 0
1946 for i < len(data) && i < 3 && data[i] == ' ' {
1947 i++
1948 }
1949 if i < len(data) && data[i] == '>' {
1950 if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') {
1951 return i + 2
1952 }
1953 return i + 1
1954 }
1955 return 0
1956}
1957
1958// parse a blockquote fragment
1959func parse_blockquote(ob *bytes.Buffer, rndr *render, data []byte) int {
1960 out := bytes.NewBuffer(nil)
1961 work := bytes.NewBuffer(nil)
1962 beg, end := 0, 0
1963 for beg < len(data) {
1964 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
1965 }
1966
1967 if pre := prefix_quote(data[beg:]); pre > 0 {
1968 beg += pre // skip prefix
1969 } else {
1970 // empty line followed by non-quote line
1971 if is_empty(data[beg:]) > 0 && (end >= len(data) || (prefix_quote(data[end:]) == 0 && is_empty(data[end:]) == 0)) {
1972 break
1973 }
1974 }
1975
1976 if beg < end { // copy into the in-place working buffer
1977 work.Write(data[beg:end])
1978 }
1979 beg = end
1980 }
1981
1982 parse_block(out, rndr, work.Bytes())
1983 if rndr.mk.blockquote != nil {
1984 rndr.mk.blockquote(ob, out.Bytes(), rndr.mk.opaque)
1985 }
1986 return end
1987}
1988
1989// returns prefix length for block code
1990func prefix_code(data []byte) int {
1991 if len(data) > 0 && data[0] == '\t' {
1992 return 1
1993 }
1994 if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
1995 return 4
1996 }
1997 return 0
1998}
1999
2000func parse_blockcode(ob *bytes.Buffer, rndr *render, data []byte) int {
2001 work := bytes.NewBuffer(nil)
2002
2003 beg, end := 0, 0
2004 for beg < len(data) {
2005 for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
2006 }
2007
2008 if pre := prefix_code(data[beg:end]); pre > 0 {
2009 beg += pre
2010 } else {
2011 if is_empty(data[beg:end]) == 0 {
2012 // non-empty non-prefixed line breaks the pre
2013 break
2014 }
2015 }
2016
2017 if beg < end {
2018 // verbatim copy to the working buffer, escaping entities
2019 if is_empty(data[beg:end]) > 0 {
2020 work.WriteByte('\n')
2021 } else {
2022 work.Write(data[beg:end])
2023 }
2024 }
2025 beg = end
2026 }
2027
2028 // trim all the \n off the end of work
2029 workbytes := work.Bytes()
2030 n := 0
2031 for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' {
2032 n++
2033 }
2034 if n > 0 {
2035 work = bytes.NewBuffer(workbytes[:len(workbytes)-n])
2036 }
2037
2038 work.WriteByte('\n')
2039
2040 if rndr.mk.blockcode != nil {
2041 rndr.mk.blockcode(ob, work.Bytes(), "", rndr.mk.opaque)
2042 }
2043
2044 return beg
2045}
2046
2047// returns unordered list item prefix
2048func prefix_uli(data []byte) int {
2049 i := 0
2050 for i < len(data) && i < 3 && data[i] == ' ' {
2051 i++
2052 }
2053 if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') {
2054 return 0
2055 }
2056 return i + 2
2057}
2058
2059// returns ordered list item prefix
2060func prefix_oli(data []byte) int {
2061 i := 0
2062 for i < len(data) && i < 3 && data[i] == ' ' {
2063 i++
2064 }
2065 if i >= len(data) || data[i] < '0' || data[i] > '9' {
2066 return 0
2067 }
2068 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
2069 i++
2070 }
2071 if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') {
2072 return 0
2073 }
2074 return i + 2
2075}
2076
2077// parse ordered or unordered list block
2078func parse_list(ob *bytes.Buffer, rndr *render, data []byte, flags int) int {
2079 work := bytes.NewBuffer(nil)
2080
2081 i, j := 0, 0
2082 for i < len(data) {
2083 j = parse_listitem(work, rndr, data[i:], &flags)
2084 i += j
2085
2086 if j == 0 || flags&MKD_LI_END != 0 {
2087 break
2088 }
2089 }
2090
2091 if rndr.mk.list != nil {
2092 rndr.mk.list(ob, work.Bytes(), flags, rndr.mk.opaque)
2093 }
2094 return i
2095}
2096
2097// parse a single list item
2098// assumes initial prefix is already removed
2099func parse_listitem(ob *bytes.Buffer, rndr *render, data []byte, flags *int) int {
2100 // keep track of the first indentation prefix
2101 beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0
2102
2103 for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' {
2104 orgpre++
2105 }
2106
2107 beg = prefix_uli(data)
2108 if beg == 0 {
2109 beg = prefix_oli(data)
2110 }
2111 if beg == 0 {
2112 return 0
2113 }
2114
2115 // skip leading whitespace on first line
2116 for beg < len(data) && data[beg] == ' ' {
2117 beg++
2118 }
2119
2120 // skip to the beginning of the following line
2121 end = beg
2122 for end < len(data) && data[end-1] != '\n' {
2123 end++
2124 }
2125
2126 // get working buffers
2127 work := bytes.NewBuffer(nil)
2128 inter := bytes.NewBuffer(nil)
2129
2130 // put the first line into the working buffer
2131 work.Write(data[beg:end])
2132 beg = end
2133
2134 // process the following lines
2135 in_empty, has_inside_empty := false, false
2136 for beg < len(data) {
2137 end++
2138
2139 for end < len(data) && data[end-1] != '\n' {
2140 end++
2141 }
2142
2143 // process an empty line
2144 if is_empty(data[beg:end]) > 0 {
2145 in_empty = true
2146 beg = end
2147 continue
2148 }
2149
2150 // calculate the indentation
2151 i = 0
2152 for i < 4 && beg+i < end && data[beg+i] == ' ' {
2153 i++
2154 }
2155
2156 pre = i
2157 if data[beg] == '\t' {
2158 i = 1
2159 pre = 8
2160 }
2161
2162 // check for a new item
2163 chunk := data[beg+i : end]
2164 if (prefix_uli(chunk) > 0 && !is_hrule(chunk)) || prefix_oli(chunk) > 0 {
2165 if in_empty {
2166 has_inside_empty = true
2167 }
2168
2169 if pre == orgpre { // the following item must have the same indentation
2170 break
2171 }
2172
2173 if sublist == 0 {
2174 sublist = work.Len()
2175 }
2176 } else {
2177 // only join indented stuff after empty lines
2178 if in_empty && i < 4 && data[beg] != '\t' {
2179 *flags |= MKD_LI_END
2180 break
2181 } else {
2182 if in_empty {
2183 work.WriteByte('\n')
2184 has_inside_empty = true
2185 }
2186 }
2187 }
2188
2189 in_empty = false
2190
2191 // add the line into the working buffer without prefix
2192 work.Write(data[beg+i : end])
2193 beg = end
2194 }
2195
2196 // render li contents
2197 if has_inside_empty {
2198 *flags |= MKD_LI_BLOCK
2199 }
2200
2201 workbytes := work.Bytes()
2202 if *flags&MKD_LI_BLOCK != 0 {
2203 // intermediate render of block li
2204 if sublist > 0 && sublist < len(workbytes) {
2205 parse_block(inter, rndr, workbytes[:sublist])
2206 parse_block(inter, rndr, workbytes[sublist:])
2207 } else {
2208 parse_block(inter, rndr, workbytes)
2209 }
2210 } else {
2211 // intermediate render of inline li
2212 if sublist > 0 && sublist < len(workbytes) {
2213 parse_inline(inter, rndr, workbytes[:sublist])
2214 parse_block(inter, rndr, workbytes[sublist:])
2215 } else {
2216 parse_inline(inter, rndr, workbytes)
2217 }
2218 }
2219
2220 // render li itself
2221 if rndr.mk.listitem != nil {
2222 rndr.mk.listitem(ob, inter.Bytes(), *flags, rndr.mk.opaque)
2223 }
2224
2225 return beg
2226}
2227
2228func parse_paragraph(ob *bytes.Buffer, rndr *render, data []byte) int {
2229 i, end, level := 0, 0, 0
2230
2231 for i < len(data) {
2232 for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ {
2233 }
2234
2235 if is_empty(data[i:]) > 0 {
2236 break
2237 }
2238 if level = is_headerline(data[i:]); level > 0 {
2239 break
2240 }
2241
2242 if rndr.ext_flags&MKDEXT_LAX_HTML_BLOCKS != 0 {
2243 if data[i] == '<' && rndr.mk.blockhtml != nil && parse_htmlblock(ob, rndr, data[i:], false) > 0 {
2244 end = i
2245 break
2246 }
2247 }
2248
2249 if is_atxheader(rndr, data[i:]) || is_hrule(data[i:]) {
2250 end = i
2251 break
2252 }
2253
2254 i = end
2255 }
2256
2257 work := data
2258 size := i
2259 for size > 0 && work[size-1] == '\n' {
2260 size--
2261 }
2262
2263 if level == 0 {
2264 tmp := bytes.NewBuffer(nil)
2265 parse_inline(tmp, rndr, work[:size])
2266 if rndr.mk.paragraph != nil {
2267 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2268 }
2269 } else {
2270 if size > 0 {
2271 beg := 0
2272 i = size
2273 size--
2274
2275 for size > 0 && work[size] != '\n' {
2276 size--
2277 }
2278
2279 beg = size + 1
2280 for size > 0 && work[size-1] == '\n' {
2281 size--
2282 }
2283
2284 if size > 0 {
2285 tmp := bytes.NewBuffer(nil)
2286 parse_inline(tmp, rndr, work[:size])
2287 if rndr.mk.paragraph != nil {
2288 rndr.mk.paragraph(ob, tmp.Bytes(), rndr.mk.opaque)
2289 }
2290
2291 work = work[beg:]
2292 size = i - beg
2293 } else {
2294 size = i
2295 }
2296 }
2297
2298 header_work := bytes.NewBuffer(nil)
2299 parse_inline(header_work, rndr, work[:size])
2300
2301 if rndr.mk.header != nil {
2302 rndr.mk.header(ob, header_work.Bytes(), level, rndr.mk.opaque)
2303 }
2304 }
2305
2306 return end
2307}
2308
2309
2310//
2311//
2312// HTML rendering
2313//
2314//
2315
2316const (
2317 HTML_SKIP_HTML = 1 << iota
2318 HTML_SKIP_STYLE
2319 HTML_SKIP_IMAGES
2320 HTML_SKIP_LINKS
2321 HTML_EXPAND_TABS
2322 HTML_SAFELINK
2323 HTML_TOC
2324 HTML_HARD_WRAP
2325 HTML_GITHUB_BLOCKCODE
2326 HTML_USE_XHTML
2327)
2328
2329type html_renderopts struct {
2330 toc_data struct {
2331 header_count int
2332 current_level int
2333 }
2334 flags uint32
2335 close_tag string
2336}
2337
2338func attr_escape(ob *bytes.Buffer, src []byte) {
2339 for i := 0; i < len(src); i++ {
2340 // directly copy unescaped characters
2341 org := i
2342 for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' {
2343 i++
2344 }
2345 if i > org {
2346 ob.Write(src[org:i])
2347 }
2348
2349 // escape a character
2350 if i >= len(src) {
2351 break
2352 }
2353 switch src[i] {
2354 case '<':
2355 ob.WriteString("<")
2356 case '>':
2357 ob.WriteString(">")
2358 case '&':
2359 ob.WriteString("&")
2360 case '"':
2361 ob.WriteString(""")
2362 }
2363 }
2364}
2365
2366func unescape_text(ob *bytes.Buffer, src []byte) {
2367 i := 0
2368 for i < len(src) {
2369 org := i
2370 for i < len(src) && src[i] != '\\' {
2371 i++
2372 }
2373
2374 if i > org {
2375 ob.Write(src[org:i])
2376 }
2377
2378 if i+1 >= len(src) {
2379 break
2380 }
2381
2382 ob.WriteByte(src[i+1])
2383 i += 2
2384 }
2385}
2386
2387func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) {
2388 options := opaque.(*html_renderopts)
2389
2390 if ob.Len() > 0 {
2391 ob.WriteByte('\n')
2392 }
2393
2394 if options.flags&HTML_TOC != 0 {
2395 ob.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.toc_data.header_count))
2396 options.toc_data.header_count++
2397 } else {
2398 ob.WriteString(fmt.Sprintf("<h%d>", level))
2399 }
2400
2401 ob.Write(text)
2402 ob.WriteString(fmt.Sprintf("</h%d>\n", level))
2403}
2404
2405func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) {
2406 sz := len(text)
2407 for sz > 0 && text[sz-1] == '\n' {
2408 sz--
2409 }
2410 org := 0
2411 for org < sz && text[org] == '\n' {
2412 org++
2413 }
2414 if org >= sz {
2415 return
2416 }
2417 if ob.Len() > 0 {
2418 ob.WriteByte('\n')
2419 }
2420 ob.Write(text[org:sz])
2421 ob.WriteByte('\n')
2422}
2423
2424func rndr_hrule(ob *bytes.Buffer, opaque interface{}) {
2425 options := opaque.(*html_renderopts)
2426
2427 if ob.Len() > 0 {
2428 ob.WriteByte('\n')
2429 }
2430 ob.WriteString("<hr")
2431 ob.WriteString(options.close_tag)
2432}
2433
2434func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
2435 if ob.Len() > 0 {
2436 ob.WriteByte('\n')
2437 }
2438
2439 if lang != "" {
2440 ob.WriteString("<pre><code class=\"")
2441
2442 for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
2443 for i < len(lang) && isspace(lang[i]) {
2444 i++
2445 }
2446
2447 if i < len(lang) {
2448 org := i
2449 for i < len(lang) && !isspace(lang[i]) {
2450 i++
2451 }
2452
2453 if lang[org] == '.' {
2454 org++
2455 }
2456
2457 if cls > 0 {
2458 ob.WriteByte(' ')
2459 }
2460 attr_escape(ob, []byte(lang[org:]))
2461 }
2462 }
2463
2464 ob.WriteString("\">")
2465 } else {
2466 ob.WriteString("<pre><code>")
2467 }
2468
2469 if len(text) > 0 {
2470 attr_escape(ob, text)
2471 }
2472
2473 ob.WriteString("</code></pre>\n")
2474}
2475
2476func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) {
2477 ob.WriteString("<blockquote>\n")
2478 ob.Write(text)
2479 ob.WriteString("</blockquote>")
2480}
2481
2482func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) {
2483 if ob.Len() > 0 {
2484 ob.WriteByte('\n')
2485 }
2486 ob.WriteString("<table><thead>\n")
2487 ob.Write(header)
2488 ob.WriteString("\n</thead><tbody>\n")
2489 ob.Write(body)
2490 ob.WriteString("\n</tbody></table>")
2491}
2492
2493func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) {
2494 if ob.Len() > 0 {
2495 ob.WriteByte('\n')
2496 }
2497 ob.WriteString("<tr>\n")
2498 ob.Write(text)
2499 ob.WriteString("\n</tr>")
2500}
2501
2502func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) {
2503 if ob.Len() > 0 {
2504 ob.WriteByte('\n')
2505 }
2506 switch align {
2507 case MKD_TABLE_ALIGN_L:
2508 ob.WriteString("<td align=\"left\">")
2509 case MKD_TABLE_ALIGN_R:
2510 ob.WriteString("<td align=\"right\">")
2511 case MKD_TABLE_ALIGN_CENTER:
2512 ob.WriteString("<td align=\"center\">")
2513 default:
2514 ob.WriteString("<td>")
2515 }
2516
2517 ob.Write(text)
2518 ob.WriteString("</td>")
2519}
2520
2521func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2522 if ob.Len() > 0 {
2523 ob.WriteByte('\n')
2524 }
2525 if flags&MKD_LIST_ORDERED != 0 {
2526 ob.WriteString("<ol>\n")
2527 } else {
2528 ob.WriteString("<ul>\n")
2529 }
2530 ob.Write(text)
2531 if flags&MKD_LIST_ORDERED != 0 {
2532 ob.WriteString("</ol>\n")
2533 } else {
2534 ob.WriteString("</ul>\n")
2535 }
2536}
2537
2538func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) {
2539 ob.WriteString("<li>")
2540 size := len(text)
2541 for size > 0 && text[size-1] == '\n' {
2542 size--
2543 }
2544 ob.Write(text[:size])
2545 ob.WriteString("</li>\n")
2546}
2547
2548func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) {
2549 options := opaque.(*html_renderopts)
2550 i := 0
2551
2552 if ob.Len() > 0 {
2553 ob.WriteByte('\n')
2554 }
2555
2556 if len(text) == 0 {
2557 return
2558 }
2559
2560 for i < len(text) && isspace(text[i]) {
2561 i++
2562 }
2563
2564 if i == len(text) {
2565 return
2566 }
2567
2568 ob.WriteString("<p>")
2569 if options.flags&HTML_HARD_WRAP != 0 {
2570 for i < len(text) {
2571 org := i
2572 for i < len(text) && text[i] != '\n' {
2573 i++
2574 }
2575
2576 if i > org {
2577 ob.Write(text[org:i])
2578 }
2579
2580 if i >= len(text) {
2581 break
2582 }
2583
2584 ob.WriteString("<br>")
2585 ob.WriteString(options.close_tag)
2586 i++
2587 }
2588 } else {
2589 ob.Write(text[i:])
2590 }
2591 ob.WriteString("</p>\n")
2592}
2593
2594func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int {
2595 options := opaque.(*html_renderopts)
2596
2597 if len(link) == 0 {
2598 return 0
2599 }
2600 if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) && kind != MKDA_EMAIL {
2601 return 0
2602 }
2603
2604 ob.WriteString("<a href=\"")
2605 if kind == MKDA_EMAIL {
2606 ob.WriteString("mailto:")
2607 }
2608 ob.Write(link)
2609 ob.WriteString("\">")
2610
2611 /*
2612 * Pretty print: if we get an email address as
2613 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
2614 * want to print the `mailto:` prefix
2615 */
2616 if bytes.HasPrefix(link, []byte("mailto:")) {
2617 attr_escape(ob, link[7:])
2618 } else {
2619 attr_escape(ob, link)
2620 }
2621
2622 ob.WriteString("</a>")
2623
2624 return 1
2625}
2626
2627func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2628 ob.WriteString("<code>")
2629 attr_escape(ob, text)
2630 ob.WriteString("</code>")
2631 return 1
2632}
2633
2634func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2635 if len(text) == 0 {
2636 return 0
2637 }
2638 ob.WriteString("<strong>")
2639 ob.Write(text)
2640 ob.WriteString("</strong>")
2641 return 1
2642}
2643
2644func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2645 if len(text) == 0 {
2646 return 0
2647 }
2648 ob.WriteString("<em>")
2649 ob.Write(text)
2650 ob.WriteString("</em>")
2651 return 1
2652}
2653
2654func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int {
2655 options := opaque.(*html_renderopts)
2656 if len(link) == 0 {
2657 return 0
2658 }
2659 ob.WriteString("<img src=\"")
2660 attr_escape(ob, link)
2661 ob.WriteString("\" alt=\"")
2662 if len(alt) > 0 {
2663 attr_escape(ob, alt)
2664 }
2665 if len(title) > 0 {
2666 ob.WriteString("\" title=\"")
2667 attr_escape(ob, title)
2668 }
2669
2670 ob.WriteByte('"')
2671 ob.WriteString(options.close_tag)
2672 return 1
2673}
2674
2675func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int {
2676 options := opaque.(*html_renderopts)
2677 ob.WriteString("<br")
2678 ob.WriteString(options.close_tag)
2679 return 1
2680}
2681
2682func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaque interface{}) int {
2683 options := opaque.(*html_renderopts)
2684
2685 if options.flags&HTML_SAFELINK != 0 && !is_safe_link(link) {
2686 return 0
2687 }
2688
2689 ob.WriteString("<a href=\"")
2690 if len(link) > 0 {
2691 ob.Write(link)
2692 }
2693 if len(title) > 0 {
2694 ob.WriteString("\" title=\"")
2695 attr_escape(ob, title)
2696 }
2697 ob.WriteString("\">")
2698 if len(content) > 0 {
2699 ob.Write(content)
2700 }
2701 ob.WriteString("</a>")
2702 return 1
2703}
2704
2705func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2706 options := opaque.(*html_renderopts)
2707 if options.flags&HTML_SKIP_HTML != 0 {
2708 return 1
2709 }
2710 if options.flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") {
2711 return 1
2712 }
2713 if options.flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") {
2714 return 1
2715 }
2716 if options.flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") {
2717 return 1
2718 }
2719 ob.Write(text)
2720 return 1
2721}
2722
2723func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2724 if len(text) == 0 {
2725 return 0
2726 }
2727 ob.WriteString("<strong><em>")
2728 ob.Write(text)
2729 ob.WriteString("</em></strong>")
2730 return 1
2731}
2732
2733func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int {
2734 if len(text) == 0 {
2735 return 0
2736 }
2737 ob.WriteString("<del>")
2738 ob.Write(text)
2739 ob.WriteString("</del>")
2740 return 1
2741}
2742
2743func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) {
2744 attr_escape(ob, text)
2745}
2746
2747func is_html_tag(tag []byte, tagname string) bool {
2748 i := 0
2749 if i < len(tag) && tag[0] != '<' {
2750 return false
2751 }
2752 i++
2753 for i < len(tag) && isspace(tag[i]) {
2754 i++
2755 }
2756
2757 if i < len(tag) && tag[i] == '/' {
2758 i++
2759 }
2760
2761 for i < len(tag) && isspace(tag[i]) {
2762 i++
2763 }
2764
2765 tag_i := i
2766 for ; i < len(tag); i, tag_i = i+1, tag_i+1 {
2767 if tag_i >= len(tagname) {
2768 break
2769 }
2770
2771 if tag[i] != tagname[tag_i] {
2772 return false
2773 }
2774 }
2775
2776 if i == len(tag) {
2777 return false
2778 }
2779
2780 return isspace(tag[i]) || tag[i] == '>'
2781}
2782
2783
2784//
2785//
2786// Public interface
2787//
2788//
2789
2790func expand_tabs(ob *bytes.Buffer, line []byte) {
2791 i, tab := 0, 0
2792
2793 for i < len(line) {
2794 org := i
2795 for i < len(line) && line[i] != '\t' {
2796 i++
2797 tab++
2798 }
2799
2800 if i > org {
2801 ob.Write(line[org:i])
2802 }
2803
2804 if i >= len(line) {
2805 break
2806 }
2807
2808 for {
2809 ob.WriteByte(' ')
2810 tab++
2811 if tab%4 == 0 {
2812 break
2813 }
2814 }
2815
2816 i++
2817 }
2818}
2819
2820func Markdown(ob *bytes.Buffer, ib []byte, rndrer *mkd_renderer, extensions uint32) {
2821 // no point in parsing if we can't render
2822 if rndrer == nil {
2823 return
2824 }
2825
2826 // fill in the character-level parsers
2827 markdown_char_ptrs[MD_CHAR_NONE] = nil
2828 markdown_char_ptrs[MD_CHAR_EMPHASIS] = char_emphasis
2829 markdown_char_ptrs[MD_CHAR_CODESPAN] = char_codespan
2830 markdown_char_ptrs[MD_CHAR_LINEBREAK] = char_linebreak
2831 markdown_char_ptrs[MD_CHAR_LINK] = char_link
2832 markdown_char_ptrs[MD_CHAR_LANGLE] = char_langle_tag
2833 markdown_char_ptrs[MD_CHAR_ESCAPE] = char_escape
2834 markdown_char_ptrs[MD_CHAR_ENTITITY] = char_entity
2835 markdown_char_ptrs[MD_CHAR_AUTOLINK] = char_autolink
2836
2837 // fill in the render structure
2838 rndr := new(render)
2839 rndr.mk = rndrer
2840 rndr.ext_flags = extensions
2841 rndr.max_nesting = 16
2842
2843 if rndr.mk.emphasis != nil || rndr.mk.double_emphasis != nil || rndr.mk.triple_emphasis != nil {
2844 rndr.active_char['*'] = MD_CHAR_EMPHASIS
2845 rndr.active_char['_'] = MD_CHAR_EMPHASIS
2846 if extensions&MKDEXT_STRIKETHROUGH != 0 {
2847 rndr.active_char['~'] = MD_CHAR_EMPHASIS
2848 }
2849 }
2850 if rndr.mk.codespan != nil {
2851 rndr.active_char['`'] = MD_CHAR_CODESPAN
2852 }
2853 if rndr.mk.linebreak != nil {
2854 rndr.active_char['\n'] = MD_CHAR_LINEBREAK
2855 }
2856 if rndr.mk.image != nil || rndr.mk.link != nil {
2857 rndr.active_char['['] = MD_CHAR_LINK
2858 }
2859 rndr.active_char['<'] = MD_CHAR_LANGLE
2860 rndr.active_char['\\'] = MD_CHAR_ESCAPE
2861 rndr.active_char['&'] = MD_CHAR_ENTITITY
2862
2863 if extensions&MKDEXT_AUTOLINK != 0 {
2864 rndr.active_char['h'] = MD_CHAR_AUTOLINK // http, https
2865 rndr.active_char['H'] = MD_CHAR_AUTOLINK
2866
2867 rndr.active_char['f'] = MD_CHAR_AUTOLINK // ftp
2868 rndr.active_char['F'] = MD_CHAR_AUTOLINK
2869
2870 rndr.active_char['m'] = MD_CHAR_AUTOLINK // mailto
2871 rndr.active_char['M'] = MD_CHAR_AUTOLINK
2872 }
2873
2874 // first pass: look for references, copy everything else
2875 text := bytes.NewBuffer(nil)
2876 beg, end := 0, 0
2877 for beg < len(ib) { // iterate over lines
2878 if is_ref(ib, beg, &end, rndr) {
2879 beg = end
2880 } else { // skip to the next line
2881 end = beg
2882 for end < len(ib) && ib[end] != '\n' && ib[end] != '\r' {
2883 end++
2884 }
2885
2886 // add the line body if present
2887 if end > beg {
2888 expand_tabs(text, ib[beg:end])
2889 }
2890
2891 for end < len(ib) && (ib[end] == '\n' || ib[end] == '\r') {
2892 // add one \n per newline
2893 if ib[end] == '\n' || (end+1 < len(ib) && ib[end+1] != '\n') {
2894 text.WriteByte('\n')
2895 }
2896 end++
2897 }
2898
2899 beg = end
2900 }
2901 }
2902
2903 // sort the reference array
2904 if len(rndr.refs) > 1 {
2905 sort.Sort(rndr.refs)
2906 }
2907
2908 // second pass: actual rendering
2909 if rndr.mk.doc_header != nil {
2910 rndr.mk.doc_header(ob, rndr.mk.opaque)
2911 }
2912
2913 if text.Len() > 0 {
2914 // add a final newline if not already present
2915 finalchar := text.Bytes()[text.Len()-1]
2916 if finalchar != '\n' && finalchar != '\r' {
2917 text.WriteByte('\n')
2918 }
2919 parse_block(ob, rndr, text.Bytes())
2920 }
2921
2922 if rndr.mk.doc_footer != nil {
2923 rndr.mk.doc_footer(ob, rndr.mk.opaque)
2924 }
2925
2926 if rndr.nesting != 0 {
2927 panic("Nesting level did not end at zero")
2928 }
2929}
2930
2931func Config_html() *mkd_renderer {
2932 // configure the rendering engine
2933 rndrer := new(mkd_renderer)
2934 rndrer.blockcode = rndr_blockcode
2935 rndrer.blockquote = rndr_blockquote
2936 rndrer.blockhtml = rndr_raw_block
2937 rndrer.header = rndr_header
2938 rndrer.hrule = rndr_hrule
2939 rndrer.list = rndr_list
2940 rndrer.listitem = rndr_listitem
2941 rndrer.paragraph = rndr_paragraph
2942 rndrer.table = rndr_table
2943 rndrer.table_row = rndr_tablerow
2944 rndrer.table_cell = rndr_tablecell
2945
2946 rndrer.autolink = rndr_autolink
2947 rndrer.codespan = rndr_codespan
2948 rndrer.double_emphasis = rndr_double_emphasis
2949 rndrer.emphasis = rndr_emphasis
2950 rndrer.image = rndr_image
2951 rndrer.linebreak = rndr_linebreak
2952 rndrer.link = rndr_link
2953 rndrer.raw_html_tag = rndr_raw_html_tag
2954 rndrer.triple_emphasis = rndr_triple_emphasis
2955 rndrer.strikethrough = rndr_strikethrough
2956
2957 rndrer.normal_text = rndr_normal_text
2958
2959 rndrer.opaque = &html_renderopts{close_tag: " />\n"}
2960 return rndrer
2961}
2962
2963func main() {
2964 // read the input
2965 var ib []byte
2966 var err os.Error
2967 switch len(os.Args) {
2968 case 1:
2969 if ib, err = ioutil.ReadAll(os.Stdin); err != nil {
2970 fmt.Fprintln(os.Stderr, "Error reading from Stdin:", err)
2971 os.Exit(-1)
2972 }
2973 case 2, 3:
2974 if ib, err = ioutil.ReadFile(os.Args[1]); err != nil {
2975 fmt.Fprintln(os.Stderr, "Error reading from", os.Args[1], ":", err)
2976 os.Exit(-1)
2977 }
2978 default:
2979 fmt.Fprintln(os.Stderr, "Usage:", os.Args[0], "[inputfile [outputfile]]")
2980 os.Exit(-1)
2981 }
2982
2983 // call the main renderer function
2984 ob := bytes.NewBuffer(nil)
2985 var extensions uint32
2986 extensions |= MKDEXT_NO_INTRA_EMPHASIS
2987 extensions |= MKDEXT_TABLES
2988 extensions |= MKDEXT_FENCED_CODE
2989 extensions |= MKDEXT_AUTOLINK
2990 extensions |= MKDEXT_STRIKETHROUGH
2991 extensions |= MKDEXT_LAX_HTML_BLOCKS
2992 extensions |= MKDEXT_SPACE_HEADERS
2993 extensions = 0
2994
2995 Markdown(ob, ib, Config_html(), extensions)
2996
2997 // output the result
2998 if len(os.Args) == 3 {
2999 if err = ioutil.WriteFile(os.Args[2], ob.Bytes(), 0644); err != nil {
3000 fmt.Fprintln(os.Stderr, "Error writing to", os.Args[2], ":", err)
3001 os.Exit(-1)
3002 }
3003 } else {
3004 if _, err = os.Stdout.Write(ob.Bytes()); err != nil {
3005 fmt.Fprintln(os.Stderr, "Error writing to Stdout:", err)
3006 os.Exit(-1)
3007 }
3008 }
3009}