inline.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20)
21
22var (
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25)
26
27// Functions to parse text within a block
28// Each function returns the number of chars taken care of
29// data is the complete block being rendered
30// offset is the number of valid chars before the current cursor
31
32func (p *parser) inline(data []byte) {
33 // this is called recursively: enforce a maximum depth
34 if p.nesting >= p.maxNesting {
35 return
36 }
37 p.nesting++
38
39 i, end := 0, 0
40 for i < len(data) {
41 // Copy inactive chars into the output, but first check for one quirk:
42 // 'h', 'm' and 'f' all might trigger a check for autolink processing
43 // and end this run of inactive characters. However, there's one nasty
44 // case where breaking this run would be bad: in smartypants fraction
45 // detection, we expect things like "1/2th" to be in a single run. So
46 // we check here if an 'h' is followed by 't' (from 'http') and if it's
47 // not, we short circuit the 'h' into the run of inactive characters.
48 //
49 // Also, in a similar fashion maybeLineBreak breaks this run of chars,
50 // but smartDash processor relies on seeing context around the dashes.
51 // Fix this somehow.
52 for end < len(data) {
53 if data[end] == ' ' {
54 consumed, br := maybeLineBreak(p, data, end)
55 if consumed > 0 {
56 p.r.NormalText(data[i:end])
57 if br {
58 p.r.LineBreak()
59 }
60 i = end
61 i += consumed
62 end = i
63 } else {
64 end++
65 }
66 continue
67 }
68 if p.inlineCallback[data[end]] != nil {
69 if end+1 < len(data) && data[end] == 'h' && data[end+1] != 't' {
70 end++
71 } else {
72 break
73 }
74 } else {
75 end++
76 }
77 }
78
79 p.r.NormalText(data[i:end])
80
81 if end >= len(data) {
82 break
83 }
84 i = end
85
86 // call the trigger
87 handler := p.inlineCallback[data[end]]
88 if consumed := handler(p, data, i); consumed == 0 {
89 // no action from the callback; buffer the byte for later
90 end = i + 1
91 } else {
92 // skip past whatever the callback used
93 i += consumed
94 end = i
95 }
96 }
97
98 p.nesting--
99}
100
101// single and double emphasis parsing
102func emphasis(p *parser, data []byte, offset int) int {
103 data = data[offset:]
104 c := data[0]
105 ret := 0
106
107 if len(data) > 2 && data[1] != c {
108 // whitespace cannot follow an opening emphasis;
109 // strikethrough only takes two characters '~~'
110 if c == '~' || isspace(data[1]) {
111 return 0
112 }
113 if ret = helperEmphasis(p, data[1:], c); ret == 0 {
114 return 0
115 }
116
117 return ret + 1
118 }
119
120 if len(data) > 3 && data[1] == c && data[2] != c {
121 if isspace(data[2]) {
122 return 0
123 }
124 if ret = helperDoubleEmphasis(p, data[2:], c); ret == 0 {
125 return 0
126 }
127
128 return ret + 2
129 }
130
131 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
132 if c == '~' || isspace(data[3]) {
133 return 0
134 }
135 if ret = helperTripleEmphasis(p, data, 3, c); ret == 0 {
136 return 0
137 }
138
139 return ret + 3
140 }
141
142 return 0
143}
144
145func codeSpan(p *parser, data []byte, offset int) int {
146 data = data[offset:]
147
148 nb := 0
149
150 // count the number of backticks in the delimiter
151 for nb < len(data) && data[nb] == '`' {
152 nb++
153 }
154
155 // find the next delimiter
156 i, end := 0, 0
157 for end = nb; end < len(data) && i < nb; end++ {
158 if data[end] == '`' {
159 i++
160 } else {
161 i = 0
162 }
163 }
164
165 // no matching delimiter?
166 if i < nb && end >= len(data) {
167 return 0
168 }
169
170 // trim outside whitespace
171 fBegin := nb
172 for fBegin < end && data[fBegin] == ' ' {
173 fBegin++
174 }
175
176 fEnd := end - nb
177 for fEnd > fBegin && data[fEnd-1] == ' ' {
178 fEnd--
179 }
180
181 // render the code span
182 if fBegin != fEnd {
183 p.r.CodeSpan(data[fBegin:fEnd])
184 }
185
186 return end
187
188}
189
190// newline preceded by two spaces becomes <br>
191func maybeLineBreak(p *parser, data []byte, offset int) (int, bool) {
192 origOffset := offset
193 for offset < len(data) && data[offset] == ' ' {
194 offset++
195 }
196 if offset < len(data) && data[offset] == '\n' {
197 if offset-origOffset >= 2 {
198 return offset - origOffset + 1, true
199 }
200 return offset - origOffset, false
201 }
202 return 0, false
203}
204
205// newline without two spaces works when HardLineBreak is enabled
206func lineBreak(p *parser, data []byte, offset int) int {
207 if p.flags&HardLineBreak != 0 {
208 p.r.LineBreak()
209 return 1
210 }
211 return 0
212}
213
214type linkType int
215
216const (
217 linkNormal linkType = iota
218 linkImg
219 linkDeferredFootnote
220 linkInlineFootnote
221)
222
223func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
224 if t == linkDeferredFootnote {
225 return false
226 }
227 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
228}
229
230func maybeImage(p *parser, data []byte, offset int) int {
231 if offset < len(data)-1 && data[offset+1] == '[' {
232 return link(p, data, offset)
233 }
234 return 0
235}
236
237func maybeInlineFootnote(p *parser, data []byte, offset int) int {
238 if offset < len(data)-1 && data[offset+1] == '[' {
239 return link(p, data, offset)
240 }
241 return 0
242}
243
244// '[': parse a link or an image or a footnote
245func link(p *parser, data []byte, offset int) int {
246 // no links allowed inside regular links, footnote, and deferred footnotes
247 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
248 return 0
249 }
250
251 var t linkType
252 switch {
253 // special case: ![^text] == deferred footnote (that follows something with
254 // an exclamation point)
255 case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
256 t = linkDeferredFootnote
257 // ![alt] == image
258 case offset >= 0 && data[offset] == '!':
259 t = linkImg
260 offset += 1
261 // ^[text] == inline footnote
262 // [^refId] == deferred footnote
263 case p.flags&Footnotes != 0:
264 if offset >= 0 && data[offset] == '^' {
265 t = linkInlineFootnote
266 offset += 1
267 } else if len(data)-1 > offset && data[offset+1] == '^' {
268 t = linkDeferredFootnote
269 }
270 // [text] == regular link
271 default:
272 t = linkNormal
273 }
274
275 data = data[offset:]
276
277 var (
278 i = 1
279 noteId int
280 title, link, altContent []byte
281 textHasNl = false
282 )
283
284 if t == linkDeferredFootnote {
285 i++
286 }
287
288 // look for the matching closing bracket
289 for level := 1; level > 0 && i < len(data); i++ {
290 switch {
291 case data[i] == '\n':
292 textHasNl = true
293
294 case data[i-1] == '\\':
295 continue
296
297 case data[i] == '[':
298 level++
299
300 case data[i] == ']':
301 level--
302 if level <= 0 {
303 i-- // compensate for extra i++ in for loop
304 }
305 }
306 }
307
308 if i >= len(data) {
309 return 0
310 }
311
312 txtE := i
313 i++
314
315 // skip any amount of whitespace or newline
316 // (this is much more lax than original markdown syntax)
317 for i < len(data) && isspace(data[i]) {
318 i++
319 }
320
321 // inline style link
322 switch {
323 case i < len(data) && data[i] == '(':
324 // skip initial whitespace
325 i++
326
327 for i < len(data) && isspace(data[i]) {
328 i++
329 }
330
331 linkB := i
332
333 // look for link end: ' " )
334 findlinkend:
335 for i < len(data) {
336 switch {
337 case data[i] == '\\':
338 i += 2
339
340 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
341 break findlinkend
342
343 default:
344 i++
345 }
346 }
347
348 if i >= len(data) {
349 return 0
350 }
351 linkE := i
352
353 // look for title end if present
354 titleB, titleE := 0, 0
355 if data[i] == '\'' || data[i] == '"' {
356 i++
357 titleB = i
358
359 findtitleend:
360 for i < len(data) {
361 switch {
362 case data[i] == '\\':
363 i += 2
364
365 case data[i] == ')':
366 break findtitleend
367
368 default:
369 i++
370 }
371 }
372
373 if i >= len(data) {
374 return 0
375 }
376
377 // skip whitespace after title
378 titleE = i - 1
379 for titleE > titleB && isspace(data[titleE]) {
380 titleE--
381 }
382
383 // check for closing quote presence
384 if data[titleE] != '\'' && data[titleE] != '"' {
385 titleB, titleE = 0, 0
386 linkE = i
387 }
388 }
389
390 // remove whitespace at the end of the link
391 for linkE > linkB && isspace(data[linkE-1]) {
392 linkE--
393 }
394
395 // remove optional angle brackets around the link
396 if data[linkB] == '<' {
397 linkB++
398 }
399 if data[linkE-1] == '>' {
400 linkE--
401 }
402
403 // build escaped link and title
404 if linkE > linkB {
405 link = data[linkB:linkE]
406 }
407
408 if titleE > titleB {
409 title = data[titleB:titleE]
410 }
411
412 i++
413
414 // reference style link
415 case isReferenceStyleLink(data, i, t):
416 var id []byte
417 altContentConsidered := false
418
419 // look for the id
420 i++
421 linkB := i
422 for i < len(data) && data[i] != ']' {
423 i++
424 }
425 if i >= len(data) {
426 return 0
427 }
428 linkE := i
429
430 // find the reference
431 if linkB == linkE {
432 if textHasNl {
433 var b bytes.Buffer
434
435 for j := 1; j < txtE; j++ {
436 switch {
437 case data[j] != '\n':
438 b.WriteByte(data[j])
439 case data[j-1] != ' ':
440 b.WriteByte(' ')
441 }
442 }
443
444 id = b.Bytes()
445 } else {
446 id = data[1:txtE]
447 altContentConsidered = true
448 }
449 } else {
450 id = data[linkB:linkE]
451 }
452
453 // find the reference with matching id
454 lr, ok := p.getRef(string(id))
455 if !ok {
456 return 0
457 }
458
459 // keep link and title from reference
460 link = lr.link
461 title = lr.title
462 if altContentConsidered {
463 altContent = lr.text
464 }
465 i++
466
467 // shortcut reference style link or reference or inline footnote
468 default:
469 var id []byte
470
471 // craft the id
472 if textHasNl {
473 var b bytes.Buffer
474
475 for j := 1; j < txtE; j++ {
476 switch {
477 case data[j] != '\n':
478 b.WriteByte(data[j])
479 case data[j-1] != ' ':
480 b.WriteByte(' ')
481 }
482 }
483
484 id = b.Bytes()
485 } else {
486 if t == linkDeferredFootnote {
487 id = data[2:txtE] // get rid of the ^
488 } else {
489 id = data[1:txtE]
490 }
491 }
492
493 if t == linkInlineFootnote {
494 // create a new reference
495 noteId = len(p.notes) + 1
496
497 var fragment []byte
498 if len(id) > 0 {
499 if len(id) < 16 {
500 fragment = make([]byte, len(id))
501 } else {
502 fragment = make([]byte, 16)
503 }
504 copy(fragment, slugify(id))
505 } else {
506 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
507 }
508
509 ref := &reference{
510 noteId: noteId,
511 hasBlock: false,
512 link: fragment,
513 title: id,
514 }
515
516 p.notes = append(p.notes, ref)
517
518 link = ref.link
519 title = ref.title
520 } else {
521 // find the reference with matching id
522 lr, ok := p.getRef(string(id))
523 if !ok {
524 return 0
525 }
526
527 if t == linkDeferredFootnote {
528 lr.noteId = len(p.notes) + 1
529 p.notes = append(p.notes, lr)
530 }
531
532 // keep link and title from reference
533 link = lr.link
534 // if inline footnote, title == footnote contents
535 title = lr.title
536 noteId = lr.noteId
537 }
538
539 // rewind the whitespace
540 i = txtE + 1
541 }
542
543 // build content: img alt is escaped, link content is parsed
544 var content bytes.Buffer
545 if txtE > 1 {
546 if t == linkImg {
547 content.Write(data[1:txtE])
548 } else {
549 // links cannot contain other links, so turn off link parsing temporarily
550 insideLink := p.insideLink
551 p.insideLink = true
552 p.inline(&content, data[1:txtE])
553 p.insideLink = insideLink
554 }
555 }
556
557 var uLink []byte
558 if t == linkNormal || t == linkImg {
559 if len(link) > 0 {
560 var uLinkBuf bytes.Buffer
561 unescapeText(&uLinkBuf, link)
562 uLink = uLinkBuf.Bytes()
563 }
564
565 // links need something to click on and somewhere to go
566 if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
567 return 0
568 }
569 }
570
571 // call the relevant rendering function
572 switch t {
573 case linkNormal:
574 if len(altContent) > 0 {
575 p.r.Link(uLink, title, altContent)
576 } else {
577 p.r.Link(uLink, title, content.Bytes())
578 }
579
580 case linkImg:
581 p.r.Image(uLink, title, content.Bytes())
582 i += 1
583
584 case linkInlineFootnote:
585 p.r.FootnoteRef(link, noteId)
586 i += 1
587
588 case linkDeferredFootnote:
589 p.r.FootnoteRef(link, noteId)
590
591 default:
592 return 0
593 }
594
595 return i
596}
597
598func (p *parser) inlineHtmlComment(data []byte) int {
599 if len(data) < 5 {
600 return 0
601 }
602 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
603 return 0
604 }
605 i := 5
606 // scan for an end-of-comment marker, across lines if necessary
607 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
608 i++
609 }
610 // no end-of-comment marker
611 if i >= len(data) {
612 return 0
613 }
614 return i + 1
615}
616
617// '<' when tags or autolinks are allowed
618func leftAngle(p *parser, data []byte, offset int) int {
619 data = data[offset:]
620 altype := LinkTypeNotAutolink
621 end := tagLength(data, &altype)
622 if size := p.inlineHtmlComment(data); size > 0 {
623 end = size
624 }
625 if end > 2 {
626 if altype != LinkTypeNotAutolink {
627 var uLink bytes.Buffer
628 unescapeText(&uLink, data[1:end+1-2])
629 if uLink.Len() > 0 {
630 p.r.AutoLink(uLink.Bytes(), altype)
631 }
632 } else {
633 p.r.RawHtmlTag(data[:end])
634 }
635 }
636
637 return end
638}
639
640// '\\' backslash escape
641var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
642
643func escape(p *parser, data []byte, offset int) int {
644 data = data[offset:]
645
646 if len(data) > 1 {
647 if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
648 p.r.LineBreak()
649 return 2
650 }
651 if bytes.IndexByte(escapeChars, data[1]) < 0 {
652 return 0
653 }
654
655 p.r.NormalText(data[1:2])
656 }
657
658 return 2
659}
660
661func unescapeText(ob *bytes.Buffer, src []byte) {
662 i := 0
663 for i < len(src) {
664 org := i
665 for i < len(src) && src[i] != '\\' {
666 i++
667 }
668
669 if i > org {
670 ob.Write(src[org:i])
671 }
672
673 if i+1 >= len(src) {
674 break
675 }
676
677 ob.WriteByte(src[i+1])
678 i += 2
679 }
680}
681
682// '&' escaped when it doesn't belong to an entity
683// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
684func entity(p *parser, data []byte, offset int) int {
685 data = data[offset:]
686
687 end := 1
688
689 if end < len(data) && data[end] == '#' {
690 end++
691 }
692
693 for end < len(data) && isalnum(data[end]) {
694 end++
695 }
696
697 if end < len(data) && data[end] == ';' {
698 end++ // real entity
699 } else {
700 return 0 // lone '&'
701 }
702
703 p.r.Entity(data[:end])
704
705 return end
706}
707
708func linkEndsWithEntity(data []byte, linkEnd int) bool {
709 entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
710 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
711}
712
713func maybeAutoLink(p *parser, data []byte, offset int) int {
714 // quick check to rule out most false hits
715 if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
716 return 0
717 }
718 prefixes := []string{
719 "http://",
720 "https://",
721 "ftp://",
722 "file://",
723 "mailto:",
724 }
725 for _, prefix := range prefixes {
726 endOfHead := offset + 8 // 8 is the len() of the longest prefix
727 if endOfHead > len(data) {
728 endOfHead = len(data)
729 }
730 head := bytes.ToLower(data[offset:endOfHead])
731 if bytes.HasPrefix(head, []byte(prefix)) {
732 return autoLink(p, data, offset)
733 }
734 }
735 return 0
736}
737
738func autoLink(p *parser, data []byte, offset int) int {
739 // Now a more expensive check to see if we're not inside an anchor element
740 anchorStart := offset
741 offsetFromAnchor := 0
742 for anchorStart > 0 && data[anchorStart] != '<' {
743 anchorStart--
744 offsetFromAnchor++
745 }
746
747 anchorStr := anchorRe.Find(data[anchorStart:])
748 if anchorStr != nil {
749 out.Write(anchorStr[offsetFromAnchor:]) // XXX: write in parser?
750 return len(anchorStr) - offsetFromAnchor
751 }
752
753 // scan backward for a word boundary
754 rewind := 0
755 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
756 rewind++
757 }
758 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
759 return 0
760 }
761
762 origData := data
763 data = data[offset-rewind:]
764
765 if !isSafeLink(data) {
766 return 0
767 }
768
769 linkEnd := 0
770 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
771 linkEnd++
772 }
773
774 // Skip punctuation at the end of the link
775 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
776 linkEnd--
777 }
778
779 // But don't skip semicolon if it's a part of escaped entity:
780 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
781 linkEnd--
782 }
783
784 // See if the link finishes with a punctuation sign that can be closed.
785 var copen byte
786 switch data[linkEnd-1] {
787 case '"':
788 copen = '"'
789 case '\'':
790 copen = '\''
791 case ')':
792 copen = '('
793 case ']':
794 copen = '['
795 case '}':
796 copen = '{'
797 default:
798 copen = 0
799 }
800
801 if copen != 0 {
802 bufEnd := offset - rewind + linkEnd - 2
803
804 openDelim := 1
805
806 /* Try to close the final punctuation sign in this same line;
807 * if we managed to close it outside of the URL, that means that it's
808 * not part of the URL. If it closes inside the URL, that means it
809 * is part of the URL.
810 *
811 * Examples:
812 *
813 * foo http://www.pokemon.com/Pikachu_(Electric) bar
814 * => http://www.pokemon.com/Pikachu_(Electric)
815 *
816 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
817 * => http://www.pokemon.com/Pikachu_(Electric)
818 *
819 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
820 * => http://www.pokemon.com/Pikachu_(Electric))
821 *
822 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
823 * => foo http://www.pokemon.com/Pikachu_(Electric)
824 */
825
826 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
827 if origData[bufEnd] == data[linkEnd-1] {
828 openDelim++
829 }
830
831 if origData[bufEnd] == copen {
832 openDelim--
833 }
834
835 bufEnd--
836 }
837
838 if openDelim == 0 {
839 linkEnd--
840 }
841 }
842
843 var uLink bytes.Buffer
844 unescapeText(&uLink, data[:linkEnd])
845
846 if uLink.Len() > 0 {
847 p.r.AutoLink(uLink.Bytes(), LinkTypeNormal)
848 }
849
850 return linkEnd
851}
852
853func isEndOfLink(char byte) bool {
854 return isspace(char) || char == '<'
855}
856
857var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
858var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
859
860func isSafeLink(link []byte) bool {
861 for _, path := range validPaths {
862 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
863 if len(link) == len(path) {
864 return true
865 } else if isalnum(link[len(path)]) {
866 return true
867 }
868 }
869 }
870
871 for _, prefix := range validUris {
872 // TODO: handle unicode here
873 // case-insensitive prefix test
874 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
875 return true
876 }
877 }
878
879 return false
880}
881
882// return the length of the given tag, or 0 is it's not valid
883func tagLength(data []byte, autolink *LinkType) int {
884 var i, j int
885
886 // a valid tag can't be shorter than 3 chars
887 if len(data) < 3 {
888 return 0
889 }
890
891 // begins with a '<' optionally followed by '/', followed by letter or number
892 if data[0] != '<' {
893 return 0
894 }
895 if data[1] == '/' {
896 i = 2
897 } else {
898 i = 1
899 }
900
901 if !isalnum(data[i]) {
902 return 0
903 }
904
905 // scheme test
906 *autolink = LinkTypeNotAutolink
907
908 // try to find the beginning of an URI
909 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
910 i++
911 }
912
913 if i > 1 && i < len(data) && data[i] == '@' {
914 if j = isMailtoAutoLink(data[i:]); j != 0 {
915 *autolink = LinkTypeEmail
916 return i + j
917 }
918 }
919
920 if i > 2 && i < len(data) && data[i] == ':' {
921 *autolink = LinkTypeNormal
922 i++
923 }
924
925 // complete autolink test: no whitespace or ' or "
926 switch {
927 case i >= len(data):
928 *autolink = LinkTypeNotAutolink
929 case *autolink != 0:
930 j = i
931
932 for i < len(data) {
933 if data[i] == '\\' {
934 i += 2
935 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
936 break
937 } else {
938 i++
939 }
940
941 }
942
943 if i >= len(data) {
944 return 0
945 }
946 if i > j && data[i] == '>' {
947 return i + 1
948 }
949
950 // one of the forbidden chars has been found
951 *autolink = LinkTypeNotAutolink
952 }
953
954 // look for something looking like a tag end
955 for i < len(data) && data[i] != '>' {
956 i++
957 }
958 if i >= len(data) {
959 return 0
960 }
961 return i + 1
962}
963
964// look for the address part of a mail autolink and '>'
965// this is less strict than the original markdown e-mail address matching
966func isMailtoAutoLink(data []byte) int {
967 nb := 0
968
969 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
970 for i := 0; i < len(data); i++ {
971 if isalnum(data[i]) {
972 continue
973 }
974
975 switch data[i] {
976 case '@':
977 nb++
978
979 case '-', '.', '_':
980 break
981
982 case '>':
983 if nb == 1 {
984 return i + 1
985 } else {
986 return 0
987 }
988 default:
989 return 0
990 }
991 }
992
993 return 0
994}
995
996// look for the next emph char, skipping other constructs
997func helperFindEmphChar(data []byte, c byte) int {
998 i := 0
999
1000 for i < len(data) {
1001 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1002 i++
1003 }
1004 if i >= len(data) {
1005 return 0
1006 }
1007 // do not count escaped chars
1008 if i != 0 && data[i-1] == '\\' {
1009 i++
1010 continue
1011 }
1012 if data[i] == c {
1013 return i
1014 }
1015
1016 if data[i] == '`' {
1017 // skip a code span
1018 tmpI := 0
1019 i++
1020 for i < len(data) && data[i] != '`' {
1021 if tmpI == 0 && data[i] == c {
1022 tmpI = i
1023 }
1024 i++
1025 }
1026 if i >= len(data) {
1027 return tmpI
1028 }
1029 i++
1030 } else if data[i] == '[' {
1031 // skip a link
1032 tmpI := 0
1033 i++
1034 for i < len(data) && data[i] != ']' {
1035 if tmpI == 0 && data[i] == c {
1036 tmpI = i
1037 }
1038 i++
1039 }
1040 i++
1041 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1042 i++
1043 }
1044 if i >= len(data) {
1045 return tmpI
1046 }
1047 if data[i] != '[' && data[i] != '(' { // not a link
1048 if tmpI > 0 {
1049 return tmpI
1050 } else {
1051 continue
1052 }
1053 }
1054 cc := data[i]
1055 i++
1056 for i < len(data) && data[i] != cc {
1057 if tmpI == 0 && data[i] == c {
1058 return i
1059 }
1060 i++
1061 }
1062 if i >= len(data) {
1063 return tmpI
1064 }
1065 i++
1066 }
1067 }
1068 return 0
1069}
1070
1071func helperEmphasis(p *parser, data []byte, c byte) int {
1072 i := 0
1073
1074 // skip one symbol if coming from emph3
1075 if len(data) > 1 && data[0] == c && data[1] == c {
1076 i = 1
1077 }
1078
1079 for i < len(data) {
1080 length := helperFindEmphChar(data[i:], c)
1081 if length == 0 {
1082 return 0
1083 }
1084 i += length
1085 if i >= len(data) {
1086 return 0
1087 }
1088
1089 if i+1 < len(data) && data[i+1] == c {
1090 i++
1091 continue
1092 }
1093
1094 if data[i] == c && !isspace(data[i-1]) {
1095
1096 if p.flags&NoIntraEmphasis != 0 {
1097 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1098 continue
1099 }
1100 }
1101
1102 var work bytes.Buffer
1103 p.inline(&work, data[:i])
1104 p.r.Emphasis(work.Bytes())
1105 return i + 1
1106 }
1107 }
1108
1109 return 0
1110}
1111
1112func helperDoubleEmphasis(p *parser, data []byte, c byte) int {
1113 i := 0
1114
1115 for i < len(data) {
1116 length := helperFindEmphChar(data[i:], c)
1117 if length == 0 {
1118 return 0
1119 }
1120 i += length
1121
1122 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1123 var work bytes.Buffer
1124 p.inline(&work, data[:i])
1125
1126 if work.Len() > 0 {
1127 // pick the right renderer
1128 if c == '~' {
1129 p.r.StrikeThrough(work.Bytes())
1130 } else {
1131 p.r.DoubleEmphasis(work.Bytes())
1132 }
1133 }
1134 return i + 2
1135 }
1136 i++
1137 }
1138 return 0
1139}
1140
1141func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) int {
1142 i := 0
1143 origData := data
1144 data = data[offset:]
1145
1146 for i < len(data) {
1147 length := helperFindEmphChar(data[i:], c)
1148 if length == 0 {
1149 return 0
1150 }
1151 i += length
1152
1153 // skip whitespace preceded symbols
1154 if data[i] != c || isspace(data[i-1]) {
1155 continue
1156 }
1157
1158 switch {
1159 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1160 // triple symbol found
1161 var work bytes.Buffer
1162
1163 p.inline(&work, data[:i])
1164 if work.Len() > 0 {
1165 p.r.TripleEmphasis(work.Bytes())
1166 }
1167 return i + 3
1168 case (i+1 < len(data) && data[i+1] == c):
1169 // double symbol found, hand over to emph1
1170 length = helperEmphasis(p, origData[offset-2:], c)
1171 if length == 0 {
1172 return 0
1173 } else {
1174 return length - 2
1175 }
1176 default:
1177 // single symbol found, hand over to emph2
1178 length = helperDoubleEmphasis(p, origData[offset-1:], c)
1179 if length == 0 {
1180 return 0
1181 } else {
1182 return length - 1
1183 }
1184 }
1185 }
1186 return 0
1187}