inline.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20)
21
22var (
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25
26 // https://www.w3.org/TR/html5/syntax.html#character-references
27 // highest unicode code point in 17 planes (2^20): 1,114,112d =
28 // 7 dec digits or 6 hex digits
29 // named entity references can be 2-31 characters with stuff like <
30 // at one end and ∳ at the other. There
31 // are also sometimes numbers at the end, although this isn't inherent
32 // in the specification; there are never numbers anywhere else in
33 // current character references, though; see ¾ and ▒, etc.
34 // https://www.w3.org/TR/html5/syntax.html#named-character-references
35 //
36 // entity := "&" (named group | number ref) ";"
37 // named group := [a-zA-Z]{2,31}[0-9]{0,2}
38 // number ref := "#" (dec ref | hex ref)
39 // dec ref := [0-9]{1,7}
40 // hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
41 htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
42)
43
44// Functions to parse text within a block
45// Each function returns the number of chars taken care of
46// data is the complete block being rendered
47// offset is the number of valid chars before the current cursor
48
49func (p *Markdown) inline(currBlock *Node, data []byte) {
50 // handlers might call us recursively: enforce a maximum depth
51 if p.nesting >= p.maxNesting || len(data) == 0 {
52 return
53 }
54 p.nesting++
55 beg, end := 0, 0
56 for end < len(data) {
57 handler := p.inlineCallback[data[end]]
58 if handler != nil {
59 if consumed, node := handler(p, data, end); consumed == 0 {
60 // No action from the callback.
61 end++
62 } else {
63 // Copy inactive chars into the output.
64 currBlock.AppendChild(text(data[beg:end]))
65 if node != nil {
66 currBlock.AppendChild(node)
67 }
68 // Skip past whatever the callback used.
69 beg = end + consumed
70 end = beg
71 }
72 } else {
73 end++
74 }
75 }
76 if beg < len(data) {
77 if data[end-1] == '\n' {
78 end--
79 }
80 currBlock.AppendChild(text(data[beg:end]))
81 }
82 p.nesting--
83}
84
85// single and double emphasis parsing
86func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
87 data = data[offset:]
88 c := data[0]
89
90 if len(data) > 2 && data[1] != c {
91 // whitespace cannot follow an opening emphasis;
92 // strikethrough only takes two characters '~~'
93 if c == '~' || isspace(data[1]) {
94 return 0, nil
95 }
96 ret, node := helperEmphasis(p, data[1:], c)
97 if ret == 0 {
98 return 0, nil
99 }
100
101 return ret + 1, node
102 }
103
104 if len(data) > 3 && data[1] == c && data[2] != c {
105 if isspace(data[2]) {
106 return 0, nil
107 }
108 ret, node := helperDoubleEmphasis(p, data[2:], c)
109 if ret == 0 {
110 return 0, nil
111 }
112
113 return ret + 2, node
114 }
115
116 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
117 if c == '~' || isspace(data[3]) {
118 return 0, nil
119 }
120 ret, node := helperTripleEmphasis(p, data, 3, c)
121 if ret == 0 {
122 return 0, nil
123 }
124
125 return ret + 3, node
126 }
127
128 return 0, nil
129}
130
131func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
132 data = data[offset:]
133
134 nb := 0
135
136 // count the number of backticks in the delimiter
137 for nb < len(data) && data[nb] == '`' {
138 nb++
139 }
140
141 // find the next delimiter
142 i, end := 0, 0
143 for end = nb; end < len(data) && i < nb; end++ {
144 if data[end] == '`' {
145 i++
146 } else {
147 i = 0
148 }
149 }
150
151 // no matching delimiter?
152 if i < nb && end >= len(data) {
153 return 0, nil
154 }
155
156 // trim outside whitespace
157 fBegin := nb
158 for fBegin < end && data[fBegin] == ' ' {
159 fBegin++
160 }
161
162 fEnd := end - nb
163 for fEnd > fBegin && data[fEnd-1] == ' ' {
164 fEnd--
165 }
166
167 // render the code span
168 if fBegin != fEnd {
169 code := NewNode(Code)
170 code.Literal = data[fBegin:fEnd]
171 return end, code
172 }
173
174 return end, nil
175}
176
177// newline preceded by two spaces becomes <br>
178func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
179 origOffset := offset
180 for offset < len(data) && data[offset] == ' ' {
181 offset++
182 }
183
184 if offset < len(data) && data[offset] == '\n' {
185 if offset-origOffset >= 2 {
186 return offset - origOffset + 1, NewNode(Hardbreak)
187 }
188 return offset - origOffset, nil
189 }
190 return 0, nil
191}
192
193// newline without two spaces works when HardLineBreak is enabled
194func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
195 if p.extensions&HardLineBreak != 0 {
196 return 1, NewNode(Hardbreak)
197 }
198 return 0, nil
199}
200
201type linkType int
202
203const (
204 linkNormal linkType = iota
205 linkImg
206 linkDeferredFootnote
207 linkInlineFootnote
208)
209
210func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
211 if t == linkDeferredFootnote {
212 return false
213 }
214 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
215}
216
217func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
218 if offset < len(data)-1 && data[offset+1] == '[' {
219 return link(p, data, offset)
220 }
221 return 0, nil
222}
223
224func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
225 if offset < len(data)-1 && data[offset+1] == '[' {
226 return link(p, data, offset)
227 }
228 return 0, nil
229}
230
231// '[': parse a link or an image or a footnote
232func link(p *Markdown, data []byte, offset int) (int, *Node) {
233 // no links allowed inside regular links, footnote, and deferred footnotes
234 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
235 return 0, nil
236 }
237
238 var t linkType
239 switch {
240 // special case: ![^text] == deferred footnote (that follows something with
241 // an exclamation point)
242 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
243 t = linkDeferredFootnote
244 // ![alt] == image
245 case offset >= 0 && data[offset] == '!':
246 t = linkImg
247 offset++
248 // ^[text] == inline footnote
249 // [^refId] == deferred footnote
250 case p.extensions&Footnotes != 0:
251 if offset >= 0 && data[offset] == '^' {
252 t = linkInlineFootnote
253 offset++
254 } else if len(data)-1 > offset && data[offset+1] == '^' {
255 t = linkDeferredFootnote
256 }
257 // [text] == regular link
258 default:
259 t = linkNormal
260 }
261
262 data = data[offset:]
263
264 var (
265 i = 1
266 noteID int
267 title, link, altContent []byte
268 textHasNl = false
269 )
270
271 if t == linkDeferredFootnote {
272 i++
273 }
274
275 // look for the matching closing bracket
276 for level := 1; level > 0 && i < len(data); i++ {
277 switch {
278 case data[i] == '\n':
279 textHasNl = true
280
281 case data[i-1] == '\\':
282 continue
283
284 case data[i] == '[':
285 level++
286
287 case data[i] == ']':
288 level--
289 if level <= 0 {
290 i-- // compensate for extra i++ in for loop
291 }
292 }
293 }
294
295 if i >= len(data) {
296 return 0, nil
297 }
298
299 txtE := i
300 i++
301 var footnoteNode *Node
302
303 // skip any amount of whitespace or newline
304 // (this is much more lax than original markdown syntax)
305 for i < len(data) && isspace(data[i]) {
306 i++
307 }
308
309 // inline style link
310 switch {
311 case i < len(data) && data[i] == '(':
312 // skip initial whitespace
313 i++
314
315 for i < len(data) && isspace(data[i]) {
316 i++
317 }
318
319 linkB := i
320
321 // look for link end: ' " )
322 findlinkend:
323 for i < len(data) {
324 switch {
325 case data[i] == '\\':
326 i += 2
327
328 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
329 break findlinkend
330
331 default:
332 i++
333 }
334 }
335
336 if i >= len(data) {
337 return 0, nil
338 }
339 linkE := i
340
341 // look for title end if present
342 titleB, titleE := 0, 0
343 if data[i] == '\'' || data[i] == '"' {
344 i++
345 titleB = i
346
347 findtitleend:
348 for i < len(data) {
349 switch {
350 case data[i] == '\\':
351 i += 2
352
353 case data[i] == ')':
354 break findtitleend
355
356 default:
357 i++
358 }
359 }
360
361 if i >= len(data) {
362 return 0, nil
363 }
364
365 // skip whitespace after title
366 titleE = i - 1
367 for titleE > titleB && isspace(data[titleE]) {
368 titleE--
369 }
370
371 // check for closing quote presence
372 if data[titleE] != '\'' && data[titleE] != '"' {
373 titleB, titleE = 0, 0
374 linkE = i
375 }
376 }
377
378 // remove whitespace at the end of the link
379 for linkE > linkB && isspace(data[linkE-1]) {
380 linkE--
381 }
382
383 // remove optional angle brackets around the link
384 if data[linkB] == '<' {
385 linkB++
386 }
387 if data[linkE-1] == '>' {
388 linkE--
389 }
390
391 // build escaped link and title
392 if linkE > linkB {
393 link = data[linkB:linkE]
394 }
395
396 if titleE > titleB {
397 title = data[titleB:titleE]
398 }
399
400 i++
401
402 // reference style link
403 case isReferenceStyleLink(data, i, t):
404 var id []byte
405 altContentConsidered := false
406
407 // look for the id
408 i++
409 linkB := i
410 for i < len(data) && data[i] != ']' {
411 i++
412 }
413 if i >= len(data) {
414 return 0, nil
415 }
416 linkE := i
417
418 // find the reference
419 if linkB == linkE {
420 if textHasNl {
421 var b bytes.Buffer
422
423 for j := 1; j < txtE; j++ {
424 switch {
425 case data[j] != '\n':
426 b.WriteByte(data[j])
427 case data[j-1] != ' ':
428 b.WriteByte(' ')
429 }
430 }
431
432 id = b.Bytes()
433 } else {
434 id = data[1:txtE]
435 altContentConsidered = true
436 }
437 } else {
438 id = data[linkB:linkE]
439 }
440
441 // find the reference with matching id
442 lr, ok := p.getRef(string(id))
443 if !ok {
444 return 0, nil
445 }
446
447 // keep link and title from reference
448 link = lr.link
449 title = lr.title
450 if altContentConsidered {
451 altContent = lr.text
452 }
453 i++
454
455 // shortcut reference style link or reference or inline footnote
456 default:
457 var id []byte
458
459 // craft the id
460 if textHasNl {
461 var b bytes.Buffer
462
463 for j := 1; j < txtE; j++ {
464 switch {
465 case data[j] != '\n':
466 b.WriteByte(data[j])
467 case data[j-1] != ' ':
468 b.WriteByte(' ')
469 }
470 }
471
472 id = b.Bytes()
473 } else {
474 if t == linkDeferredFootnote {
475 id = data[2:txtE] // get rid of the ^
476 } else {
477 id = data[1:txtE]
478 }
479 }
480
481 footnoteNode = NewNode(Item)
482 if t == linkInlineFootnote {
483 // create a new reference
484 noteID = len(p.notes) + 1
485
486 var fragment []byte
487 if len(id) > 0 {
488 if len(id) < 16 {
489 fragment = make([]byte, len(id))
490 } else {
491 fragment = make([]byte, 16)
492 }
493 copy(fragment, slugify(id))
494 } else {
495 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
496 }
497
498 ref := &reference{
499 noteID: noteID,
500 hasBlock: false,
501 link: fragment,
502 title: id,
503 footnote: footnoteNode,
504 }
505
506 p.notes = append(p.notes, ref)
507
508 link = ref.link
509 title = ref.title
510 } else {
511 // find the reference with matching id
512 lr, ok := p.getRef(string(id))
513 if !ok {
514 return 0, nil
515 }
516
517 if t == linkDeferredFootnote {
518 lr.noteID = len(p.notes) + 1
519 lr.footnote = footnoteNode
520 p.notes = append(p.notes, lr)
521 }
522
523 // keep link and title from reference
524 link = lr.link
525 // if inline footnote, title == footnote contents
526 title = lr.title
527 noteID = lr.noteID
528 }
529
530 // rewind the whitespace
531 i = txtE + 1
532 }
533
534 var uLink []byte
535 if t == linkNormal || t == linkImg {
536 if len(link) > 0 {
537 var uLinkBuf bytes.Buffer
538 unescapeText(&uLinkBuf, link)
539 uLink = uLinkBuf.Bytes()
540 }
541
542 // links need something to click on and somewhere to go
543 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
544 return 0, nil
545 }
546 }
547
548 // call the relevant rendering function
549 var linkNode *Node
550 switch t {
551 case linkNormal:
552 linkNode = NewNode(Link)
553 linkNode.Destination = normalizeURI(uLink)
554 linkNode.Title = title
555 if len(altContent) > 0 {
556 linkNode.AppendChild(text(altContent))
557 } else {
558 // links cannot contain other links, so turn off link parsing
559 // temporarily and recurse
560 insideLink := p.insideLink
561 p.insideLink = true
562 p.inline(linkNode, data[1:txtE])
563 p.insideLink = insideLink
564 }
565
566 case linkImg:
567 linkNode = NewNode(Image)
568 linkNode.Destination = uLink
569 linkNode.Title = title
570 linkNode.AppendChild(text(data[1:txtE]))
571 i++
572
573 case linkInlineFootnote, linkDeferredFootnote:
574 linkNode = NewNode(Link)
575 linkNode.Destination = link
576 linkNode.Title = title
577 linkNode.NoteID = noteID
578 linkNode.Footnote = footnoteNode
579 if t == linkInlineFootnote {
580 i++
581 }
582
583 default:
584 return 0, nil
585 }
586
587 return i, linkNode
588}
589
590func (p *Markdown) inlineHTMLComment(data []byte) int {
591 if len(data) < 5 {
592 return 0
593 }
594 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
595 return 0
596 }
597 i := 5
598 // scan for an end-of-comment marker, across lines if necessary
599 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
600 i++
601 }
602 // no end-of-comment marker
603 if i >= len(data) {
604 return 0
605 }
606 return i + 1
607}
608
609func stripMailto(link []byte) []byte {
610 if bytes.HasPrefix(link, []byte("mailto://")) {
611 return link[9:]
612 } else if bytes.HasPrefix(link, []byte("mailto:")) {
613 return link[7:]
614 } else {
615 return link
616 }
617}
618
619// autolinkType specifies a kind of autolink that gets detected.
620type autolinkType int
621
622// These are the possible flag values for the autolink renderer.
623const (
624 notAutolink autolinkType = iota
625 normalAutolink
626 emailAutolink
627)
628
629// '<' when tags or autolinks are allowed
630func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
631 data = data[offset:]
632 altype, end := tagLength(data)
633 if size := p.inlineHTMLComment(data); size > 0 {
634 end = size
635 }
636 if end > 2 {
637 if altype != notAutolink {
638 var uLink bytes.Buffer
639 unescapeText(&uLink, data[1:end+1-2])
640 if uLink.Len() > 0 {
641 link := uLink.Bytes()
642 node := NewNode(Link)
643 node.Destination = link
644 if altype == emailAutolink {
645 node.Destination = append([]byte("mailto:"), link...)
646 }
647 node.AppendChild(text(stripMailto(link)))
648 return end, node
649 }
650 } else {
651 htmlTag := NewNode(HTMLSpan)
652 htmlTag.Literal = data[:end]
653 return end, htmlTag
654 }
655 }
656
657 return end, nil
658}
659
660// '\\' backslash escape
661var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
662
663func escape(p *Markdown, data []byte, offset int) (int, *Node) {
664 data = data[offset:]
665
666 if len(data) > 1 {
667 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
668 return 2, NewNode(Hardbreak)
669 }
670 if bytes.IndexByte(escapeChars, data[1]) < 0 {
671 return 0, nil
672 }
673
674 return 2, text(data[1:2])
675 }
676
677 return 2, nil
678}
679
680func unescapeText(ob *bytes.Buffer, src []byte) {
681 i := 0
682 for i < len(src) {
683 org := i
684 for i < len(src) && src[i] != '\\' {
685 i++
686 }
687
688 if i > org {
689 ob.Write(src[org:i])
690 }
691
692 if i+1 >= len(src) {
693 break
694 }
695
696 ob.WriteByte(src[i+1])
697 i += 2
698 }
699}
700
701// '&' escaped when it doesn't belong to an entity
702// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
703func entity(p *Markdown, data []byte, offset int) (int, *Node) {
704 data = data[offset:]
705
706 end := 1
707
708 if end < len(data) && data[end] == '#' {
709 end++
710 }
711
712 for end < len(data) && isalnum(data[end]) {
713 end++
714 }
715
716 if end < len(data) && data[end] == ';' {
717 end++ // real entity
718 } else {
719 return 0, nil // lone '&'
720 }
721
722 ent := data[:end]
723 // undo & escaping or it will be converted to &amp; by another
724 // escaper in the renderer
725 if bytes.Equal(ent, []byte("&")) {
726 ent = []byte{'&'}
727 }
728
729 return end, text(ent)
730}
731
732func linkEndsWithEntity(data []byte, linkEnd int) bool {
733 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
734 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
735}
736
737// hasPrefixCaseInsensitive is a custom implementation of
738// strings.HasPrefix(strings.ToLower(s), prefix)
739// we rolled our own because ToLower pulls in a huge machinery of lowercasing
740// anything from Unicode and that's very slow. Since this func will only be
741// used on ASCII protocol prefixes, we can take shortcuts.
742func hasPrefixCaseInsensitive(s, prefix []byte) bool {
743 if len(s) < len(prefix) {
744 return false
745 }
746 delta := byte('a' - 'A')
747 for i, b := range prefix {
748 if b != s[i] && b != s[i]+delta {
749 return false
750 }
751 }
752 return true
753}
754
755var protocolPrefixes = [][]byte{
756 []byte("http://"),
757 []byte("https://"),
758 []byte("ftp://"),
759 []byte("file://"),
760 []byte("mailto:"),
761}
762
763const shortestPrefix = 6 // len("ftp://"), the shortest of the above
764
765func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
766 // quick check to rule out most false hits
767 if p.insideLink || len(data) < offset+shortestPrefix {
768 return 0, nil
769 }
770 for _, prefix := range protocolPrefixes {
771 endOfHead := offset + 8 // 8 is the len() of the longest prefix
772 if endOfHead > len(data) {
773 endOfHead = len(data)
774 }
775 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
776 return autoLink(p, data, offset)
777 }
778 }
779 return 0, nil
780}
781
782func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
783 // Now a more expensive check to see if we're not inside an anchor element
784 anchorStart := offset
785 offsetFromAnchor := 0
786 for anchorStart > 0 && data[anchorStart] != '<' {
787 anchorStart--
788 offsetFromAnchor++
789 }
790
791 anchorStr := anchorRe.Find(data[anchorStart:])
792 if anchorStr != nil {
793 anchorClose := NewNode(HTMLSpan)
794 anchorClose.Literal = anchorStr[offsetFromAnchor:]
795 return len(anchorStr) - offsetFromAnchor, anchorClose
796 }
797
798 // scan backward for a word boundary
799 rewind := 0
800 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
801 rewind++
802 }
803 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
804 return 0, nil
805 }
806
807 origData := data
808 data = data[offset-rewind:]
809
810 if !isSafeLink(data) {
811 return 0, nil
812 }
813
814 linkEnd := 0
815 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
816 linkEnd++
817 }
818
819 // Skip punctuation at the end of the link
820 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
821 linkEnd--
822 }
823
824 // But don't skip semicolon if it's a part of escaped entity:
825 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
826 linkEnd--
827 }
828
829 // See if the link finishes with a punctuation sign that can be closed.
830 var copen byte
831 switch data[linkEnd-1] {
832 case '"':
833 copen = '"'
834 case '\'':
835 copen = '\''
836 case ')':
837 copen = '('
838 case ']':
839 copen = '['
840 case '}':
841 copen = '{'
842 default:
843 copen = 0
844 }
845
846 if copen != 0 {
847 bufEnd := offset - rewind + linkEnd - 2
848
849 openDelim := 1
850
851 /* Try to close the final punctuation sign in this same line;
852 * if we managed to close it outside of the URL, that means that it's
853 * not part of the URL. If it closes inside the URL, that means it
854 * is part of the URL.
855 *
856 * Examples:
857 *
858 * foo http://www.pokemon.com/Pikachu_(Electric) bar
859 * => http://www.pokemon.com/Pikachu_(Electric)
860 *
861 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
862 * => http://www.pokemon.com/Pikachu_(Electric)
863 *
864 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
865 * => http://www.pokemon.com/Pikachu_(Electric))
866 *
867 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
868 * => foo http://www.pokemon.com/Pikachu_(Electric)
869 */
870
871 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
872 if origData[bufEnd] == data[linkEnd-1] {
873 openDelim++
874 }
875
876 if origData[bufEnd] == copen {
877 openDelim--
878 }
879
880 bufEnd--
881 }
882
883 if openDelim == 0 {
884 linkEnd--
885 }
886 }
887
888 var uLink bytes.Buffer
889 unescapeText(&uLink, data[:linkEnd])
890
891 if uLink.Len() > 0 {
892 node := NewNode(Link)
893 node.Destination = uLink.Bytes()
894 node.AppendChild(text(uLink.Bytes()))
895 return linkEnd, node
896 }
897
898 return linkEnd, nil
899}
900
901func isEndOfLink(char byte) bool {
902 return isspace(char) || char == '<'
903}
904
905var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
906var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
907
908func isSafeLink(link []byte) bool {
909 for _, path := range validPaths {
910 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
911 if len(link) == len(path) {
912 return true
913 } else if isalnum(link[len(path)]) {
914 return true
915 }
916 }
917 }
918
919 for _, prefix := range validUris {
920 // TODO: handle unicode here
921 // case-insensitive prefix test
922 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
923 return true
924 }
925 }
926
927 return false
928}
929
930// return the length of the given tag, or 0 is it's not valid
931func tagLength(data []byte) (autolink autolinkType, end int) {
932 var i, j int
933
934 // a valid tag can't be shorter than 3 chars
935 if len(data) < 3 {
936 return notAutolink, 0
937 }
938
939 // begins with a '<' optionally followed by '/', followed by letter or number
940 if data[0] != '<' {
941 return notAutolink, 0
942 }
943 if data[1] == '/' {
944 i = 2
945 } else {
946 i = 1
947 }
948
949 if !isalnum(data[i]) {
950 return notAutolink, 0
951 }
952
953 // scheme test
954 autolink = notAutolink
955
956 // try to find the beginning of an URI
957 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
958 i++
959 }
960
961 if i > 1 && i < len(data) && data[i] == '@' {
962 if j = isMailtoAutoLink(data[i:]); j != 0 {
963 return emailAutolink, i + j
964 }
965 }
966
967 if i > 2 && i < len(data) && data[i] == ':' {
968 autolink = normalAutolink
969 i++
970 }
971
972 // complete autolink test: no whitespace or ' or "
973 switch {
974 case i >= len(data):
975 autolink = notAutolink
976 case autolink != notAutolink:
977 j = i
978
979 for i < len(data) {
980 if data[i] == '\\' {
981 i += 2
982 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
983 break
984 } else {
985 i++
986 }
987
988 }
989
990 if i >= len(data) {
991 return autolink, 0
992 }
993 if i > j && data[i] == '>' {
994 return autolink, i + 1
995 }
996
997 // one of the forbidden chars has been found
998 autolink = notAutolink
999 }
1000 i += bytes.IndexByte(data[i:], '>')
1001 if i < 0 {
1002 return autolink, 0
1003 }
1004 return autolink, i + 1
1005}
1006
1007// look for the address part of a mail autolink and '>'
1008// this is less strict than the original markdown e-mail address matching
1009func isMailtoAutoLink(data []byte) int {
1010 nb := 0
1011
1012 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1013 for i := 0; i < len(data); i++ {
1014 if isalnum(data[i]) {
1015 continue
1016 }
1017
1018 switch data[i] {
1019 case '@':
1020 nb++
1021
1022 case '-', '.', '_':
1023 break
1024
1025 case '>':
1026 if nb == 1 {
1027 return i + 1
1028 }
1029 return 0
1030 default:
1031 return 0
1032 }
1033 }
1034
1035 return 0
1036}
1037
1038// look for the next emph char, skipping other constructs
1039func helperFindEmphChar(data []byte, c byte) int {
1040 i := 0
1041
1042 for i < len(data) {
1043 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1044 i++
1045 }
1046 if i >= len(data) {
1047 return 0
1048 }
1049 // do not count escaped chars
1050 if i != 0 && data[i-1] == '\\' {
1051 i++
1052 continue
1053 }
1054 if data[i] == c {
1055 return i
1056 }
1057
1058 if data[i] == '`' {
1059 // skip a code span
1060 tmpI := 0
1061 i++
1062 for i < len(data) && data[i] != '`' {
1063 if tmpI == 0 && data[i] == c {
1064 tmpI = i
1065 }
1066 i++
1067 }
1068 if i >= len(data) {
1069 return tmpI
1070 }
1071 i++
1072 } else if data[i] == '[' {
1073 // skip a link
1074 tmpI := 0
1075 i++
1076 for i < len(data) && data[i] != ']' {
1077 if tmpI == 0 && data[i] == c {
1078 tmpI = i
1079 }
1080 i++
1081 }
1082 i++
1083 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1084 i++
1085 }
1086 if i >= len(data) {
1087 return tmpI
1088 }
1089 if data[i] != '[' && data[i] != '(' { // not a link
1090 if tmpI > 0 {
1091 return tmpI
1092 }
1093 continue
1094 }
1095 cc := data[i]
1096 i++
1097 for i < len(data) && data[i] != cc {
1098 if tmpI == 0 && data[i] == c {
1099 return i
1100 }
1101 i++
1102 }
1103 if i >= len(data) {
1104 return tmpI
1105 }
1106 i++
1107 }
1108 }
1109 return 0
1110}
1111
1112func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1113 i := 0
1114
1115 // skip one symbol if coming from emph3
1116 if len(data) > 1 && data[0] == c && data[1] == c {
1117 i = 1
1118 }
1119
1120 for i < len(data) {
1121 length := helperFindEmphChar(data[i:], c)
1122 if length == 0 {
1123 return 0, nil
1124 }
1125 i += length
1126 if i >= len(data) {
1127 return 0, nil
1128 }
1129
1130 if i+1 < len(data) && data[i+1] == c {
1131 i++
1132 continue
1133 }
1134
1135 if data[i] == c && !isspace(data[i-1]) {
1136
1137 if p.extensions&NoIntraEmphasis != 0 {
1138 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1139 continue
1140 }
1141 }
1142
1143 emph := NewNode(Emph)
1144 p.inline(emph, data[:i])
1145 return i + 1, emph
1146 }
1147 }
1148
1149 return 0, nil
1150}
1151
1152func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1153 i := 0
1154
1155 for i < len(data) {
1156 length := helperFindEmphChar(data[i:], c)
1157 if length == 0 {
1158 return 0, nil
1159 }
1160 i += length
1161
1162 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1163 nodeType := Strong
1164 if c == '~' {
1165 nodeType = Del
1166 }
1167 node := NewNode(nodeType)
1168 p.inline(node, data[:i])
1169 return i + 2, node
1170 }
1171 i++
1172 }
1173 return 0, nil
1174}
1175
1176func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
1177 i := 0
1178 origData := data
1179 data = data[offset:]
1180
1181 for i < len(data) {
1182 length := helperFindEmphChar(data[i:], c)
1183 if length == 0 {
1184 return 0, nil
1185 }
1186 i += length
1187
1188 // skip whitespace preceded symbols
1189 if data[i] != c || isspace(data[i-1]) {
1190 continue
1191 }
1192
1193 switch {
1194 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1195 // triple symbol found
1196 strong := NewNode(Strong)
1197 em := NewNode(Emph)
1198 strong.AppendChild(em)
1199 p.inline(em, data[:i])
1200 return i + 3, strong
1201 case (i+1 < len(data) && data[i+1] == c):
1202 // double symbol found, hand over to emph1
1203 length, node := helperEmphasis(p, origData[offset-2:], c)
1204 if length == 0 {
1205 return 0, nil
1206 }
1207 return length - 2, node
1208 default:
1209 // single symbol found, hand over to emph2
1210 length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1211 if length == 0 {
1212 return 0, nil
1213 }
1214 return length - 1, node
1215 }
1216 }
1217 return 0, nil
1218}
1219
1220func text(s []byte) *Node {
1221 node := NewNode(Text)
1222 node.Literal = s
1223 return node
1224}
1225
1226func normalizeURI(s []byte) []byte {
1227 return s // TODO: implement
1228}