inline.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20 "strings"
21)
22
23var (
24 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
25 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
26
27 // https://www.w3.org/TR/html5/syntax.html#character-references
28 // highest unicode code point in 17 planes (2^20): 1,114,112d =
29 // 7 dec digits or 6 hex digits
30 // named entity references can be 2-31 characters with stuff like <
31 // at one end and ∳ at the other. There
32 // are also sometimes numbers at the end, although this isn't inherent
33 // in the specification; there are never numbers anywhere else in
34 // current character references, though; see ¾ and ▒, etc.
35 // https://www.w3.org/TR/html5/syntax.html#named-character-references
36 //
37 // entity := "&" (named group | number ref) ";"
38 // named group := [a-zA-Z]{2,31}[0-9]{0,2}
39 // number ref := "#" (dec ref | hex ref)
40 // dec ref := [0-9]{1,7}
41 // hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
42 htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
43)
44
45// Functions to parse text within a block
46// Each function returns the number of chars taken care of
47// data is the complete block being rendered
48// offset is the number of valid chars before the current cursor
49
50func (p *Markdown) inline(currBlock *Node, data []byte) {
51 // handlers might call us recursively: enforce a maximum depth
52 if p.nesting >= p.maxNesting || len(data) == 0 {
53 return
54 }
55 p.nesting++
56 beg, end := 0, 0
57 for end < len(data) {
58 handler := p.inlineCallback[data[end]]
59 if handler != nil {
60 if consumed, node := handler(p, data, end); consumed == 0 {
61 // No action from the callback.
62 end++
63 } else {
64 // Copy inactive chars into the output.
65 currBlock.AppendChild(text(data[beg:end]))
66 if node != nil {
67 currBlock.AppendChild(node)
68 }
69 // Skip past whatever the callback used.
70 beg = end + consumed
71 end = beg
72 }
73 } else {
74 end++
75 }
76 }
77 if beg < len(data) {
78 if data[end-1] == '\n' {
79 end--
80 }
81 currBlock.AppendChild(text(data[beg:end]))
82 }
83 p.nesting--
84}
85
86// single and double emphasis parsing
87func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
88 data = data[offset:]
89 c := data[0]
90
91 if len(data) > 2 && data[1] != c {
92 // whitespace cannot follow an opening emphasis;
93 // strikethrough only takes two characters '~~'
94 if c == '~' || isspace(data[1]) {
95 return 0, nil
96 }
97 ret, node := helperEmphasis(p, data[1:], c)
98 if ret == 0 {
99 return 0, nil
100 }
101
102 return ret + 1, node
103 }
104
105 if len(data) > 3 && data[1] == c && data[2] != c {
106 if isspace(data[2]) {
107 return 0, nil
108 }
109 ret, node := helperDoubleEmphasis(p, data[2:], c)
110 if ret == 0 {
111 return 0, nil
112 }
113
114 return ret + 2, node
115 }
116
117 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
118 if c == '~' || isspace(data[3]) {
119 return 0, nil
120 }
121 ret, node := helperTripleEmphasis(p, data, 3, c)
122 if ret == 0 {
123 return 0, nil
124 }
125
126 return ret + 3, node
127 }
128
129 return 0, nil
130}
131
132func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
133 data = data[offset:]
134
135 nb := 0
136
137 // count the number of backticks in the delimiter
138 for nb < len(data) && data[nb] == '`' {
139 nb++
140 }
141
142 // find the next delimiter
143 i, end := 0, 0
144 for end = nb; end < len(data) && i < nb; end++ {
145 if data[end] == '`' {
146 i++
147 } else {
148 i = 0
149 }
150 }
151
152 // no matching delimiter?
153 if i < nb && end >= len(data) {
154 return 0, nil
155 }
156
157 // trim outside whitespace
158 fBegin := nb
159 for fBegin < end && data[fBegin] == ' ' {
160 fBegin++
161 }
162
163 fEnd := end - nb
164 for fEnd > fBegin && data[fEnd-1] == ' ' {
165 fEnd--
166 }
167
168 // render the code span
169 if fBegin != fEnd {
170 code := NewNode(Code)
171 code.Literal = data[fBegin:fEnd]
172 return end, code
173 }
174
175 return end, nil
176}
177
178// newline preceded by two spaces becomes <br>
179func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
180 origOffset := offset
181 for offset < len(data) && data[offset] == ' ' {
182 offset++
183 }
184
185 if offset < len(data) && data[offset] == '\n' {
186 if offset-origOffset >= 2 {
187 return offset - origOffset + 1, NewNode(Hardbreak)
188 }
189 return offset - origOffset, nil
190 }
191 return 0, nil
192}
193
194// newline without two spaces works when HardLineBreak is enabled
195func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
196 if p.extensions&HardLineBreak != 0 {
197 return 1, NewNode(Hardbreak)
198 }
199 return 0, nil
200}
201
202type linkType int
203
204const (
205 linkNormal linkType = iota
206 linkImg
207 linkDeferredFootnote
208 linkInlineFootnote
209)
210
211func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
212 if t == linkDeferredFootnote {
213 return false
214 }
215 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
216}
217
218func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
219 if offset < len(data)-1 && data[offset+1] == '[' {
220 return link(p, data, offset)
221 }
222 return 0, nil
223}
224
225func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
226 if offset < len(data)-1 && data[offset+1] == '[' {
227 return link(p, data, offset)
228 }
229 return 0, nil
230}
231
232// '[': parse a link or an image or a footnote
233func link(p *Markdown, data []byte, offset int) (int, *Node) {
234 // no links allowed inside regular links, footnote, and deferred footnotes
235 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
236 return 0, nil
237 }
238
239 var t linkType
240 switch {
241 // special case: ![^text] == deferred footnote (that follows something with
242 // an exclamation point)
243 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
244 t = linkDeferredFootnote
245 // ![alt] == image
246 case offset >= 0 && data[offset] == '!':
247 t = linkImg
248 offset++
249 // ^[text] == inline footnote
250 // [^refId] == deferred footnote
251 case p.extensions&Footnotes != 0:
252 if offset >= 0 && data[offset] == '^' {
253 t = linkInlineFootnote
254 offset++
255 } else if len(data)-1 > offset && data[offset+1] == '^' {
256 t = linkDeferredFootnote
257 }
258 // [text] == regular link
259 default:
260 t = linkNormal
261 }
262
263 data = data[offset:]
264
265 var (
266 i = 1
267 noteID int
268 title, link, altContent []byte
269 widthHeight []byte
270 textHasNl = false
271 )
272
273 if t == linkDeferredFootnote {
274 i++
275 }
276
277 // look for the matching closing bracket
278 for level := 1; level > 0 && i < len(data); i++ {
279 switch {
280 case data[i] == '\n':
281 textHasNl = true
282
283 case isBackslashEscaped(data, i):
284 continue
285
286 case data[i] == '[':
287 level++
288
289 case data[i] == ']':
290 level--
291 if level <= 0 {
292 i-- // compensate for extra i++ in for loop
293 }
294 }
295 }
296
297 if i >= len(data) {
298 return 0, nil
299 }
300
301 txtE := i
302 i++
303 var footnoteNode *Node
304
305 // skip any amount of whitespace or newline
306 // (this is much more lax than original markdown syntax)
307 for i < len(data) && isspace(data[i]) {
308 i++
309 }
310
311 // inline style link
312 switch {
313 case i < len(data) && data[i] == '(':
314 // skip initial whitespace
315 i++
316
317 for i < len(data) && isspace(data[i]) {
318 i++
319 }
320
321 linkB := i
322
323 // look for link end: ' " )
324 findlinkend:
325 for i < len(data) {
326 switch {
327 case data[i] == '\\':
328 i += 2
329
330 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
331 break findlinkend
332
333 default:
334 i++
335 }
336 }
337
338 if i >= len(data) {
339 return 0, nil
340 }
341 linkE := i
342
343 // look for title end if present
344 titleB, titleE := 0, 0
345 if data[i] == '\'' || data[i] == '"' {
346 i++
347 titleB = i
348
349 findtitleend:
350 for i < len(data) {
351 switch {
352 case data[i] == '\\':
353 i += 2
354
355 case data[i] == ')':
356 break findtitleend
357
358 default:
359 i++
360 }
361 }
362
363 if i >= len(data) {
364 return 0, nil
365 }
366
367 // skip whitespace after title
368 titleE = i - 1
369 for titleE > titleB && isspace(data[titleE]) {
370 titleE--
371 }
372
373 // check for closing quote presence
374 if data[titleE] != '\'' && data[titleE] != '"' {
375 titleB, titleE = 0, 0
376 linkE = i
377 }
378 }
379
380 // remove whitespace at the end of the link
381 for linkE > linkB && isspace(data[linkE-1]) {
382 linkE--
383 }
384
385 // remove optional angle brackets around the link
386 if data[linkB] == '<' {
387 linkB++
388 }
389 if data[linkE-1] == '>' {
390 linkE--
391 }
392
393 // build escaped link and title
394 if linkE > linkB {
395 link = data[linkB:linkE]
396 }
397
398 if titleE > titleB {
399 title = data[titleB:titleE]
400 }
401
402 i++
403
404 var whE, whB int
405 if i < len(data) && data[i] == '{' {
406 i++
407 whB = i
408
409 findwidthheight:
410 for i < len(data) {
411 switch {
412 case data[i] == '}':
413 break findwidthheight
414 default:
415 i++
416 }
417 }
418
419 if i >= len(data) {
420 return 0, nil
421 }
422
423 whE = i
424
425 if whE > whB {
426 widthHeight = data[whB:whE]
427 }
428 i++
429 }
430 // reference style link
431 case isReferenceStyleLink(data, i, t):
432 var id []byte
433 altContentConsidered := false
434
435 // look for the id
436 i++
437 linkB := i
438 for i < len(data) && data[i] != ']' {
439 i++
440 }
441 if i >= len(data) {
442 return 0, nil
443 }
444 linkE := i
445
446 // find the reference
447 if linkB == linkE {
448 if textHasNl {
449 var b bytes.Buffer
450
451 for j := 1; j < txtE; j++ {
452 switch {
453 case data[j] != '\n':
454 b.WriteByte(data[j])
455 case data[j-1] != ' ':
456 b.WriteByte(' ')
457 }
458 }
459
460 id = b.Bytes()
461 } else {
462 id = data[1:txtE]
463 altContentConsidered = true
464 }
465 } else {
466 id = data[linkB:linkE]
467 }
468
469 // find the reference with matching id
470 lr, ok := p.getRef(string(id))
471 if !ok {
472 return 0, nil
473 }
474
475 // keep link and title from reference
476 link = lr.link
477 title = lr.title
478 if altContentConsidered {
479 altContent = lr.text
480 }
481 i++
482
483 // shortcut reference style link or reference or inline footnote
484 default:
485 var id []byte
486
487 // craft the id
488 if textHasNl {
489 var b bytes.Buffer
490
491 for j := 1; j < txtE; j++ {
492 switch {
493 case data[j] != '\n':
494 b.WriteByte(data[j])
495 case data[j-1] != ' ':
496 b.WriteByte(' ')
497 }
498 }
499
500 id = b.Bytes()
501 } else {
502 if t == linkDeferredFootnote {
503 id = data[2:txtE] // get rid of the ^
504 } else {
505 id = data[1:txtE]
506 }
507 }
508
509 footnoteNode = NewNode(Item)
510 if t == linkInlineFootnote {
511 // create a new reference
512 noteID = len(p.notes) + 1
513
514 var fragment []byte
515 if len(id) > 0 {
516 if len(id) < 16 {
517 fragment = make([]byte, len(id))
518 } else {
519 fragment = make([]byte, 16)
520 }
521 copy(fragment, slugify(id))
522 } else {
523 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
524 }
525
526 ref := &reference{
527 noteID: noteID,
528 hasBlock: false,
529 link: fragment,
530 title: id,
531 footnote: footnoteNode,
532 }
533
534 p.notes = append(p.notes, ref)
535
536 link = ref.link
537 title = ref.title
538 } else {
539 // find the reference with matching id
540 lr, ok := p.getRef(string(id))
541 if !ok {
542 return 0, nil
543 }
544
545 if t == linkDeferredFootnote {
546 lr.noteID = len(p.notes) + 1
547 lr.footnote = footnoteNode
548 p.notes = append(p.notes, lr)
549 }
550
551 // keep link and title from reference
552 link = lr.link
553 // if inline footnote, title == footnote contents
554 title = lr.title
555 noteID = lr.noteID
556 }
557
558 // rewind the whitespace
559 i = txtE + 1
560 }
561
562 var uLink []byte
563 if t == linkNormal || t == linkImg {
564 if len(link) > 0 {
565 var uLinkBuf bytes.Buffer
566 unescapeText(&uLinkBuf, link)
567 uLink = uLinkBuf.Bytes()
568 }
569
570 // links need something to click on and somewhere to go
571 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
572 return 0, nil
573 }
574 }
575
576 // call the relevant rendering function
577 var linkNode *Node
578 switch t {
579 case linkNormal:
580 linkNode = NewNode(Link)
581 linkNode.Destination = normalizeURI(uLink)
582 linkNode.Title = title
583 if len(altContent) > 0 {
584 linkNode.AppendChild(text(altContent))
585 } else {
586 // links cannot contain other links, so turn off link parsing
587 // temporarily and recurse
588 insideLink := p.insideLink
589 p.insideLink = true
590 p.inline(linkNode, data[1:txtE])
591 p.insideLink = insideLink
592 }
593
594 case linkImg:
595 linkNode = NewNode(Image)
596 if len(widthHeight) > 0 {
597 wh := strings.Split(string(widthHeight), "x")
598 w, _ := strconv.Atoi(wh[0])
599 h, _ := strconv.Atoi(wh[1])
600 linkNode.Width, linkNode.Height = w, h
601 }
602 linkNode.Destination = uLink
603 linkNode.Title = title
604 linkNode.AppendChild(text(data[1:txtE]))
605 i++
606
607 case linkInlineFootnote, linkDeferredFootnote:
608 linkNode = NewNode(Link)
609 linkNode.Destination = link
610 linkNode.Title = title
611 linkNode.NoteID = noteID
612 linkNode.Footnote = footnoteNode
613 if t == linkInlineFootnote {
614 i++
615 }
616
617 default:
618 return 0, nil
619 }
620
621 return i, linkNode
622}
623
624func (p *Markdown) inlineHTMLComment(data []byte) int {
625 if len(data) < 5 {
626 return 0
627 }
628 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
629 return 0
630 }
631 i := 5
632 // scan for an end-of-comment marker, across lines if necessary
633 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
634 i++
635 }
636 // no end-of-comment marker
637 if i >= len(data) {
638 return 0
639 }
640 return i + 1
641}
642
643func stripMailto(link []byte) []byte {
644 if bytes.HasPrefix(link, []byte("mailto://")) {
645 return link[9:]
646 } else if bytes.HasPrefix(link, []byte("mailto:")) {
647 return link[7:]
648 } else {
649 return link
650 }
651}
652
653// autolinkType specifies a kind of autolink that gets detected.
654type autolinkType int
655
656// These are the possible flag values for the autolink renderer.
657const (
658 notAutolink autolinkType = iota
659 normalAutolink
660 emailAutolink
661)
662
663// '<' when tags or autolinks are allowed
664func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
665 data = data[offset:]
666 altype, end := tagLength(data)
667 if size := p.inlineHTMLComment(data); size > 0 {
668 end = size
669 }
670 if end > 2 {
671 if altype != notAutolink {
672 var uLink bytes.Buffer
673 unescapeText(&uLink, data[1:end+1-2])
674 if uLink.Len() > 0 {
675 link := uLink.Bytes()
676 node := NewNode(Link)
677 node.Destination = link
678 if altype == emailAutolink {
679 node.Destination = append([]byte("mailto:"), link...)
680 }
681 node.AppendChild(text(stripMailto(link)))
682 return end, node
683 }
684 } else {
685 htmlTag := NewNode(HTMLSpan)
686 htmlTag.Literal = data[:end]
687 return end, htmlTag
688 }
689 }
690
691 return end, nil
692}
693
694// '\\' backslash escape
695var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
696
697func escape(p *Markdown, data []byte, offset int) (int, *Node) {
698 data = data[offset:]
699
700 if len(data) > 1 {
701 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
702 return 2, NewNode(Hardbreak)
703 }
704 if bytes.IndexByte(escapeChars, data[1]) < 0 {
705 return 0, nil
706 }
707
708 return 2, text(data[1:2])
709 }
710
711 return 2, nil
712}
713
714func unescapeText(ob *bytes.Buffer, src []byte) {
715 i := 0
716 for i < len(src) {
717 org := i
718 for i < len(src) && src[i] != '\\' {
719 i++
720 }
721
722 if i > org {
723 ob.Write(src[org:i])
724 }
725
726 if i+1 >= len(src) {
727 break
728 }
729
730 ob.WriteByte(src[i+1])
731 i += 2
732 }
733}
734
735// '&' escaped when it doesn't belong to an entity
736// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
737func entity(p *Markdown, data []byte, offset int) (int, *Node) {
738 data = data[offset:]
739
740 end := 1
741
742 if end < len(data) && data[end] == '#' {
743 end++
744 }
745
746 for end < len(data) && isalnum(data[end]) {
747 end++
748 }
749
750 if end < len(data) && data[end] == ';' {
751 end++ // real entity
752 } else {
753 return 0, nil // lone '&'
754 }
755
756 ent := data[:end]
757 // undo & escaping or it will be converted to &amp; by another
758 // escaper in the renderer
759 if bytes.Equal(ent, []byte("&")) {
760 ent = []byte{'&'}
761 }
762
763 return end, text(ent)
764}
765
766func linkEndsWithEntity(data []byte, linkEnd int) bool {
767 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
768 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
769}
770
771// hasPrefixCaseInsensitive is a custom implementation of
772// strings.HasPrefix(strings.ToLower(s), prefix)
773// we rolled our own because ToLower pulls in a huge machinery of lowercasing
774// anything from Unicode and that's very slow. Since this func will only be
775// used on ASCII protocol prefixes, we can take shortcuts.
776func hasPrefixCaseInsensitive(s, prefix []byte) bool {
777 if len(s) < len(prefix) {
778 return false
779 }
780 delta := byte('a' - 'A')
781 for i, b := range prefix {
782 if b != s[i] && b != s[i]+delta {
783 return false
784 }
785 }
786 return true
787}
788
789var protocolPrefixes = [][]byte{
790 []byte("http://"),
791 []byte("https://"),
792 []byte("ftp://"),
793 []byte("file://"),
794 []byte("mailto:"),
795}
796
797const shortestPrefix = 6 // len("ftp://"), the shortest of the above
798
799func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
800 // quick check to rule out most false hits
801 if p.insideLink || len(data) < offset+shortestPrefix {
802 return 0, nil
803 }
804 for _, prefix := range protocolPrefixes {
805 endOfHead := offset + 8 // 8 is the len() of the longest prefix
806 if endOfHead > len(data) {
807 endOfHead = len(data)
808 }
809 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
810 return autoLink(p, data, offset)
811 }
812 }
813 return 0, nil
814}
815
816func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
817 // Now a more expensive check to see if we're not inside an anchor element
818 anchorStart := offset
819 offsetFromAnchor := 0
820 for anchorStart > 0 && data[anchorStart] != '<' {
821 anchorStart--
822 offsetFromAnchor++
823 }
824
825 anchorStr := anchorRe.Find(data[anchorStart:])
826 if anchorStr != nil {
827 anchorClose := NewNode(HTMLSpan)
828 anchorClose.Literal = anchorStr[offsetFromAnchor:]
829 return len(anchorStr) - offsetFromAnchor, anchorClose
830 }
831
832 // scan backward for a word boundary
833 rewind := 0
834 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
835 rewind++
836 }
837 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
838 return 0, nil
839 }
840
841 origData := data
842 data = data[offset-rewind:]
843
844 if !isSafeLink(data) {
845 return 0, nil
846 }
847
848 linkEnd := 0
849 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
850 linkEnd++
851 }
852
853 // Skip punctuation at the end of the link
854 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
855 linkEnd--
856 }
857
858 // But don't skip semicolon if it's a part of escaped entity:
859 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
860 linkEnd--
861 }
862
863 // See if the link finishes with a punctuation sign that can be closed.
864 var copen byte
865 switch data[linkEnd-1] {
866 case '"':
867 copen = '"'
868 case '\'':
869 copen = '\''
870 case ')':
871 copen = '('
872 case ']':
873 copen = '['
874 case '}':
875 copen = '{'
876 default:
877 copen = 0
878 }
879
880 if copen != 0 {
881 bufEnd := offset - rewind + linkEnd - 2
882
883 openDelim := 1
884
885 /* Try to close the final punctuation sign in this same line;
886 * if we managed to close it outside of the URL, that means that it's
887 * not part of the URL. If it closes inside the URL, that means it
888 * is part of the URL.
889 *
890 * Examples:
891 *
892 * foo http://www.pokemon.com/Pikachu_(Electric) bar
893 * => http://www.pokemon.com/Pikachu_(Electric)
894 *
895 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
896 * => http://www.pokemon.com/Pikachu_(Electric)
897 *
898 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
899 * => http://www.pokemon.com/Pikachu_(Electric))
900 *
901 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
902 * => foo http://www.pokemon.com/Pikachu_(Electric)
903 */
904
905 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
906 if origData[bufEnd] == data[linkEnd-1] {
907 openDelim++
908 }
909
910 if origData[bufEnd] == copen {
911 openDelim--
912 }
913
914 bufEnd--
915 }
916
917 if openDelim == 0 {
918 linkEnd--
919 }
920 }
921
922 var uLink bytes.Buffer
923 unescapeText(&uLink, data[:linkEnd])
924
925 if uLink.Len() > 0 {
926 node := NewNode(Link)
927 node.Destination = uLink.Bytes()
928 node.AppendChild(text(uLink.Bytes()))
929 return linkEnd, node
930 }
931
932 return linkEnd, nil
933}
934
935func isEndOfLink(char byte) bool {
936 return isspace(char) || char == '<'
937}
938
939var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
940var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
941
942func isSafeLink(link []byte) bool {
943 for _, path := range validPaths {
944 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
945 if len(link) == len(path) {
946 return true
947 } else if isalnum(link[len(path)]) {
948 return true
949 }
950 }
951 }
952
953 for _, prefix := range validUris {
954 // TODO: handle unicode here
955 // case-insensitive prefix test
956 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
957 return true
958 }
959 }
960
961 return false
962}
963
964// return the length of the given tag, or 0 is it's not valid
965func tagLength(data []byte) (autolink autolinkType, end int) {
966 var i, j int
967
968 // a valid tag can't be shorter than 3 chars
969 if len(data) < 3 {
970 return notAutolink, 0
971 }
972
973 // begins with a '<' optionally followed by '/', followed by letter or number
974 if data[0] != '<' {
975 return notAutolink, 0
976 }
977 if data[1] == '/' {
978 i = 2
979 } else {
980 i = 1
981 }
982
983 if !isalnum(data[i]) {
984 return notAutolink, 0
985 }
986
987 // scheme test
988 autolink = notAutolink
989
990 // try to find the beginning of an URI
991 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
992 i++
993 }
994
995 if i > 1 && i < len(data) && data[i] == '@' {
996 if j = isMailtoAutoLink(data[i:]); j != 0 {
997 return emailAutolink, i + j
998 }
999 }
1000
1001 if i > 2 && i < len(data) && data[i] == ':' {
1002 autolink = normalAutolink
1003 i++
1004 }
1005
1006 // complete autolink test: no whitespace or ' or "
1007 switch {
1008 case i >= len(data):
1009 autolink = notAutolink
1010 case autolink != notAutolink:
1011 j = i
1012
1013 for i < len(data) {
1014 if data[i] == '\\' {
1015 i += 2
1016 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
1017 break
1018 } else {
1019 i++
1020 }
1021
1022 }
1023
1024 if i >= len(data) {
1025 return autolink, 0
1026 }
1027 if i > j && data[i] == '>' {
1028 return autolink, i + 1
1029 }
1030
1031 // one of the forbidden chars has been found
1032 autolink = notAutolink
1033 }
1034 i += bytes.IndexByte(data[i:], '>')
1035 if i < 0 {
1036 return autolink, 0
1037 }
1038 return autolink, i + 1
1039}
1040
1041// look for the address part of a mail autolink and '>'
1042// this is less strict than the original markdown e-mail address matching
1043func isMailtoAutoLink(data []byte) int {
1044 nb := 0
1045
1046 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1047 for i := 0; i < len(data); i++ {
1048 if isalnum(data[i]) {
1049 continue
1050 }
1051
1052 switch data[i] {
1053 case '@':
1054 nb++
1055
1056 case '-', '.', '_':
1057 break
1058
1059 case '>':
1060 if nb == 1 {
1061 return i + 1
1062 }
1063 return 0
1064 default:
1065 return 0
1066 }
1067 }
1068
1069 return 0
1070}
1071
1072// look for the next emph char, skipping other constructs
1073func helperFindEmphChar(data []byte, c byte) int {
1074 i := 0
1075
1076 for i < len(data) {
1077 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1078 i++
1079 }
1080 if i >= len(data) {
1081 return 0
1082 }
1083 // do not count escaped chars
1084 if i != 0 && data[i-1] == '\\' {
1085 i++
1086 continue
1087 }
1088 if data[i] == c {
1089 return i
1090 }
1091
1092 if data[i] == '`' {
1093 // skip a code span
1094 tmpI := 0
1095 i++
1096 for i < len(data) && data[i] != '`' {
1097 if tmpI == 0 && data[i] == c {
1098 tmpI = i
1099 }
1100 i++
1101 }
1102 if i >= len(data) {
1103 return tmpI
1104 }
1105 i++
1106 } else if data[i] == '[' {
1107 // skip a link
1108 tmpI := 0
1109 i++
1110 for i < len(data) && data[i] != ']' {
1111 if tmpI == 0 && data[i] == c {
1112 tmpI = i
1113 }
1114 i++
1115 }
1116 i++
1117 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1118 i++
1119 }
1120 if i >= len(data) {
1121 return tmpI
1122 }
1123 if data[i] != '[' && data[i] != '(' { // not a link
1124 if tmpI > 0 {
1125 return tmpI
1126 }
1127 continue
1128 }
1129 cc := data[i]
1130 i++
1131 for i < len(data) && data[i] != cc {
1132 if tmpI == 0 && data[i] == c {
1133 return i
1134 }
1135 i++
1136 }
1137 if i >= len(data) {
1138 return tmpI
1139 }
1140 i++
1141 }
1142 }
1143 return 0
1144}
1145
1146func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1147 i := 0
1148
1149 // skip one symbol if coming from emph3
1150 if len(data) > 1 && data[0] == c && data[1] == c {
1151 i = 1
1152 }
1153
1154 for i < len(data) {
1155 length := helperFindEmphChar(data[i:], c)
1156 if length == 0 {
1157 return 0, nil
1158 }
1159 i += length
1160 if i >= len(data) {
1161 return 0, nil
1162 }
1163
1164 if i+1 < len(data) && data[i+1] == c {
1165 i++
1166 continue
1167 }
1168
1169 if data[i] == c && !isspace(data[i-1]) {
1170
1171 if p.extensions&NoIntraEmphasis != 0 {
1172 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1173 continue
1174 }
1175 }
1176
1177 emph := NewNode(Emph)
1178 p.inline(emph, data[:i])
1179 return i + 1, emph
1180 }
1181 }
1182
1183 return 0, nil
1184}
1185
1186func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
1187 i := 0
1188
1189 for i < len(data) {
1190 length := helperFindEmphChar(data[i:], c)
1191 if length == 0 {
1192 return 0, nil
1193 }
1194 i += length
1195
1196 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1197 nodeType := Strong
1198 if c == '~' {
1199 nodeType = Del
1200 }
1201 node := NewNode(nodeType)
1202 p.inline(node, data[:i])
1203 return i + 2, node
1204 }
1205 i++
1206 }
1207 return 0, nil
1208}
1209
1210func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
1211 i := 0
1212 origData := data
1213 data = data[offset:]
1214
1215 for i < len(data) {
1216 length := helperFindEmphChar(data[i:], c)
1217 if length == 0 {
1218 return 0, nil
1219 }
1220 i += length
1221
1222 // skip whitespace preceded symbols
1223 if data[i] != c || isspace(data[i-1]) {
1224 continue
1225 }
1226
1227 switch {
1228 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1229 // triple symbol found
1230 strong := NewNode(Strong)
1231 em := NewNode(Emph)
1232 strong.AppendChild(em)
1233 p.inline(em, data[:i])
1234 return i + 3, strong
1235 case (i+1 < len(data) && data[i+1] == c):
1236 // double symbol found, hand over to emph1
1237 length, node := helperEmphasis(p, origData[offset-2:], c)
1238 if length == 0 {
1239 return 0, nil
1240 }
1241 return length - 2, node
1242 default:
1243 // single symbol found, hand over to emph2
1244 length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1245 if length == 0 {
1246 return 0, nil
1247 }
1248 return length - 1, node
1249 }
1250 }
1251 return 0, nil
1252}
1253
1254func text(s []byte) *Node {
1255 node := NewNode(Text)
1256 node.Literal = s
1257 return node
1258}
1259
1260func normalizeURI(s []byte) []byte {
1261 return s // TODO: implement
1262}