inline.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20)
21
22var (
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25
26 // TODO: improve this regexp to catch all possible entities:
27 htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
28)
29
30// Functions to parse text within a block
31// Each function returns the number of chars taken care of
32// data is the complete block being rendered
33// offset is the number of valid chars before the current cursor
34
35func (p *parser) inline(data []byte) {
36 // this is called recursively: enforce a maximum depth
37 if p.nesting >= p.maxNesting {
38 return
39 }
40 p.nesting++
41
42 i, end := 0, 0
43 for i < len(data) {
44 // Stop at EOL
45 if data[i] == '\n' && i+1 == len(data) {
46 break
47 }
48 // Copy inactive chars into the output, but first check for one quirk:
49 // 'h', 'm' and 'f' all might trigger a check for autolink processing
50 // and end this run of inactive characters. However, there's one nasty
51 // case where breaking this run would be bad: in smartypants fraction
52 // detection, we expect things like "1/2th" to be in a single run. So
53 // we check here if an 'h' is followed by 't' (from 'http') and if it's
54 // not, we short circuit the 'h' into the run of inactive characters.
55 //
56 // Also, in a similar fashion maybeLineBreak breaks this run of chars,
57 // but smartDash processor relies on seeing context around the dashes.
58 // Fix this somehow.
59 for end < len(data) {
60 if data[end] == ' ' {
61 consumed, br := maybeLineBreak(p, data, end)
62 if consumed > 0 {
63 p.currBlock.AppendChild(text(data[i:end]))
64 if br {
65 p.currBlock.AppendChild(NewNode(Hardbreak))
66 }
67 i = end
68 i += consumed
69 end = i
70 } else {
71 end++
72 }
73 continue
74 }
75 if p.inlineCallback[data[end]] != nil {
76 if end+1 < len(data) && data[end] == 'h' && data[end+1] != 't' {
77 end++
78 } else {
79 break
80 }
81 } else {
82 end++
83 }
84 }
85
86 p.currBlock.AppendChild(text(data[i:end]))
87
88 if end >= len(data) {
89 break
90 }
91 i = end
92
93 // call the trigger
94 handler := p.inlineCallback[data[end]]
95 if consumed := handler(p, data, i); consumed == 0 {
96 // no action from the callback; buffer the byte for later
97 end = i + 1
98 } else {
99 // skip past whatever the callback used
100 i += consumed
101 end = i
102 }
103 }
104
105 p.nesting--
106}
107
108// single and double emphasis parsing
109func emphasis(p *parser, data []byte, offset int) int {
110 data = data[offset:]
111 c := data[0]
112 ret := 0
113
114 if len(data) > 2 && data[1] != c {
115 // whitespace cannot follow an opening emphasis;
116 // strikethrough only takes two characters '~~'
117 if c == '~' || isspace(data[1]) {
118 return 0
119 }
120 if ret = helperEmphasis(p, data[1:], c); ret == 0 {
121 return 0
122 }
123
124 return ret + 1
125 }
126
127 if len(data) > 3 && data[1] == c && data[2] != c {
128 if isspace(data[2]) {
129 return 0
130 }
131 if ret = helperDoubleEmphasis(p, data[2:], c); ret == 0 {
132 return 0
133 }
134
135 return ret + 2
136 }
137
138 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
139 if c == '~' || isspace(data[3]) {
140 return 0
141 }
142 if ret = helperTripleEmphasis(p, data, 3, c); ret == 0 {
143 return 0
144 }
145
146 return ret + 3
147 }
148
149 return 0
150}
151
152func codeSpan(p *parser, data []byte, offset int) int {
153 data = data[offset:]
154
155 nb := 0
156
157 // count the number of backticks in the delimiter
158 for nb < len(data) && data[nb] == '`' {
159 nb++
160 }
161
162 // find the next delimiter
163 i, end := 0, 0
164 for end = nb; end < len(data) && i < nb; end++ {
165 if data[end] == '`' {
166 i++
167 } else {
168 i = 0
169 }
170 }
171
172 // no matching delimiter?
173 if i < nb && end >= len(data) {
174 return 0
175 }
176
177 // trim outside whitespace
178 fBegin := nb
179 for fBegin < end && data[fBegin] == ' ' {
180 fBegin++
181 }
182
183 fEnd := end - nb
184 for fEnd > fBegin && data[fEnd-1] == ' ' {
185 fEnd--
186 }
187
188 // render the code span
189 if fBegin != fEnd {
190 code := NewNode(Code)
191 code.Literal = data[fBegin:fEnd]
192 p.currBlock.AppendChild(code)
193 }
194
195 return end
196
197}
198
199// newline preceded by two spaces becomes <br>
200func maybeLineBreak(p *parser, data []byte, offset int) (int, bool) {
201 origOffset := offset
202 for offset < len(data) && data[offset] == ' ' {
203 offset++
204 }
205 if offset < len(data) && data[offset] == '\n' {
206 if offset-origOffset >= 2 {
207 return offset - origOffset + 1, true
208 }
209 return offset - origOffset, false
210 }
211 return 0, false
212}
213
214// newline without two spaces works when HardLineBreak is enabled
215func lineBreak(p *parser, data []byte, offset int) int {
216 if p.flags&HardLineBreak != 0 {
217 p.currBlock.AppendChild(NewNode(Hardbreak))
218 return 1
219 }
220 return 0
221}
222
223type linkType int
224
225const (
226 linkNormal linkType = iota
227 linkImg
228 linkDeferredFootnote
229 linkInlineFootnote
230)
231
232func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
233 if t == linkDeferredFootnote {
234 return false
235 }
236 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
237}
238
239func maybeImage(p *parser, data []byte, offset int) int {
240 if offset < len(data)-1 && data[offset+1] == '[' {
241 return link(p, data, offset)
242 }
243 return 0
244}
245
246func maybeInlineFootnote(p *parser, data []byte, offset int) int {
247 if offset < len(data)-1 && data[offset+1] == '[' {
248 return link(p, data, offset)
249 }
250 return 0
251}
252
253// '[': parse a link or an image or a footnote
254func link(p *parser, data []byte, offset int) int {
255 // no links allowed inside regular links, footnote, and deferred footnotes
256 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
257 return 0
258 }
259
260 var t linkType
261 switch {
262 // special case: ![^text] == deferred footnote (that follows something with
263 // an exclamation point)
264 case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
265 t = linkDeferredFootnote
266 // ![alt] == image
267 case offset >= 0 && data[offset] == '!':
268 t = linkImg
269 offset++
270 // ^[text] == inline footnote
271 // [^refId] == deferred footnote
272 case p.flags&Footnotes != 0:
273 if offset >= 0 && data[offset] == '^' {
274 t = linkInlineFootnote
275 offset++
276 } else if len(data)-1 > offset && data[offset+1] == '^' {
277 t = linkDeferredFootnote
278 }
279 // [text] == regular link
280 default:
281 t = linkNormal
282 }
283
284 data = data[offset:]
285
286 var (
287 i = 1
288 noteID int
289 title, link, altContent []byte
290 textHasNl = false
291 )
292
293 if t == linkDeferredFootnote {
294 i++
295 }
296
297 // look for the matching closing bracket
298 for level := 1; level > 0 && i < len(data); i++ {
299 switch {
300 case data[i] == '\n':
301 textHasNl = true
302
303 case data[i-1] == '\\':
304 continue
305
306 case data[i] == '[':
307 level++
308
309 case data[i] == ']':
310 level--
311 if level <= 0 {
312 i-- // compensate for extra i++ in for loop
313 }
314 }
315 }
316
317 if i >= len(data) {
318 return 0
319 }
320
321 txtE := i
322 i++
323
324 // skip any amount of whitespace or newline
325 // (this is much more lax than original markdown syntax)
326 for i < len(data) && isspace(data[i]) {
327 i++
328 }
329
330 // inline style link
331 switch {
332 case i < len(data) && data[i] == '(':
333 // skip initial whitespace
334 i++
335
336 for i < len(data) && isspace(data[i]) {
337 i++
338 }
339
340 linkB := i
341
342 // look for link end: ' " )
343 findlinkend:
344 for i < len(data) {
345 switch {
346 case data[i] == '\\':
347 i += 2
348
349 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
350 break findlinkend
351
352 default:
353 i++
354 }
355 }
356
357 if i >= len(data) {
358 return 0
359 }
360 linkE := i
361
362 // look for title end if present
363 titleB, titleE := 0, 0
364 if data[i] == '\'' || data[i] == '"' {
365 i++
366 titleB = i
367
368 findtitleend:
369 for i < len(data) {
370 switch {
371 case data[i] == '\\':
372 i += 2
373
374 case data[i] == ')':
375 break findtitleend
376
377 default:
378 i++
379 }
380 }
381
382 if i >= len(data) {
383 return 0
384 }
385
386 // skip whitespace after title
387 titleE = i - 1
388 for titleE > titleB && isspace(data[titleE]) {
389 titleE--
390 }
391
392 // check for closing quote presence
393 if data[titleE] != '\'' && data[titleE] != '"' {
394 titleB, titleE = 0, 0
395 linkE = i
396 }
397 }
398
399 // remove whitespace at the end of the link
400 for linkE > linkB && isspace(data[linkE-1]) {
401 linkE--
402 }
403
404 // remove optional angle brackets around the link
405 if data[linkB] == '<' {
406 linkB++
407 }
408 if data[linkE-1] == '>' {
409 linkE--
410 }
411
412 // build escaped link and title
413 if linkE > linkB {
414 link = data[linkB:linkE]
415 }
416
417 if titleE > titleB {
418 title = data[titleB:titleE]
419 }
420
421 i++
422
423 // reference style link
424 case isReferenceStyleLink(data, i, t):
425 var id []byte
426 altContentConsidered := false
427
428 // look for the id
429 i++
430 linkB := i
431 for i < len(data) && data[i] != ']' {
432 i++
433 }
434 if i >= len(data) {
435 return 0
436 }
437 linkE := i
438
439 // find the reference
440 if linkB == linkE {
441 if textHasNl {
442 var b bytes.Buffer
443
444 for j := 1; j < txtE; j++ {
445 switch {
446 case data[j] != '\n':
447 b.WriteByte(data[j])
448 case data[j-1] != ' ':
449 b.WriteByte(' ')
450 }
451 }
452
453 id = b.Bytes()
454 } else {
455 id = data[1:txtE]
456 altContentConsidered = true
457 }
458 } else {
459 id = data[linkB:linkE]
460 }
461
462 // find the reference with matching id
463 lr, ok := p.getRef(string(id))
464 if !ok {
465 return 0
466 }
467
468 // keep link and title from reference
469 link = lr.link
470 title = lr.title
471 if altContentConsidered {
472 altContent = lr.text
473 }
474 i++
475
476 // shortcut reference style link or reference or inline footnote
477 default:
478 var id []byte
479
480 // craft the id
481 if textHasNl {
482 var b bytes.Buffer
483
484 for j := 1; j < txtE; j++ {
485 switch {
486 case data[j] != '\n':
487 b.WriteByte(data[j])
488 case data[j-1] != ' ':
489 b.WriteByte(' ')
490 }
491 }
492
493 id = b.Bytes()
494 } else {
495 if t == linkDeferredFootnote {
496 id = data[2:txtE] // get rid of the ^
497 } else {
498 id = data[1:txtE]
499 }
500 }
501
502 if t == linkInlineFootnote {
503 // create a new reference
504 noteID = len(p.notes) + 1
505
506 var fragment []byte
507 if len(id) > 0 {
508 if len(id) < 16 {
509 fragment = make([]byte, len(id))
510 } else {
511 fragment = make([]byte, 16)
512 }
513 copy(fragment, slugify(id))
514 } else {
515 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
516 }
517
518 ref := &reference{
519 noteID: noteID,
520 hasBlock: false,
521 link: fragment,
522 title: id,
523 }
524
525 p.notes = append(p.notes, ref)
526
527 link = ref.link
528 title = ref.title
529 } else {
530 // find the reference with matching id
531 lr, ok := p.getRef(string(id))
532 if !ok {
533 return 0
534 }
535
536 if t == linkDeferredFootnote {
537 lr.noteID = len(p.notes) + 1
538 p.notes = append(p.notes, lr)
539 }
540
541 // keep link and title from reference
542 link = lr.link
543 // if inline footnote, title == footnote contents
544 title = lr.title
545 noteID = lr.noteID
546 }
547
548 // rewind the whitespace
549 i = txtE + 1
550 }
551
552 var uLink []byte
553 if t == linkNormal || t == linkImg {
554 if len(link) > 0 {
555 var uLinkBuf bytes.Buffer
556 unescapeText(&uLinkBuf, link)
557 uLink = uLinkBuf.Bytes()
558 }
559
560 // links need something to click on and somewhere to go
561 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
562 return 0
563 }
564 }
565
566 // call the relevant rendering function
567 switch t {
568 case linkNormal:
569 linkNode := NewNode(Link)
570 linkNode.Destination = normalizeURI(uLink)
571 linkNode.Title = title
572 p.currBlock.AppendChild(linkNode)
573 if len(altContent) > 0 {
574 linkNode.AppendChild(text(altContent))
575 } else {
576 // links cannot contain other links, so turn off link parsing
577 // temporarily and recurse
578 insideLink := p.insideLink
579 p.insideLink = true
580 tmpNode := p.currBlock
581 p.currBlock = linkNode
582 p.inline(data[1:txtE])
583 p.currBlock = tmpNode
584 p.insideLink = insideLink
585 }
586
587 case linkImg:
588 linkNode := NewNode(Image)
589 linkNode.Destination = uLink
590 linkNode.Title = title
591 p.currBlock.AppendChild(linkNode)
592 linkNode.AppendChild(text(data[1:txtE]))
593 i++
594
595 case linkInlineFootnote, linkDeferredFootnote:
596 linkNode := NewNode(Link)
597 linkNode.Destination = link
598 linkNode.Title = title
599 linkNode.NoteID = noteID
600 p.currBlock.AppendChild(linkNode)
601 if t == linkInlineFootnote {
602 i++
603 }
604
605 default:
606 return 0
607 }
608
609 return i
610}
611
612func (p *parser) inlineHTMLComment(data []byte) int {
613 if len(data) < 5 {
614 return 0
615 }
616 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
617 return 0
618 }
619 i := 5
620 // scan for an end-of-comment marker, across lines if necessary
621 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
622 i++
623 }
624 // no end-of-comment marker
625 if i >= len(data) {
626 return 0
627 }
628 return i + 1
629}
630
631func stripMailto(link []byte) []byte {
632 if bytes.HasPrefix(link, []byte("mailto://")) {
633 return link[9:]
634 } else if bytes.HasPrefix(link, []byte("mailto:")) {
635 return link[7:]
636 } else {
637 return link
638 }
639}
640
641// autolinkType specifies a kind of autolink that gets detected.
642type autolinkType int
643
644// These are the possible flag values for the autolink renderer.
645const (
646 notAutolink autolinkType = iota
647 normalAutolink
648 emailAutolink
649)
650
651// '<' when tags or autolinks are allowed
652func leftAngle(p *parser, data []byte, offset int) int {
653 data = data[offset:]
654 altype, end := tagLength(data)
655 if size := p.inlineHTMLComment(data); size > 0 {
656 end = size
657 }
658 if end > 2 {
659 if altype != notAutolink {
660 var uLink bytes.Buffer
661 unescapeText(&uLink, data[1:end+1-2])
662 if uLink.Len() > 0 {
663 link := uLink.Bytes()
664 node := NewNode(Link)
665 node.Destination = link
666 if altype == emailAutolink {
667 node.Destination = append([]byte("mailto:"), link...)
668 }
669 p.currBlock.AppendChild(node)
670 node.AppendChild(text(stripMailto(link)))
671 }
672 } else {
673 htmlTag := NewNode(HTMLSpan)
674 htmlTag.Literal = data[:end]
675 p.currBlock.AppendChild(htmlTag)
676 }
677 }
678
679 return end
680}
681
682// '\\' backslash escape
683var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
684
685func escape(p *parser, data []byte, offset int) int {
686 data = data[offset:]
687
688 if len(data) > 1 {
689 if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
690 p.currBlock.AppendChild(NewNode(Hardbreak))
691 return 2
692 }
693 if bytes.IndexByte(escapeChars, data[1]) < 0 {
694 return 0
695 }
696
697 p.currBlock.AppendChild(text(data[1:2]))
698 }
699
700 return 2
701}
702
703func unescapeText(ob *bytes.Buffer, src []byte) {
704 i := 0
705 for i < len(src) {
706 org := i
707 for i < len(src) && src[i] != '\\' {
708 i++
709 }
710
711 if i > org {
712 ob.Write(src[org:i])
713 }
714
715 if i+1 >= len(src) {
716 break
717 }
718
719 ob.WriteByte(src[i+1])
720 i += 2
721 }
722}
723
724// '&' escaped when it doesn't belong to an entity
725// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
726func entity(p *parser, data []byte, offset int) int {
727 data = data[offset:]
728
729 end := 1
730
731 if end < len(data) && data[end] == '#' {
732 end++
733 }
734
735 for end < len(data) && isalnum(data[end]) {
736 end++
737 }
738
739 if end < len(data) && data[end] == ';' {
740 end++ // real entity
741 } else {
742 return 0 // lone '&'
743 }
744
745 ent := data[:end]
746 // undo & escaping or it will be converted to &amp; by another
747 // escaper in the renderer
748 if bytes.Equal(ent, []byte("&")) {
749 ent = []byte{'&'}
750 }
751 p.currBlock.AppendChild(text(ent))
752
753 return end
754}
755
756func linkEndsWithEntity(data []byte, linkEnd int) bool {
757 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
758 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
759}
760
761func maybeAutoLink(p *parser, data []byte, offset int) int {
762 // quick check to rule out most false hits
763 if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
764 return 0
765 }
766 prefixes := []string{
767 "http://",
768 "https://",
769 "ftp://",
770 "file://",
771 "mailto:",
772 }
773 for _, prefix := range prefixes {
774 endOfHead := offset + 8 // 8 is the len() of the longest prefix
775 if endOfHead > len(data) {
776 endOfHead = len(data)
777 }
778 head := bytes.ToLower(data[offset:endOfHead])
779 if bytes.HasPrefix(head, []byte(prefix)) {
780 return autoLink(p, data, offset)
781 }
782 }
783 return 0
784}
785
786func autoLink(p *parser, data []byte, offset int) int {
787 // Now a more expensive check to see if we're not inside an anchor element
788 anchorStart := offset
789 offsetFromAnchor := 0
790 for anchorStart > 0 && data[anchorStart] != '<' {
791 anchorStart--
792 offsetFromAnchor++
793 }
794
795 anchorStr := anchorRe.Find(data[anchorStart:])
796 if anchorStr != nil {
797 anchorClose := NewNode(HTMLSpan)
798 anchorClose.Literal = anchorStr[offsetFromAnchor:]
799 p.currBlock.AppendChild(anchorClose)
800 return len(anchorStr) - offsetFromAnchor
801 }
802
803 // scan backward for a word boundary
804 rewind := 0
805 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
806 rewind++
807 }
808 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
809 return 0
810 }
811
812 origData := data
813 data = data[offset-rewind:]
814
815 if !isSafeLink(data) {
816 return 0
817 }
818
819 linkEnd := 0
820 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
821 linkEnd++
822 }
823
824 // Skip punctuation at the end of the link
825 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
826 linkEnd--
827 }
828
829 // But don't skip semicolon if it's a part of escaped entity:
830 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
831 linkEnd--
832 }
833
834 // See if the link finishes with a punctuation sign that can be closed.
835 var copen byte
836 switch data[linkEnd-1] {
837 case '"':
838 copen = '"'
839 case '\'':
840 copen = '\''
841 case ')':
842 copen = '('
843 case ']':
844 copen = '['
845 case '}':
846 copen = '{'
847 default:
848 copen = 0
849 }
850
851 if copen != 0 {
852 bufEnd := offset - rewind + linkEnd - 2
853
854 openDelim := 1
855
856 /* Try to close the final punctuation sign in this same line;
857 * if we managed to close it outside of the URL, that means that it's
858 * not part of the URL. If it closes inside the URL, that means it
859 * is part of the URL.
860 *
861 * Examples:
862 *
863 * foo http://www.pokemon.com/Pikachu_(Electric) bar
864 * => http://www.pokemon.com/Pikachu_(Electric)
865 *
866 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
867 * => http://www.pokemon.com/Pikachu_(Electric)
868 *
869 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
870 * => http://www.pokemon.com/Pikachu_(Electric))
871 *
872 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
873 * => foo http://www.pokemon.com/Pikachu_(Electric)
874 */
875
876 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
877 if origData[bufEnd] == data[linkEnd-1] {
878 openDelim++
879 }
880
881 if origData[bufEnd] == copen {
882 openDelim--
883 }
884
885 bufEnd--
886 }
887
888 if openDelim == 0 {
889 linkEnd--
890 }
891 }
892
893 var uLink bytes.Buffer
894 unescapeText(&uLink, data[:linkEnd])
895
896 if uLink.Len() > 0 {
897 node := NewNode(Link)
898 node.Destination = uLink.Bytes()
899 p.currBlock.AppendChild(node)
900 node.AppendChild(text(uLink.Bytes()))
901 }
902
903 return linkEnd
904}
905
906func isEndOfLink(char byte) bool {
907 return isspace(char) || char == '<'
908}
909
910var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
911var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
912
913func isSafeLink(link []byte) bool {
914 for _, path := range validPaths {
915 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
916 if len(link) == len(path) {
917 return true
918 } else if isalnum(link[len(path)]) {
919 return true
920 }
921 }
922 }
923
924 for _, prefix := range validUris {
925 // TODO: handle unicode here
926 // case-insensitive prefix test
927 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
928 return true
929 }
930 }
931
932 return false
933}
934
935// return the length of the given tag, or 0 is it's not valid
936func tagLength(data []byte) (autolink autolinkType, end int) {
937 var i, j int
938
939 // a valid tag can't be shorter than 3 chars
940 if len(data) < 3 {
941 return notAutolink, 0
942 }
943
944 // begins with a '<' optionally followed by '/', followed by letter or number
945 if data[0] != '<' {
946 return notAutolink, 0
947 }
948 if data[1] == '/' {
949 i = 2
950 } else {
951 i = 1
952 }
953
954 if !isalnum(data[i]) {
955 return notAutolink, 0
956 }
957
958 // scheme test
959 autolink = notAutolink
960
961 // try to find the beginning of an URI
962 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
963 i++
964 }
965
966 if i > 1 && i < len(data) && data[i] == '@' {
967 if j = isMailtoAutoLink(data[i:]); j != 0 {
968 return emailAutolink, i + j
969 }
970 }
971
972 if i > 2 && i < len(data) && data[i] == ':' {
973 autolink = normalAutolink
974 i++
975 }
976
977 // complete autolink test: no whitespace or ' or "
978 switch {
979 case i >= len(data):
980 autolink = notAutolink
981 case autolink != notAutolink:
982 j = i
983
984 for i < len(data) {
985 if data[i] == '\\' {
986 i += 2
987 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
988 break
989 } else {
990 i++
991 }
992
993 }
994
995 if i >= len(data) {
996 return autolink, 0
997 }
998 if i > j && data[i] == '>' {
999 return autolink, i + 1
1000 }
1001
1002 // one of the forbidden chars has been found
1003 autolink = notAutolink
1004 }
1005 i += bytes.IndexByte(data[i:], '>')
1006 if i < 0 {
1007 return autolink, 0
1008 }
1009 return autolink, i + 1
1010}
1011
1012// look for the address part of a mail autolink and '>'
1013// this is less strict than the original markdown e-mail address matching
1014func isMailtoAutoLink(data []byte) int {
1015 nb := 0
1016
1017 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1018 for i := 0; i < len(data); i++ {
1019 if isalnum(data[i]) {
1020 continue
1021 }
1022
1023 switch data[i] {
1024 case '@':
1025 nb++
1026
1027 case '-', '.', '_':
1028 break
1029
1030 case '>':
1031 if nb == 1 {
1032 return i + 1
1033 }
1034 return 0
1035 default:
1036 return 0
1037 }
1038 }
1039
1040 return 0
1041}
1042
1043// look for the next emph char, skipping other constructs
1044func helperFindEmphChar(data []byte, c byte) int {
1045 i := 0
1046
1047 for i < len(data) {
1048 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1049 i++
1050 }
1051 if i >= len(data) {
1052 return 0
1053 }
1054 // do not count escaped chars
1055 if i != 0 && data[i-1] == '\\' {
1056 i++
1057 continue
1058 }
1059 if data[i] == c {
1060 return i
1061 }
1062
1063 if data[i] == '`' {
1064 // skip a code span
1065 tmpI := 0
1066 i++
1067 for i < len(data) && data[i] != '`' {
1068 if tmpI == 0 && data[i] == c {
1069 tmpI = i
1070 }
1071 i++
1072 }
1073 if i >= len(data) {
1074 return tmpI
1075 }
1076 i++
1077 } else if data[i] == '[' {
1078 // skip a link
1079 tmpI := 0
1080 i++
1081 for i < len(data) && data[i] != ']' {
1082 if tmpI == 0 && data[i] == c {
1083 tmpI = i
1084 }
1085 i++
1086 }
1087 i++
1088 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1089 i++
1090 }
1091 if i >= len(data) {
1092 return tmpI
1093 }
1094 if data[i] != '[' && data[i] != '(' { // not a link
1095 if tmpI > 0 {
1096 return tmpI
1097 }
1098 continue
1099 }
1100 cc := data[i]
1101 i++
1102 for i < len(data) && data[i] != cc {
1103 if tmpI == 0 && data[i] == c {
1104 return i
1105 }
1106 i++
1107 }
1108 if i >= len(data) {
1109 return tmpI
1110 }
1111 i++
1112 }
1113 }
1114 return 0
1115}
1116
1117func helperEmphasis(p *parser, data []byte, c byte) int {
1118 i := 0
1119
1120 // skip one symbol if coming from emph3
1121 if len(data) > 1 && data[0] == c && data[1] == c {
1122 i = 1
1123 }
1124
1125 for i < len(data) {
1126 length := helperFindEmphChar(data[i:], c)
1127 if length == 0 {
1128 return 0
1129 }
1130 i += length
1131 if i >= len(data) {
1132 return 0
1133 }
1134
1135 if i+1 < len(data) && data[i+1] == c {
1136 i++
1137 continue
1138 }
1139
1140 if data[i] == c && !isspace(data[i-1]) {
1141
1142 if p.flags&NoIntraEmphasis != 0 {
1143 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1144 continue
1145 }
1146 }
1147
1148 emph := NewNode(Emph)
1149 p.currBlock.AppendChild(emph)
1150 tmp := p.currBlock
1151 p.currBlock = emph
1152 p.inline(data[:i])
1153 p.currBlock = tmp
1154 return i + 1
1155 }
1156 }
1157
1158 return 0
1159}
1160
1161func helperDoubleEmphasis(p *parser, data []byte, c byte) int {
1162 i := 0
1163
1164 for i < len(data) {
1165 length := helperFindEmphChar(data[i:], c)
1166 if length == 0 {
1167 return 0
1168 }
1169 i += length
1170
1171 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1172 nodeType := Strong
1173 if c == '~' {
1174 nodeType = Del
1175 }
1176 node := NewNode(nodeType)
1177 p.currBlock.AppendChild(node)
1178 tmp := p.currBlock
1179 p.currBlock = node
1180 p.inline(data[:i])
1181 p.currBlock = tmp
1182 return i + 2
1183 }
1184 i++
1185 }
1186 return 0
1187}
1188
1189func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) int {
1190 i := 0
1191 origData := data
1192 data = data[offset:]
1193
1194 for i < len(data) {
1195 length := helperFindEmphChar(data[i:], c)
1196 if length == 0 {
1197 return 0
1198 }
1199 i += length
1200
1201 // skip whitespace preceded symbols
1202 if data[i] != c || isspace(data[i-1]) {
1203 continue
1204 }
1205
1206 switch {
1207 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1208 // triple symbol found
1209 strong := NewNode(Strong)
1210 em := NewNode(Emph)
1211 strong.AppendChild(em)
1212 p.currBlock.AppendChild(strong)
1213 tmp := p.currBlock
1214 p.currBlock = em
1215 p.inline(data[:i])
1216 p.currBlock = tmp
1217 return i + 3
1218 case (i+1 < len(data) && data[i+1] == c):
1219 // double symbol found, hand over to emph1
1220 length = helperEmphasis(p, origData[offset-2:], c)
1221 if length == 0 {
1222 return 0
1223 }
1224 return length - 2
1225 default:
1226 // single symbol found, hand over to emph2
1227 length = helperDoubleEmphasis(p, origData[offset-1:], c)
1228 if length == 0 {
1229 return 0
1230 }
1231 return length - 1
1232 }
1233 }
1234 return 0
1235}
1236
1237func text(s []byte) *Node {
1238 node := NewNode(Text)
1239 node.Literal = s
1240 return node
1241}
1242
1243func normalizeURI(s []byte) []byte {
1244 return s // TODO: implement
1245}