inline.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20)
21
22var (
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25)
26
27// Functions to parse text within a block
28// Each function returns the number of chars taken care of
29// data is the complete block being rendered
30// offset is the number of valid chars before the current cursor
31
32func (p *parser) inline(data []byte) {
33 // this is called recursively: enforce a maximum depth
34 if p.nesting >= p.maxNesting {
35 return
36 }
37 p.nesting++
38
39 i, end := 0, 0
40 for i < len(data) {
41 // Stop at EOL
42 if data[i] == '\n' && i+1 == len(data) {
43 break
44 }
45 // Copy inactive chars into the output, but first check for one quirk:
46 // 'h', 'm' and 'f' all might trigger a check for autolink processing
47 // and end this run of inactive characters. However, there's one nasty
48 // case where breaking this run would be bad: in smartypants fraction
49 // detection, we expect things like "1/2th" to be in a single run. So
50 // we check here if an 'h' is followed by 't' (from 'http') and if it's
51 // not, we short circuit the 'h' into the run of inactive characters.
52 //
53 // Also, in a similar fashion maybeLineBreak breaks this run of chars,
54 // but smartDash processor relies on seeing context around the dashes.
55 // Fix this somehow.
56 for end < len(data) {
57 if data[end] == ' ' {
58 consumed, br := maybeLineBreak(p, data, end)
59 if consumed > 0 {
60 p.currBlock.appendChild(text(data[i:end]))
61 if br {
62 p.currBlock.appendChild(NewNode(Hardbreak))
63 }
64 i = end
65 i += consumed
66 end = i
67 } else {
68 end++
69 }
70 continue
71 }
72 if p.inlineCallback[data[end]] != nil {
73 if end+1 < len(data) && data[end] == 'h' && data[end+1] != 't' {
74 end++
75 } else {
76 break
77 }
78 } else {
79 end++
80 }
81 }
82
83 p.currBlock.appendChild(text(data[i:end]))
84
85 if end >= len(data) {
86 break
87 }
88 i = end
89
90 // call the trigger
91 handler := p.inlineCallback[data[end]]
92 if consumed := handler(p, data, i); consumed == 0 {
93 // no action from the callback; buffer the byte for later
94 end = i + 1
95 } else {
96 // skip past whatever the callback used
97 i += consumed
98 end = i
99 }
100 }
101
102 p.nesting--
103}
104
105// single and double emphasis parsing
106func emphasis(p *parser, data []byte, offset int) int {
107 data = data[offset:]
108 c := data[0]
109 ret := 0
110
111 if len(data) > 2 && data[1] != c {
112 // whitespace cannot follow an opening emphasis;
113 // strikethrough only takes two characters '~~'
114 if c == '~' || isspace(data[1]) {
115 return 0
116 }
117 if ret = helperEmphasis(p, data[1:], c); ret == 0 {
118 return 0
119 }
120
121 return ret + 1
122 }
123
124 if len(data) > 3 && data[1] == c && data[2] != c {
125 if isspace(data[2]) {
126 return 0
127 }
128 if ret = helperDoubleEmphasis(p, data[2:], c); ret == 0 {
129 return 0
130 }
131
132 return ret + 2
133 }
134
135 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
136 if c == '~' || isspace(data[3]) {
137 return 0
138 }
139 if ret = helperTripleEmphasis(p, data, 3, c); ret == 0 {
140 return 0
141 }
142
143 return ret + 3
144 }
145
146 return 0
147}
148
149func codeSpan(p *parser, data []byte, offset int) int {
150 data = data[offset:]
151
152 nb := 0
153
154 // count the number of backticks in the delimiter
155 for nb < len(data) && data[nb] == '`' {
156 nb++
157 }
158
159 // find the next delimiter
160 i, end := 0, 0
161 for end = nb; end < len(data) && i < nb; end++ {
162 if data[end] == '`' {
163 i++
164 } else {
165 i = 0
166 }
167 }
168
169 // no matching delimiter?
170 if i < nb && end >= len(data) {
171 return 0
172 }
173
174 // trim outside whitespace
175 fBegin := nb
176 for fBegin < end && data[fBegin] == ' ' {
177 fBegin++
178 }
179
180 fEnd := end - nb
181 for fEnd > fBegin && data[fEnd-1] == ' ' {
182 fEnd--
183 }
184
185 // render the code span
186 if fBegin != fEnd {
187 code := NewNode(Code)
188 code.Literal = data[fBegin:fEnd]
189 p.currBlock.appendChild(code)
190 }
191
192 return end
193
194}
195
196// newline preceded by two spaces becomes <br>
197func maybeLineBreak(p *parser, data []byte, offset int) (int, bool) {
198 origOffset := offset
199 for offset < len(data) && data[offset] == ' ' {
200 offset++
201 }
202 if offset < len(data) && data[offset] == '\n' {
203 if offset-origOffset >= 2 {
204 return offset - origOffset + 1, true
205 }
206 return offset - origOffset, false
207 }
208 return 0, false
209}
210
211// newline without two spaces works when HardLineBreak is enabled
212func lineBreak(p *parser, data []byte, offset int) int {
213 if p.flags&HardLineBreak != 0 {
214 p.currBlock.appendChild(NewNode(Hardbreak))
215 return 1
216 }
217 return 0
218}
219
220type linkType int
221
222const (
223 linkNormal linkType = iota
224 linkImg
225 linkDeferredFootnote
226 linkInlineFootnote
227)
228
229func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
230 if t == linkDeferredFootnote {
231 return false
232 }
233 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
234}
235
236func maybeImage(p *parser, data []byte, offset int) int {
237 if offset < len(data)-1 && data[offset+1] == '[' {
238 return link(p, data, offset)
239 }
240 return 0
241}
242
243func maybeInlineFootnote(p *parser, data []byte, offset int) int {
244 if offset < len(data)-1 && data[offset+1] == '[' {
245 return link(p, data, offset)
246 }
247 return 0
248}
249
250// '[': parse a link or an image or a footnote
251func link(p *parser, data []byte, offset int) int {
252 // no links allowed inside regular links, footnote, and deferred footnotes
253 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
254 return 0
255 }
256
257 var t linkType
258 switch {
259 // special case: ![^text] == deferred footnote (that follows something with
260 // an exclamation point)
261 case p.flags&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
262 t = linkDeferredFootnote
263 // ![alt] == image
264 case offset >= 0 && data[offset] == '!':
265 t = linkImg
266 offset += 1
267 // ^[text] == inline footnote
268 // [^refId] == deferred footnote
269 case p.flags&Footnotes != 0:
270 if offset >= 0 && data[offset] == '^' {
271 t = linkInlineFootnote
272 offset += 1
273 } else if len(data)-1 > offset && data[offset+1] == '^' {
274 t = linkDeferredFootnote
275 }
276 // [text] == regular link
277 default:
278 t = linkNormal
279 }
280
281 data = data[offset:]
282
283 var (
284 i = 1
285 noteId int
286 title, link, altContent []byte
287 textHasNl = false
288 )
289
290 if t == linkDeferredFootnote {
291 i++
292 }
293
294 // look for the matching closing bracket
295 for level := 1; level > 0 && i < len(data); i++ {
296 switch {
297 case data[i] == '\n':
298 textHasNl = true
299
300 case data[i-1] == '\\':
301 continue
302
303 case data[i] == '[':
304 level++
305
306 case data[i] == ']':
307 level--
308 if level <= 0 {
309 i-- // compensate for extra i++ in for loop
310 }
311 }
312 }
313
314 if i >= len(data) {
315 return 0
316 }
317
318 txtE := i
319 i++
320
321 // skip any amount of whitespace or newline
322 // (this is much more lax than original markdown syntax)
323 for i < len(data) && isspace(data[i]) {
324 i++
325 }
326
327 // inline style link
328 switch {
329 case i < len(data) && data[i] == '(':
330 // skip initial whitespace
331 i++
332
333 for i < len(data) && isspace(data[i]) {
334 i++
335 }
336
337 linkB := i
338
339 // look for link end: ' " )
340 findlinkend:
341 for i < len(data) {
342 switch {
343 case data[i] == '\\':
344 i += 2
345
346 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
347 break findlinkend
348
349 default:
350 i++
351 }
352 }
353
354 if i >= len(data) {
355 return 0
356 }
357 linkE := i
358
359 // look for title end if present
360 titleB, titleE := 0, 0
361 if data[i] == '\'' || data[i] == '"' {
362 i++
363 titleB = i
364
365 findtitleend:
366 for i < len(data) {
367 switch {
368 case data[i] == '\\':
369 i += 2
370
371 case data[i] == ')':
372 break findtitleend
373
374 default:
375 i++
376 }
377 }
378
379 if i >= len(data) {
380 return 0
381 }
382
383 // skip whitespace after title
384 titleE = i - 1
385 for titleE > titleB && isspace(data[titleE]) {
386 titleE--
387 }
388
389 // check for closing quote presence
390 if data[titleE] != '\'' && data[titleE] != '"' {
391 titleB, titleE = 0, 0
392 linkE = i
393 }
394 }
395
396 // remove whitespace at the end of the link
397 for linkE > linkB && isspace(data[linkE-1]) {
398 linkE--
399 }
400
401 // remove optional angle brackets around the link
402 if data[linkB] == '<' {
403 linkB++
404 }
405 if data[linkE-1] == '>' {
406 linkE--
407 }
408
409 // build escaped link and title
410 if linkE > linkB {
411 link = data[linkB:linkE]
412 }
413
414 if titleE > titleB {
415 title = data[titleB:titleE]
416 }
417
418 i++
419
420 // reference style link
421 case isReferenceStyleLink(data, i, t):
422 var id []byte
423 altContentConsidered := false
424
425 // look for the id
426 i++
427 linkB := i
428 for i < len(data) && data[i] != ']' {
429 i++
430 }
431 if i >= len(data) {
432 return 0
433 }
434 linkE := i
435
436 // find the reference
437 if linkB == linkE {
438 if textHasNl {
439 var b bytes.Buffer
440
441 for j := 1; j < txtE; j++ {
442 switch {
443 case data[j] != '\n':
444 b.WriteByte(data[j])
445 case data[j-1] != ' ':
446 b.WriteByte(' ')
447 }
448 }
449
450 id = b.Bytes()
451 } else {
452 id = data[1:txtE]
453 altContentConsidered = true
454 }
455 } else {
456 id = data[linkB:linkE]
457 }
458
459 // find the reference with matching id
460 lr, ok := p.getRef(string(id))
461 if !ok {
462 return 0
463 }
464
465 // keep link and title from reference
466 link = lr.link
467 title = lr.title
468 if altContentConsidered {
469 altContent = lr.text
470 }
471 i++
472
473 // shortcut reference style link or reference or inline footnote
474 default:
475 var id []byte
476
477 // craft the id
478 if textHasNl {
479 var b bytes.Buffer
480
481 for j := 1; j < txtE; j++ {
482 switch {
483 case data[j] != '\n':
484 b.WriteByte(data[j])
485 case data[j-1] != ' ':
486 b.WriteByte(' ')
487 }
488 }
489
490 id = b.Bytes()
491 } else {
492 if t == linkDeferredFootnote {
493 id = data[2:txtE] // get rid of the ^
494 } else {
495 id = data[1:txtE]
496 }
497 }
498
499 if t == linkInlineFootnote {
500 // create a new reference
501 noteId = len(p.notes) + 1
502
503 var fragment []byte
504 if len(id) > 0 {
505 if len(id) < 16 {
506 fragment = make([]byte, len(id))
507 } else {
508 fragment = make([]byte, 16)
509 }
510 copy(fragment, slugify(id))
511 } else {
512 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
513 }
514
515 ref := &reference{
516 noteId: noteId,
517 hasBlock: false,
518 link: fragment,
519 title: id,
520 }
521
522 p.notes = append(p.notes, ref)
523
524 link = ref.link
525 title = ref.title
526 } else {
527 // find the reference with matching id
528 lr, ok := p.getRef(string(id))
529 if !ok {
530 return 0
531 }
532
533 if t == linkDeferredFootnote {
534 lr.noteId = len(p.notes) + 1
535 p.notes = append(p.notes, lr)
536 }
537
538 // keep link and title from reference
539 link = lr.link
540 // if inline footnote, title == footnote contents
541 title = lr.title
542 noteId = lr.noteId
543 }
544
545 // rewind the whitespace
546 i = txtE + 1
547 }
548
549 var uLink []byte
550 if t == linkNormal || t == linkImg {
551 if len(link) > 0 {
552 var uLinkBuf bytes.Buffer
553 unescapeText(&uLinkBuf, link)
554 uLink = uLinkBuf.Bytes()
555 }
556
557 // links need something to click on and somewhere to go
558 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
559 return 0
560 }
561 }
562
563 // call the relevant rendering function
564 switch t {
565 case linkNormal:
566 linkNode := NewNode(Link)
567 linkNode.Destination = normalizeURI(uLink)
568 linkNode.Title = title
569 p.currBlock.appendChild(linkNode)
570 if len(altContent) > 0 {
571 linkNode.appendChild(text(altContent))
572 } else {
573 // links cannot contain other links, so turn off link parsing
574 // temporarily and recurse
575 insideLink := p.insideLink
576 p.insideLink = true
577 tmpNode := p.currBlock
578 p.currBlock = linkNode
579 p.inline(data[1:txtE])
580 p.currBlock = tmpNode
581 p.insideLink = insideLink
582 }
583
584 case linkImg:
585 linkNode := NewNode(Image)
586 linkNode.Destination = uLink
587 linkNode.Title = title
588 p.currBlock.appendChild(linkNode)
589 linkNode.appendChild(text(data[1:txtE]))
590 i += 1
591
592 case linkInlineFootnote, linkDeferredFootnote:
593 linkNode := NewNode(Link)
594 linkNode.Destination = link
595 linkNode.Title = title
596 linkNode.NoteID = noteId
597 p.currBlock.appendChild(linkNode)
598 if t == linkInlineFootnote {
599 i++
600 }
601
602 default:
603 return 0
604 }
605
606 return i
607}
608
609func (p *parser) inlineHtmlComment(data []byte) int {
610 if len(data) < 5 {
611 return 0
612 }
613 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
614 return 0
615 }
616 i := 5
617 // scan for an end-of-comment marker, across lines if necessary
618 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
619 i++
620 }
621 // no end-of-comment marker
622 if i >= len(data) {
623 return 0
624 }
625 return i + 1
626}
627
628func stripMailto(link []byte) []byte {
629 if bytes.HasPrefix(link, []byte("mailto://")) {
630 return link[9:]
631 } else if bytes.HasPrefix(link, []byte("mailto:")) {
632 return link[7:]
633 } else {
634 return link
635 }
636}
637
638// '<' when tags or autolinks are allowed
639func leftAngle(p *parser, data []byte, offset int) int {
640 data = data[offset:]
641 altype := LinkTypeNotAutolink
642 end := tagLength(data, &altype)
643 if size := p.inlineHtmlComment(data); size > 0 {
644 end = size
645 }
646 if end > 2 {
647 if altype != LinkTypeNotAutolink {
648 var uLink bytes.Buffer
649 unescapeText(&uLink, data[1:end+1-2])
650 if uLink.Len() > 0 {
651 link := uLink.Bytes()
652 node := NewNode(Link)
653 node.Destination = link
654 if altype == LinkTypeEmail {
655 node.Destination = append([]byte("mailto:"), link...)
656 }
657 p.currBlock.appendChild(node)
658 node.appendChild(text(stripMailto(link)))
659 }
660 } else {
661 htmlTag := NewNode(HTMLSpan)
662 htmlTag.Literal = data[:end]
663 p.currBlock.appendChild(htmlTag)
664 }
665 }
666
667 return end
668}
669
670// '\\' backslash escape
671var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
672
673func escape(p *parser, data []byte, offset int) int {
674 data = data[offset:]
675
676 if len(data) > 1 {
677 if p.flags&BackslashLineBreak != 0 && data[1] == '\n' {
678 p.currBlock.appendChild(NewNode(Hardbreak))
679 return 2
680 }
681 if bytes.IndexByte(escapeChars, data[1]) < 0 {
682 return 0
683 }
684
685 p.currBlock.appendChild(text(data[1:2]))
686 }
687
688 return 2
689}
690
691func unescapeText(ob *bytes.Buffer, src []byte) {
692 i := 0
693 for i < len(src) {
694 org := i
695 for i < len(src) && src[i] != '\\' {
696 i++
697 }
698
699 if i > org {
700 ob.Write(src[org:i])
701 }
702
703 if i+1 >= len(src) {
704 break
705 }
706
707 ob.WriteByte(src[i+1])
708 i += 2
709 }
710}
711
712// '&' escaped when it doesn't belong to an entity
713// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
714func entity(p *parser, data []byte, offset int) int {
715 data = data[offset:]
716
717 end := 1
718
719 if end < len(data) && data[end] == '#' {
720 end++
721 }
722
723 for end < len(data) && isalnum(data[end]) {
724 end++
725 }
726
727 if end < len(data) && data[end] == ';' {
728 end++ // real entity
729 } else {
730 return 0 // lone '&'
731 }
732
733 ent := data[:end]
734 // undo & escaping or it will be converted to &amp; by another
735 // escaper in the renderer
736 if bytes.Equal(ent, []byte("&")) {
737 ent = []byte{'&'}
738 }
739 p.currBlock.appendChild(text(ent))
740
741 return end
742}
743
744func linkEndsWithEntity(data []byte, linkEnd int) bool {
745 entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
746 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
747}
748
749func maybeAutoLink(p *parser, data []byte, offset int) int {
750 // quick check to rule out most false hits
751 if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
752 return 0
753 }
754 prefixes := []string{
755 "http://",
756 "https://",
757 "ftp://",
758 "file://",
759 "mailto:",
760 }
761 for _, prefix := range prefixes {
762 endOfHead := offset + 8 // 8 is the len() of the longest prefix
763 if endOfHead > len(data) {
764 endOfHead = len(data)
765 }
766 head := bytes.ToLower(data[offset:endOfHead])
767 if bytes.HasPrefix(head, []byte(prefix)) {
768 return autoLink(p, data, offset)
769 }
770 }
771 return 0
772}
773
774func autoLink(p *parser, data []byte, offset int) int {
775 // Now a more expensive check to see if we're not inside an anchor element
776 anchorStart := offset
777 offsetFromAnchor := 0
778 for anchorStart > 0 && data[anchorStart] != '<' {
779 anchorStart--
780 offsetFromAnchor++
781 }
782
783 anchorStr := anchorRe.Find(data[anchorStart:])
784 if anchorStr != nil {
785 anchorClose := NewNode(HTMLSpan)
786 anchorClose.Literal = anchorStr[offsetFromAnchor:]
787 p.currBlock.appendChild(anchorClose)
788 return len(anchorStr) - offsetFromAnchor
789 }
790
791 // scan backward for a word boundary
792 rewind := 0
793 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
794 rewind++
795 }
796 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
797 return 0
798 }
799
800 origData := data
801 data = data[offset-rewind:]
802
803 if !isSafeLink(data) {
804 return 0
805 }
806
807 linkEnd := 0
808 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
809 linkEnd++
810 }
811
812 // Skip punctuation at the end of the link
813 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
814 linkEnd--
815 }
816
817 // But don't skip semicolon if it's a part of escaped entity:
818 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
819 linkEnd--
820 }
821
822 // See if the link finishes with a punctuation sign that can be closed.
823 var copen byte
824 switch data[linkEnd-1] {
825 case '"':
826 copen = '"'
827 case '\'':
828 copen = '\''
829 case ')':
830 copen = '('
831 case ']':
832 copen = '['
833 case '}':
834 copen = '{'
835 default:
836 copen = 0
837 }
838
839 if copen != 0 {
840 bufEnd := offset - rewind + linkEnd - 2
841
842 openDelim := 1
843
844 /* Try to close the final punctuation sign in this same line;
845 * if we managed to close it outside of the URL, that means that it's
846 * not part of the URL. If it closes inside the URL, that means it
847 * is part of the URL.
848 *
849 * Examples:
850 *
851 * foo http://www.pokemon.com/Pikachu_(Electric) bar
852 * => http://www.pokemon.com/Pikachu_(Electric)
853 *
854 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
855 * => http://www.pokemon.com/Pikachu_(Electric)
856 *
857 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
858 * => http://www.pokemon.com/Pikachu_(Electric))
859 *
860 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
861 * => foo http://www.pokemon.com/Pikachu_(Electric)
862 */
863
864 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
865 if origData[bufEnd] == data[linkEnd-1] {
866 openDelim++
867 }
868
869 if origData[bufEnd] == copen {
870 openDelim--
871 }
872
873 bufEnd--
874 }
875
876 if openDelim == 0 {
877 linkEnd--
878 }
879 }
880
881 var uLink bytes.Buffer
882 unescapeText(&uLink, data[:linkEnd])
883
884 if uLink.Len() > 0 {
885 node := NewNode(Link)
886 node.Destination = uLink.Bytes()
887 p.currBlock.appendChild(node)
888 node.appendChild(text(uLink.Bytes()))
889 }
890
891 return linkEnd
892}
893
894func isEndOfLink(char byte) bool {
895 return isspace(char) || char == '<'
896}
897
898var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
899var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
900
901func isSafeLink(link []byte) bool {
902 for _, path := range validPaths {
903 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
904 if len(link) == len(path) {
905 return true
906 } else if isalnum(link[len(path)]) {
907 return true
908 }
909 }
910 }
911
912 for _, prefix := range validUris {
913 // TODO: handle unicode here
914 // case-insensitive prefix test
915 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
916 return true
917 }
918 }
919
920 return false
921}
922
923// return the length of the given tag, or 0 is it's not valid
924func tagLength(data []byte, autolink *LinkType) int {
925 var i, j int
926
927 // a valid tag can't be shorter than 3 chars
928 if len(data) < 3 {
929 return 0
930 }
931
932 // begins with a '<' optionally followed by '/', followed by letter or number
933 if data[0] != '<' {
934 return 0
935 }
936 if data[1] == '/' {
937 i = 2
938 } else {
939 i = 1
940 }
941
942 if !isalnum(data[i]) {
943 return 0
944 }
945
946 // scheme test
947 *autolink = LinkTypeNotAutolink
948
949 // try to find the beginning of an URI
950 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
951 i++
952 }
953
954 if i > 1 && i < len(data) && data[i] == '@' {
955 if j = isMailtoAutoLink(data[i:]); j != 0 {
956 *autolink = LinkTypeEmail
957 return i + j
958 }
959 }
960
961 if i > 2 && i < len(data) && data[i] == ':' {
962 *autolink = LinkTypeNormal
963 i++
964 }
965
966 // complete autolink test: no whitespace or ' or "
967 switch {
968 case i >= len(data):
969 *autolink = LinkTypeNotAutolink
970 case *autolink != 0:
971 j = i
972
973 for i < len(data) {
974 if data[i] == '\\' {
975 i += 2
976 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
977 break
978 } else {
979 i++
980 }
981
982 }
983
984 if i >= len(data) {
985 return 0
986 }
987 if i > j && data[i] == '>' {
988 return i + 1
989 }
990
991 // one of the forbidden chars has been found
992 *autolink = LinkTypeNotAutolink
993 }
994
995 // look for something looking like a tag end
996 for i < len(data) && data[i] != '>' {
997 i++
998 }
999 if i >= len(data) {
1000 return 0
1001 }
1002 return i + 1
1003}
1004
1005// look for the address part of a mail autolink and '>'
1006// this is less strict than the original markdown e-mail address matching
1007func isMailtoAutoLink(data []byte) int {
1008 nb := 0
1009
1010 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1011 for i := 0; i < len(data); i++ {
1012 if isalnum(data[i]) {
1013 continue
1014 }
1015
1016 switch data[i] {
1017 case '@':
1018 nb++
1019
1020 case '-', '.', '_':
1021 break
1022
1023 case '>':
1024 if nb == 1 {
1025 return i + 1
1026 } else {
1027 return 0
1028 }
1029 default:
1030 return 0
1031 }
1032 }
1033
1034 return 0
1035}
1036
1037// look for the next emph char, skipping other constructs
1038func helperFindEmphChar(data []byte, c byte) int {
1039 i := 0
1040
1041 for i < len(data) {
1042 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1043 i++
1044 }
1045 if i >= len(data) {
1046 return 0
1047 }
1048 // do not count escaped chars
1049 if i != 0 && data[i-1] == '\\' {
1050 i++
1051 continue
1052 }
1053 if data[i] == c {
1054 return i
1055 }
1056
1057 if data[i] == '`' {
1058 // skip a code span
1059 tmpI := 0
1060 i++
1061 for i < len(data) && data[i] != '`' {
1062 if tmpI == 0 && data[i] == c {
1063 tmpI = i
1064 }
1065 i++
1066 }
1067 if i >= len(data) {
1068 return tmpI
1069 }
1070 i++
1071 } else if data[i] == '[' {
1072 // skip a link
1073 tmpI := 0
1074 i++
1075 for i < len(data) && data[i] != ']' {
1076 if tmpI == 0 && data[i] == c {
1077 tmpI = i
1078 }
1079 i++
1080 }
1081 i++
1082 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1083 i++
1084 }
1085 if i >= len(data) {
1086 return tmpI
1087 }
1088 if data[i] != '[' && data[i] != '(' { // not a link
1089 if tmpI > 0 {
1090 return tmpI
1091 } else {
1092 continue
1093 }
1094 }
1095 cc := data[i]
1096 i++
1097 for i < len(data) && data[i] != cc {
1098 if tmpI == 0 && data[i] == c {
1099 return i
1100 }
1101 i++
1102 }
1103 if i >= len(data) {
1104 return tmpI
1105 }
1106 i++
1107 }
1108 }
1109 return 0
1110}
1111
1112func helperEmphasis(p *parser, data []byte, c byte) int {
1113 i := 0
1114
1115 // skip one symbol if coming from emph3
1116 if len(data) > 1 && data[0] == c && data[1] == c {
1117 i = 1
1118 }
1119
1120 for i < len(data) {
1121 length := helperFindEmphChar(data[i:], c)
1122 if length == 0 {
1123 return 0
1124 }
1125 i += length
1126 if i >= len(data) {
1127 return 0
1128 }
1129
1130 if i+1 < len(data) && data[i+1] == c {
1131 i++
1132 continue
1133 }
1134
1135 if data[i] == c && !isspace(data[i-1]) {
1136
1137 if p.flags&NoIntraEmphasis != 0 {
1138 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1139 continue
1140 }
1141 }
1142
1143 emph := NewNode(Emph)
1144 p.currBlock.appendChild(emph)
1145 tmp := p.currBlock
1146 p.currBlock = emph
1147 p.inline(data[:i])
1148 p.currBlock = tmp
1149 return i + 1
1150 }
1151 }
1152
1153 return 0
1154}
1155
1156func helperDoubleEmphasis(p *parser, data []byte, c byte) int {
1157 i := 0
1158
1159 for i < len(data) {
1160 length := helperFindEmphChar(data[i:], c)
1161 if length == 0 {
1162 return 0
1163 }
1164 i += length
1165
1166 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1167 nodeType := Strong
1168 if c == '~' {
1169 nodeType = Del
1170 }
1171 node := NewNode(nodeType)
1172 p.currBlock.appendChild(node)
1173 tmp := p.currBlock
1174 p.currBlock = node
1175 p.inline(data[:i])
1176 p.currBlock = tmp
1177 return i + 2
1178 }
1179 i++
1180 }
1181 return 0
1182}
1183
1184func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) int {
1185 i := 0
1186 origData := data
1187 data = data[offset:]
1188
1189 for i < len(data) {
1190 length := helperFindEmphChar(data[i:], c)
1191 if length == 0 {
1192 return 0
1193 }
1194 i += length
1195
1196 // skip whitespace preceded symbols
1197 if data[i] != c || isspace(data[i-1]) {
1198 continue
1199 }
1200
1201 switch {
1202 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1203 // triple symbol found
1204 strong := NewNode(Strong)
1205 em := NewNode(Emph)
1206 strong.appendChild(em)
1207 p.currBlock.appendChild(strong)
1208 tmp := p.currBlock
1209 p.currBlock = em
1210 p.inline(data[:i])
1211 p.currBlock = tmp
1212 return i + 3
1213 case (i+1 < len(data) && data[i+1] == c):
1214 // double symbol found, hand over to emph1
1215 length = helperEmphasis(p, origData[offset-2:], c)
1216 if length == 0 {
1217 return 0
1218 } else {
1219 return length - 2
1220 }
1221 default:
1222 // single symbol found, hand over to emph2
1223 length = helperDoubleEmphasis(p, origData[offset-1:], c)
1224 if length == 0 {
1225 return 0
1226 } else {
1227 return length - 1
1228 }
1229 }
1230 }
1231 return 0
1232}
1233
1234func text(s []byte) *Node {
1235 node := NewNode(Text)
1236 node.Literal = s
1237 return node
1238}
1239
1240func normalizeURI(s []byte) []byte {
1241 return s // TODO: implement
1242}