v2: Only split when inline callbacks consume some bytes (#301) * Only split when inline callbacks consume some bytes The former hacks around maybeLineBreak and Smartypants are no longer needed. The algorithm has been streamlined: shorter, simpler, faster. The 'currBlock' field of the parser is gone. * Remove spurious logs
@@ -32,7 +32,7 @@ // Each function returns the number of chars taken care of
// data is the complete block being rendered // offset is the number of valid chars before the current cursor -func (p *parser) inline(data []byte) { +func (p *parser) inline(currBlock *Node, data []byte) { // this is called recursively: enforce a maximum depth if p.nesting >= p.maxNesting { return@@ -45,59 +45,35 @@ // Stop at EOL
if data[i] == '\n' && i+1 == len(data) { break } - // Copy inactive chars into the output, but first check for one quirk: - // 'h', 'm' and 'f' all might trigger a check for autolink processing - // and end this run of inactive characters. However, there's one nasty - // case where breaking this run would be bad: in smartypants fraction - // detection, we expect things like "1/2th" to be in a single run. So - // we check here if an 'h' is followed by 't' (from 'http') and if it's - // not, we short circuit the 'h' into the run of inactive characters. - // - // Also, in a similar fashion maybeLineBreak breaks this run of chars, - // but smartDash processor relies on seeing context around the dashes. - // Fix this somehow. - for end < len(data) { - if data[end] == ' ' { - consumed, br := maybeLineBreak(p, data, end) - if consumed > 0 { - p.currBlock.AppendChild(text(data[i:end])) - if br { - p.currBlock.AppendChild(NewNode(Hardbreak)) - } - i = end - i += consumed - end = i - } else { - end++ - } - continue - } + + for ; end < len(data); end++ { if p.inlineCallback[data[end]] != nil { - if end+1 < len(data) && data[end] == 'h' && data[end+1] != 't' { - end++ - } else { - break - } - } else { - end++ + break } } - p.currBlock.AppendChild(text(data[i:end])) - if end >= len(data) { + if data[end-1] == '\n' { + currBlock.AppendChild(text(data[i : end-1])) + } else { + currBlock.AppendChild(text(data[i:end])) + } break } - i = end // call the trigger handler := p.inlineCallback[data[end]] - if consumed := handler(p, data, i); consumed == 0 { - // no action from the callback; buffer the byte for later - end = i + 1 + if consumed, node := handler(p, data, end); consumed == 0 { + // No action from the callback. + end++ } else { - // skip past whatever the callback used - i += consumed + // Copy inactive chars into the output. + currBlock.AppendChild(text(data[i:end])) + if node != nil { + currBlock.AppendChild(node) + } + // Skip past whatever the callback used. + i = end + consumed end = i } }@@ -106,50 +82,52 @@ p.nesting--
} // single and double emphasis parsing -func emphasis(p *parser, data []byte, offset int) int { +func emphasis(p *parser, data []byte, offset int) (int, *Node) { data = data[offset:] c := data[0] - ret := 0 if len(data) > 2 && data[1] != c { // whitespace cannot follow an opening emphasis; // strikethrough only takes two characters '~~' if c == '~' || isspace(data[1]) { - return 0 + return 0, nil } - if ret = helperEmphasis(p, data[1:], c); ret == 0 { - return 0 + ret, node := helperEmphasis(p, data[1:], c) + if ret == 0 { + return 0, nil } - return ret + 1 + return ret + 1, node } if len(data) > 3 && data[1] == c && data[2] != c { if isspace(data[2]) { - return 0 + return 0, nil } - if ret = helperDoubleEmphasis(p, data[2:], c); ret == 0 { - return 0 + ret, node := helperDoubleEmphasis(p, data[2:], c) + if ret == 0 { + return 0, nil } - return ret + 2 + return ret + 2, node } if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { if c == '~' || isspace(data[3]) { - return 0 + return 0, nil } - if ret = helperTripleEmphasis(p, data, 3, c); ret == 0 { - return 0 + ret, node := helperTripleEmphasis(p, data, 3, c) + if ret == 0 { + return 0, nil } - return ret + 3 + return ret + 3, node } - return 0 + return 0, nil } -func codeSpan(p *parser, data []byte, offset int) int { +func codeSpan(p *parser, data []byte, offset int) (int, *Node) { data = data[offset:] nb := 0@@ -171,7 +149,7 @@ }
// no matching delimiter? if i < nb && end >= len(data) { - return 0 + return 0, nil } // trim outside whitespace@@ -189,35 +167,34 @@ // render the code span
if fBegin != fEnd { code := NewNode(Code) code.Literal = data[fBegin:fEnd] - p.currBlock.AppendChild(code) + return end, code } - return end - + return end, nil } // newline preceded by two spaces becomes <br> -func maybeLineBreak(p *parser, data []byte, offset int) (int, bool) { +func maybeLineBreak(p *parser, data []byte, offset int) (int, *Node) { origOffset := offset for offset < len(data) && data[offset] == ' ' { offset++ } + if offset < len(data) && data[offset] == '\n' { if offset-origOffset >= 2 { - return offset - origOffset + 1, true + return offset - origOffset + 1, NewNode(Hardbreak) } - return offset - origOffset, false + return offset - origOffset, nil } - return 0, false + return 0, nil } // newline without two spaces works when HardLineBreak is enabled -func lineBreak(p *parser, data []byte, offset int) int { +func lineBreak(p *parser, data []byte, offset int) (int, *Node) { if p.flags&HardLineBreak != 0 { - p.currBlock.AppendChild(NewNode(Hardbreak)) - return 1 + return 1, NewNode(Hardbreak) } - return 0 + return 0, nil } type linkType int@@ -236,25 +213,25 @@ }
return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' } -func maybeImage(p *parser, data []byte, offset int) int { +func maybeImage(p *parser, data []byte, offset int) (int, *Node) { if offset < len(data)-1 && data[offset+1] == '[' { return link(p, data, offset) } - return 0 + return 0, nil } -func maybeInlineFootnote(p *parser, data []byte, offset int) int { +func maybeInlineFootnote(p *parser, data []byte, offset int) (int, *Node) { if offset < len(data)-1 && data[offset+1] == '[' { return link(p, data, offset) } - return 0 + return 0, nil } // '[': parse a link or an image or a footnote -func link(p *parser, data []byte, offset int) int { +func link(p *parser, data []byte, offset int) (int, *Node) { // no links allowed inside regular links, footnote, and deferred footnotes if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { - return 0 + return 0, nil } var t linkType@@ -315,7 +292,7 @@ }
} if i >= len(data) { - return 0 + return 0, nil } txtE := i@@ -355,7 +332,7 @@ }
} if i >= len(data) { - return 0 + return 0, nil } linkE := i@@ -380,7 +357,7 @@ }
} if i >= len(data) { - return 0 + return 0, nil } // skip whitespace after title@@ -432,7 +409,7 @@ for i < len(data) && data[i] != ']' {
i++ } if i >= len(data) { - return 0 + return 0, nil } linkE := i@@ -462,7 +439,7 @@
// find the reference with matching id lr, ok := p.getRef(string(id)) if !ok { - return 0 + return 0, nil } // keep link and title from reference@@ -530,7 +507,7 @@ } else {
// find the reference with matching id lr, ok := p.getRef(string(id)) if !ok { - return 0 + return 0, nil } if t == linkDeferredFootnote {@@ -559,17 +536,17 @@ }
// links need something to click on and somewhere to go if len(uLink) == 0 || (t == linkNormal && txtE <= 1) { - return 0 + return 0, nil } } // call the relevant rendering function + var linkNode *Node switch t { case linkNormal: - linkNode := NewNode(Link) + linkNode = NewNode(Link) linkNode.Destination = normalizeURI(uLink) linkNode.Title = title - p.currBlock.AppendChild(linkNode) if len(altContent) > 0 { linkNode.AppendChild(text(altContent)) } else {@@ -577,36 +554,31 @@ // links cannot contain other links, so turn off link parsing
// temporarily and recurse insideLink := p.insideLink p.insideLink = true - tmpNode := p.currBlock - p.currBlock = linkNode - p.inline(data[1:txtE]) - p.currBlock = tmpNode + p.inline(linkNode, data[1:txtE]) p.insideLink = insideLink } case linkImg: - linkNode := NewNode(Image) + linkNode = NewNode(Image) linkNode.Destination = uLink linkNode.Title = title - p.currBlock.AppendChild(linkNode) linkNode.AppendChild(text(data[1:txtE])) i++ case linkInlineFootnote, linkDeferredFootnote: - linkNode := NewNode(Link) + linkNode = NewNode(Link) linkNode.Destination = link linkNode.Title = title linkNode.NoteID = noteID - p.currBlock.AppendChild(linkNode) if t == linkInlineFootnote { i++ } default: - return 0 + return 0, nil } - return i + return i, linkNode } func (p *parser) inlineHTMLComment(data []byte) int {@@ -649,7 +621,7 @@ emailAutolink
) // '<' when tags or autolinks are allowed -func leftAngle(p *parser, data []byte, offset int) int { +func leftAngle(p *parser, data []byte, offset int) (int, *Node) { data = data[offset:] altype, end := tagLength(data) if size := p.inlineHTMLComment(data); size > 0 {@@ -666,38 +638,37 @@ node.Destination = link
if altype == emailAutolink { node.Destination = append([]byte("mailto:"), link...) } - p.currBlock.AppendChild(node) node.AppendChild(text(stripMailto(link))) + return end, node } } else { htmlTag := NewNode(HTMLSpan) htmlTag.Literal = data[:end] - p.currBlock.AppendChild(htmlTag) + return end, htmlTag } } - return end + return end, nil } // '\\' backslash escape var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") -func escape(p *parser, data []byte, offset int) int { +func escape(p *parser, data []byte, offset int) (int, *Node) { data = data[offset:] if len(data) > 1 { if p.flags&BackslashLineBreak != 0 && data[1] == '\n' { - p.currBlock.AppendChild(NewNode(Hardbreak)) - return 2 + return 2, NewNode(Hardbreak) } if bytes.IndexByte(escapeChars, data[1]) < 0 { - return 0 + return 0, nil } - p.currBlock.AppendChild(text(data[1:2])) + return 2, text(data[1:2]) } - return 2 + return 2, nil } func unescapeText(ob *bytes.Buffer, src []byte) {@@ -723,7 +694,7 @@ }
// '&' escaped when it doesn't belong to an entity // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; -func entity(p *parser, data []byte, offset int) int { +func entity(p *parser, data []byte, offset int) (int, *Node) { data = data[offset:] end := 1@@ -739,7 +710,7 @@
if end < len(data) && data[end] == ';' { end++ // real entity } else { - return 0 // lone '&' + return 0, nil // lone '&' } ent := data[:end]@@ -748,9 +719,8 @@ // escaper in the renderer
if bytes.Equal(ent, []byte("&")) { ent = []byte{'&'} } - p.currBlock.AppendChild(text(ent)) - return end + return end, text(ent) } func linkEndsWithEntity(data []byte, linkEnd int) bool {@@ -758,10 +728,10 @@ entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd } -func maybeAutoLink(p *parser, data []byte, offset int) int { +func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) { // quick check to rule out most false hits if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below - return 0 + return 0, nil } prefixes := []string{ "http://",@@ -780,10 +750,10 @@ if bytes.HasPrefix(head, []byte(prefix)) {
return autoLink(p, data, offset) } } - return 0 + return 0, nil } -func autoLink(p *parser, data []byte, offset int) int { +func autoLink(p *parser, data []byte, offset int) (int, *Node) { // Now a more expensive check to see if we're not inside an anchor element anchorStart := offset offsetFromAnchor := 0@@ -796,8 +766,7 @@ anchorStr := anchorRe.Find(data[anchorStart:])
if anchorStr != nil { anchorClose := NewNode(HTMLSpan) anchorClose.Literal = anchorStr[offsetFromAnchor:] - p.currBlock.AppendChild(anchorClose) - return len(anchorStr) - offsetFromAnchor + return len(anchorStr) - offsetFromAnchor, anchorClose } // scan backward for a word boundary@@ -806,14 +775,14 @@ for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
rewind++ } if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters - return 0 + return 0, nil } origData := data data = data[offset-rewind:] if !isSafeLink(data) { - return 0 + return 0, nil } linkEnd := 0@@ -896,11 +865,11 @@
if uLink.Len() > 0 { node := NewNode(Link) node.Destination = uLink.Bytes() - p.currBlock.AppendChild(node) node.AppendChild(text(uLink.Bytes())) + return linkEnd, node } - return linkEnd + return linkEnd, nil } func isEndOfLink(char byte) bool {@@ -1114,7 +1083,7 @@ }
return 0 } -func helperEmphasis(p *parser, data []byte, c byte) int { +func helperEmphasis(p *parser, data []byte, c byte) (int, *Node) { i := 0 // skip one symbol if coming from emph3@@ -1125,11 +1094,11 @@
for i < len(data) { length := helperFindEmphChar(data[i:], c) if length == 0 { - return 0 + return 0, nil } i += length if i >= len(data) { - return 0 + return 0, nil } if i+1 < len(data) && data[i+1] == c {@@ -1146,25 +1115,21 @@ }
} emph := NewNode(Emph) - p.currBlock.AppendChild(emph) - tmp := p.currBlock - p.currBlock = emph - p.inline(data[:i]) - p.currBlock = tmp - return i + 1 + p.inline(emph, data[:i]) + return i + 1, emph } } - return 0 + return 0, nil } -func helperDoubleEmphasis(p *parser, data []byte, c byte) int { +func helperDoubleEmphasis(p *parser, data []byte, c byte) (int, *Node) { i := 0 for i < len(data) { length := helperFindEmphChar(data[i:], c) if length == 0 { - return 0 + return 0, nil } i += length@@ -1174,19 +1139,15 @@ if c == '~' {
nodeType = Del } node := NewNode(nodeType) - p.currBlock.AppendChild(node) - tmp := p.currBlock - p.currBlock = node - p.inline(data[:i]) - p.currBlock = tmp - return i + 2 + p.inline(node, data[:i]) + return i + 2, node } i++ } - return 0 + return 0, nil } -func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) int { +func helperTripleEmphasis(p *parser, data []byte, offset int, c byte) (int, *Node) { i := 0 origData := data data = data[offset:]@@ -1194,7 +1155,7 @@
for i < len(data) { length := helperFindEmphChar(data[i:], c) if length == 0 { - return 0 + return 0, nil } i += length@@ -1209,29 +1170,25 @@ // triple symbol found
strong := NewNode(Strong) em := NewNode(Emph) strong.AppendChild(em) - p.currBlock.AppendChild(strong) - tmp := p.currBlock - p.currBlock = em - p.inline(data[:i]) - p.currBlock = tmp - return i + 3 + p.inline(em, data[:i]) + return i + 3, strong case (i+1 < len(data) && data[i+1] == c): // double symbol found, hand over to emph1 - length = helperEmphasis(p, origData[offset-2:], c) + length, node := helperEmphasis(p, origData[offset-2:], c) if length == 0 { - return 0 + return 0, nil } - return length - 2 + return length - 2, node default: // single symbol found, hand over to emph2 - length = helperDoubleEmphasis(p, origData[offset-1:], c) + length, node := helperDoubleEmphasis(p, origData[offset-1:], c) if length == 0 { - return 0 + return 0, nil } - return length - 1 + return length - 1, node } } - return 0 + return 0, nil } func text(s []byte) *Node {
@@ -168,7 +168,7 @@ }
// Callback functions for inline parsing. One such function is defined // for each character that triggers a response when parsing inline data. -type inlineParser func(p *parser, data []byte, offset int) int +type inlineParser func(p *parser, data []byte, offset int) (int, *Node) // Parser holds runtime state used by the parser. // This is constructed by the Markdown function.@@ -191,7 +191,6 @@ tip *Node // = doc
oldTip *Node lastMatchedContainer *Node // = doc allClosed bool - currBlock *Node // a block node currently being parsed by inline parser } func (p *parser) getRef(refid string) (ref *reference, found bool) {@@ -367,6 +366,7 @@ p.lastMatchedContainer = docNode
p.allClosed = true // register inline parsers + p.inlineCallback[' '] = maybeLineBreak p.inlineCallback['*'] = emphasis p.inlineCallback['_'] = emphasis if extensions&Strikethrough != 0 {@@ -403,8 +403,7 @@ }
// Walk the tree again and process inline markdown in each block p.doc.Walk(func(node *Node, entering bool) WalkStatus { if node.Type == Paragraph || node.Type == Header || node.Type == TableCell { - p.currBlock = node - p.inline(node.content) + p.inline(node, node.content) node.content = nil } return GoToNext@@ -436,8 +435,7 @@ if ref.hasBlock {
flags |= ListItemContainsBlock p.block(ref.title) } else { - p.currBlock = block - p.inline(ref.title) + p.inline(block, ref.title) } flags &^= ListItemBeginningOfList | ListItemContainsBlock }@@ -447,8 +445,7 @@ p.tip = above
finalizeHTMLBlock(p.addBlock(HTMLBlock, []byte("</div>"))) block.Walk(func(node *Node, entering bool) WalkStatus { if node.Type == Paragraph || node.Type == Header { - p.currBlock = node - p.inline(node.content) + p.inline(node, node.content) node.content = nil } return GoToNext@@ -528,7 +525,7 @@ if ref.hasBlock {
flags |= ListItemContainsBlock p.block(ref.title) } else { - p.inline(ref.title) + p.inline(nil, ref.title) } flags &^= ListItemBeginningOfList | ListItemContainsBlock }