Merge pull request #27 from moshee/master Footnotes (addresses #14)
@@ -458,8 +458,8 @@ return 0
} var i int - for i = 0; data[i] != '\n'; i++ { - if data[i] != ' ' { + for i = 0; i < len(data) && data[i] != '\n'; i++ { + if data[i] != ' ' && data[i] != '\t' { return 0 } }@@ -1101,8 +1101,9 @@
line = i } + rawBytes := raw.Bytes() + // render the contents of the list item - rawBytes := raw.Bytes() var cooked bytes.Buffer if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { // intermediate render of block li
@@ -322,6 +322,24 @@ out.Write(text)
out.WriteString("</td>") } +func (options *Html) Footnotes(out *bytes.Buffer, text func() bool) { + out.WriteString("<div class=\"footnotes\">\n") + options.HRule(out) + options.List(out, text, LIST_TYPE_ORDERED) + out.WriteString("</div>\n") +} + +func (options *Html) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) { + if flags&LIST_ITEM_CONTAINS_BLOCK != 0 || flags&LIST_ITEM_BEGINNING_OF_LIST != 0 { + doubleSpace(out) + } + out.WriteString(`<li id="fn:`) + out.Write(slugify(name)) + out.WriteString(`">`) + out.Write(text) + out.WriteString("</li>\n") +} + func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) { marker := out.Len() doubleSpace(out)@@ -499,6 +517,17 @@ func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
out.WriteString("<del>") out.Write(text) out.WriteString("</del>") +} + +func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + slug := slugify(ref) + out.WriteString(`<sup class="footnote-ref" id="fnref:`) + out.Write(slug) + out.WriteString(`"><a rel="footnote" href="#fn:`) + out.Write(slug) + out.WriteString(`">`) + out.WriteString(strconv.Itoa(id)) + out.WriteString(`</a></sup>`) } func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
@@ -15,6 +15,7 @@ package blackfriday
import ( "bytes" + "strconv" ) // Functions to parse text within a block@@ -168,20 +169,49 @@ p.r.LineBreak(out)
return 1 } -// '[': parse a link or an image +type linkType int + +const ( + linkNormal linkType = iota + linkImg + linkDeferredFootnote + linkInlineFootnote +) + +// '[': parse a link or an image or a footnote func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { // no links allowed inside other links if p.insideLink { return 0 } - isImg := offset > 0 && data[offset-1] == '!' + // [text] == regular link + // ![alt] == image + // ^[text] == inline footnote + // [^refId] == deferred footnote + var t linkType + if offset > 0 && data[offset-1] == '!' { + t = linkImg + } else if p.flags&EXTENSION_FOOTNOTES != 0 { + if offset > 0 && data[offset-1] == '^' { + t = linkInlineFootnote + } else if len(data)-1 > offset && data[offset+1] == '^' { + t = linkDeferredFootnote + } + } data = data[offset:] - i := 1 - var title, link []byte - textHasNl := false + var ( + i = 1 + noteId int + title, link []byte + textHasNl = false + ) + + if t == linkDeferredFootnote { + i++ + } // look for the matching closing bracket for level := 1; level > 0 && i < len(data); i++ {@@ -351,6 +381,7 @@ key := string(bytes.ToLower(id))
lr, ok := p.refs[key] if !ok { return 0 + } // keep link and title from reference@@ -358,7 +389,7 @@ link = lr.link
title = lr.title i++ - // shortcut reference style link + // shortcut reference style link or reference or inline footnote default: var id []byte@@ -377,19 +408,59 @@ }
id = b.Bytes() } else { - id = data[1:txtE] + if t == linkDeferredFootnote { + id = data[2:txtE] // get rid of the ^ + } else { + id = data[1:txtE] + } } - // find the reference with matching id key := string(bytes.ToLower(id)) - lr, ok := p.refs[key] - if !ok { - return 0 - } + if t == linkInlineFootnote { + // create a new reference + noteId = len(p.notes) + 1 - // keep link and title from reference - link = lr.link - title = lr.title + var fragment []byte + if len(id) > 0 { + if len(id) < 16 { + fragment = make([]byte, len(id)) + } else { + fragment = make([]byte, 16) + } + copy(fragment, slugify(id)) + } else { + fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...) + } + + ref := &reference{ + noteId: noteId, + hasBlock: false, + link: fragment, + title: id, + } + + p.notes = append(p.notes, ref) + + link = ref.link + title = ref.title + } else { + // find the reference with matching id + lr, ok := p.refs[key] + if !ok { + return 0 + } + + if t == linkDeferredFootnote { + lr.noteId = len(p.notes) + 1 + p.notes = append(p.notes, lr) + } + + // keep link and title from reference + link = lr.link + // if inline footnote, title == footnote contents + title = lr.title + noteId = lr.noteId + } // rewind the whitespace i = txtE + 1@@ -398,7 +469,7 @@
// build content: img alt is escaped, link content is parsed var content bytes.Buffer if txtE > 1 { - if isImg { + if t == linkImg { content.Write(data[1:txtE]) } else { // links cannot contain other links, so turn off link parsing temporarily@@ -410,19 +481,25 @@ }
} var uLink []byte - if len(link) > 0 { - var uLinkBuf bytes.Buffer - unescapeText(&uLinkBuf, link) - uLink = uLinkBuf.Bytes() - } + if t == linkNormal || t == linkImg { + if len(link) > 0 { + var uLinkBuf bytes.Buffer + unescapeText(&uLinkBuf, link) + uLink = uLinkBuf.Bytes() + } - // links need something to click on and somewhere to go - if len(uLink) == 0 || (!isImg && content.Len() == 0) { - return 0 + // links need something to click on and somewhere to go + if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { + return 0 + } } // call the relevant rendering function - if isImg { + switch t { + case linkNormal: + p.r.Link(out, uLink, title, content.Bytes()) + + case linkImg: outSize := out.Len() outBytes := out.Bytes() if outSize > 0 && outBytes[outSize-1] == '!' {@@ -430,8 +507,21 @@ out.Truncate(outSize - 1)
} p.r.Image(out, uLink, title, content.Bytes()) - } else { - p.r.Link(out, uLink, title, content.Bytes()) + + case linkInlineFootnote: + outSize := out.Len() + outBytes := out.Bytes() + if outSize > 0 && outBytes[outSize-1] == '^' { + out.Truncate(outSize - 1) + } + + p.r.FootnoteRef(out, link, noteId) + + case linkDeferredFootnote: + p.r.FootnoteRef(out, link, noteId) + + default: + return 0 } return i
@@ -35,11 +35,13 @@
func doTestsInlineParam(t *testing.T, tests []string, extensions, htmlFlags int) { // catch and report panics var candidate string - defer func() { - if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v]\n", candidate) - } - }() + /* + defer func() { + if err := recover(); err != nil { + t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err) + } + }() + */ for i := 0; i+1 < len(tests); i += 2 { input := tests[i]@@ -501,3 +503,142 @@ "http://new.com?q=>&etc</a></p>\n",
} doTestsInline(t, tests) } + +func TestFootnotes(t *testing.T) { + tests := []string{ + "testing footnotes.[^a]\n\n[^a]: This is the note\n", + `<p>testing footnotes.<sup class="footnote-ref" id="fnref:a"><a rel="footnote" href="#fn:a">1</a></sup></p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:a">This is the note +</li> +</ol> +</div> +`, + + `testing long[^b] notes. + +[^b]: Paragraph 1 + + Paragraph 2 + + ` + "```\n\tsome code\n\t```" + ` + + Paragraph 3 + +No longer in the footnote +`, + `<p>testing long<sup class="footnote-ref" id="fnref:b"><a rel="footnote" href="#fn:b">1</a></sup> notes.</p> + +<p>No longer in the footnote</p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:b"><p>Paragraph 1</p> + +<p>Paragraph 2</p> + +<p><code> +some code +</code></p> + +<p>Paragraph 3</p> +</li> +</ol> +</div> +`, + + `testing[^c] multiple[^d] notes. + +[^c]: this is [note] c + + +omg + +[^d]: this is note d + +what happens here + +[note]: /link/c + +`, + `<p>testing<sup class="footnote-ref" id="fnref:c"><a rel="footnote" href="#fn:c">1</a></sup> multiple<sup class="footnote-ref" id="fnref:d"><a rel="footnote" href="#fn:d">2</a></sup> notes.</p> + +<p>omg</p> + +<p>what happens here</p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:c">this is <a href="/link/c">note</a> c +</li> +<li id="fn:d">this is note d +</li> +</ol> +</div> +`, + + "testing inline^[this is the note] notes.\n", + `<p>testing inline<sup class="footnote-ref" id="fnref:this-is-the-note"><a rel="footnote" href="#fn:this-is-the-note">1</a></sup> notes.</p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:this-is-the-note">this is the note</li> +</ol> +</div> +`, + + "testing multiple[^1] types^[inline note] of notes[^2]\n\n[^2]: the second deferred note\n[^1]: the first deferred note\n\n\twhich happens to be a block\n", + `<p>testing multiple<sup class="footnote-ref" id="fnref:1"><a rel="footnote" href="#fn:1">1</a></sup> types<sup class="footnote-ref" id="fnref:inline-note"><a rel="footnote" href="#fn:inline-note">2</a></sup> of notes<sup class="footnote-ref" id="fnref:2"><a rel="footnote" href="#fn:2">3</a></sup></p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:1"><p>the first deferred note</p> + +<p>which happens to be a block</p> +</li> +<li id="fn:inline-note">inline note</li> +<li id="fn:2">the second deferred note +</li> +</ol> +</div> +`, + + `This is a footnote[^1]^[and this is an inline footnote] + +[^1]: the footnote text. + + may be multiple paragraphs. +`, + `<p>This is a footnote<sup class="footnote-ref" id="fnref:1"><a rel="footnote" href="#fn:1">1</a></sup><sup class="footnote-ref" id="fnref:and-this-is-an-i"><a rel="footnote" href="#fn:and-this-is-an-i">2</a></sup></p> +<div class="footnotes"> + +<hr /> + +<ol> +<li id="fn:1"><p>the footnote text.</p> + +<p>may be multiple paragraphs.</p> +</li> +<li id="fn:and-this-is-an-i">and this is an inline footnote</li> +</ol> +</div> +`, + + "empty footnote[^]\n\n[^]: fn text", + "<p>empty footnote<sup class=\"footnote-ref\" id=\"fnref:\"><a rel=\"footnote\" href=\"#fn:\">1</a></sup></p>\n<div class=\"footnotes\">\n\n<hr />\n\n<ol>\n<li id=\"fn:\">fn text\n</li>\n</ol>\n</div>\n", + } + + doTestsInlineParam(t, tests, EXTENSION_FOOTNOTES, 0) +}
@@ -158,6 +158,15 @@ }
out.Write(text) } +// TODO: this +func (options *Latex) Footnotes(out *bytes.Buffer, text func() bool) { + +} + +func (options *Latex) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) { + +} + func (options *Latex) AutoLink(out *bytes.Buffer, link []byte, kind int) { out.WriteString("\\href{") if kind == LINK_TYPE_EMAIL {@@ -227,6 +236,11 @@ func (options *Latex) StrikeThrough(out *bytes.Buffer, text []byte) {
out.WriteString("\\sout{") out.Write(text) out.WriteString("}") +} + +// TODO: this +func (options *Latex) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + } func needsBackslash(c byte) bool {
@@ -37,6 +37,7 @@ EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules
EXTENSION_SPACE_HEADERS // be strict about prefix header rules EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four + EXTENSION_FOOTNOTES // Pandoc-style footnotes ) // These are the possible flag values for the link renderer.@@ -139,6 +140,8 @@ Paragraph(out *bytes.Buffer, text func() bool)
Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) TableRow(out *bytes.Buffer, text []byte) TableCell(out *bytes.Buffer, text []byte, flags int) + Footnotes(out *bytes.Buffer, text func() bool) + FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) // Span-level callbacks AutoLink(out *bytes.Buffer, link []byte, kind int)@@ -151,6 +154,7 @@ Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
RawHtmlTag(out *bytes.Buffer, tag []byte) TripleEmphasis(out *bytes.Buffer, text []byte) StrikeThrough(out *bytes.Buffer, text []byte) + FootnoteRef(out *bytes.Buffer, ref []byte, id int) // Low-level callbacks Entity(out *bytes.Buffer, entity []byte)@@ -175,6 +179,11 @@ flags int
nesting int maxNesting int insideLink bool + + // Footnotes need to be ordered as well as available to quickly check for + // presence. If a ref is also a footnote, it's stored both in refs and here + // in notes. Slice is nil if footnotes not enabled. + notes []*reference } //@@ -273,6 +282,10 @@ if extensions&EXTENSION_AUTOLINK != 0 {
p.inlineCallback[':'] = autoLink } + if extensions&EXTENSION_FOOTNOTES != 0 { + p.notes = make([]*reference, 0) + } + first := firstPass(p, input) second := secondPass(p, first)@@ -292,7 +305,7 @@ tabSize = TAB_SIZE_EIGHT
} beg, end := 0, 0 for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:]); end > 0 { + if end = isReference(p, input[beg:], tabSize); end > 0 { beg += end } else { // skip to the next line end = beg@@ -331,6 +344,26 @@ var output bytes.Buffer
p.r.DocumentHeader(&output) p.block(&output, input) + + if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { + p.r.Footnotes(&output, func() bool { + flags := LIST_ITEM_BEGINNING_OF_LIST + for _, ref := range p.notes { + var buf bytes.Buffer + if ref.hasBlock { + flags |= LIST_ITEM_CONTAINS_BLOCK + p.block(&buf, ref.title) + } else { + p.inline(&buf, ref.title) + } + p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) + flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK + } + + return true + }) + } + p.r.DocumentFooter(&output) if p.nesting != 0 {@@ -354,11 +387,27 @@ // Anywhere in the document, the reference can be linked by referring to its
// label, i.e., 1 and 2 in this example, as in: // // This library is hosted on [Github][2], a git hosting site. +// +// Actual footnotes as specified in Pandoc and supported by some other Markdown +// libraries such as php-markdown are also taken care of. They look like this: +// +// This sentence needs a bit of further explanation.[^note] +// +// [^note]: This is the explanation. +// +// Footnotes should be placed at the end of the document in an ordered list. +// Inline footnotes such as: +// +// Inline footnotes^[Not supported.] also exist. +// +// are not yet supported. // References are parsed and stored in this struct. type reference struct { - link []byte - title []byte + link []byte + title []byte + noteId int // 0 if not a footnote ref + hasBlock bool } // Check whether or not data starts with a reference link.@@ -366,7 +415,7 @@ // If so, it is parsed and stored in the list of references
// (in the render struct). // Returns the number of bytes to skip to move past it, // or zero if the first line is not a reference. -func isReference(p *parser, data []byte) int { +func isReference(p *parser, data []byte, tabSize int) int { // up to 3 optional leading spaces if len(data) < 4 { return 0@@ -376,11 +425,21 @@ for i < 3 && data[i] == ' ' {
i++ } + noteId := 0 + // id part: anything but a newline between brackets if data[i] != '[' { return 0 } i++ + if p.flags&EXTENSION_FOOTNOTES != 0 { + if data[i] == '^' { + // we can set it to anything here because the proper noteIds will + // be assigned later during the second pass. It just has to be != 0 + noteId = 1 + i++ + } + } idOffset := i for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { i++@@ -412,15 +471,59 @@ if i >= len(data) {
return 0 } + var ( + linkOffset, linkEnd int + titleOffset, titleEnd int + lineEnd int + raw []byte + hasBlock bool + ) + + if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { + linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) + lineEnd = linkEnd + } else { + linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) + } + if lineEnd == 0 { + return 0 + } + + // a valid ref has been found + + ref := &reference{ + noteId: noteId, + hasBlock: hasBlock, + } + + if noteId > 0 { + // reusing the link field for the id since footnotes don't have links + ref.link = data[idOffset:idEnd] + // if footnote, it's not really a title, it's the contained text + ref.title = raw + } else { + ref.link = data[linkOffset:linkEnd] + ref.title = data[titleOffset:titleEnd] + } + + // id matches are case-insensitive + id := string(bytes.ToLower(data[idOffset:idEnd])) + + p.refs[id] = ref + + return lineEnd +} + +func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { // link: whitespace-free sequence, optionally between angle brackets if data[i] == '<' { i++ } - linkOffset := i + linkOffset = i for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { i++ } - linkEnd := i + linkEnd = i if data[linkOffset] == '<' && data[linkEnd-1] == '>' { linkOffset++ linkEnd--@@ -431,11 +534,10 @@ for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
i++ } if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { - return 0 + return } // compute end-of-line - lineEnd := 0 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { lineEnd = i }@@ -452,7 +554,6 @@ }
} // optional title: any non-newline sequence enclosed in '"() alone on its line - titleOffset, titleEnd := 0, 0 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { i++ titleOffset = i@@ -477,20 +578,88 @@ lineEnd = titleEnd
titleEnd = i } } - if lineEnd == 0 { // garbage after the link - return 0 + + return +} + +// The first bit of this logic is the same as (*parser).listItem, but the rest +// is much simpler. This function simply finds the entire block and shifts it +// over by one tab if it is indeed a block (just returns the line if it's not). +// blockEnd is the end of the section in the input buffer, and contents is the +// extracted text that was shifted over one tab. It will need to be rendered at +// the end of the document. +func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { + if i == 0 || len(data) == 0 { + return + } + + // skip leading whitespace on first line + for i < len(data) && data[i] == ' ' { + i++ + } + + blockStart = i + + // find the end of the line + blockEnd = i + for i < len(data) && data[i-1] != '\n' { + i++ } - // a valid ref has been found + // get working buffer + var raw bytes.Buffer + + // put the first line into the working buffer + raw.Write(data[blockEnd:i]) + blockEnd = i + + // process the following lines + containsBlankLine := false + +gatherLines: + for blockEnd < len(data) { + i++ + + // find the end of this line + for i < len(data) && data[i-1] != '\n' { + i++ + } - // id matches are case-insensitive - id := string(bytes.ToLower(data[idOffset:idEnd])) - p.refs[id] = &reference{ - link: data[linkOffset:linkEnd], - title: data[titleOffset:titleEnd], + // if it is an empty line, guess that it is part of this item + // and move on to the next line + if p.isEmpty(data[blockEnd:i]) > 0 { + containsBlankLine = true + blockEnd = i + continue + } + + n := 0 + if n = isIndented(data[blockEnd:i], indentSize); n == 0 { + // this is the end of the block. + // we don't want to include this last line in the index. + break gatherLines + } + + // if there were blank lines before this one, insert a new one now + if containsBlankLine { + raw.WriteByte('\n') + containsBlankLine = false + } + + // get rid of that first tab, write to buffer + raw.Write(data[blockEnd+n : i]) + hasBlock = true + + blockEnd = i + } + + if data[blockEnd-1] != '\n' { + raw.WriteByte('\n') } - return lineEnd + contents = raw.Bytes() + + return } //@@ -578,3 +747,57 @@
i++ } } + +// Find if a line counts as indented or not. +// Returns number of characters the indent is (0 = not indented). +func isIndented(data []byte, indentSize int) int { + if len(data) == 0 { + return 0 + } + if data[0] == '\t' { + return 1 + } + if len(data) < indentSize { + return 0 + } + for i := 0; i < indentSize; i++ { + if data[i] != ' ' { + return 0 + } + } + return indentSize +} + +// Create a url-safe slug for fragments +func slugify(in []byte) []byte { + if len(in) == 0 { + return in + } + out := make([]byte, 0, len(in)) + sym := false + + for _, ch := range in { + if isalnum(ch) { + sym = false + out = append(out, ch) + } else if sym { + continue + } else { + out = append(out, '-') + sym = true + } + } + var a, b int + var ch byte + for a, ch = range out { + if ch != '-' { + break + } + } + for b = len(out) - 1; b > 0; b-- { + if out[b] != '-' { + break + } + } + return out[a : b+1] +}