all repos — grayfriday @ be082a1ef25d4211496762bbbdbab6df7e68c1f3

blackfriday fork with a few changes

First attempt at supporting Pandoc-style footnotes. The existing tests have not broken but the new functionality does not work yet.
moshee moshee@displaynone.us
Tue, 25 Jun 2013 01:18:47 +0000
commit

be082a1ef25d4211496762bbbdbab6df7e68c1f3

parent

2336fd31093e8d1d9cab8642e0c08a82520a2abc

6 files changed, 368 insertions(+), 32 deletions(-)

jump to
M block.goblock.go

@@ -1101,8 +1101,9 @@

line = i } + rawBytes := raw.Bytes() + // render the contents of the list item - rawBytes := raw.Bytes() var cooked bytes.Buffer if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { // intermediate render of block li
M html.gohtml.go

@@ -322,6 +322,20 @@ out.Write(text)

out.WriteString("</td>") } +func (options *Html) Footnotes(out *bytes.Buffer, p *parser) { + out.WriteString("<div class=\"footnotes\">\n") + options.HRule(out) + options.List(out, func() bool { + for _, ref := range p.notes { + out.WriteString("<li>\n") + out.Write(ref.title) + out.WriteString("</li>\n") + } + return true + }, LIST_TYPE_ORDERED) + out.WriteString("</div>\n") +} + func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) { marker := out.Len() doubleSpace(out)

@@ -499,6 +513,17 @@ func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {

out.WriteString("<del>") out.Write(text) out.WriteString("</del>") +} + +func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + slug := slugify(ref) + out.WriteString(`<sup class="footnote-ref" id="fnref:`) + out.Write(slug) + out.WriteString(`"><a rel="footnote" href="#fn:`) + out.Write(slug) + out.WriteString(`">`) + out.WriteString(strconv.Itoa(id)) + out.WriteString(`</a></sup>`) } func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
M inline.goinline.go

@@ -168,20 +168,48 @@ p.r.LineBreak(out)

return 1 } -// '[': parse a link or an image +type linkType int + +const ( + linkNormal linkType = iota + linkImg + linkDeferredFootnote + +// linkInlineFootnote +) + +// '[': parse a link or an image or a footnote func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { // no links allowed inside other links if p.insideLink { return 0 } - isImg := offset > 0 && data[offset-1] == '!' + // [text] == regular link + // ![alt] == image + // ^[text] == inline footnote + // [^refId] == deferred footnote + var t linkType + if offset > 0 && data[offset-1] == '!' { + t = linkImg + } else if p.flags&EXTENSION_FOOTNOTES != 0 { + if len(data) > offset && data[offset+1] == '^' { + t = linkDeferredFootnote + } + } data = data[offset:] - i := 1 - var title, link []byte - textHasNl := false + var ( + i = 1 + noteId int + title, link []byte + textHasNl = false + ) + + if t == linkDeferredFootnote { + i++ + } // look for the matching closing bracket for level := 1; level > 0 && i < len(data); i++ {

@@ -351,6 +379,7 @@ key := string(bytes.ToLower(id))

lr, ok := p.refs[key] if !ok { return 0 + } // keep link and title from reference

@@ -358,7 +387,7 @@ link = lr.link

title = lr.title i++ - // shortcut reference style link + // shortcut reference style link or footnote default: var id []byte

@@ -377,7 +406,11 @@ }

id = b.Bytes() } else { - id = data[1:txtE] + if t == linkDeferredFootnote { + id = data[2:txtE] + } else { + id = data[1:txtE] + } } // find the reference with matching id

@@ -389,7 +422,9 @@ }

// keep link and title from reference link = lr.link + // if inline footnote, title == footnote contents title = lr.title + noteId = lr.noteId // rewind the whitespace i = txtE + 1

@@ -398,7 +433,7 @@

// build content: img alt is escaped, link content is parsed var content bytes.Buffer if txtE > 1 { - if isImg { + if t == linkImg { content.Write(data[1:txtE]) } else { // links cannot contain other links, so turn off link parsing temporarily

@@ -417,12 +452,16 @@ uLink = uLinkBuf.Bytes()

} // links need something to click on and somewhere to go - if len(uLink) == 0 || (!isImg && content.Len() == 0) { + if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { return 0 } // call the relevant rendering function - if isImg { + switch t { + case linkNormal: + p.r.Link(out, uLink, title, content.Bytes()) + + case linkImg: outSize := out.Len() outBytes := out.Bytes() if outSize > 0 && outBytes[outSize-1] == '!' {

@@ -430,8 +469,12 @@ out.Truncate(outSize - 1)

} p.r.Image(out, uLink, title, content.Bytes()) - } else { - p.r.Link(out, uLink, title, content.Bytes()) + + case linkDeferredFootnote: + p.r.FootnoteRef(out, link, noteId) + + default: + return 0 } return i
M inline_test.goinline_test.go

@@ -37,7 +37,8 @@ // catch and report panics

var candidate string defer func() { if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v]\n", candidate) + panic(err) + t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err) } }()

@@ -501,3 +502,45 @@ "http://new.com?q=&gt;&amp;etc</a></p>\n",

} doTestsInline(t, tests) } + +func TestFootnotes(t *testing.T) { + tests := []string{ + "testing footnotes.[^a]\n\n[^a]: This is the note\n", + "", + + `testing long[^b] notes. + +[^b]: Paragraph 1 + + Paragraph 2 + + ` + "```\n\tsome code\n\t```" + ` + + Paragraph 3 + +No longer in the footnote +`, + "", + + `testing[^c] multiple[^d] notes. + +[^c]: this is note c + + +omg + +[^d]: this is note d + +what happens here +`, + "", + } + + for _, test := range tests { + if len(test) > 0 { + t.Errorf("Output:\n%s\n", runMarkdownInline(test, EXTENSION_FOOTNOTES, 0)) + } + } + + //doTestsInlineParam(t, tests, EXTENSION_FOOTNOTES, 0) +}
M latex.golatex.go

@@ -158,6 +158,11 @@ }

out.Write(text) } +// TODO: this +func (options *Latex) Footnotes(out *bytes.Buffer, p *parser) { + +} + func (options *Latex) AutoLink(out *bytes.Buffer, link []byte, kind int) { out.WriteString("\\href{") if kind == LINK_TYPE_EMAIL {

@@ -227,6 +232,11 @@ func (options *Latex) StrikeThrough(out *bytes.Buffer, text []byte) {

out.WriteString("\\sout{") out.Write(text) out.WriteString("}") +} + +// TODO: this +func (options *Latex) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + } func needsBackslash(c byte) bool {
M markdown.gomarkdown.go

@@ -37,6 +37,7 @@ EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules

EXTENSION_SPACE_HEADERS // be strict about prefix header rules EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four + EXTENSION_FOOTNOTES // Pandoc-style footnotes ) // These are the possible flag values for the link renderer.

@@ -139,6 +140,7 @@ Paragraph(out *bytes.Buffer, text func() bool)

Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) TableRow(out *bytes.Buffer, text []byte) TableCell(out *bytes.Buffer, text []byte, flags int) + Footnotes(out *bytes.Buffer, p *parser) // Span-level callbacks AutoLink(out *bytes.Buffer, link []byte, kind int)

@@ -151,6 +153,7 @@ Link(out *bytes.Buffer, link []byte, title []byte, content []byte)

RawHtmlTag(out *bytes.Buffer, tag []byte) TripleEmphasis(out *bytes.Buffer, text []byte) StrikeThrough(out *bytes.Buffer, text []byte) + FootnoteRef(out *bytes.Buffer, ref []byte, id int) // Low-level callbacks Entity(out *bytes.Buffer, entity []byte)

@@ -175,6 +178,11 @@ flags int

nesting int maxNesting int insideLink bool + + // Footnotes need to be ordered as well as available to quickly check for + // presence. If a ref is also a footnote, it's stored both in refs and here + // in notes. Slice is nil if footnotes not enabled. + notes []*reference } //

@@ -273,6 +281,10 @@ if extensions&EXTENSION_AUTOLINK != 0 {

p.inlineCallback[':'] = autoLink } + if extensions&EXTENSION_FOOTNOTES != 0 { + p.notes = make([]*reference, 0) + } + first := firstPass(p, input) second := secondPass(p, first)

@@ -292,7 +304,7 @@ tabSize = TAB_SIZE_EIGHT

} beg, end := 0, 0 for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:]); end > 0 { + if end = isReference(p, input[beg:], tabSize); end > 0 { beg += end } else { // skip to the next line end = beg

@@ -331,6 +343,13 @@ var output bytes.Buffer

p.r.DocumentHeader(&output) p.block(&output, input) + + // NOTE: this is a big hack because we need the parser again for the + // footnotes, so this can't really go in the public interface + if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { + p.r.Footnotes(&output, p) + } + p.r.DocumentFooter(&output) if p.nesting != 0 {

@@ -354,11 +373,26 @@ // Anywhere in the document, the reference can be linked by referring to its

// label, i.e., 1 and 2 in this example, as in: // // This library is hosted on [Github][2], a git hosting site. +// +// Actual footnotes as specified in Pandoc and supported by some other Markdown +// libraries such as php-markdown are also taken care of. They look like this: +// +// This sentence needs a bit of further explanation.[^note] +// +// [^note]: This is the explanation. +// +// Footnotes should be placed at the end of the document in an ordered list. +// Inline footnotes such as: +// +// Inline footnotes^[Not supported.] also exist. +// +// are not yet supported. // References are parsed and stored in this struct. type reference struct { - link []byte - title []byte + link []byte + title []byte + noteId int // 0 if not a footnote ref } // Check whether or not data starts with a reference link.

@@ -366,7 +400,8 @@ // If so, it is parsed and stored in the list of references

// (in the render struct). // Returns the number of bytes to skip to move past it, // or zero if the first line is not a reference. -func isReference(p *parser, data []byte) int { +func isReference(p *parser, data []byte, tabSize int) int { + println("[", string(data), "]") // up to 3 optional leading spaces if len(data) < 4 { return 0

@@ -376,11 +411,19 @@ for i < 3 && data[i] == ' ' {

i++ } + noteId := 0 + // id part: anything but a newline between brackets if data[i] != '[' { return 0 } i++ + if p.flags&EXTENSION_FOOTNOTES != 0 { + if data[i] == '^' { + noteId = len(p.notes) + 1 + i++ + } + } idOffset := i for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { i++

@@ -391,6 +434,7 @@ }

idEnd := i // spacer: colon (space | tab)* newline? (space | tab)* + // /:[ \t]*\n?[ \t]*/ i++ if i >= len(data) || data[i] != ':' { return 0

@@ -412,15 +456,56 @@ if i >= len(data) {

return 0 } + var ( + linkOffset, linkEnd int + titleOffset, titleEnd int + lineEnd int + raw []byte + ) + + if p.flags&EXTENSION_FOOTNOTES != 0 && noteId > 0 { + linkOffset, linkEnd, raw = scanFootnote(p, data, i, tabSize) + lineEnd = linkEnd + linkOffset + } else { + linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) + } + if lineEnd == 0 { + return 0 + } + + // a valid ref has been found + + ref := &reference{ + noteId: noteId, + } + + if noteId > 0 { + // reusing the link field for the id since footnotes don't have titles + ref.link = data[idOffset:idEnd] + // if footnote, it's not really a title, it's the contained text + ref.title = raw + p.notes = append(p.notes, ref) + } else { + ref.link = data[linkOffset:linkEnd] + ref.title = data[titleOffset:titleEnd] + } + + // id matches are case-insensitive + id := string(bytes.ToLower(data[idOffset:idEnd])) + p.refs[id] = ref + return lineEnd +} + +func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { // link: whitespace-free sequence, optionally between angle brackets if data[i] == '<' { i++ } - linkOffset := i + linkOffset = i for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { i++ } - linkEnd := i + linkEnd = i if data[linkOffset] == '<' && data[linkEnd-1] == '>' { linkOffset++ linkEnd--

@@ -431,11 +516,10 @@ for i < len(data) && (data[i] == ' ' || data[i] == '\t') {

i++ } if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { - return 0 + return } // compute end-of-line - lineEnd := 0 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { lineEnd = i }

@@ -452,7 +536,6 @@ }

} // optional title: any non-newline sequence enclosed in '"() alone on its line - titleOffset, titleEnd := 0, 0 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { i++ titleOffset = i

@@ -477,20 +560,97 @@ lineEnd = titleEnd

titleEnd = i } } - if lineEnd == 0 { // garbage after the link - return 0 + + return +} + +// The first bit of this logic is the same as (*parser).listItem, but the rest +// is much simpler. This function simply finds the entire block and shifts it +// over by one tab if it is indeed a block (just returns the line if it's not). +// blockEnd is the end of the section in the input buffer, and contents is the +// extracted text that was shifted over one tab. It will need to be rendered at +// the end of the document. +func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte) { + if i == 0 { + return } - // a valid ref has been found + // skip leading whitespace on first line + for data[i] == ' ' { + i++ + } - // id matches are case-insensitive - id := string(bytes.ToLower(data[idOffset:idEnd])) - p.refs[id] = &reference{ - link: data[linkOffset:linkEnd], - title: data[titleOffset:titleEnd], + blockStart = i + + // find the end of the line + blockEnd = i + for data[i-1] != '\n' { + if i >= len(data) { + return + } + i++ + } + + // get working buffer + var raw bytes.Buffer + + // put the first line into the working buffer + raw.Write(data[blockEnd:i]) + blockEnd = i + + // process the following lines + containsBlankLine := false + hasBlock := false + +gatherLines: + for blockEnd < len(data) { + i++ + + // find the end of this line + for data[i-1] != '\n' { + i++ + } + + // if it is an empty line, guess that it is part of this item + // and move on to the next line + if p.isEmpty(data[blockEnd:i]) > 0 { + containsBlankLine = true + blockEnd = i + continue + } + + n := 0 + if n = isIndented(data[blockEnd:i], indentSize); n == 0 { + // this is the end of the block. + // we don't want to include this last line in the index. + break gatherLines + } + + // if there were blank lines before this one, insert a new one now + if containsBlankLine { + hasBlock = true + raw.WriteByte('\n') + containsBlankLine = false + } + + // get rid of that first tab, write to buffer + raw.Write(data[blockEnd+n : i]) + + blockEnd = i + } + + rawBytes := raw.Bytes() + println("raw: {" + string(raw.Bytes()) + "}") + buf := new(bytes.Buffer) + + if hasBlock { + p.block(buf, rawBytes) + } else { + p.inline(buf, rawBytes) } + contents = buf.Bytes() - return lineEnd + return } //

@@ -578,3 +738,57 @@

i++ } } + +// Find if a line counts as indented or not. +// Returns number of characters the indent is (0 = not indented). +func isIndented(data []byte, indentSize int) int { + if len(data) == 0 { + return 0 + } + if data[0] == '\t' { + return 1 + } + if len(data) < indentSize { + return 0 + } + for i := 0; i < indentSize; i++ { + if data[i] != ' ' { + return 0 + } + } + return indentSize +} + +// Create a url-safe slug for fragments +func slugify(in []byte) []byte { + if len(in) == 0 { + return in + } + out := make([]byte, 0, len(in)) + sym := false + + for _, ch := range in { + if isalnum(ch) { + sym = false + out = append(out, ch) + } else if sym { + continue + } else { + out = append(out, '-') + sym = true + } + } + var a, b int + var ch byte + for a, ch = range out { + if ch != '-' { + break + } + } + for b = len(out) - 1; b > 0; b-- { + if out[b] != '-' { + break + } + } + return out[a : b+1] +}