all repos — grayfriday @ 40f28ee0224972fc3ac84999e877778a81f1e87d

blackfriday fork with a few changes

Prevent generated header collisions, less naively.

> This is a rework of an earlier version of this code.

The automatic header ID generation code submitted in #125 has a subtle
bug where it will use the same ID for multiple headers with identical
text. In the case below, all the headers are rendered a `<h1
id="header">Header</h1>`.

  ```markdown
  # Header
  # Header
  # Header
  # Header
  ```

This change is a simple but robust approach that uses an incrementing
counter and pre-checking to prevent header collision. (The above would
be rendered as `header`, `header-1`, `header-2`, and `header-3`.) In
more complex cases, it will append a new counter suffix (`-1`), like so:

  ```markdown
  # Header
  # Header 1
  # Header
  # Header
  ```

This will generate `header`, `header-1`, `header-1-1`, and `header-1-2`.

This code has two additional changes over the prior version:

1.  Rather than reimplementing @shurcooL’s anchor sanitization code, I
    have imported it as from
    `github.com/shurcooL/go/github_flavored_markdown/sanitized_anchor_name`.

2.  The markdown block parser is now only interested in *generating* a
    sanitized anchor name, not with ensuring its uniqueness. That code
    has been moved to the HTML renderer. This means that if the HTML
    renderer is modified to identify all unique headers prior to
    rendering, the hackish nature of the collision detection can be
    eliminated.
Austin Ziegler austin@zieglers.ca
Sat, 01 Nov 2014 18:35:35 -0400
commit

40f28ee0224972fc3ac84999e877778a81f1e87d

parent

7c8f3c1dcc7e0f87c62af34ee193fc3251f6d8a4

3 files changed, 55 insertions(+), 20 deletions(-)

jump to
M block.goblock.go

@@ -15,7 +15,7 @@ package blackfriday

import ( "bytes" - "unicode" + "github.com/shurcooL/go/github_flavored_markdown/sanitized_anchor_name" ) // Parse block-level data.

@@ -227,7 +227,7 @@ end--

} if end > i { if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { - id = createSanitizedAnchorName(string(data[i:end])) + id = sanitized_anchor_name.Create(string(data[i:end])) } work := func() bool { p.inline(out, data[i:end])

@@ -1276,7 +1276,7 @@ }(out, p, data[prev:eol])

id := "" if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { - id = createSanitizedAnchorName(string(data[prev:eol])) + id = sanitized_anchor_name.Create(string(data[prev:eol])) } p.r.Header(out, work, level, id)

@@ -1325,16 +1325,3 @@

p.renderParagraph(out, data[:i]) return i } - -func createSanitizedAnchorName(text string) string { - var anchorName []rune - for _, r := range []rune(text) { - switch { - case r == ' ': - anchorName = append(anchorName, '-') - case unicode.IsLetter(r) || unicode.IsNumber(r): - anchorName = append(anchorName, unicode.ToLower(r)) - } - } - return string(anchorName) -}
M block_test.goblock_test.go

@@ -275,8 +275,25 @@

"* List\n * Nested list\n # Nested header\n", "<ul>\n<li><p>List</p>\n\n<ul>\n<li><p>Nested list</p>\n\n" + "<h1 id=\"nested-header\">Nested header</h1></li>\n</ul></li>\n</ul>\n", + + "# Header\n\n# Header\n", + "<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n", + + "# Header 1\n\n# Header 1", + "<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n", + + "# Header\n\n# Header 1\n\n# Header\n\n# Header", + "<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header</h1>\n\n<h1 id=\"header-1-2\">Header</h1>\n", } doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS) +} + +func TestPrefixMultipleHeaderExtensions(t *testing.T) { + var tests = []string{ + "# Header\n\n# Header {#header}\n\n# Header 1", + "<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n", + } + doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS|EXTENSION_HEADER_IDS) } func TestUnderlineHeaders(t *testing.T) {

@@ -369,6 +386,12 @@ "<h1 id=\"trailing-spaces\">Trailing spaces</h1>\n",

"Double underline\n=====\n=====\n", "<h1 id=\"double-underline\">Double underline</h1>\n\n<p>=====</p>\n", + + "Header\n======\n\nHeader\n======\n", + "<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n", + + "Header 1\n========\n\nHeader 1\n========\n", + "<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n", } doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS) }
M html.gohtml.go

@@ -81,6 +81,9 @@ headerCount int

currentLevel int toc *bytes.Buffer + // Track header IDs to prevent ID collision in a single generation. + headerIDs map[string]int + smartypants *smartypantsRenderer }

@@ -122,6 +125,8 @@

headerCount: 0, currentLevel: 0, toc: new(bytes.Buffer), + + headerIDs: make(map[string]int), smartypants: smartypants(flags), }

@@ -190,11 +195,12 @@ func (options *Html) Header(out *bytes.Buffer, text func() bool, level int, id string) {

marker := out.Len() doubleSpace(out) + if id == "" && options.flags&HTML_TOC != 0 { + id = fmt.Sprintf("toc_%d", options.headerCount) + } + if id != "" { - out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id)) - } else if options.flags&HTML_TOC != 0 { - // headerCount is incremented in htmlTocHeader - out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount)) + out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, options.ensureUniqueHeaderID(id))) } else { out.WriteString(fmt.Sprintf("<h%d>", level)) }

@@ -853,3 +859,22 @@ yes = true

} return } + +func (options *Html) ensureUniqueHeaderID(id string) string { + for count, found := options.headerIDs[id]; found; count, found = options.headerIDs[id] { + tmp := fmt.Sprintf("%s-%d", id, count+1) + + if _, tmpFound := options.headerIDs[tmp]; !tmpFound { + options.headerIDs[id] = count + 1 + id = tmp + } else { + id = id + "-1" + } + } + + if _, found := options.headerIDs[id]; !found { + options.headerIDs[id] = 0 + } + + return id +}