all repos — grayfriday @ 4170cc12d47178d054742ca80a4c719041f0d978

blackfriday fork with a few changes

smartypants.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// SmartyPants rendering
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"io"
 21)
 22
 23// SPRenderer is a struct containing state of a Smartypants renderer.
 24type SPRenderer struct {
 25	inSingleQuote bool
 26	inDoubleQuote bool
 27	callbacks     [256]smartCallback
 28}
 29
 30func wordBoundary(c byte) bool {
 31	return c == 0 || isspace(c) || ispunct(c)
 32}
 33
 34func tolower(c byte) byte {
 35	if c >= 'A' && c <= 'Z' {
 36		return c - 'A' + 'a'
 37	}
 38	return c
 39}
 40
 41func isdigit(c byte) bool {
 42	return c >= '0' && c <= '9'
 43}
 44
 45func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
 46	// edge of the buffer is likely to be a tag that we don't get to see,
 47	// so we treat it like text sometimes
 48
 49	// enumerate all sixteen possibilities for (previousChar, nextChar)
 50	// each can be one of {0, space, punct, other}
 51	switch {
 52	case previousChar == 0 && nextChar == 0:
 53		// context is not any help here, so toggle
 54		*isOpen = !*isOpen
 55	case isspace(previousChar) && nextChar == 0:
 56		// [ "] might be [ "<code>foo...]
 57		*isOpen = true
 58	case ispunct(previousChar) && nextChar == 0:
 59		// [!"] hmm... could be [Run!"] or [("<code>...]
 60		*isOpen = false
 61	case /* isnormal(previousChar) && */ nextChar == 0:
 62		// [a"] is probably a close
 63		*isOpen = false
 64	case previousChar == 0 && isspace(nextChar):
 65		// [" ] might be [...foo</code>" ]
 66		*isOpen = false
 67	case isspace(previousChar) && isspace(nextChar):
 68		// [ " ] context is not any help here, so toggle
 69		*isOpen = !*isOpen
 70	case ispunct(previousChar) && isspace(nextChar):
 71		// [!" ] is probably a close
 72		*isOpen = false
 73	case /* isnormal(previousChar) && */ isspace(nextChar):
 74		// [a" ] this is one of the easy cases
 75		*isOpen = false
 76	case previousChar == 0 && ispunct(nextChar):
 77		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 78		*isOpen = false
 79	case isspace(previousChar) && ispunct(nextChar):
 80		// [ "!] looks more like [ "$1.95]
 81		*isOpen = true
 82	case ispunct(previousChar) && ispunct(nextChar):
 83		// [!"!] context is not any help here, so toggle
 84		*isOpen = !*isOpen
 85	case /* isnormal(previousChar) && */ ispunct(nextChar):
 86		// [a"!] is probably a close
 87		*isOpen = false
 88	case previousChar == 0 /* && isnormal(nextChar) */ :
 89		// ["a] is probably an open
 90		*isOpen = true
 91	case isspace(previousChar) /* && isnormal(nextChar) */ :
 92		// [ "a] this is one of the easy cases
 93		*isOpen = true
 94	case ispunct(previousChar) /* && isnormal(nextChar) */ :
 95		// [!"a] is probably an open
 96		*isOpen = true
 97	default:
 98		// [a'b] maybe a contraction?
 99		*isOpen = false
100	}
101
102	// Note that with the limited lookahead, this non-breaking
103	// space will also be appended to single double quotes.
104	if addNBSP && !*isOpen {
105		out.WriteString("&nbsp;")
106	}
107
108	out.WriteByte('&')
109	if *isOpen {
110		out.WriteByte('l')
111	} else {
112		out.WriteByte('r')
113	}
114	out.WriteByte(quote)
115	out.WriteString("quo;")
116
117	if addNBSP && *isOpen {
118		out.WriteString("&nbsp;")
119	}
120
121	return true
122}
123
124func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
125	if len(text) >= 2 {
126		t1 := tolower(text[1])
127
128		if t1 == '\'' {
129			nextChar := byte(0)
130			if len(text) >= 3 {
131				nextChar = text[2]
132			}
133			if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
134				return 1
135			}
136		}
137
138		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
139			out.WriteString("&rsquo;")
140			return 0
141		}
142
143		if len(text) >= 3 {
144			t2 := tolower(text[2])
145
146			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
147				(len(text) < 4 || wordBoundary(text[3])) {
148				out.WriteString("&rsquo;")
149				return 0
150			}
151		}
152	}
153
154	nextChar := byte(0)
155	if len(text) > 1 {
156		nextChar = text[1]
157	}
158	if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
159		return 0
160	}
161
162	out.WriteByte(text[0])
163	return 0
164}
165
166func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
167	if len(text) >= 3 {
168		t1 := tolower(text[1])
169		t2 := tolower(text[2])
170
171		if t1 == 'c' && t2 == ')' {
172			out.WriteString("&copy;")
173			return 2
174		}
175
176		if t1 == 'r' && t2 == ')' {
177			out.WriteString("&reg;")
178			return 2
179		}
180
181		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
182			out.WriteString("&trade;")
183			return 3
184		}
185	}
186
187	out.WriteByte(text[0])
188	return 0
189}
190
191func (r *SPRenderer) smartEmDash(out *bytes.Buffer, previousChar byte, text []byte) int {
192	if len(text) >= 3 {
193		if text[1] == '-' && text[2] == '-' && isspace(text[3]) {
194			out.WriteString("&mdash;")
195			return 3
196		}
197	}
198
199	out.WriteByte(text[0])
200	return 0
201}
202
203func (r *SPRenderer) smartEnDash(out *bytes.Buffer, previousChar byte, text []byte) int {
204	if len(text) >= 2 {
205		if isdigit(previousChar) && isdigit(text[1]) {
206			out.WriteString("&ndash;")
207			return 0
208		}
209	}
210	out.WriteByte(text[0])
211	return 0
212}
213
214func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
215	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
216		out.WriteString("&mdash;")
217		return 2
218	}
219	if len(text) >= 2 && text[1] == '-' {
220		out.WriteString("&ndash;")
221		return 1
222	}
223
224	out.WriteByte(text[0])
225	return 0
226}
227
228func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
229	if bytes.HasPrefix(text, []byte("&quot;")) {
230		nextChar := byte(0)
231		if len(text) >= 7 {
232			nextChar = text[6]
233		}
234		if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
235			return 5
236		}
237	}
238
239	if bytes.HasPrefix(text, []byte("&#0;")) {
240		return 3
241	}
242
243	out.WriteByte('&')
244	return 0
245}
246
247func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
248	var quote byte = 'd'
249	if angledQuotes {
250		quote = 'a'
251	}
252
253	return func(out *bytes.Buffer, previousChar byte, text []byte) int {
254		return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
255	}
256}
257
258func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
259	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
260		out.WriteString("&hellip;")
261		return 2
262	}
263
264	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
265		out.WriteString("&hellip;")
266		return 4
267	}
268
269	out.WriteByte(text[0])
270	return 0
271}
272
273func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
274	if len(text) >= 2 && text[1] == '`' {
275		nextChar := byte(0)
276		if len(text) >= 3 {
277			nextChar = text[2]
278		}
279		if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
280			return 1
281		}
282	}
283
284	out.WriteByte(text[0])
285	return 0
286}
287
288func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
289	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
290		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
291		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
292		//       and avoid changing dates like 1/23/2005 into fractions.
293		numEnd := 0
294		for len(text) > numEnd && isdigit(text[numEnd]) {
295			numEnd++
296		}
297		if numEnd == 0 {
298			out.WriteByte(text[0])
299			return 0
300		}
301		denStart := numEnd + 1
302		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
303			denStart = numEnd + 3
304		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
305			out.WriteByte(text[0])
306			return 0
307		}
308		denEnd := denStart
309		for len(text) > denEnd && isdigit(text[denEnd]) {
310			denEnd++
311		}
312		if denEnd == denStart {
313			out.WriteByte(text[0])
314			return 0
315		}
316		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
317			out.WriteString("<sup>")
318			out.Write(text[:numEnd])
319			out.WriteString("</sup>&frasl;<sub>")
320			out.Write(text[denStart:denEnd])
321			out.WriteString("</sub>")
322			return denEnd - 1
323		}
324	}
325
326	out.WriteByte(text[0])
327	return 0
328}
329
330func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
331	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
332		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
333			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
334				out.WriteString("&frac12;")
335				return 2
336			}
337		}
338
339		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
340			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
341				out.WriteString("&frac14;")
342				return 2
343			}
344		}
345
346		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
347			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
348				out.WriteString("&frac34;")
349				return 2
350			}
351		}
352	}
353
354	out.WriteByte(text[0])
355	return 0
356}
357
358func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
359	nextChar := byte(0)
360	if len(text) > 1 {
361		nextChar = text[1]
362	}
363	if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
364		out.WriteString("&quot;")
365	}
366
367	return 0
368}
369
370func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
371	return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
372}
373
374func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
375	return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
376}
377
378func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
379	i := 0
380
381	for i < len(text) && text[i] != '>' {
382		i++
383	}
384
385	out.Write(text[:i+1])
386	return i
387}
388
389type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
390
391// NewSmartypantsRenderer constructs a Smartypants renderer object.
392func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
393	var (
394		r SPRenderer
395
396		smartAmpAngled      = r.smartAmp(true, false)
397		smartAmpAngledNBSP  = r.smartAmp(true, true)
398		smartAmpRegular     = r.smartAmp(false, false)
399		smartAmpRegularNBSP = r.smartAmp(false, true)
400
401		addNBSP = flags&SmartypantsQuotesNBSP != 0
402	)
403
404	if flags&SmartypantsAngledQuotes == 0 {
405		r.callbacks['"'] = r.smartDoubleQuote
406		if !addNBSP {
407			r.callbacks['&'] = smartAmpRegular
408		} else {
409			r.callbacks['&'] = smartAmpRegularNBSP
410		}
411	} else {
412		r.callbacks['"'] = r.smartAngledDoubleQuote
413		if !addNBSP {
414			r.callbacks['&'] = smartAmpAngled
415		} else {
416			r.callbacks['&'] = smartAmpAngledNBSP
417		}
418	}
419	r.callbacks['\''] = r.smartSingleQuote
420	r.callbacks['('] = r.smartParens
421	if flags&SmartypantsDashes != 0 {
422		if flags&SmartypantsLatexDashes == 0 {
423			r.callbacks[' '] = r.smartEmDash
424			r.callbacks['-'] = r.smartEnDash
425		} else {
426			r.callbacks['-'] = r.smartDashLatex
427		}
428	}
429	r.callbacks['.'] = r.smartPeriod
430	if flags&SmartypantsFractions == 0 {
431		r.callbacks['1'] = r.smartNumber
432		r.callbacks['3'] = r.smartNumber
433	} else {
434		for ch := '1'; ch <= '9'; ch++ {
435			r.callbacks[ch] = r.smartNumberGeneric
436		}
437	}
438	r.callbacks['<'] = r.smartLeftAngle
439	r.callbacks['`'] = r.smartBacktick
440	return &r
441}
442
443// Process is the entry point of the Smartypants renderer.
444func (r *SPRenderer) Process(w io.Writer, text []byte) {
445	mark := 0
446	for i := 0; i < len(text); i++ {
447		if action := r.callbacks[text[i]]; action != nil {
448			if i > mark {
449				w.Write(text[mark:i])
450			}
451			previousChar := byte(0)
452			if i > 0 {
453				previousChar = text[i-1]
454			}
455			var tmp bytes.Buffer
456			i += action(&tmp, previousChar, text[i:])
457			w.Write(tmp.Bytes())
458			mark = i + 1
459		}
460	}
461	if mark < len(text) {
462		w.Write(text[mark:])
463	}
464}