all repos — grayfriday @ f35fae8188b283b28b2d3e4c3159e13c0b36440f

blackfriday fork with a few changes

smartypants.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// SmartyPants rendering
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20)
 21
 22type SPRenderer struct {
 23	inSingleQuote bool
 24	inDoubleQuote bool
 25	callbacks     [256]smartCallback
 26}
 27
 28func wordBoundary(c byte) bool {
 29	return c == 0 || isspace(c) || ispunct(c)
 30}
 31
 32func tolower(c byte) byte {
 33	if c >= 'A' && c <= 'Z' {
 34		return c - 'A' + 'a'
 35	}
 36	return c
 37}
 38
 39func isdigit(c byte) bool {
 40	return c >= '0' && c <= '9'
 41}
 42
 43func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
 44	// edge of the buffer is likely to be a tag that we don't get to see,
 45	// so we treat it like text sometimes
 46
 47	// enumerate all sixteen possibilities for (previousChar, nextChar)
 48	// each can be one of {0, space, punct, other}
 49	switch {
 50	case previousChar == 0 && nextChar == 0:
 51		// context is not any help here, so toggle
 52		*isOpen = !*isOpen
 53	case isspace(previousChar) && nextChar == 0:
 54		// [ "] might be [ "<code>foo...]
 55		*isOpen = true
 56	case ispunct(previousChar) && nextChar == 0:
 57		// [!"] hmm... could be [Run!"] or [("<code>...]
 58		*isOpen = false
 59	case /* isnormal(previousChar) && */ nextChar == 0:
 60		// [a"] is probably a close
 61		*isOpen = false
 62	case previousChar == 0 && isspace(nextChar):
 63		// [" ] might be [...foo</code>" ]
 64		*isOpen = false
 65	case isspace(previousChar) && isspace(nextChar):
 66		// [ " ] context is not any help here, so toggle
 67		*isOpen = !*isOpen
 68	case ispunct(previousChar) && isspace(nextChar):
 69		// [!" ] is probably a close
 70		*isOpen = false
 71	case /* isnormal(previousChar) && */ isspace(nextChar):
 72		// [a" ] this is one of the easy cases
 73		*isOpen = false
 74	case previousChar == 0 && ispunct(nextChar):
 75		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 76		*isOpen = false
 77	case isspace(previousChar) && ispunct(nextChar):
 78		// [ "!] looks more like [ "$1.95]
 79		*isOpen = true
 80	case ispunct(previousChar) && ispunct(nextChar):
 81		// [!"!] context is not any help here, so toggle
 82		*isOpen = !*isOpen
 83	case /* isnormal(previousChar) && */ ispunct(nextChar):
 84		// [a"!] is probably a close
 85		*isOpen = false
 86	case previousChar == 0 /* && isnormal(nextChar) */ :
 87		// ["a] is probably an open
 88		*isOpen = true
 89	case isspace(previousChar) /* && isnormal(nextChar) */ :
 90		// [ "a] this is one of the easy cases
 91		*isOpen = true
 92	case ispunct(previousChar) /* && isnormal(nextChar) */ :
 93		// [!"a] is probably an open
 94		*isOpen = true
 95	default:
 96		// [a'b] maybe a contraction?
 97		*isOpen = false
 98	}
 99
100	out.WriteByte('&')
101	if *isOpen {
102		out.WriteByte('l')
103	} else {
104		out.WriteByte('r')
105	}
106	out.WriteByte(quote)
107	out.WriteString("quo;")
108	return true
109}
110
111func (smrt *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
112	if len(text) >= 2 {
113		t1 := tolower(text[1])
114
115		if t1 == '\'' {
116			nextChar := byte(0)
117			if len(text) >= 3 {
118				nextChar = text[2]
119			}
120			if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
121				return 1
122			}
123		}
124
125		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
126			out.WriteString("&rsquo;")
127			return 0
128		}
129
130		if len(text) >= 3 {
131			t2 := tolower(text[2])
132
133			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
134				(len(text) < 4 || wordBoundary(text[3])) {
135				out.WriteString("&rsquo;")
136				return 0
137			}
138		}
139	}
140
141	nextChar := byte(0)
142	if len(text) > 1 {
143		nextChar = text[1]
144	}
145	if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote) {
146		return 0
147	}
148
149	out.WriteByte(text[0])
150	return 0
151}
152
153func (smrt *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
154	if len(text) >= 3 {
155		t1 := tolower(text[1])
156		t2 := tolower(text[2])
157
158		if t1 == 'c' && t2 == ')' {
159			out.WriteString("&copy;")
160			return 2
161		}
162
163		if t1 == 'r' && t2 == ')' {
164			out.WriteString("&reg;")
165			return 2
166		}
167
168		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
169			out.WriteString("&trade;")
170			return 3
171		}
172	}
173
174	out.WriteByte(text[0])
175	return 0
176}
177
178func (smrt *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
179	if len(text) >= 2 {
180		if text[1] == '-' {
181			out.WriteString("&mdash;")
182			return 1
183		}
184
185		if wordBoundary(previousChar) && wordBoundary(text[1]) {
186			out.WriteString("&ndash;")
187			return 0
188		}
189	}
190
191	out.WriteByte(text[0])
192	return 0
193}
194
195func (smrt *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
196	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
197		out.WriteString("&mdash;")
198		return 2
199	}
200	if len(text) >= 2 && text[1] == '-' {
201		out.WriteString("&ndash;")
202		return 1
203	}
204
205	out.WriteByte(text[0])
206	return 0
207}
208
209func (smrt *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
210	if bytes.HasPrefix(text, []byte("&quot;")) {
211		nextChar := byte(0)
212		if len(text) >= 7 {
213			nextChar = text[6]
214		}
215		if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
216			return 5
217		}
218	}
219
220	if bytes.HasPrefix(text, []byte("&#0;")) {
221		return 3
222	}
223
224	out.WriteByte('&')
225	return 0
226}
227
228func (smrt *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
229	return smrt.smartAmpVariant(out, previousChar, text, 'd')
230}
231
232func (smrt *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
233	return smrt.smartAmpVariant(out, previousChar, text, 'a')
234}
235
236func (smrt *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
237	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
238		out.WriteString("&hellip;")
239		return 2
240	}
241
242	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
243		out.WriteString("&hellip;")
244		return 4
245	}
246
247	out.WriteByte(text[0])
248	return 0
249}
250
251func (smrt *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
252	if len(text) >= 2 && text[1] == '`' {
253		nextChar := byte(0)
254		if len(text) >= 3 {
255			nextChar = text[2]
256		}
257		if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
258			return 1
259		}
260	}
261
262	out.WriteByte(text[0])
263	return 0
264}
265
266func (smrt *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
267	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
268		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
269		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
270		//       and avoid changing dates like 1/23/2005 into fractions.
271		numEnd := 0
272		for len(text) > numEnd && isdigit(text[numEnd]) {
273			numEnd++
274		}
275		if numEnd == 0 {
276			out.WriteByte(text[0])
277			return 0
278		}
279		denStart := numEnd + 1
280		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
281			denStart = numEnd + 3
282		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
283			out.WriteByte(text[0])
284			return 0
285		}
286		denEnd := denStart
287		for len(text) > denEnd && isdigit(text[denEnd]) {
288			denEnd++
289		}
290		if denEnd == denStart {
291			out.WriteByte(text[0])
292			return 0
293		}
294		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
295			out.WriteString("<sup>")
296			out.Write(text[:numEnd])
297			out.WriteString("</sup>&frasl;<sub>")
298			out.Write(text[denStart:denEnd])
299			out.WriteString("</sub>")
300			return denEnd - 1
301		}
302	}
303
304	out.WriteByte(text[0])
305	return 0
306}
307
308func (smrt *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
309	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
310		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
311			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
312				out.WriteString("&frac12;")
313				return 2
314			}
315		}
316
317		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
318			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
319				out.WriteString("&frac14;")
320				return 2
321			}
322		}
323
324		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
325			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
326				out.WriteString("&frac34;")
327				return 2
328			}
329		}
330	}
331
332	out.WriteByte(text[0])
333	return 0
334}
335
336func (smrt *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
337	nextChar := byte(0)
338	if len(text) > 1 {
339		nextChar = text[1]
340	}
341	if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
342		out.WriteString("&quot;")
343	}
344
345	return 0
346}
347
348func (smrt *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
349	return smrt.smartDoubleQuoteVariant(out, previousChar, text, 'd')
350}
351
352func (smrt *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
353	return smrt.smartDoubleQuoteVariant(out, previousChar, text, 'a')
354}
355
356func (smrt *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
357	i := 0
358
359	for i < len(text) && text[i] != '>' {
360		i++
361	}
362
363	out.Write(text[:i+1])
364	return i
365}
366
367type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
368
369func NewSmartypantsRenderer(flags Extensions) *SPRenderer {
370	var r SPRenderer
371	if flags&SmartypantsAngledQuotes == 0 {
372		r.callbacks['"'] = r.smartDoubleQuote
373		r.callbacks['&'] = r.smartAmp
374	} else {
375		r.callbacks['"'] = r.smartAngledDoubleQuote
376		r.callbacks['&'] = r.smartAmpAngledQuote
377	}
378	r.callbacks['\''] = r.smartSingleQuote
379	r.callbacks['('] = r.smartParens
380	if flags&SmartypantsDashes != 0 {
381		if flags&SmartypantsLatexDashes == 0 {
382			r.callbacks['-'] = r.smartDash
383		} else {
384			r.callbacks['-'] = r.smartDashLatex
385		}
386	}
387	r.callbacks['.'] = r.smartPeriod
388	if flags&SmartypantsFractions == 0 {
389		r.callbacks['1'] = r.smartNumber
390		r.callbacks['3'] = r.smartNumber
391	} else {
392		for ch := '1'; ch <= '9'; ch++ {
393			r.callbacks[ch] = r.smartNumberGeneric
394		}
395	}
396	r.callbacks['<'] = r.smartLeftAngle
397	r.callbacks['`'] = r.smartBacktick
398	return &r
399}
400
401func (sr *SPRenderer) Process(text []byte) []byte {
402	var buff bytes.Buffer
403	// first do normal entity escaping
404	text = attrEscape2(text)
405	mark := 0
406	for i := 0; i < len(text); i++ {
407		if action := sr.callbacks[text[i]]; action != nil {
408			if i > mark {
409				buff.Write(text[mark:i])
410			}
411			previousChar := byte(0)
412			if i > 0 {
413				previousChar = text[i-1]
414			}
415			var tmp bytes.Buffer
416			i += action(&tmp, previousChar, text[i:])
417			buff.Write(tmp.Bytes())
418			mark = i + 1
419		}
420	}
421	if mark < len(text) {
422		buff.Write(text[mark:])
423	}
424	return buff.Bytes()
425}