all repos — grayfriday @ b91b5719eb612459149aae6f340a304e5956530b

blackfriday fork with a few changes

smartypants.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// SmartyPants rendering
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20)
 21
 22// SPRenderer is a struct containing state of a Smartypants renderer.
 23type SPRenderer struct {
 24	inSingleQuote bool
 25	inDoubleQuote bool
 26	callbacks     [256]smartCallback
 27}
 28
 29func wordBoundary(c byte) bool {
 30	return c == 0 || isspace(c) || ispunct(c)
 31}
 32
 33func tolower(c byte) byte {
 34	if c >= 'A' && c <= 'Z' {
 35		return c - 'A' + 'a'
 36	}
 37	return c
 38}
 39
 40func isdigit(c byte) bool {
 41	return c >= '0' && c <= '9'
 42}
 43
 44func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
 45	// edge of the buffer is likely to be a tag that we don't get to see,
 46	// so we treat it like text sometimes
 47
 48	// enumerate all sixteen possibilities for (previousChar, nextChar)
 49	// each can be one of {0, space, punct, other}
 50	switch {
 51	case previousChar == 0 && nextChar == 0:
 52		// context is not any help here, so toggle
 53		*isOpen = !*isOpen
 54	case isspace(previousChar) && nextChar == 0:
 55		// [ "] might be [ "<code>foo...]
 56		*isOpen = true
 57	case ispunct(previousChar) && nextChar == 0:
 58		// [!"] hmm... could be [Run!"] or [("<code>...]
 59		*isOpen = false
 60	case /* isnormal(previousChar) && */ nextChar == 0:
 61		// [a"] is probably a close
 62		*isOpen = false
 63	case previousChar == 0 && isspace(nextChar):
 64		// [" ] might be [...foo</code>" ]
 65		*isOpen = false
 66	case isspace(previousChar) && isspace(nextChar):
 67		// [ " ] context is not any help here, so toggle
 68		*isOpen = !*isOpen
 69	case ispunct(previousChar) && isspace(nextChar):
 70		// [!" ] is probably a close
 71		*isOpen = false
 72	case /* isnormal(previousChar) && */ isspace(nextChar):
 73		// [a" ] this is one of the easy cases
 74		*isOpen = false
 75	case previousChar == 0 && ispunct(nextChar):
 76		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 77		*isOpen = false
 78	case isspace(previousChar) && ispunct(nextChar):
 79		// [ "!] looks more like [ "$1.95]
 80		*isOpen = true
 81	case ispunct(previousChar) && ispunct(nextChar):
 82		// [!"!] context is not any help here, so toggle
 83		*isOpen = !*isOpen
 84	case /* isnormal(previousChar) && */ ispunct(nextChar):
 85		// [a"!] is probably a close
 86		*isOpen = false
 87	case previousChar == 0 /* && isnormal(nextChar) */ :
 88		// ["a] is probably an open
 89		*isOpen = true
 90	case isspace(previousChar) /* && isnormal(nextChar) */ :
 91		// [ "a] this is one of the easy cases
 92		*isOpen = true
 93	case ispunct(previousChar) /* && isnormal(nextChar) */ :
 94		// [!"a] is probably an open
 95		*isOpen = true
 96	default:
 97		// [a'b] maybe a contraction?
 98		*isOpen = false
 99	}
100
101	out.WriteByte('&')
102	if *isOpen {
103		out.WriteByte('l')
104	} else {
105		out.WriteByte('r')
106	}
107	out.WriteByte(quote)
108	out.WriteString("quo;")
109	return true
110}
111
112func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
113	if len(text) >= 2 {
114		t1 := tolower(text[1])
115
116		if t1 == '\'' {
117			nextChar := byte(0)
118			if len(text) >= 3 {
119				nextChar = text[2]
120			}
121			if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
122				return 1
123			}
124		}
125
126		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
127			out.WriteString("&rsquo;")
128			return 0
129		}
130
131		if len(text) >= 3 {
132			t2 := tolower(text[2])
133
134			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
135				(len(text) < 4 || wordBoundary(text[3])) {
136				out.WriteString("&rsquo;")
137				return 0
138			}
139		}
140	}
141
142	nextChar := byte(0)
143	if len(text) > 1 {
144		nextChar = text[1]
145	}
146	if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote) {
147		return 0
148	}
149
150	out.WriteByte(text[0])
151	return 0
152}
153
154func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
155	if len(text) >= 3 {
156		t1 := tolower(text[1])
157		t2 := tolower(text[2])
158
159		if t1 == 'c' && t2 == ')' {
160			out.WriteString("&copy;")
161			return 2
162		}
163
164		if t1 == 'r' && t2 == ')' {
165			out.WriteString("&reg;")
166			return 2
167		}
168
169		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
170			out.WriteString("&trade;")
171			return 3
172		}
173	}
174
175	out.WriteByte(text[0])
176	return 0
177}
178
179func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
180	if len(text) >= 2 {
181		if text[1] == '-' {
182			out.WriteString("&mdash;")
183			return 1
184		}
185
186		if wordBoundary(previousChar) && wordBoundary(text[1]) {
187			out.WriteString("&ndash;")
188			return 0
189		}
190	}
191
192	out.WriteByte(text[0])
193	return 0
194}
195
196func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
197	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
198		out.WriteString("&mdash;")
199		return 2
200	}
201	if len(text) >= 2 && text[1] == '-' {
202		out.WriteString("&ndash;")
203		return 1
204	}
205
206	out.WriteByte(text[0])
207	return 0
208}
209
210func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
211	if bytes.HasPrefix(text, []byte("&quot;")) {
212		nextChar := byte(0)
213		if len(text) >= 7 {
214			nextChar = text[6]
215		}
216		if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
217			return 5
218		}
219	}
220
221	if bytes.HasPrefix(text, []byte("&#0;")) {
222		return 3
223	}
224
225	out.WriteByte('&')
226	return 0
227}
228
229func (r *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
230	return r.smartAmpVariant(out, previousChar, text, 'd')
231}
232
233func (r *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
234	return r.smartAmpVariant(out, previousChar, text, 'a')
235}
236
237func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
238	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
239		out.WriteString("&hellip;")
240		return 2
241	}
242
243	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
244		out.WriteString("&hellip;")
245		return 4
246	}
247
248	out.WriteByte(text[0])
249	return 0
250}
251
252func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
253	if len(text) >= 2 && text[1] == '`' {
254		nextChar := byte(0)
255		if len(text) >= 3 {
256			nextChar = text[2]
257		}
258		if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
259			return 1
260		}
261	}
262
263	out.WriteByte(text[0])
264	return 0
265}
266
267func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
268	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
269		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
270		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
271		//       and avoid changing dates like 1/23/2005 into fractions.
272		numEnd := 0
273		for len(text) > numEnd && isdigit(text[numEnd]) {
274			numEnd++
275		}
276		if numEnd == 0 {
277			out.WriteByte(text[0])
278			return 0
279		}
280		denStart := numEnd + 1
281		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
282			denStart = numEnd + 3
283		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
284			out.WriteByte(text[0])
285			return 0
286		}
287		denEnd := denStart
288		for len(text) > denEnd && isdigit(text[denEnd]) {
289			denEnd++
290		}
291		if denEnd == denStart {
292			out.WriteByte(text[0])
293			return 0
294		}
295		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
296			out.WriteString("<sup>")
297			out.Write(text[:numEnd])
298			out.WriteString("</sup>&frasl;<sub>")
299			out.Write(text[denStart:denEnd])
300			out.WriteString("</sub>")
301			return denEnd - 1
302		}
303	}
304
305	out.WriteByte(text[0])
306	return 0
307}
308
309func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
310	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
311		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
312			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
313				out.WriteString("&frac12;")
314				return 2
315			}
316		}
317
318		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
319			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
320				out.WriteString("&frac14;")
321				return 2
322			}
323		}
324
325		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
326			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
327				out.WriteString("&frac34;")
328				return 2
329			}
330		}
331	}
332
333	out.WriteByte(text[0])
334	return 0
335}
336
337func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
338	nextChar := byte(0)
339	if len(text) > 1 {
340		nextChar = text[1]
341	}
342	if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
343		out.WriteString("&quot;")
344	}
345
346	return 0
347}
348
349func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
350	return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
351}
352
353func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
354	return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
355}
356
357func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
358	i := 0
359
360	for i < len(text) && text[i] != '>' {
361		i++
362	}
363
364	out.Write(text[:i+1])
365	return i
366}
367
368type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
369
370// NewSmartypantsRenderer constructs a Smartypants renderer object.
371func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
372	var r SPRenderer
373	if flags&SmartypantsAngledQuotes == 0 {
374		r.callbacks['"'] = r.smartDoubleQuote
375		r.callbacks['&'] = r.smartAmp
376	} else {
377		r.callbacks['"'] = r.smartAngledDoubleQuote
378		r.callbacks['&'] = r.smartAmpAngledQuote
379	}
380	r.callbacks['\''] = r.smartSingleQuote
381	r.callbacks['('] = r.smartParens
382	if flags&SmartypantsDashes != 0 {
383		if flags&SmartypantsLatexDashes == 0 {
384			r.callbacks['-'] = r.smartDash
385		} else {
386			r.callbacks['-'] = r.smartDashLatex
387		}
388	}
389	r.callbacks['.'] = r.smartPeriod
390	if flags&SmartypantsFractions == 0 {
391		r.callbacks['1'] = r.smartNumber
392		r.callbacks['3'] = r.smartNumber
393	} else {
394		for ch := '1'; ch <= '9'; ch++ {
395			r.callbacks[ch] = r.smartNumberGeneric
396		}
397	}
398	r.callbacks['<'] = r.smartLeftAngle
399	r.callbacks['`'] = r.smartBacktick
400	return &r
401}
402
403// Process is the entry point of the Smartypants renderer.
404func (r *SPRenderer) Process(text []byte) []byte {
405	var buff bytes.Buffer
406	mark := 0
407	for i := 0; i < len(text); i++ {
408		if action := r.callbacks[text[i]]; action != nil {
409			if i > mark {
410				buff.Write(text[mark:i])
411			}
412			previousChar := byte(0)
413			if i > 0 {
414				previousChar = text[i-1]
415			}
416			var tmp bytes.Buffer
417			i += action(&tmp, previousChar, text[i:])
418			buff.Write(tmp.Bytes())
419			mark = i + 1
420		}
421	}
422	if mark < len(text) {
423		buff.Write(text[mark:])
424	}
425	return buff.Bytes()
426}