all repos — grayfriday @ 257ccba98f905c39b0163a224c8f73eddd09fd93

blackfriday fork with a few changes

smartypants.go (view raw)

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// SmartyPants rendering
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20	"io"
 21)
 22
 23// SPRenderer is a struct containing state of a Smartypants renderer.
 24type SPRenderer struct {
 25	inSingleQuote bool
 26	inDoubleQuote bool
 27	callbacks     [256]smartCallback
 28}
 29
 30func wordBoundary(c byte) bool {
 31	return c == 0 || isspace(c) || ispunct(c)
 32}
 33
 34func tolower(c byte) byte {
 35	if c >= 'A' && c <= 'Z' {
 36		return c - 'A' + 'a'
 37	}
 38	return c
 39}
 40
 41func isdigit(c byte) bool {
 42	return c >= '0' && c <= '9'
 43}
 44
 45func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
 46	// edge of the buffer is likely to be a tag that we don't get to see,
 47	// so we treat it like text sometimes
 48
 49	// enumerate all sixteen possibilities for (previousChar, nextChar)
 50	// each can be one of {0, space, punct, other}
 51	switch {
 52	case previousChar == 0 && nextChar == 0:
 53		// context is not any help here, so toggle
 54		*isOpen = !*isOpen
 55	case isspace(previousChar) && nextChar == 0:
 56		// [ "] might be [ "<code>foo...]
 57		*isOpen = true
 58	case ispunct(previousChar) && nextChar == 0:
 59		// [!"] hmm... could be [Run!"] or [("<code>...]
 60		*isOpen = false
 61	case /* isnormal(previousChar) && */ nextChar == 0:
 62		// [a"] is probably a close
 63		*isOpen = false
 64	case previousChar == 0 && isspace(nextChar):
 65		// [" ] might be [...foo</code>" ]
 66		*isOpen = false
 67	case isspace(previousChar) && isspace(nextChar):
 68		// [ " ] context is not any help here, so toggle
 69		*isOpen = !*isOpen
 70	case ispunct(previousChar) && isspace(nextChar):
 71		// [!" ] is probably a close
 72		*isOpen = false
 73	case /* isnormal(previousChar) && */ isspace(nextChar):
 74		// [a" ] this is one of the easy cases
 75		*isOpen = false
 76	case previousChar == 0 && ispunct(nextChar):
 77		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 78		*isOpen = false
 79	case isspace(previousChar) && ispunct(nextChar):
 80		// [ "!] looks more like [ "$1.95]
 81		*isOpen = true
 82	case ispunct(previousChar) && ispunct(nextChar):
 83		// [!"!] context is not any help here, so toggle
 84		*isOpen = !*isOpen
 85	case /* isnormal(previousChar) && */ ispunct(nextChar):
 86		// [a"!] is probably a close
 87		*isOpen = false
 88	case previousChar == 0 /* && isnormal(nextChar) */ :
 89		// ["a] is probably an open
 90		*isOpen = true
 91	case isspace(previousChar) /* && isnormal(nextChar) */ :
 92		// [ "a] this is one of the easy cases
 93		*isOpen = true
 94	case ispunct(previousChar) /* && isnormal(nextChar) */ :
 95		// [!"a] is probably an open
 96		*isOpen = true
 97	default:
 98		// [a'b] maybe a contraction?
 99		*isOpen = false
100	}
101
102	out.WriteByte('&')
103	if *isOpen {
104		out.WriteByte('l')
105	} else {
106		out.WriteByte('r')
107	}
108	out.WriteByte(quote)
109	out.WriteString("quo;")
110	return true
111}
112
113func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
114	if len(text) >= 2 {
115		t1 := tolower(text[1])
116
117		if t1 == '\'' {
118			nextChar := byte(0)
119			if len(text) >= 3 {
120				nextChar = text[2]
121			}
122			if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
123				return 1
124			}
125		}
126
127		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
128			out.WriteString("&rsquo;")
129			return 0
130		}
131
132		if len(text) >= 3 {
133			t2 := tolower(text[2])
134
135			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
136				(len(text) < 4 || wordBoundary(text[3])) {
137				out.WriteString("&rsquo;")
138				return 0
139			}
140		}
141	}
142
143	nextChar := byte(0)
144	if len(text) > 1 {
145		nextChar = text[1]
146	}
147	if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote) {
148		return 0
149	}
150
151	out.WriteByte(text[0])
152	return 0
153}
154
155func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
156	if len(text) >= 3 {
157		t1 := tolower(text[1])
158		t2 := tolower(text[2])
159
160		if t1 == 'c' && t2 == ')' {
161			out.WriteString("&copy;")
162			return 2
163		}
164
165		if t1 == 'r' && t2 == ')' {
166			out.WriteString("&reg;")
167			return 2
168		}
169
170		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
171			out.WriteString("&trade;")
172			return 3
173		}
174	}
175
176	out.WriteByte(text[0])
177	return 0
178}
179
180func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
181	if len(text) >= 2 {
182		if text[1] == '-' {
183			out.WriteString("&mdash;")
184			return 1
185		}
186
187		if wordBoundary(previousChar) && wordBoundary(text[1]) {
188			out.WriteString("&ndash;")
189			return 0
190		}
191	}
192
193	out.WriteByte(text[0])
194	return 0
195}
196
197func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
198	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
199		out.WriteString("&mdash;")
200		return 2
201	}
202	if len(text) >= 2 && text[1] == '-' {
203		out.WriteString("&ndash;")
204		return 1
205	}
206
207	out.WriteByte(text[0])
208	return 0
209}
210
211func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
212	if bytes.HasPrefix(text, []byte("&quot;")) {
213		nextChar := byte(0)
214		if len(text) >= 7 {
215			nextChar = text[6]
216		}
217		if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
218			return 5
219		}
220	}
221
222	if bytes.HasPrefix(text, []byte("&#0;")) {
223		return 3
224	}
225
226	out.WriteByte('&')
227	return 0
228}
229
230func (r *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
231	return r.smartAmpVariant(out, previousChar, text, 'd')
232}
233
234func (r *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
235	return r.smartAmpVariant(out, previousChar, text, 'a')
236}
237
238func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
239	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
240		out.WriteString("&hellip;")
241		return 2
242	}
243
244	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
245		out.WriteString("&hellip;")
246		return 4
247	}
248
249	out.WriteByte(text[0])
250	return 0
251}
252
253func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
254	if len(text) >= 2 && text[1] == '`' {
255		nextChar := byte(0)
256		if len(text) >= 3 {
257			nextChar = text[2]
258		}
259		if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
260			return 1
261		}
262	}
263
264	out.WriteByte(text[0])
265	return 0
266}
267
268func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
269	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
270		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
271		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
272		//       and avoid changing dates like 1/23/2005 into fractions.
273		numEnd := 0
274		for len(text) > numEnd && isdigit(text[numEnd]) {
275			numEnd++
276		}
277		if numEnd == 0 {
278			out.WriteByte(text[0])
279			return 0
280		}
281		denStart := numEnd + 1
282		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
283			denStart = numEnd + 3
284		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
285			out.WriteByte(text[0])
286			return 0
287		}
288		denEnd := denStart
289		for len(text) > denEnd && isdigit(text[denEnd]) {
290			denEnd++
291		}
292		if denEnd == denStart {
293			out.WriteByte(text[0])
294			return 0
295		}
296		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
297			out.WriteString("<sup>")
298			out.Write(text[:numEnd])
299			out.WriteString("</sup>&frasl;<sub>")
300			out.Write(text[denStart:denEnd])
301			out.WriteString("</sub>")
302			return denEnd - 1
303		}
304	}
305
306	out.WriteByte(text[0])
307	return 0
308}
309
310func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
311	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
312		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
313			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
314				out.WriteString("&frac12;")
315				return 2
316			}
317		}
318
319		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
320			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
321				out.WriteString("&frac14;")
322				return 2
323			}
324		}
325
326		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
327			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
328				out.WriteString("&frac34;")
329				return 2
330			}
331		}
332	}
333
334	out.WriteByte(text[0])
335	return 0
336}
337
338func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
339	nextChar := byte(0)
340	if len(text) > 1 {
341		nextChar = text[1]
342	}
343	if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
344		out.WriteString("&quot;")
345	}
346
347	return 0
348}
349
350func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
351	return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
352}
353
354func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
355	return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
356}
357
358func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
359	i := 0
360
361	for i < len(text) && text[i] != '>' {
362		i++
363	}
364
365	out.Write(text[:i+1])
366	return i
367}
368
369type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
370
371// NewSmartypantsRenderer constructs a Smartypants renderer object.
372func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
373	var r SPRenderer
374	if flags&SmartypantsAngledQuotes == 0 {
375		r.callbacks['"'] = r.smartDoubleQuote
376		r.callbacks['&'] = r.smartAmp
377	} else {
378		r.callbacks['"'] = r.smartAngledDoubleQuote
379		r.callbacks['&'] = r.smartAmpAngledQuote
380	}
381	r.callbacks['\''] = r.smartSingleQuote
382	r.callbacks['('] = r.smartParens
383	if flags&SmartypantsDashes != 0 {
384		if flags&SmartypantsLatexDashes == 0 {
385			r.callbacks['-'] = r.smartDash
386		} else {
387			r.callbacks['-'] = r.smartDashLatex
388		}
389	}
390	r.callbacks['.'] = r.smartPeriod
391	if flags&SmartypantsFractions == 0 {
392		r.callbacks['1'] = r.smartNumber
393		r.callbacks['3'] = r.smartNumber
394	} else {
395		for ch := '1'; ch <= '9'; ch++ {
396			r.callbacks[ch] = r.smartNumberGeneric
397		}
398	}
399	r.callbacks['<'] = r.smartLeftAngle
400	r.callbacks['`'] = r.smartBacktick
401	return &r
402}
403
404// Process is the entry point of the Smartypants renderer.
405func (r *SPRenderer) Process(w io.Writer, text []byte) {
406	mark := 0
407	for i := 0; i < len(text); i++ {
408		if action := r.callbacks[text[i]]; action != nil {
409			if i > mark {
410				w.Write(text[mark:i])
411			}
412			previousChar := byte(0)
413			if i > 0 {
414				previousChar = text[i-1]
415			}
416			var tmp bytes.Buffer
417			i += action(&tmp, previousChar, text[i:])
418			w.Write(tmp.Bytes())
419			mark = i + 1
420		}
421	}
422	if mark < len(text) {
423		w.Write(text[mark:])
424	}
425}