smartypants.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// SmartyPants rendering
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20)
21
22// SPRenderer is a struct containing state of a Smartypants renderer.
23type SPRenderer struct {
24 inSingleQuote bool
25 inDoubleQuote bool
26 callbacks [256]smartCallback
27}
28
29func wordBoundary(c byte) bool {
30 return c == 0 || isspace(c) || ispunct(c)
31}
32
33func tolower(c byte) byte {
34 if c >= 'A' && c <= 'Z' {
35 return c - 'A' + 'a'
36 }
37 return c
38}
39
40func isdigit(c byte) bool {
41 return c >= '0' && c <= '9'
42}
43
44func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
45 // edge of the buffer is likely to be a tag that we don't get to see,
46 // so we treat it like text sometimes
47
48 // enumerate all sixteen possibilities for (previousChar, nextChar)
49 // each can be one of {0, space, punct, other}
50 switch {
51 case previousChar == 0 && nextChar == 0:
52 // context is not any help here, so toggle
53 *isOpen = !*isOpen
54 case isspace(previousChar) && nextChar == 0:
55 // [ "] might be [ "<code>foo...]
56 *isOpen = true
57 case ispunct(previousChar) && nextChar == 0:
58 // [!"] hmm... could be [Run!"] or [("<code>...]
59 *isOpen = false
60 case /* isnormal(previousChar) && */ nextChar == 0:
61 // [a"] is probably a close
62 *isOpen = false
63 case previousChar == 0 && isspace(nextChar):
64 // [" ] might be [...foo</code>" ]
65 *isOpen = false
66 case isspace(previousChar) && isspace(nextChar):
67 // [ " ] context is not any help here, so toggle
68 *isOpen = !*isOpen
69 case ispunct(previousChar) && isspace(nextChar):
70 // [!" ] is probably a close
71 *isOpen = false
72 case /* isnormal(previousChar) && */ isspace(nextChar):
73 // [a" ] this is one of the easy cases
74 *isOpen = false
75 case previousChar == 0 && ispunct(nextChar):
76 // ["!] hmm... could be ["$1.95] or [</code>"!...]
77 *isOpen = false
78 case isspace(previousChar) && ispunct(nextChar):
79 // [ "!] looks more like [ "$1.95]
80 *isOpen = true
81 case ispunct(previousChar) && ispunct(nextChar):
82 // [!"!] context is not any help here, so toggle
83 *isOpen = !*isOpen
84 case /* isnormal(previousChar) && */ ispunct(nextChar):
85 // [a"!] is probably a close
86 *isOpen = false
87 case previousChar == 0 /* && isnormal(nextChar) */ :
88 // ["a] is probably an open
89 *isOpen = true
90 case isspace(previousChar) /* && isnormal(nextChar) */ :
91 // [ "a] this is one of the easy cases
92 *isOpen = true
93 case ispunct(previousChar) /* && isnormal(nextChar) */ :
94 // [!"a] is probably an open
95 *isOpen = true
96 default:
97 // [a'b] maybe a contraction?
98 *isOpen = false
99 }
100
101 out.WriteByte('&')
102 if *isOpen {
103 out.WriteByte('l')
104 } else {
105 out.WriteByte('r')
106 }
107 out.WriteByte(quote)
108 out.WriteString("quo;")
109 return true
110}
111
112func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
113 if len(text) >= 2 {
114 t1 := tolower(text[1])
115
116 if t1 == '\'' {
117 nextChar := byte(0)
118 if len(text) >= 3 {
119 nextChar = text[2]
120 }
121 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
122 return 1
123 }
124 }
125
126 if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
127 out.WriteString("’")
128 return 0
129 }
130
131 if len(text) >= 3 {
132 t2 := tolower(text[2])
133
134 if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
135 (len(text) < 4 || wordBoundary(text[3])) {
136 out.WriteString("’")
137 return 0
138 }
139 }
140 }
141
142 nextChar := byte(0)
143 if len(text) > 1 {
144 nextChar = text[1]
145 }
146 if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote) {
147 return 0
148 }
149
150 out.WriteByte(text[0])
151 return 0
152}
153
154func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
155 if len(text) >= 3 {
156 t1 := tolower(text[1])
157 t2 := tolower(text[2])
158
159 if t1 == 'c' && t2 == ')' {
160 out.WriteString("©")
161 return 2
162 }
163
164 if t1 == 'r' && t2 == ')' {
165 out.WriteString("®")
166 return 2
167 }
168
169 if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
170 out.WriteString("™")
171 return 3
172 }
173 }
174
175 out.WriteByte(text[0])
176 return 0
177}
178
179func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
180 if len(text) >= 2 {
181 if text[1] == '-' {
182 out.WriteString("—")
183 return 1
184 }
185
186 if wordBoundary(previousChar) && wordBoundary(text[1]) {
187 out.WriteString("–")
188 return 0
189 }
190 }
191
192 out.WriteByte(text[0])
193 return 0
194}
195
196func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
197 if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
198 out.WriteString("—")
199 return 2
200 }
201 if len(text) >= 2 && text[1] == '-' {
202 out.WriteString("–")
203 return 1
204 }
205
206 out.WriteByte(text[0])
207 return 0
208}
209
210func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
211 if bytes.HasPrefix(text, []byte(""")) {
212 nextChar := byte(0)
213 if len(text) >= 7 {
214 nextChar = text[6]
215 }
216 if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
217 return 5
218 }
219 }
220
221 if bytes.HasPrefix(text, []byte("�")) {
222 return 3
223 }
224
225 out.WriteByte('&')
226 return 0
227}
228
229func (r *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
230 return r.smartAmpVariant(out, previousChar, text, 'd')
231}
232
233func (r *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
234 return r.smartAmpVariant(out, previousChar, text, 'a')
235}
236
237func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
238 if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
239 out.WriteString("…")
240 return 2
241 }
242
243 if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
244 out.WriteString("…")
245 return 4
246 }
247
248 out.WriteByte(text[0])
249 return 0
250}
251
252func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
253 if len(text) >= 2 && text[1] == '`' {
254 nextChar := byte(0)
255 if len(text) >= 3 {
256 nextChar = text[2]
257 }
258 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
259 return 1
260 }
261 }
262
263 out.WriteByte(text[0])
264 return 0
265}
266
267func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
268 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
269 // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
270 // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
271 // and avoid changing dates like 1/23/2005 into fractions.
272 numEnd := 0
273 for len(text) > numEnd && isdigit(text[numEnd]) {
274 numEnd++
275 }
276 if numEnd == 0 {
277 out.WriteByte(text[0])
278 return 0
279 }
280 denStart := numEnd + 1
281 if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
282 denStart = numEnd + 3
283 } else if len(text) < numEnd+2 || text[numEnd] != '/' {
284 out.WriteByte(text[0])
285 return 0
286 }
287 denEnd := denStart
288 for len(text) > denEnd && isdigit(text[denEnd]) {
289 denEnd++
290 }
291 if denEnd == denStart {
292 out.WriteByte(text[0])
293 return 0
294 }
295 if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
296 out.WriteString("<sup>")
297 out.Write(text[:numEnd])
298 out.WriteString("</sup>⁄<sub>")
299 out.Write(text[denStart:denEnd])
300 out.WriteString("</sub>")
301 return denEnd - 1
302 }
303 }
304
305 out.WriteByte(text[0])
306 return 0
307}
308
309func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
310 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
311 if text[0] == '1' && text[1] == '/' && text[2] == '2' {
312 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
313 out.WriteString("½")
314 return 2
315 }
316 }
317
318 if text[0] == '1' && text[1] == '/' && text[2] == '4' {
319 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
320 out.WriteString("¼")
321 return 2
322 }
323 }
324
325 if text[0] == '3' && text[1] == '/' && text[2] == '4' {
326 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
327 out.WriteString("¾")
328 return 2
329 }
330 }
331 }
332
333 out.WriteByte(text[0])
334 return 0
335}
336
337func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
338 nextChar := byte(0)
339 if len(text) > 1 {
340 nextChar = text[1]
341 }
342 if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
343 out.WriteString(""")
344 }
345
346 return 0
347}
348
349func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
350 return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
351}
352
353func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
354 return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
355}
356
357func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
358 i := 0
359
360 for i < len(text) && text[i] != '>' {
361 i++
362 }
363
364 out.Write(text[:i+1])
365 return i
366}
367
368type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
369
370// NewSmartypantsRenderer constructs a Smartypants renderer object.
371func NewSmartypantsRenderer(flags Extensions) *SPRenderer {
372 var r SPRenderer
373 if flags&SmartypantsAngledQuotes == 0 {
374 r.callbacks['"'] = r.smartDoubleQuote
375 r.callbacks['&'] = r.smartAmp
376 } else {
377 r.callbacks['"'] = r.smartAngledDoubleQuote
378 r.callbacks['&'] = r.smartAmpAngledQuote
379 }
380 r.callbacks['\''] = r.smartSingleQuote
381 r.callbacks['('] = r.smartParens
382 if flags&SmartypantsDashes != 0 {
383 if flags&SmartypantsLatexDashes == 0 {
384 r.callbacks['-'] = r.smartDash
385 } else {
386 r.callbacks['-'] = r.smartDashLatex
387 }
388 }
389 r.callbacks['.'] = r.smartPeriod
390 if flags&SmartypantsFractions == 0 {
391 r.callbacks['1'] = r.smartNumber
392 r.callbacks['3'] = r.smartNumber
393 } else {
394 for ch := '1'; ch <= '9'; ch++ {
395 r.callbacks[ch] = r.smartNumberGeneric
396 }
397 }
398 r.callbacks['<'] = r.smartLeftAngle
399 r.callbacks['`'] = r.smartBacktick
400 return &r
401}
402
403// Process is the entry point of the Smartypants renderer.
404func (r *SPRenderer) Process(text []byte) []byte {
405 var buff bytes.Buffer
406 // first do normal entity escaping
407 text = esc(text)
408 mark := 0
409 for i := 0; i < len(text); i++ {
410 if action := r.callbacks[text[i]]; action != nil {
411 if i > mark {
412 buff.Write(text[mark:i])
413 }
414 previousChar := byte(0)
415 if i > 0 {
416 previousChar = text[i-1]
417 }
418 var tmp bytes.Buffer
419 i += action(&tmp, previousChar, text[i:])
420 buff.Write(tmp.Bytes())
421 mark = i + 1
422 }
423 }
424 if mark < len(text) {
425 buff.Write(text[mark:])
426 }
427 return buff.Bytes()
428}