smartypants.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// SmartyPants rendering
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20)
21
22type SPRenderer struct {
23 inSingleQuote bool
24 inDoubleQuote bool
25 callbacks [256]smartCallback
26}
27
28func wordBoundary(c byte) bool {
29 return c == 0 || isspace(c) || ispunct(c)
30}
31
32func tolower(c byte) byte {
33 if c >= 'A' && c <= 'Z' {
34 return c - 'A' + 'a'
35 }
36 return c
37}
38
39func isdigit(c byte) bool {
40 return c >= '0' && c <= '9'
41}
42
43func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
44 // edge of the buffer is likely to be a tag that we don't get to see,
45 // so we treat it like text sometimes
46
47 // enumerate all sixteen possibilities for (previousChar, nextChar)
48 // each can be one of {0, space, punct, other}
49 switch {
50 case previousChar == 0 && nextChar == 0:
51 // context is not any help here, so toggle
52 *isOpen = !*isOpen
53 case isspace(previousChar) && nextChar == 0:
54 // [ "] might be [ "<code>foo...]
55 *isOpen = true
56 case ispunct(previousChar) && nextChar == 0:
57 // [!"] hmm... could be [Run!"] or [("<code>...]
58 *isOpen = false
59 case /* isnormal(previousChar) && */ nextChar == 0:
60 // [a"] is probably a close
61 *isOpen = false
62 case previousChar == 0 && isspace(nextChar):
63 // [" ] might be [...foo</code>" ]
64 *isOpen = false
65 case isspace(previousChar) && isspace(nextChar):
66 // [ " ] context is not any help here, so toggle
67 *isOpen = !*isOpen
68 case ispunct(previousChar) && isspace(nextChar):
69 // [!" ] is probably a close
70 *isOpen = false
71 case /* isnormal(previousChar) && */ isspace(nextChar):
72 // [a" ] this is one of the easy cases
73 *isOpen = false
74 case previousChar == 0 && ispunct(nextChar):
75 // ["!] hmm... could be ["$1.95] or [</code>"!...]
76 *isOpen = false
77 case isspace(previousChar) && ispunct(nextChar):
78 // [ "!] looks more like [ "$1.95]
79 *isOpen = true
80 case ispunct(previousChar) && ispunct(nextChar):
81 // [!"!] context is not any help here, so toggle
82 *isOpen = !*isOpen
83 case /* isnormal(previousChar) && */ ispunct(nextChar):
84 // [a"!] is probably a close
85 *isOpen = false
86 case previousChar == 0 /* && isnormal(nextChar) */ :
87 // ["a] is probably an open
88 *isOpen = true
89 case isspace(previousChar) /* && isnormal(nextChar) */ :
90 // [ "a] this is one of the easy cases
91 *isOpen = true
92 case ispunct(previousChar) /* && isnormal(nextChar) */ :
93 // [!"a] is probably an open
94 *isOpen = true
95 default:
96 // [a'b] maybe a contraction?
97 *isOpen = false
98 }
99
100 out.WriteByte('&')
101 if *isOpen {
102 out.WriteByte('l')
103 } else {
104 out.WriteByte('r')
105 }
106 out.WriteByte(quote)
107 out.WriteString("quo;")
108 return true
109}
110
111func (smrt *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
112 if len(text) >= 2 {
113 t1 := tolower(text[1])
114
115 if t1 == '\'' {
116 nextChar := byte(0)
117 if len(text) >= 3 {
118 nextChar = text[2]
119 }
120 if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
121 return 1
122 }
123 }
124
125 if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
126 out.WriteString("’")
127 return 0
128 }
129
130 if len(text) >= 3 {
131 t2 := tolower(text[2])
132
133 if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
134 (len(text) < 4 || wordBoundary(text[3])) {
135 out.WriteString("’")
136 return 0
137 }
138 }
139 }
140
141 nextChar := byte(0)
142 if len(text) > 1 {
143 nextChar = text[1]
144 }
145 if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote) {
146 return 0
147 }
148
149 out.WriteByte(text[0])
150 return 0
151}
152
153func (smrt *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
154 if len(text) >= 3 {
155 t1 := tolower(text[1])
156 t2 := tolower(text[2])
157
158 if t1 == 'c' && t2 == ')' {
159 out.WriteString("©")
160 return 2
161 }
162
163 if t1 == 'r' && t2 == ')' {
164 out.WriteString("®")
165 return 2
166 }
167
168 if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
169 out.WriteString("™")
170 return 3
171 }
172 }
173
174 out.WriteByte(text[0])
175 return 0
176}
177
178func (smrt *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
179 if len(text) >= 2 {
180 if text[1] == '-' {
181 out.WriteString("—")
182 return 1
183 }
184
185 if wordBoundary(previousChar) && wordBoundary(text[1]) {
186 out.WriteString("–")
187 return 0
188 }
189 }
190
191 out.WriteByte(text[0])
192 return 0
193}
194
195func (smrt *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
196 if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
197 out.WriteString("—")
198 return 2
199 }
200 if len(text) >= 2 && text[1] == '-' {
201 out.WriteString("–")
202 return 1
203 }
204
205 out.WriteByte(text[0])
206 return 0
207}
208
209func (smrt *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
210 if bytes.HasPrefix(text, []byte(""")) {
211 nextChar := byte(0)
212 if len(text) >= 7 {
213 nextChar = text[6]
214 }
215 if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
216 return 5
217 }
218 }
219
220 if bytes.HasPrefix(text, []byte("�")) {
221 return 3
222 }
223
224 out.WriteByte('&')
225 return 0
226}
227
228func (smrt *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
229 return smrt.smartAmpVariant(out, previousChar, text, 'd')
230}
231
232func (smrt *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
233 return smrt.smartAmpVariant(out, previousChar, text, 'a')
234}
235
236func (smrt *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
237 if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
238 out.WriteString("…")
239 return 2
240 }
241
242 if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
243 out.WriteString("…")
244 return 4
245 }
246
247 out.WriteByte(text[0])
248 return 0
249}
250
251func (smrt *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
252 if len(text) >= 2 && text[1] == '`' {
253 nextChar := byte(0)
254 if len(text) >= 3 {
255 nextChar = text[2]
256 }
257 if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote) {
258 return 1
259 }
260 }
261
262 out.WriteByte(text[0])
263 return 0
264}
265
266func (smrt *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
267 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
268 // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
269 // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
270 // and avoid changing dates like 1/23/2005 into fractions.
271 numEnd := 0
272 for len(text) > numEnd && isdigit(text[numEnd]) {
273 numEnd++
274 }
275 if numEnd == 0 {
276 out.WriteByte(text[0])
277 return 0
278 }
279 denStart := numEnd + 1
280 if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
281 denStart = numEnd + 3
282 } else if len(text) < numEnd+2 || text[numEnd] != '/' {
283 out.WriteByte(text[0])
284 return 0
285 }
286 denEnd := denStart
287 for len(text) > denEnd && isdigit(text[denEnd]) {
288 denEnd++
289 }
290 if denEnd == denStart {
291 out.WriteByte(text[0])
292 return 0
293 }
294 if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
295 out.WriteString("<sup>")
296 out.Write(text[:numEnd])
297 out.WriteString("</sup>⁄<sub>")
298 out.Write(text[denStart:denEnd])
299 out.WriteString("</sub>")
300 return denEnd - 1
301 }
302 }
303
304 out.WriteByte(text[0])
305 return 0
306}
307
308func (smrt *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
309 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
310 if text[0] == '1' && text[1] == '/' && text[2] == '2' {
311 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
312 out.WriteString("½")
313 return 2
314 }
315 }
316
317 if text[0] == '1' && text[1] == '/' && text[2] == '4' {
318 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
319 out.WriteString("¼")
320 return 2
321 }
322 }
323
324 if text[0] == '3' && text[1] == '/' && text[2] == '4' {
325 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
326 out.WriteString("¾")
327 return 2
328 }
329 }
330 }
331
332 out.WriteByte(text[0])
333 return 0
334}
335
336func (smrt *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
337 nextChar := byte(0)
338 if len(text) > 1 {
339 nextChar = text[1]
340 }
341 if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote) {
342 out.WriteString(""")
343 }
344
345 return 0
346}
347
348func (smrt *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
349 return smrt.smartDoubleQuoteVariant(out, previousChar, text, 'd')
350}
351
352func (smrt *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
353 return smrt.smartDoubleQuoteVariant(out, previousChar, text, 'a')
354}
355
356func (smrt *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
357 i := 0
358
359 for i < len(text) && text[i] != '>' {
360 i++
361 }
362
363 out.Write(text[:i+1])
364 return i
365}
366
367type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
368
369func NewSmartypantsRenderer(flags Extensions) *SPRenderer {
370 var r SPRenderer
371 if flags&SmartypantsAngledQuotes == 0 {
372 r.callbacks['"'] = r.smartDoubleQuote
373 r.callbacks['&'] = r.smartAmp
374 } else {
375 r.callbacks['"'] = r.smartAngledDoubleQuote
376 r.callbacks['&'] = r.smartAmpAngledQuote
377 }
378 r.callbacks['\''] = r.smartSingleQuote
379 r.callbacks['('] = r.smartParens
380 if flags&SmartypantsDashes != 0 {
381 if flags&SmartypantsLatexDashes == 0 {
382 r.callbacks['-'] = r.smartDash
383 } else {
384 r.callbacks['-'] = r.smartDashLatex
385 }
386 }
387 r.callbacks['.'] = r.smartPeriod
388 if flags&SmartypantsFractions == 0 {
389 r.callbacks['1'] = r.smartNumber
390 r.callbacks['3'] = r.smartNumber
391 } else {
392 for ch := '1'; ch <= '9'; ch++ {
393 r.callbacks[ch] = r.smartNumberGeneric
394 }
395 }
396 r.callbacks['<'] = r.smartLeftAngle
397 r.callbacks['`'] = r.smartBacktick
398 return &r
399}
400
401func (sr *SPRenderer) Process(text []byte) []byte {
402 var buff bytes.Buffer
403 // first do normal entity escaping
404 text = esc(text)
405 mark := 0
406 for i := 0; i < len(text); i++ {
407 if action := sr.callbacks[text[i]]; action != nil {
408 if i > mark {
409 buff.Write(text[mark:i])
410 }
411 previousChar := byte(0)
412 if i > 0 {
413 previousChar = text[i-1]
414 }
415 var tmp bytes.Buffer
416 i += action(&tmp, previousChar, text[i:])
417 buff.Write(tmp.Bytes())
418 mark = i + 1
419 }
420 }
421 if mark < len(text) {
422 buff.Write(text[mark:])
423 }
424 return buff.Bytes()
425}