smartypants.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// SmartyPants rendering
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "io"
21)
22
23// SPRenderer is a struct containing state of a Smartypants renderer.
24type SPRenderer struct {
25 inSingleQuote bool
26 inDoubleQuote bool
27 callbacks [256]smartCallback
28}
29
30func wordBoundary(c byte) bool {
31 return c == 0 || isspace(c) || ispunct(c)
32}
33
34func tolower(c byte) byte {
35 if c >= 'A' && c <= 'Z' {
36 return c - 'A' + 'a'
37 }
38 return c
39}
40
41func isdigit(c byte) bool {
42 return c >= '0' && c <= '9'
43}
44
45func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
46 // edge of the buffer is likely to be a tag that we don't get to see,
47 // so we treat it like text sometimes
48
49 // enumerate all sixteen possibilities for (previousChar, nextChar)
50 // each can be one of {0, space, punct, other}
51 switch {
52 case previousChar == 0 && nextChar == 0:
53 // context is not any help here, so toggle
54 *isOpen = !*isOpen
55 case isspace(previousChar) && nextChar == 0:
56 // [ "] might be [ "<code>foo...]
57 *isOpen = true
58 case ispunct(previousChar) && nextChar == 0:
59 // [!"] hmm... could be [Run!"] or [("<code>...]
60 *isOpen = false
61 case /* isnormal(previousChar) && */ nextChar == 0:
62 // [a"] is probably a close
63 *isOpen = false
64 case previousChar == 0 && isspace(nextChar):
65 // [" ] might be [...foo</code>" ]
66 *isOpen = false
67 case isspace(previousChar) && isspace(nextChar):
68 // [ " ] context is not any help here, so toggle
69 *isOpen = !*isOpen
70 case ispunct(previousChar) && isspace(nextChar):
71 // [!" ] is probably a close
72 *isOpen = false
73 case /* isnormal(previousChar) && */ isspace(nextChar):
74 // [a" ] this is one of the easy cases
75 *isOpen = false
76 case previousChar == 0 && ispunct(nextChar):
77 // ["!] hmm... could be ["$1.95] or [</code>"!...]
78 *isOpen = false
79 case isspace(previousChar) && ispunct(nextChar):
80 // [ "!] looks more like [ "$1.95]
81 *isOpen = true
82 case ispunct(previousChar) && ispunct(nextChar):
83 // [!"!] context is not any help here, so toggle
84 *isOpen = !*isOpen
85 case /* isnormal(previousChar) && */ ispunct(nextChar):
86 // [a"!] is probably a close
87 *isOpen = false
88 case previousChar == 0 /* && isnormal(nextChar) */ :
89 // ["a] is probably an open
90 *isOpen = true
91 case isspace(previousChar) /* && isnormal(nextChar) */ :
92 // [ "a] this is one of the easy cases
93 *isOpen = true
94 case ispunct(previousChar) /* && isnormal(nextChar) */ :
95 // [!"a] is probably an open
96 *isOpen = true
97 default:
98 // [a'b] maybe a contraction?
99 *isOpen = false
100 }
101
102 // Note that with the limited lookahead, this non-breaking
103 // space will also be appended to single double quotes.
104 if addNBSP && !*isOpen {
105 out.WriteString(" ")
106 }
107
108 out.WriteByte('&')
109 if *isOpen {
110 out.WriteByte('l')
111 } else {
112 out.WriteByte('r')
113 }
114 out.WriteByte(quote)
115 out.WriteString("quo;")
116
117 if addNBSP && *isOpen {
118 out.WriteString(" ")
119 }
120
121 return true
122}
123
124func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
125 if len(text) >= 2 {
126 t1 := tolower(text[1])
127
128 if t1 == '\'' {
129 nextChar := byte(0)
130 if len(text) >= 3 {
131 nextChar = text[2]
132 }
133 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
134 return 1
135 }
136 }
137
138 if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
139 out.WriteString("’")
140 return 0
141 }
142
143 if len(text) >= 3 {
144 t2 := tolower(text[2])
145
146 if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
147 (len(text) < 4 || wordBoundary(text[3])) {
148 out.WriteString("’")
149 return 0
150 }
151 }
152 }
153
154 nextChar := byte(0)
155 if len(text) > 1 {
156 nextChar = text[1]
157 }
158 if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
159 return 0
160 }
161
162 out.WriteByte(text[0])
163 return 0
164}
165
166func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
167 if len(text) >= 3 {
168 t1 := tolower(text[1])
169 t2 := tolower(text[2])
170
171 if t1 == 'c' && t2 == ')' {
172 out.WriteString("©")
173 return 2
174 }
175
176 if t1 == 'r' && t2 == ')' {
177 out.WriteString("®")
178 return 2
179 }
180
181 if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
182 out.WriteString("™")
183 return 3
184 }
185 }
186
187 out.WriteByte(text[0])
188 return 0
189}
190
191func (r *SPRenderer) smartEmDash(out *bytes.Buffer, previousChar byte, text []byte) int {
192 if len(text) >= 3 {
193 if text[1] == '-' && text[2] == '-' && isspace(text[3]) {
194 out.WriteString("—")
195 return 3
196 }
197 }
198
199 out.WriteByte(text[0])
200 return 0
201}
202
203func (r *SPRenderer) smartEnDash(out *bytes.Buffer, previousChar byte, text []byte) int {
204 if len(text) >= 2 {
205 if isdigit(previousChar) && isdigit(text[1]) {
206 out.WriteString("–")
207 return 0
208 }
209 }
210 out.WriteByte(text[0])
211 return 0
212}
213
214func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
215 if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
216 out.WriteString("—")
217 return 2
218 }
219 if len(text) >= 2 && text[1] == '-' {
220 out.WriteString("–")
221 return 1
222 }
223
224 out.WriteByte(text[0])
225 return 0
226}
227
228func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
229 if bytes.HasPrefix(text, []byte(""")) {
230 nextChar := byte(0)
231 if len(text) >= 7 {
232 nextChar = text[6]
233 }
234 if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
235 return 5
236 }
237 }
238
239 if bytes.HasPrefix(text, []byte("�")) {
240 return 3
241 }
242
243 out.WriteByte('&')
244 return 0
245}
246
247func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
248 var quote byte = 'd'
249 if angledQuotes {
250 quote = 'a'
251 }
252
253 return func(out *bytes.Buffer, previousChar byte, text []byte) int {
254 return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
255 }
256}
257
258func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
259 if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
260 out.WriteString("…")
261 return 2
262 }
263
264 if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
265 out.WriteString("…")
266 return 4
267 }
268
269 out.WriteByte(text[0])
270 return 0
271}
272
273func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
274 if len(text) >= 2 && text[1] == '`' {
275 nextChar := byte(0)
276 if len(text) >= 3 {
277 nextChar = text[2]
278 }
279 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
280 return 1
281 }
282 }
283
284 out.WriteByte(text[0])
285 return 0
286}
287
288func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
289 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
290 // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
291 // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
292 // and avoid changing dates like 1/23/2005 into fractions.
293 numEnd := 0
294 for len(text) > numEnd && isdigit(text[numEnd]) {
295 numEnd++
296 }
297 if numEnd == 0 {
298 out.WriteByte(text[0])
299 return 0
300 }
301 denStart := numEnd + 1
302 if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
303 denStart = numEnd + 3
304 } else if len(text) < numEnd+2 || text[numEnd] != '/' {
305 out.WriteByte(text[0])
306 return 0
307 }
308 denEnd := denStart
309 for len(text) > denEnd && isdigit(text[denEnd]) {
310 denEnd++
311 }
312 if denEnd == denStart {
313 out.WriteByte(text[0])
314 return 0
315 }
316 if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
317 out.WriteString("<sup>")
318 out.Write(text[:numEnd])
319 out.WriteString("</sup>⁄<sub>")
320 out.Write(text[denStart:denEnd])
321 out.WriteString("</sub>")
322 return denEnd - 1
323 }
324 }
325
326 out.WriteByte(text[0])
327 return 0
328}
329
330func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
331 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
332 if text[0] == '1' && text[1] == '/' && text[2] == '2' {
333 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
334 out.WriteString("½")
335 return 2
336 }
337 }
338
339 if text[0] == '1' && text[1] == '/' && text[2] == '4' {
340 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
341 out.WriteString("¼")
342 return 2
343 }
344 }
345
346 if text[0] == '3' && text[1] == '/' && text[2] == '4' {
347 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
348 out.WriteString("¾")
349 return 2
350 }
351 }
352 }
353
354 out.WriteByte(text[0])
355 return 0
356}
357
358func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
359 nextChar := byte(0)
360 if len(text) > 1 {
361 nextChar = text[1]
362 }
363 if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
364 out.WriteString(""")
365 }
366
367 return 0
368}
369
370func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
371 return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
372}
373
374func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
375 return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
376}
377
378func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
379 i := 0
380
381 for i < len(text) && text[i] != '>' {
382 i++
383 }
384
385 out.Write(text[:i+1])
386 return i
387}
388
389type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
390
391// NewSmartypantsRenderer constructs a Smartypants renderer object.
392func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
393 var (
394 r SPRenderer
395
396 smartAmpAngled = r.smartAmp(true, false)
397 smartAmpAngledNBSP = r.smartAmp(true, true)
398 smartAmpRegular = r.smartAmp(false, false)
399 smartAmpRegularNBSP = r.smartAmp(false, true)
400
401 addNBSP = flags&SmartypantsQuotesNBSP != 0
402 )
403
404 if flags&SmartypantsAngledQuotes == 0 {
405 r.callbacks['"'] = r.smartDoubleQuote
406 if !addNBSP {
407 r.callbacks['&'] = smartAmpRegular
408 } else {
409 r.callbacks['&'] = smartAmpRegularNBSP
410 }
411 } else {
412 r.callbacks['"'] = r.smartAngledDoubleQuote
413 if !addNBSP {
414 r.callbacks['&'] = smartAmpAngled
415 } else {
416 r.callbacks['&'] = smartAmpAngledNBSP
417 }
418 }
419 r.callbacks['\''] = r.smartSingleQuote
420 r.callbacks['('] = r.smartParens
421 if flags&SmartypantsDashes != 0 {
422 if flags&SmartypantsLatexDashes == 0 {
423 r.callbacks[' '] = r.smartEmDash
424 r.callbacks['-'] = r.smartEnDash
425 } else {
426 r.callbacks['-'] = r.smartDashLatex
427 }
428 }
429 r.callbacks['.'] = r.smartPeriod
430 if flags&SmartypantsFractions == 0 {
431 r.callbacks['1'] = r.smartNumber
432 r.callbacks['3'] = r.smartNumber
433 } else {
434 for ch := '1'; ch <= '9'; ch++ {
435 r.callbacks[ch] = r.smartNumberGeneric
436 }
437 }
438 r.callbacks['<'] = r.smartLeftAngle
439 r.callbacks['`'] = r.smartBacktick
440 return &r
441}
442
443// Process is the entry point of the Smartypants renderer.
444func (r *SPRenderer) Process(w io.Writer, text []byte) {
445 mark := 0
446 for i := 0; i < len(text); i++ {
447 if action := r.callbacks[text[i]]; action != nil {
448 if i > mark {
449 w.Write(text[mark:i])
450 }
451 previousChar := byte(0)
452 if i > 0 {
453 previousChar = text[i-1]
454 }
455 var tmp bytes.Buffer
456 i += action(&tmp, previousChar, text[i:])
457 w.Write(tmp.Bytes())
458 mark = i + 1
459 }
460 }
461 if mark < len(text) {
462 w.Write(text[mark:])
463 }
464}