smartypants.go (view raw)
1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11//
12// SmartyPants rendering
13//
14//
15
16package blackfriday
17
18import (
19 "bytes"
20 "io"
21)
22
23// SPRenderer is a struct containing state of a Smartypants renderer.
24type SPRenderer struct {
25 inSingleQuote bool
26 inDoubleQuote bool
27 callbacks [256]smartCallback
28}
29
30func wordBoundary(c byte) bool {
31 return c == 0 || isspace(c) || ispunct(c)
32}
33
34func tolower(c byte) byte {
35 if c >= 'A' && c <= 'Z' {
36 return c - 'A' + 'a'
37 }
38 return c
39}
40
41func isdigit(c byte) bool {
42 return c >= '0' && c <= '9'
43}
44
45func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool {
46 // edge of the buffer is likely to be a tag that we don't get to see,
47 // so we treat it like text sometimes
48
49 // enumerate all sixteen possibilities for (previousChar, nextChar)
50 // each can be one of {0, space, punct, other}
51 switch {
52 case previousChar == 0 && nextChar == 0:
53 // context is not any help here, so toggle
54 *isOpen = !*isOpen
55 case isspace(previousChar) && nextChar == 0:
56 // [ "] might be [ "<code>foo...]
57 *isOpen = true
58 case ispunct(previousChar) && nextChar == 0:
59 // [!"] hmm... could be [Run!"] or [("<code>...]
60 *isOpen = false
61 case /* isnormal(previousChar) && */ nextChar == 0:
62 // [a"] is probably a close
63 *isOpen = false
64 case previousChar == 0 && isspace(nextChar):
65 // [" ] might be [...foo</code>" ]
66 *isOpen = false
67 case isspace(previousChar) && isspace(nextChar):
68 // [ " ] context is not any help here, so toggle
69 *isOpen = !*isOpen
70 case ispunct(previousChar) && isspace(nextChar):
71 // [!" ] is probably a close
72 *isOpen = false
73 case /* isnormal(previousChar) && */ isspace(nextChar):
74 // [a" ] this is one of the easy cases
75 *isOpen = false
76 case previousChar == 0 && ispunct(nextChar):
77 // ["!] hmm... could be ["$1.95] or [</code>"!...]
78 *isOpen = false
79 case isspace(previousChar) && ispunct(nextChar):
80 // [ "!] looks more like [ "$1.95]
81 *isOpen = true
82 case ispunct(previousChar) && ispunct(nextChar):
83 // [!"!] context is not any help here, so toggle
84 *isOpen = !*isOpen
85 case /* isnormal(previousChar) && */ ispunct(nextChar):
86 // [a"!] is probably a close
87 *isOpen = false
88 case previousChar == 0 /* && isnormal(nextChar) */ :
89 // ["a] is probably an open
90 *isOpen = true
91 case isspace(previousChar) /* && isnormal(nextChar) */ :
92 // [ "a] this is one of the easy cases
93 *isOpen = true
94 case ispunct(previousChar) /* && isnormal(nextChar) */ :
95 // [!"a] is probably an open
96 *isOpen = true
97 default:
98 // [a'b] maybe a contraction?
99 *isOpen = false
100 }
101
102 out.WriteByte('&')
103 if *isOpen {
104 out.WriteByte('l')
105 } else {
106 out.WriteByte('r')
107 }
108 out.WriteByte(quote)
109 out.WriteString("quo;")
110 return true
111}
112
113func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
114 if len(text) >= 2 {
115 t1 := tolower(text[1])
116
117 if t1 == '\'' {
118 nextChar := byte(0)
119 if len(text) >= 3 {
120 nextChar = text[2]
121 }
122 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
123 return 1
124 }
125 }
126
127 if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
128 out.WriteString("’")
129 return 0
130 }
131
132 if len(text) >= 3 {
133 t2 := tolower(text[2])
134
135 if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
136 (len(text) < 4 || wordBoundary(text[3])) {
137 out.WriteString("’")
138 return 0
139 }
140 }
141 }
142
143 nextChar := byte(0)
144 if len(text) > 1 {
145 nextChar = text[1]
146 }
147 if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote) {
148 return 0
149 }
150
151 out.WriteByte(text[0])
152 return 0
153}
154
155func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
156 if len(text) >= 3 {
157 t1 := tolower(text[1])
158 t2 := tolower(text[2])
159
160 if t1 == 'c' && t2 == ')' {
161 out.WriteString("©")
162 return 2
163 }
164
165 if t1 == 'r' && t2 == ')' {
166 out.WriteString("®")
167 return 2
168 }
169
170 if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
171 out.WriteString("™")
172 return 3
173 }
174 }
175
176 out.WriteByte(text[0])
177 return 0
178}
179
180func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
181 if len(text) >= 2 {
182 if text[1] == '-' {
183 out.WriteString("—")
184 return 1
185 }
186
187 if wordBoundary(previousChar) && wordBoundary(text[1]) {
188 out.WriteString("–")
189 return 0
190 }
191 }
192
193 out.WriteByte(text[0])
194 return 0
195}
196
197func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
198 if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
199 out.WriteString("—")
200 return 2
201 }
202 if len(text) >= 2 && text[1] == '-' {
203 out.WriteString("–")
204 return 1
205 }
206
207 out.WriteByte(text[0])
208 return 0
209}
210
211func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
212 if bytes.HasPrefix(text, []byte(""")) {
213 nextChar := byte(0)
214 if len(text) >= 7 {
215 nextChar = text[6]
216 }
217 if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
218 return 5
219 }
220 }
221
222 if bytes.HasPrefix(text, []byte("�")) {
223 return 3
224 }
225
226 out.WriteByte('&')
227 return 0
228}
229
230func (r *SPRenderer) smartAmp(out *bytes.Buffer, previousChar byte, text []byte) int {
231 return r.smartAmpVariant(out, previousChar, text, 'd')
232}
233
234func (r *SPRenderer) smartAmpAngledQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
235 return r.smartAmpVariant(out, previousChar, text, 'a')
236}
237
238func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
239 if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
240 out.WriteString("…")
241 return 2
242 }
243
244 if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
245 out.WriteString("…")
246 return 4
247 }
248
249 out.WriteByte(text[0])
250 return 0
251}
252
253func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
254 if len(text) >= 2 && text[1] == '`' {
255 nextChar := byte(0)
256 if len(text) >= 3 {
257 nextChar = text[2]
258 }
259 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote) {
260 return 1
261 }
262 }
263
264 out.WriteByte(text[0])
265 return 0
266}
267
268func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
269 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
270 // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
271 // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
272 // and avoid changing dates like 1/23/2005 into fractions.
273 numEnd := 0
274 for len(text) > numEnd && isdigit(text[numEnd]) {
275 numEnd++
276 }
277 if numEnd == 0 {
278 out.WriteByte(text[0])
279 return 0
280 }
281 denStart := numEnd + 1
282 if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
283 denStart = numEnd + 3
284 } else if len(text) < numEnd+2 || text[numEnd] != '/' {
285 out.WriteByte(text[0])
286 return 0
287 }
288 denEnd := denStart
289 for len(text) > denEnd && isdigit(text[denEnd]) {
290 denEnd++
291 }
292 if denEnd == denStart {
293 out.WriteByte(text[0])
294 return 0
295 }
296 if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
297 out.WriteString("<sup>")
298 out.Write(text[:numEnd])
299 out.WriteString("</sup>⁄<sub>")
300 out.Write(text[denStart:denEnd])
301 out.WriteString("</sub>")
302 return denEnd - 1
303 }
304 }
305
306 out.WriteByte(text[0])
307 return 0
308}
309
310func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
311 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
312 if text[0] == '1' && text[1] == '/' && text[2] == '2' {
313 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
314 out.WriteString("½")
315 return 2
316 }
317 }
318
319 if text[0] == '1' && text[1] == '/' && text[2] == '4' {
320 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
321 out.WriteString("¼")
322 return 2
323 }
324 }
325
326 if text[0] == '3' && text[1] == '/' && text[2] == '4' {
327 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
328 out.WriteString("¾")
329 return 2
330 }
331 }
332 }
333
334 out.WriteByte(text[0])
335 return 0
336}
337
338func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
339 nextChar := byte(0)
340 if len(text) > 1 {
341 nextChar = text[1]
342 }
343 if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote) {
344 out.WriteString(""")
345 }
346
347 return 0
348}
349
350func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
351 return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
352}
353
354func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
355 return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
356}
357
358func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
359 i := 0
360
361 for i < len(text) && text[i] != '>' {
362 i++
363 }
364
365 out.Write(text[:i+1])
366 return i
367}
368
369type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
370
371// NewSmartypantsRenderer constructs a Smartypants renderer object.
372func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
373 var r SPRenderer
374 if flags&SmartypantsAngledQuotes == 0 {
375 r.callbacks['"'] = r.smartDoubleQuote
376 r.callbacks['&'] = r.smartAmp
377 } else {
378 r.callbacks['"'] = r.smartAngledDoubleQuote
379 r.callbacks['&'] = r.smartAmpAngledQuote
380 }
381 r.callbacks['\''] = r.smartSingleQuote
382 r.callbacks['('] = r.smartParens
383 if flags&SmartypantsDashes != 0 {
384 if flags&SmartypantsLatexDashes == 0 {
385 r.callbacks['-'] = r.smartDash
386 } else {
387 r.callbacks['-'] = r.smartDashLatex
388 }
389 }
390 r.callbacks['.'] = r.smartPeriod
391 if flags&SmartypantsFractions == 0 {
392 r.callbacks['1'] = r.smartNumber
393 r.callbacks['3'] = r.smartNumber
394 } else {
395 for ch := '1'; ch <= '9'; ch++ {
396 r.callbacks[ch] = r.smartNumberGeneric
397 }
398 }
399 r.callbacks['<'] = r.smartLeftAngle
400 r.callbacks['`'] = r.smartBacktick
401 return &r
402}
403
404// Process is the entry point of the Smartypants renderer.
405func (r *SPRenderer) Process(w io.Writer, text []byte) {
406 mark := 0
407 for i := 0; i < len(text); i++ {
408 if action := r.callbacks[text[i]]; action != nil {
409 if i > mark {
410 w.Write(text[mark:i])
411 }
412 previousChar := byte(0)
413 if i > 0 {
414 previousChar = text[i-1]
415 }
416 var tmp bytes.Buffer
417 i += action(&tmp, previousChar, text[i:])
418 w.Write(tmp.Bytes())
419 mark = i + 1
420 }
421 }
422 if mark < len(text) {
423 w.Write(text[mark:])
424 }
425}