markitzero.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "bytes"
20 "fmt"
21 "regexp"
22 "strings"
23
24 "humungus.tedunangst.com/r/webs/synlight"
25)
26
27var re_bolder = regexp.MustCompile(`(^|\W)\*\*((?s:.*?))\*\*($|\W)`)
28var re_italicer = regexp.MustCompile(`(^|\W)\*((?s:.*?))\*($|\W)`)
29var re_bigcoder = regexp.MustCompile("```(.*)\n?((?s:.*?))\n?```\n?")
30var re_coder = regexp.MustCompile("`([^`]*)`")
31var re_quoter = regexp.MustCompile(`(?m:^> (.*)(\n- ?(.*))?\n?)`)
32var re_reciter = regexp.MustCompile(`(<cite><a href=".*?">)https://twitter.com/([^/]+)/.*?(</a></cite>)`)
33var re_link = regexp.MustCompile(`.?.?https?://[^\s"]+[\w/)!]`)
34var re_zerolink = regexp.MustCompile(`\[([^]]*)\]\(([^)]*\)?)\)`)
35var re_imgfix = regexp.MustCompile(`<img ([^>]*)>`)
36var re_lister = regexp.MustCompile(`((^|\n)(\+|-).*)+\n?`)
37
38var lighter = synlight.New(synlight.Options{Format: synlight.HTML})
39
40// fewer side effects than html.EscapeString
41func fasterescaper(s []byte) []byte {
42 buf := make([]byte, 0, len(s))
43 for _, c := range []byte(s) {
44 switch c {
45 case '&':
46 buf = append(buf, []byte("&")...)
47 case '<':
48 buf = append(buf, []byte("<")...)
49 case '>':
50 buf = append(buf, []byte(">")...)
51 default:
52 buf = append(buf, c)
53 }
54 }
55 return buf
56}
57
58func replaceifmatch(re *regexp.Regexp, input []byte, repl []byte) []byte {
59 if !re.Match(input) {
60 return input
61 }
62 return re.ReplaceAll(input, repl)
63}
64
65func replaceifmatchfn(re *regexp.Regexp, input []byte, repl func([]byte) []byte) []byte {
66 if !re.Match(input) {
67 return input
68 }
69 return re.ReplaceAllFunc(input, repl)
70}
71
72func replacenocopy(input []byte, pat []byte, repl []byte) []byte {
73 if !bytes.Contains(input, pat) {
74 return input
75 }
76 return bytes.Replace(input, pat, repl, -1)
77}
78
79func markitzero(ss string) string {
80 s := []byte(ss)
81 // prepare the string
82 s = bytes.TrimSpace(s)
83 s = replacenocopy(s, []byte("\r"), []byte(""))
84
85 hascode := bytes.Contains(s, []byte("`"))
86
87 // save away the code blocks so we don't mess them up further
88 var bigcodes, lilcodes, images [][]byte
89 if hascode {
90 s = replaceifmatchfn(re_bigcoder, s, func(code []byte) []byte {
91 bigcodes = append(bigcodes, code)
92 return []byte("``````")
93 })
94 s = replaceifmatchfn(re_coder, s, func(code []byte) []byte {
95 lilcodes = append(lilcodes, code)
96 return []byte("`x`")
97 })
98 }
99 s = replaceifmatchfn(re_imgfix, s, func(img []byte) []byte {
100 images = append(images, img)
101 return []byte("<img x>")
102 })
103
104 s = fasterescaper(s)
105
106 // mark it zero
107 if bytes.Contains(s, []byte("http")) {
108 s = replaceifmatchfn(re_link, s, linkreplacer)
109 }
110 s = replaceifmatch(re_zerolink, s, []byte(`<a href="$2">$1</a>`))
111 if bytes.Contains(s, []byte("**")) {
112 s = replaceifmatch(re_bolder, s, []byte("$1<b>$2</b>$3"))
113 }
114 if bytes.Contains(s, []byte("*")) {
115 s = replaceifmatch(re_italicer, s, []byte("$1<i>$2</i>$3"))
116 }
117 if bytes.Contains(s, []byte("> ")) {
118 s = replaceifmatch(re_quoter, s, []byte("<blockquote>$1<br><cite>$3</cite></blockquote><p>"))
119 s = replaceifmatch(re_reciter, s, []byte("$1$2$3"))
120 }
121 s = replacenocopy(s, []byte("\n---\n"), []byte("<hr><p>"))
122
123 if bytes.Contains(s, []byte("\n+")) || bytes.Contains(s, []byte("\n-")) {
124 s = replaceifmatchfn(re_lister, s, func(m []byte) []byte {
125 m = bytes.Trim(m, "\n")
126 items := bytes.Split(m, []byte("\n"))
127 r := []byte("<ul>")
128 for _, item := range items {
129 r = append(r, []byte("<li>")...)
130 r = append(r, bytes.Trim(item[1:], " ")...)
131 }
132 r = append(r, []byte("</ul><p>")...)
133 return r
134 })
135 }
136
137 // restore images
138 s = replacenocopy(s, []byte("<img x>"), []byte("<img x>"))
139 s = replaceifmatchfn(re_imgfix, s, func([]byte) []byte {
140 img := images[0]
141 images = images[1:]
142 return img
143 })
144
145 // now restore the code blocks
146 if hascode {
147 s = replaceifmatchfn(re_coder, s, func([]byte) []byte {
148 code := lilcodes[0]
149 lilcodes = lilcodes[1:]
150 return fasterescaper(code)
151 })
152 s = replaceifmatchfn(re_bigcoder, s, func([]byte) []byte {
153 code := bigcodes[0]
154 bigcodes = bigcodes[1:]
155 m := re_bigcoder.FindSubmatch(code)
156 var buf bytes.Buffer
157 buf.WriteString("<pre><code>")
158 lighter.Highlight(m[2], string(m[1]), &buf)
159 buf.WriteString("</code></pre><p>")
160 return buf.Bytes()
161 })
162 s = replaceifmatch(re_coder, s, []byte("<code>$1</code>"))
163 }
164
165 // some final fixups
166 s = replacenocopy(s, []byte("\n"), []byte("<br>"))
167 s = replacenocopy(s, []byte("<br><blockquote>"), []byte("<blockquote>"))
168 s = replacenocopy(s, []byte("<br><cite></cite>"), []byte(""))
169 s = replacenocopy(s, []byte("<br><pre>"), []byte("<pre>"))
170 s = replacenocopy(s, []byte("<br><ul>"), []byte("<ul>"))
171 s = replacenocopy(s, []byte("<p><br>"), []byte("<p>"))
172 return string(s)
173}
174
175func linkreplacer(burl []byte) []byte {
176 url := string(burl)
177 if url[0:2] == "](" {
178 return burl
179 }
180 prefix := ""
181 for !strings.HasPrefix(url, "http") {
182 prefix += url[0:1]
183 url = url[1:]
184 }
185 addparen := false
186 adddot := false
187 if strings.HasSuffix(url, ")") && strings.IndexByte(url, '(') == -1 {
188 url = url[:len(url)-1]
189 addparen = true
190 }
191 if strings.HasSuffix(url, ".") {
192 url = url[:len(url)-1]
193 adddot = true
194 }
195 url = fmt.Sprintf(`<a href="%s">%s</a>`, url, url)
196 if adddot {
197 url += "."
198 }
199 if addparen {
200 url += ")"
201 }
202 return []byte(prefix + url)
203}