markitzero.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "fmt"
20 "regexp"
21 "strings"
22
23 "golang.org/x/net/html"
24 "humungus.tedunangst.com/r/webs/synlight"
25)
26
27var re_bolder = regexp.MustCompile(`(^|\W)\*\*((?s:.*?))\*\*($|\W)`)
28var re_italicer = regexp.MustCompile(`(^|\W)\*((?s:.*?))\*($|\W)`)
29var re_bigcoder = regexp.MustCompile("```(.*)\n?((?s:.*?))\n?```\n?")
30var re_coder = regexp.MustCompile("`([^`]*)`")
31var re_quoter = regexp.MustCompile(`(?m:^> (.*)(\n- ?(.*))?\n?)`)
32var re_reciter = regexp.MustCompile(`(<cite><a href=".*?">)https://twitter.com/([^/]+)/.*?(</a></cite>)`)
33var re_link = regexp.MustCompile(`.?.?https?://[^\s"]+[\w/)!]`)
34var re_zerolink = regexp.MustCompile(`\[([^]]*)\]\(([^)]*\)?)\)`)
35var re_imgfix = regexp.MustCompile(`<img ([^>]*)>`)
36var re_lister = regexp.MustCompile(`((^|\n)(\+|-).*)+\n?`)
37var re_tabler = regexp.MustCompile(`((^|\n)\|.*)+\n?`)
38var re_header = regexp.MustCompile(`(^|\n)(#+) (.*)\n?`)
39
40var lighter = synlight.New(synlight.Options{Format: synlight.HTML})
41
42var allowInlineHtml = false
43
44func markitzero(s string) string {
45 // prepare the string
46 s = strings.TrimSpace(s)
47 s = strings.Replace(s, "\r", "", -1)
48
49 codeword := "`elided big code`"
50
51 // save away the code blocks so we don't mess them up further
52 var bigcodes, lilcodes, images []string
53 s = re_bigcoder.ReplaceAllStringFunc(s, func(code string) string {
54 bigcodes = append(bigcodes, code)
55 return codeword
56 })
57 s = re_coder.ReplaceAllStringFunc(s, func(code string) string {
58 lilcodes = append(lilcodes, code)
59 return "`x`"
60 })
61 s = re_imgfix.ReplaceAllStringFunc(s, func(img string) string {
62 images = append(images, img)
63 return "<img x>"
64 })
65
66 // fewer side effects than html.EscapeString
67 buf := make([]byte, 0, len(s))
68 for _, c := range []byte(s) {
69 switch c {
70 case '&':
71 buf = append(buf, []byte("&")...)
72 case '<':
73 buf = append(buf, []byte("<")...)
74 case '>':
75 buf = append(buf, []byte(">")...)
76 default:
77 buf = append(buf, c)
78 }
79 }
80 s = string(buf)
81
82 // mark it zero
83 if strings.Contains(s, "http") {
84 s = re_link.ReplaceAllStringFunc(s, linkreplacer)
85 }
86 s = re_zerolink.ReplaceAllString(s, `<a href="$2">$1</a>`)
87 if strings.Contains(s, "*") {
88 s = re_bolder.ReplaceAllString(s, "$1<b>$2</b>$3")
89 s = re_italicer.ReplaceAllString(s, "$1<i>$2</i>$3")
90 }
91 s = re_quoter.ReplaceAllString(s, "<blockquote>$1<br><cite>$3</cite></blockquote><p>")
92 s = re_reciter.ReplaceAllString(s, "$1$2$3")
93 s = strings.Replace(s, "\n---\n", "<hr><p>", -1)
94
95 s = re_lister.ReplaceAllStringFunc(s, func(m string) string {
96 m = strings.Trim(m, "\n")
97 items := strings.Split(m, "\n")
98 r := "<ul>"
99 for _, item := range items {
100 r += "<li>" + strings.Trim(item[1:], " ")
101 }
102 r += "</ul><p>"
103 return r
104 })
105 s = re_tabler.ReplaceAllStringFunc(s, func(m string) string {
106 m = strings.Trim(m, "\n")
107 rows := strings.Split(m, "\n")
108 var r strings.Builder
109 r.WriteString("<table>")
110 alignments := make(map[int]string)
111 for _, row := range rows {
112 hastr := false
113 cells := strings.Split(row, "|")
114 for i, cell := range cells {
115 cell = strings.TrimSpace(cell)
116 if cell == "" && (i == 0 || i == len(cells)-1) {
117 continue
118 }
119 switch cell {
120 case ":---":
121 alignments[i] = ` style="text-align: left"`
122 continue
123 case ":---:":
124 alignments[i] = ` style="text-align: center"`
125 continue
126 case "---:":
127 alignments[i] = ` style="text-align: right"`
128 continue
129 }
130 if !hastr {
131 r.WriteString("<tr>")
132 hastr = true
133 }
134 fmt.Fprintf(&r, "<td%s>", alignments[i])
135 r.WriteString(cell)
136 }
137 }
138 r.WriteString("</table><p>")
139 return r.String()
140 })
141 s = re_header.ReplaceAllStringFunc(s, func(s string) string {
142 s = strings.TrimSpace(s)
143 m := re_header.FindStringSubmatch(s)
144 num := len(m[2])
145 return fmt.Sprintf("<h%d>%s</h%d><p>", num, m[3], num)
146 })
147
148 // restore images
149 s = strings.Replace(s, "<img x>", "<img x>", -1)
150 s = re_imgfix.ReplaceAllStringFunc(s, func(string) string {
151 img := images[0]
152 images = images[1:]
153 return img
154 })
155
156 s = strings.Replace(s, "\n\n", "<p>", -1)
157 s = strings.Replace(s, "\n", "<br>", -1)
158
159 // now restore the code blocks
160 s = re_coder.ReplaceAllStringFunc(s, func(string) string {
161 code := lilcodes[0]
162 lilcodes = lilcodes[1:]
163 if code == codeword && len(bigcodes) > 0 {
164 code := bigcodes[0]
165 bigcodes = bigcodes[1:]
166 m := re_bigcoder.FindStringSubmatch(code)
167 if allowInlineHtml && m[1] == "inlinehtml" {
168 return m[2]
169 }
170 return "<pre><code>" + lighter.HighlightString(m[2], m[1]) + "</code></pre><p>"
171 }
172 code = html.EscapeString(code)
173 return code
174 })
175
176 s = re_coder.ReplaceAllString(s, "<code>$1</code>")
177
178 // some final fixups
179 s = strings.Replace(s, "<br><blockquote>", "<blockquote>", -1)
180 s = strings.Replace(s, "<br><cite></cite>", "", -1)
181 s = strings.Replace(s, "<br><pre>", "<pre>", -1)
182 s = strings.Replace(s, "<br><ul>", "<ul>", -1)
183 s = strings.Replace(s, "<br><table>", "<table>", -1)
184 s = strings.Replace(s, "<p><br>", "<p>", -1)
185 return s
186}
187
188func linkreplacer(url string) string {
189 if url[0:2] == "](" {
190 return url
191 }
192 prefix := ""
193 for !strings.HasPrefix(url, "http") {
194 prefix += url[0:1]
195 url = url[1:]
196 }
197 addparen := false
198 adddot := false
199 if strings.HasSuffix(url, ")") && strings.IndexByte(url, '(') == -1 {
200 url = url[:len(url)-1]
201 addparen = true
202 }
203 if strings.HasSuffix(url, ".") {
204 url = url[:len(url)-1]
205 adddot = true
206 }
207 url = fmt.Sprintf(`<a href="%s">%s</a>`, url, url)
208 if adddot {
209 url += "."
210 }
211 if addparen {
212 url += ")"
213 }
214 return prefix + url
215}