all repos — honk @ 19e0ea9e8ed7d3d8e9a7c06aa16d8b760d043dc7

my fork of honk

markitzero.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"regexp"
 22	"strings"
 23
 24	"humungus.tedunangst.com/r/webs/synlight"
 25)
 26
 27var re_bolder = regexp.MustCompile(`(^|\W)\*\*((?s:.*?))\*\*($|\W)`)
 28var re_italicer = regexp.MustCompile(`(^|\W)\*((?s:.*?))\*($|\W)`)
 29var re_bigcoder = regexp.MustCompile("```(.*)\n?((?s:.*?))\n?```\n?")
 30var re_coder = regexp.MustCompile("`([^`]*)`")
 31var re_quoter = regexp.MustCompile(`(?m:^&gt; (.*)(\n- ?(.*))?\n?)`)
 32var re_reciter = regexp.MustCompile(`(<cite><a href=".*?">)https://twitter.com/([^/]+)/.*?(</a></cite>)`)
 33var re_link = regexp.MustCompile(`.?.?https?://[^\s"]+[\w/)!]`)
 34var re_zerolink = regexp.MustCompile(`\[([^]]*)\]\(([^)]*\)?)\)`)
 35var re_imgfix = regexp.MustCompile(`<img ([^>]*)>`)
 36var re_lister = regexp.MustCompile(`((^|\n)(\+|-).*)+\n?`)
 37
 38var lighter = synlight.New(synlight.Options{Format: synlight.HTML})
 39
 40// fewer side effects than html.EscapeString
 41func fasterescaper(s []byte) []byte {
 42	buf := make([]byte, 0, len(s))
 43	for _, c := range []byte(s) {
 44		switch c {
 45		case '&':
 46			buf = append(buf, []byte("&amp;")...)
 47		case '<':
 48			buf = append(buf, []byte("&lt;")...)
 49		case '>':
 50			buf = append(buf, []byte("&gt;")...)
 51		default:
 52			buf = append(buf, c)
 53		}
 54	}
 55	return buf
 56}
 57
 58func replaceifmatch(re *regexp.Regexp, input []byte, repl []byte) []byte {
 59	if !re.Match(input) {
 60		return input
 61	}
 62	return re.ReplaceAll(input, repl)
 63}
 64
 65func replaceifmatchfn(re *regexp.Regexp, input []byte, repl func([]byte) []byte) []byte {
 66	if !re.Match(input) {
 67		return input
 68	}
 69	return re.ReplaceAllFunc(input, repl)
 70}
 71
 72func replacenocopy(input []byte, pat []byte, repl []byte) []byte {
 73	if !bytes.Contains(input, pat) {
 74		return input
 75	}
 76	return bytes.Replace(input, pat, repl, -1)
 77}
 78
 79func markitzero(ss string) string {
 80	s := []byte(ss)
 81	// prepare the string
 82	s = bytes.TrimSpace(s)
 83	s = replacenocopy(s, []byte("\r"), []byte(""))
 84
 85	hascode := bytes.Contains(s, []byte("`"))
 86
 87	// save away the code blocks so we don't mess them up further
 88	var bigcodes, lilcodes, images [][]byte
 89	if hascode {
 90		s = replaceifmatchfn(re_bigcoder, s, func(code []byte) []byte {
 91			bigcodes = append(bigcodes, code)
 92			return []byte("``````")
 93		})
 94		s = replaceifmatchfn(re_coder, s, func(code []byte) []byte {
 95			lilcodes = append(lilcodes, code)
 96			return []byte("`x`")
 97		})
 98	}
 99	s = replaceifmatchfn(re_imgfix, s, func(img []byte) []byte {
100		images = append(images, img)
101		return []byte("<img x>")
102	})
103
104	s = fasterescaper(s)
105
106	// mark it zero
107	if bytes.Contains(s, []byte("http")) {
108		s = replaceifmatchfn(re_link, s, linkreplacer)
109	}
110	s = replaceifmatch(re_zerolink, s, []byte(`<a href="$2">$1</a>`))
111	if bytes.Contains(s, []byte("**")) {
112		s = replaceifmatch(re_bolder, s, []byte("$1<b>$2</b>$3"))
113	}
114	if bytes.Contains(s, []byte("*")) {
115		s = replaceifmatch(re_italicer, s, []byte("$1<i>$2</i>$3"))
116	}
117	if bytes.Contains(s, []byte("&gt; ")) {
118		s = replaceifmatch(re_quoter, s, []byte("<blockquote>$1<br><cite>$3</cite></blockquote><p>"))
119		s = replaceifmatch(re_reciter, s, []byte("$1$2$3"))
120	}
121	s = replacenocopy(s, []byte("\n---\n"), []byte("<hr><p>"))
122
123	if bytes.Contains(s, []byte("\n+")) || bytes.Contains(s, []byte("\n-")) {
124		s = replaceifmatchfn(re_lister, s, func(m []byte) []byte {
125			m = bytes.Trim(m, "\n")
126			items := bytes.Split(m, []byte("\n"))
127			r := []byte("<ul>")
128			for _, item := range items {
129				r = append(r, []byte("<li>")...)
130				r = append(r, bytes.Trim(item[1:], " ")...)
131			}
132			r = append(r, []byte("</ul><p>")...)
133			return r
134		})
135	}
136
137	// restore images
138	s = replacenocopy(s, []byte("&lt;img x&gt;"), []byte("<img x>"))
139	s = replaceifmatchfn(re_imgfix, s, func([]byte) []byte {
140		img := images[0]
141		images = images[1:]
142		return img
143	})
144
145	// now restore the code blocks
146	if hascode {
147		s = replaceifmatchfn(re_coder, s, func([]byte) []byte {
148			code := lilcodes[0]
149			lilcodes = lilcodes[1:]
150			return fasterescaper(code)
151		})
152		s = replaceifmatchfn(re_bigcoder, s, func([]byte) []byte {
153			code := bigcodes[0]
154			bigcodes = bigcodes[1:]
155			m := re_bigcoder.FindSubmatch(code)
156			var buf bytes.Buffer
157			buf.WriteString("<pre><code>")
158			lighter.Highlight(m[2], string(m[1]), &buf)
159			buf.WriteString("</code></pre><p>")
160			return buf.Bytes()
161		})
162		s = replaceifmatch(re_coder, s, []byte("<code>$1</code>"))
163	}
164
165	// some final fixups
166	s = replacenocopy(s, []byte("\n"), []byte("<br>"))
167	s = replacenocopy(s, []byte("<br><blockquote>"), []byte("<blockquote>"))
168	s = replacenocopy(s, []byte("<br><cite></cite>"), []byte(""))
169	s = replacenocopy(s, []byte("<br><pre>"), []byte("<pre>"))
170	s = replacenocopy(s, []byte("<br><ul>"), []byte("<ul>"))
171	s = replacenocopy(s, []byte("<p><br>"), []byte("<p>"))
172	return string(s)
173}
174
175func linkreplacer(burl []byte) []byte {
176	url := string(burl)
177	if url[0:2] == "](" {
178		return burl
179	}
180	prefix := ""
181	for !strings.HasPrefix(url, "http") {
182		prefix += url[0:1]
183		url = url[1:]
184	}
185	addparen := false
186	adddot := false
187	if strings.HasSuffix(url, ")") && strings.IndexByte(url, '(') == -1 {
188		url = url[:len(url)-1]
189		addparen = true
190	}
191	if strings.HasSuffix(url, ".") {
192		url = url[:len(url)-1]
193		adddot = true
194	}
195	url = fmt.Sprintf(`<a href="%s">%s</a>`, url, url)
196	if adddot {
197		url += "."
198	}
199	if addparen {
200		url += ")"
201	}
202	return []byte(prefix + url)
203}