use htfilter from webs
Ted Unangst tedu@tedunangst.com
Fri, 17 May 2019 19:37:43 -0400
M
fun.go
→
fun.go
@@ -26,9 +26,12 @@ "net/http"
"regexp" "strings" "sync" + + "humungus.tedunangst.com/r/webs/htfilter" ) func reverbolate(honks []*Honk) { + filt := htfilter.New() for _, h := range honks { h.What += "ed" if h.Honker == "" {@@ -56,7 +59,7 @@ precis := h.Precis
if precis != "" { precis = "<p>summary: " + precis + "<p>" } - h.HTML = cleanstring(precis + h.Noise) + h.HTML, _ = filt.String(precis + h.Noise) emuxifier := func(e string) string { for _, d := range h.Donks { if d.Name == e {
M
go.mod
→
go.mod
@@ -6,5 +6,5 @@ github.com/mattn/go-runewidth v0.0.4
golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 humungus.tedunangst.com/r/go-sqlite3 v1.1.2 - humungus.tedunangst.com/r/webs v0.4.3 + humungus.tedunangst.com/r/webs v0.4.4 )
M
go.sum
→
go.sum
@@ -14,3 +14,5 @@ humungus.tedunangst.com/r/go-sqlite3 v1.1.2 h1:bRAXNRZ4VNFRFhhG4tdudK4Lv4ktHQAHEppKlDANUFg=
humungus.tedunangst.com/r/go-sqlite3 v1.1.2/go.mod h1:FtEEmQM7U2Ey1TuEEOyY1BmphTZnmiEjPsNLEAkpf/M= humungus.tedunangst.com/r/webs v0.4.3 h1:L0id2lZDK+lmuWswd+iOV4T0LXvZe92SqD50AuZDnDM= humungus.tedunangst.com/r/webs v0.4.3/go.mod h1:6yLLDXBaE4pKURa/3/bxoQPod37uAqc/Kq8J0IopWW0= +humungus.tedunangst.com/r/webs v0.4.4 h1:uK1YW+eGQ0JADiSs7Ipt0ljFfQw7e73924wMm4V3gss= +humungus.tedunangst.com/r/webs v0.4.4/go.mod h1:79Ww3HmgE1m+HXU0r0b9hkOD3JuDzXoGiEauHuKcwBI=
M
honk.go
→
honk.go
@@ -34,6 +34,7 @@ "strings"
"time" "github.com/gorilla/mux" + "humungus.tedunangst.com/r/webs/htfilter" "humungus.tedunangst.com/r/webs/image" "humungus.tedunangst.com/r/webs/login" "humungus.tedunangst.com/r/webs/rss"@@ -498,10 +499,11 @@ if u == nil {
w.Header().Set("Cache-Control", "max-age=60") } if user != nil { + filt := htfilter.New() templinfo["Name"] = user.Name whatabout := user.About whatabout = obfusbreak(user.About) - templinfo["WhatAbout"] = cleanstring(whatabout) + templinfo["WhatAbout"], _ = filt.String(whatabout) } templinfo["Honks"] = honks templinfo["ServerMessage"] = infomsg
D
html.go
@@ -1,196 +0,0 @@
-// -// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com> -// -// Permission to use, copy, modify, and distribute this software for any -// purpose with or without fee is hereby granted, provided that the above -// copyright notice and this permission notice appear in all copies. -// -// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -package main - -import ( - "fmt" - "html/template" - "io" - "log" - "net/url" - "regexp" - "sort" - "strings" - - "golang.org/x/net/html" -) - -var permittedtags = []string{ - "div", "h1", "h2", "h3", "h4", "h5", "h6", - "table", "thead", "tbody", "th", "tr", "td", "colgroup", "col", - "p", "br", "pre", "code", "blockquote", "q", - "samp", "mark", "ins", "dfn", "cite", "abbr", "address", - "strong", "em", "b", "i", "s", "u", "sub", "sup", "del", "tt", "small", - "ol", "ul", "li", "dl", "dt", "dd", -} -var permittedattr = []string{"colspan", "rowspan"} -var bannedtags = []string{"script", "style"} - -func init() { - sort.Strings(permittedtags) - sort.Strings(permittedattr) - sort.Strings(bannedtags) -} - -func contains(array []string, tag string) bool { - idx := sort.SearchStrings(array, tag) - return idx < len(array) && array[idx] == tag -} - -func getattr(node *html.Node, attr string) string { - for _, a := range node.Attr { - if a.Key == attr { - return a.Val - } - } - return "" -} - -func hasclass(node *html.Node, class string) bool { - return strings.Contains(" "+getattr(node, "class")+" ", " "+class+" ") -} - -func writetag(w io.Writer, node *html.Node) { - io.WriteString(w, "<") - io.WriteString(w, node.Data) - for _, attr := range node.Attr { - if contains(permittedattr, attr.Key) { - fmt.Fprintf(w, ` %s="%s"`, attr.Key, html.EscapeString(attr.Val)) - } - } - io.WriteString(w, ">") -} - -func render(w io.Writer, node *html.Node) { - if node.Type == html.ElementNode { - tag := node.Data - switch { - case tag == "a": - href := getattr(node, "href") - hrefurl, err := url.Parse(href) - if err != nil { - href = "#BROKEN-" + href - } else { - href = hrefurl.String() - } - fmt.Fprintf(w, `<a href="%s" rel=noreferrer>`, html.EscapeString(href)) - case tag == "img": - div := replaceimg(node) - if div != "skip" { - io.WriteString(w, div) - } - case tag == "span": - case tag == "iframe": - src := html.EscapeString(getattr(node, "src")) - fmt.Fprintf(w, `<iframe src="<a href="%s">%s</a>">`, src, src) - case contains(permittedtags, tag): - writetag(w, node) - case contains(bannedtags, tag): - return - } - } else if node.Type == html.TextNode { - io.WriteString(w, html.EscapeString(node.Data)) - } - - for c := node.FirstChild; c != nil; c = c.NextSibling { - render(w, c) - } - - if node.Type == html.ElementNode { - tag := node.Data - if tag == "a" || (contains(permittedtags, tag) && tag != "br") { - fmt.Fprintf(w, "</%s>", tag) - } - if tag == "p" || tag == "div" { - io.WriteString(w, "\n") - } - } -} - -func replaceimg(node *html.Node) string { - src := getattr(node, "src") - alt := getattr(node, "alt") - //title := getattr(node, "title") - if hasclass(node, "Emoji") && alt != "" { - return html.EscapeString(alt) - } - return html.EscapeString(fmt.Sprintf(`<img src="%s">`, src)) -} - -func cleannode(node *html.Node) template.HTML { - var buf strings.Builder - render(&buf, node) - return template.HTML(buf.String()) -} - -func cleanstring(shtml string) template.HTML { - reader := strings.NewReader(shtml) - body, err := html.Parse(reader) - if err != nil { - log.Printf("error parsing html: %s", err) - return "" - } - return cleannode(body) -} - -func textonly(w io.Writer, node *html.Node) { - switch node.Type { - case html.ElementNode: - tag := node.Data - switch { - case tag == "a": - href := getattr(node, "href") - fmt.Fprintf(w, `<a href="%s">`, href) - case tag == "img": - io.WriteString(w, "<img>") - case contains(bannedtags, tag): - return - } - case html.TextNode: - io.WriteString(w, node.Data) - } - for c := node.FirstChild; c != nil; c = c.NextSibling { - textonly(w, c) - } - if node.Type == html.ElementNode { - tag := node.Data - if tag == "a" { - fmt.Fprintf(w, "</%s>", tag) - } - if tag == "p" || tag == "div" { - io.WriteString(w, "\n") - } - } -} - -var re_whitespaceeater = regexp.MustCompile("[ \t\r]*\n[ \t\r]*") -var re_blanklineeater = regexp.MustCompile("\n\n+") -var re_tabeater = regexp.MustCompile("[ \t]+") - -func htmltotext(shtml template.HTML) string { - reader := strings.NewReader(string(shtml)) - body, _ := html.Parse(reader) - var buf strings.Builder - textonly(&buf, body) - rv := buf.String() - rv = re_whitespaceeater.ReplaceAllLiteralString(rv, "\n") - rv = re_blanklineeater.ReplaceAllLiteralString(rv, "\n\n") - rv = re_tabeater.ReplaceAllLiteralString(rv, " ") - for len(rv) > 0 && rv[0] == '\n' { - rv = rv[1:] - } - return rv -}