icy does git — honk: 09cf39892dbea47518d42ae7400f0923c4b3eb03

use htfilter from webs

Ted Unangst tedu@tedunangst.com

Fri, 17 May 2019 19:37:43 -0400

commit

09cf39892dbea47518d42ae7400f0923c4b3eb03

parent

a4d92f2e3fc9ae6692393f6bd22a0b6e609bf30f

5 files changed, 10 insertions(+), 199 deletions(-)

jump to

fun.go

go.mod

go.sum

honk.go

M fun.go → fun.go

@@ -26,9 +26,12 @@ "net/http"
 	"regexp"
 	"strings"
 	"sync"
+
+	"humungus.tedunangst.com/r/webs/htfilter"
 )
 
 func reverbolate(honks []*Honk) {
+	filt := htfilter.New()
 	for _, h := range honks {
 		h.What += "ed"
 		if h.Honker == "" {
@@ -56,7 +59,7 @@ precis := h.Precis
 		if precis != "" {
 			precis = "<p>summary: " + precis + "<p>"
 		}
-		h.HTML = cleanstring(precis + h.Noise)
+		h.HTML, _ = filt.String(precis + h.Noise)
 		emuxifier := func(e string) string {
 			for _, d := range h.Donks {
 				if d.Name == e {

M go.mod → go.mod

@@ -6,5 +6,5 @@ github.com/mattn/go-runewidth v0.0.4
 	golang.org/x/crypto v0.0.0-20190424203555-c05e17bb3b2d
 	golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3
 	humungus.tedunangst.com/r/go-sqlite3 v1.1.2
-	humungus.tedunangst.com/r/webs v0.4.3
+	humungus.tedunangst.com/r/webs v0.4.4
 )

M go.sum → go.sum

@@ -14,3 +14,5 @@ humungus.tedunangst.com/r/go-sqlite3 v1.1.2 h1:bRAXNRZ4VNFRFhhG4tdudK4Lv4ktHQAHEppKlDANUFg=
 humungus.tedunangst.com/r/go-sqlite3 v1.1.2/go.mod h1:FtEEmQM7U2Ey1TuEEOyY1BmphTZnmiEjPsNLEAkpf/M=
 humungus.tedunangst.com/r/webs v0.4.3 h1:L0id2lZDK+lmuWswd+iOV4T0LXvZe92SqD50AuZDnDM=
 humungus.tedunangst.com/r/webs v0.4.3/go.mod h1:6yLLDXBaE4pKURa/3/bxoQPod37uAqc/Kq8J0IopWW0=
+humungus.tedunangst.com/r/webs v0.4.4 h1:uK1YW+eGQ0JADiSs7Ipt0ljFfQw7e73924wMm4V3gss=
+humungus.tedunangst.com/r/webs v0.4.4/go.mod h1:79Ww3HmgE1m+HXU0r0b9hkOD3JuDzXoGiEauHuKcwBI=

M honk.go → honk.go

@@ -34,6 +34,7 @@ "strings"
 	"time"
 
 	"github.com/gorilla/mux"
+	"humungus.tedunangst.com/r/webs/htfilter"
 	"humungus.tedunangst.com/r/webs/image"
 	"humungus.tedunangst.com/r/webs/login"
 	"humungus.tedunangst.com/r/webs/rss"
@@ -498,10 +499,11 @@ if u == nil {
 		w.Header().Set("Cache-Control", "max-age=60")
 	}
 	if user != nil {
+		filt := htfilter.New()
 		templinfo["Name"] = user.Name
 		whatabout := user.About
 		whatabout = obfusbreak(user.About)
-		templinfo["WhatAbout"] = cleanstring(whatabout)
+		templinfo["WhatAbout"], _ = filt.String(whatabout)
 	}
 	templinfo["Honks"] = honks
 	templinfo["ServerMessage"] = infomsg

D html.go

@@ -1,196 +0,0 @@
-//
-// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
-//
-// Permission to use, copy, modify, and distribute this software for any
-// purpose with or without fee is hereby granted, provided that the above
-// copyright notice and this permission notice appear in all copies.
-//
-// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-package main
-
-import (
-	"fmt"
-	"html/template"
-	"io"
-	"log"
-	"net/url"
-	"regexp"
-	"sort"
-	"strings"
-
-	"golang.org/x/net/html"
-)
-
-var permittedtags = []string{
-	"div", "h1", "h2", "h3", "h4", "h5", "h6",
-	"table", "thead", "tbody", "th", "tr", "td", "colgroup", "col",
-	"p", "br", "pre", "code", "blockquote", "q",
-	"samp", "mark", "ins", "dfn", "cite", "abbr", "address",
-	"strong", "em", "b", "i", "s", "u", "sub", "sup", "del", "tt", "small",
-	"ol", "ul", "li", "dl", "dt", "dd",
-}
-var permittedattr = []string{"colspan", "rowspan"}
-var bannedtags = []string{"script", "style"}
-
-func init() {
-	sort.Strings(permittedtags)
-	sort.Strings(permittedattr)
-	sort.Strings(bannedtags)
-}
-
-func contains(array []string, tag string) bool {
-	idx := sort.SearchStrings(array, tag)
-	return idx < len(array) && array[idx] == tag
-}
-
-func getattr(node *html.Node, attr string) string {
-	for _, a := range node.Attr {
-		if a.Key == attr {
-			return a.Val
-		}
-	}
-	return ""
-}
-
-func hasclass(node *html.Node, class string) bool {
-	return strings.Contains(" "+getattr(node, "class")+" ", " "+class+" ")
-}
-
-func writetag(w io.Writer, node *html.Node) {
-	io.WriteString(w, "<")
-	io.WriteString(w, node.Data)
-	for _, attr := range node.Attr {
-		if contains(permittedattr, attr.Key) {
-			fmt.Fprintf(w, ` %s="%s"`, attr.Key, html.EscapeString(attr.Val))
-		}
-	}
-	io.WriteString(w, ">")
-}
-
-func render(w io.Writer, node *html.Node) {
-	if node.Type == html.ElementNode {
-		tag := node.Data
-		switch {
-		case tag == "a":
-			href := getattr(node, "href")
-			hrefurl, err := url.Parse(href)
-			if err != nil {
-				href = "#BROKEN-" + href
-			} else {
-				href = hrefurl.String()
-			}
-			fmt.Fprintf(w, `<a href="%s" rel=noreferrer>`, html.EscapeString(href))
-		case tag == "img":
-			div := replaceimg(node)
-			if div != "skip" {
-				io.WriteString(w, div)
-			}
-		case tag == "span":
-		case tag == "iframe":
-			src := html.EscapeString(getattr(node, "src"))
-			fmt.Fprintf(w, `&lt;iframe src="<a href="%s">%s</a>"&gt;`, src, src)
-		case contains(permittedtags, tag):
-			writetag(w, node)
-		case contains(bannedtags, tag):
-			return
-		}
-	} else if node.Type == html.TextNode {
-		io.WriteString(w, html.EscapeString(node.Data))
-	}
-
-	for c := node.FirstChild; c != nil; c = c.NextSibling {
-		render(w, c)
-	}
-
-	if node.Type == html.ElementNode {
-		tag := node.Data
-		if tag == "a" || (contains(permittedtags, tag) && tag != "br") {
-			fmt.Fprintf(w, "</%s>", tag)
-		}
-		if tag == "p" || tag == "div" {
-			io.WriteString(w, "\n")
-		}
-	}
-}
-
-func replaceimg(node *html.Node) string {
-	src := getattr(node, "src")
-	alt := getattr(node, "alt")
-	//title := getattr(node, "title")
-	if hasclass(node, "Emoji") && alt != "" {
-		return html.EscapeString(alt)
-	}
-	return html.EscapeString(fmt.Sprintf(`<img src="%s">`, src))
-}
-
-func cleannode(node *html.Node) template.HTML {
-	var buf strings.Builder
-	render(&buf, node)
-	return template.HTML(buf.String())
-}
-
-func cleanstring(shtml string) template.HTML {
-	reader := strings.NewReader(shtml)
-	body, err := html.Parse(reader)
-	if err != nil {
-		log.Printf("error parsing html: %s", err)
-		return ""
-	}
-	return cleannode(body)
-}
-
-func textonly(w io.Writer, node *html.Node) {
-	switch node.Type {
-	case html.ElementNode:
-		tag := node.Data
-		switch {
-		case tag == "a":
-			href := getattr(node, "href")
-			fmt.Fprintf(w, `<a href="%s">`, href)
-		case tag == "img":
-			io.WriteString(w, "<img>")
-		case contains(bannedtags, tag):
-			return
-		}
-	case html.TextNode:
-		io.WriteString(w, node.Data)
-	}
-	for c := node.FirstChild; c != nil; c = c.NextSibling {
-		textonly(w, c)
-	}
-	if node.Type == html.ElementNode {
-		tag := node.Data
-		if tag == "a" {
-			fmt.Fprintf(w, "</%s>", tag)
-		}
-		if tag == "p" || tag == "div" {
-			io.WriteString(w, "\n")
-		}
-	}
-}
-
-var re_whitespaceeater = regexp.MustCompile("[ \t\r]*\n[ \t\r]*")
-var re_blanklineeater = regexp.MustCompile("\n\n+")
-var re_tabeater = regexp.MustCompile("[ \t]+")
-
-func htmltotext(shtml template.HTML) string {
-	reader := strings.NewReader(string(shtml))
-	body, _ := html.Parse(reader)
-	var buf strings.Builder
-	textonly(&buf, body)
-	rv := buf.String()
-	rv = re_whitespaceeater.ReplaceAllLiteralString(rv, "\n")
-	rv = re_blanklineeater.ReplaceAllLiteralString(rv, "\n\n")
-	rv = re_tabeater.ReplaceAllLiteralString(rv, " ")
-	for len(rv) > 0 && rv[0] == '\n' {
-		rv = rv[1:]
-	}
-	return rv
-}

all repos — honk @ 09cf39892dbea47518d42ae7400f0923c4b3eb03

my fork of honk