experimental text filtering to stop the zalgo
Ted Unangst tedu@tedunangst.com
Wed, 24 Apr 2019 01:16:34 -0400
4 files changed,
93 insertions(+),
0 deletions(-)
M
fun.go
→
fun.go
@@ -54,6 +54,7 @@ h.URL = h.XID
} } zap := make(map[*Donk]bool) + h.Noise = unpucker(h.Noise) h.HTML = cleanstring(h.Noise) emuxifier := func(e string) string { for _, d := range h.Donks {@@ -151,6 +152,7 @@ }
var re_bolder = regexp.MustCompile(`(^|\W)\*\*([\w\s,.!?']+)\*\*($|\W)`) var re_italicer = regexp.MustCompile(`(^|\W)\*([\w\s,.!?']+)\*($|\W)`) + func markitzero(s string) string { s = re_bolder.ReplaceAllString(s, "$1<b>$2</b>$3") s = re_italicer.ReplaceAllString(s, "$1<i>$2</i>$3")
M
go.mod
→
go.mod
@@ -2,6 +2,7 @@ module humungus.tedunangst.com/r/honk
require ( github.com/gorilla/mux v1.7.1 + github.com/mattn/go-runewidth v0.0.4 golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 humungus.tedunangst.com/r/go-sqlite3 v1.1.2
M
go.sum
→
go.sum
@@ -1,5 +1,7 @@
github.com/gorilla/mux v1.7.1 h1:Dw4jY2nghMMRsh1ol8dv1axHkDwMQK2DHerMNJsIpJU= github.com/gorilla/mux v1.7.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 h1:bselrhR0Or1vomJZC8ZIjWtbDmn9OYFLX5Ik9alpJpE= golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
A
skulduggery.go
@@ -0,0 +1,88 @@
+// +// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com> +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +package main + +import ( + "regexp" + + "github.com/mattn/go-runewidth" +) + +var bigboldshitz = "๐๐๐๐๐๐ ๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐" +var lilboldshitz = "๐๐๐๐๐๐๐ ๐ก๐ข๐ฃ๐ค๐ฅ๐ฆ๐ง๐จ๐ฉ๐ช๐ซ๐ฌ๐ญ๐ฎ๐ฏ๐ฐ๐ฑ๐ฒ๐ณ" +var biggothshitz = "๐ฌ๐ญ๐ฎ๐ฏ๐ฐ๐ฑ๐ฒ๐ณ๐ด๐ต๐ถ๐ท๐ธ๐น๐บ๐ป๐ผ๐ฝ๐พ๐ฟ๐๐๐๐๐๐ " +var lilgothshitz = "๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐" +var bigitalshitz = "๐จ๐ฉ๐ช๐ซ๐ฌ๐ญ๐ฎ๐ฏ๐ฐ๐ฑ๐ฒ๐ณ๐ด๐ต๐ถ๐ท๐ธ๐น๐บ๐ป๐ผ๐ฝ๐พ๐ฟ๐๐" +var lilitalshitz = "๐๐๐๐ ๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐" +var bigbangshitz = "๐ธ๐นโ๐ป๐ผ๐ฝ๐พโ๐๐๐๐๐โ๐โโโ๐๐๐๐๐๐๐โค" +var lilbangshitz = "๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ก๐ข๐ฃ๐ค๐ฅ๐ฆ๐ง๐จ๐ฉ๐ช๐ซ" + +var re_alltheshitz = regexp.MustCompile(`[` + + bigboldshitz + lilboldshitz + + biggothshitz + lilgothshitz + + bigitalshitz + lilitalshitz + + bigbangshitz + lilbangshitz + + `]{2,}`) + +// this may not be especially fast +func unpucker(s string) string { + fixer := func(r string) string { + x := make([]byte, len(r)) + xi := 0 + loop1: + for _, c := range r { + xi++ + for _, set := range []string{bigboldshitz, biggothshitz, bigitalshitz, bigbangshitz} { + i := 0 + for _, rr := range set { + if rr == c { + x[xi] = byte('A' + i) + continue loop1 + } + i++ + } + } + for _, set := range []string{lilboldshitz, lilgothshitz, lilitalshitz, lilbangshitz} { + i := 0 + for _, rr := range set { + if rr == c { + x[xi] = byte('a' + i) + continue loop1 + } + i++ + } + } + x[xi] = '.' + } + return string(x) + } + s = re_alltheshitz.ReplaceAllStringFunc(s, fixer) + x := make([]byte, 0, len(s)) + zw := false + for _, c := range s { + if runewidth.RuneWidth(c) == 0 { + if zw { + continue + } + zw = true + } else { + zw = false + } + q := string(c) + x = append(x, []byte(q)...) + } + return string(x) +}