all repos — honk @ 94c2a1e8319731d30a742a20af27f3d31743deee

my fork of honk

hoot.go (view raw)

 1package main
 2
 3import (
 4	"fmt"
 5	"io"
 6	"log"
 7	"net/http"
 8	"os"
 9	"regexp"
10	"strings"
11
12	"github.com/andybalholm/cascadia"
13	"golang.org/x/net/html"
14	"humungus.tedunangst.com/r/webs/htfilter"
15)
16
17var tweetsel = cascadia.MustCompile("p.tweet-text")
18var linksel = cascadia.MustCompile(".time a.tweet-timestamp")
19var authorregex = regexp.MustCompile("twitter.com/([^/]+)")
20
21func hootfetcher(hoot string) string {
22	url := hoot[5:]
23	if url[0] == ' ' {
24		url = url[1:]
25	}
26	url = strings.Replace(url, "mobile.twitter.com", "twitter.com", -1)
27	log.Printf("hooterizing %s", url)
28	req, err := http.NewRequest("GET", url, nil)
29	if err != nil {
30		log.Printf("error: %s", err)
31		return hoot
32	}
33	req.Header.Set("User-Agent", "OpenBSD ftp")
34	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
35	req.Header.Set("Accept-Language", "en-US,en;q=0.9")
36	resp, err := http.DefaultClient.Do(req)
37	if err != nil {
38		log.Printf("error: %s", err)
39		return hoot
40	}
41	defer resp.Body.Close()
42	if resp.StatusCode != 200 {
43		log.Printf("error getting %s: %d", url, resp.StatusCode)
44		return hoot
45	}
46	ld, _ := os.Create("lasthoot.html")
47	r := io.TeeReader(resp.Body, ld)
48	return hootfixer(r, url)
49}
50
51func hootfixer(r io.Reader, url string) string {
52	root, _ := html.Parse(r)
53	divs := tweetsel.MatchAll(root)
54
55	wanted := ""
56	var buf strings.Builder
57
58	fmt.Fprintf(&buf, "hoot: %s\n", url)
59	for _, div := range divs {
60		twp := div.Parent.Parent.Parent
61		alink := linksel.MatchFirst(twp)
62		if alink == nil {
63			log.Printf("missing link")
64			continue
65		}
66		link := "https://twitter.com" + htfilter.GetAttr(alink, "href")
67		authormatch := authorregex.FindStringSubmatch(link)
68		if len(authormatch) < 2 {
69			log.Printf("no author?")
70			continue
71		}
72		author := authormatch[1]
73		if wanted == "" {
74			wanted = author
75		}
76		if author != wanted {
77			continue
78		}
79		text := htfilter.TextOnly(div)
80		text = strings.Replace(text, "\n", " ", -1)
81		text = strings.Replace(text, "pic.twitter.com", "https://pic.twitter.com", -1)
82
83		fmt.Fprintf(&buf, "> @%s: %s\n", author, text)
84	}
85	return buf.String()
86}
87
88var re_hoots = regexp.MustCompile(`hoot: ?https://\S+`)
89
90func hooterize(noise string) string {
91	return re_hoots.ReplaceAllStringFunc(noise, hootfetcher)
92}