twitter is simply incapable of leaving well enough alone.
Ted Unangst tedu@tedunangst.com
Sat, 02 Jul 2022 17:57:29 -0400
2 files changed,
34 insertions(+),
2 deletions(-)
M
docs/changelog.txt
→
docs/changelog.txt
@@ -1,5 +1,9 @@
changelog +=== next + ++ Try to fix hoot again because Twitter did a Twitter. + === 0.9.8 Tentative Tentacle + Switch database to WAL mode.
M
hoot.go
→
hoot.go
@@ -28,8 +28,8 @@ "golang.org/x/net/html"
"humungus.tedunangst.com/r/webs/htfilter" ) -var tweetsel = cascadia.MustCompile("p.tweet-text") -var linksel = cascadia.MustCompile("a.tweet-timestamp") +var tweetsel = cascadia.MustCompile("div[itemProp=articleBody]") +var linksel = cascadia.MustCompile("a time") var replyingto = cascadia.MustCompile(".ReplyingToContextBelowAuthor") var imgsel = cascadia.MustCompile("div.js-adaptive-photo img") var authorregex = regexp.MustCompile("twitter.com/([^/]+)")@@ -65,6 +65,34 @@ fmt.Fprintf(&buf, "%s\n", url)
divs := tweetsel.MatchAll(root) for i, div := range divs { + { + twp := div.Parent.Parent.Parent + link := url + alink := linksel.MatchFirst(twp) + if alink == nil { + if i != 0 { + dlog.Printf("missing link") + continue + } + } else { + alink = alink.Parent + link = "https://twitter.com" + htfilter.GetAttr(alink, "href") + } + authormatch := authorregex.FindStringSubmatch(link) + if len(authormatch) < 2 { + dlog.Printf("no author?: %s", link) + continue + } + author := authormatch[1] + if author != wanted { + continue + } + text := htf.NodeText(div) + text = strings.Replace(text, "\n", " ", -1) + fmt.Fprintf(&buf, "> @%s: %s\n", author, text) + continue + } + twp := div.Parent.Parent.Parent link := url alink := linksel.MatchFirst(twp)