all repos — navani @ 577dcf5c5509a950dfd2c4abd1c6bf2b39dd4dda

forlater's primary mail processing service

reader/fetch.go (view raw)

 1package reader
 2
 3import (
 4	"bytes"
 5	"crypto/sha1"
 6	"encoding/hex"
 7	"fmt"
 8	"io"
 9	"net/http"
10	"net/url"
11	"strings"
12
13	"git.icyphox.sh/forlater/navani/cache"
14	readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18	readability.Article
19	URL *url.URL
20}
21
22func checksum(s []byte) string {
23	h := sha1.New()
24	h.Write(s)
25	b := h.Sum(nil)
26	return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32	client := &http.Client{}
33	sum := checksum([]byte(url))
34	c, err := cache.NewConn()
35	if err != nil {
36		return nil, fmt.Errorf("cache error: %w\n", err)
37	}
38
39	body, err := c.Get(sum)
40	// Not in cache.
41	if err != nil {
42		req, err := http.NewRequest("GET", url, nil)
43		if err != nil {
44			return nil, fmt.Errorf("http error: %w\n", err)
45		}
46
47		req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36")
48		resp, err := client.Do(req)
49		if err != nil {
50			return nil, fmt.Errorf("http client error: %w\n", err)
51		}
52
53		buf := bytes.Buffer{}
54		// Read into r and write into buf.
55		// Cache and return!
56		r := io.TeeReader(resp.Body, &buf)
57		b, err := io.ReadAll(r)
58		if err != nil {
59			return nil, fmt.Errorf("io error: %w\n", err)
60		}
61		_, err = c.Set(sum, b)
62		if err != nil {
63			return nil, fmt.Errorf("cache error: %w\n", err)
64		}
65		return &buf, nil
66	}
67
68	return strings.NewReader(body), nil
69}
70
71// Makes a given html body readable. Returns an error if it
72// can't.
73func Readable(r io.Reader, u *url.URL) (Article, error) {
74	article, err := readability.FromReader(r, u)
75	if err != nil {
76		return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %w\n", u, err)
77	}
78
79	return Article{article, u}, nil
80}