all repos — navani @ 8e576664699c9c4a292bd9ffda650d7b0e8e086b

forlater's primary mail processing service

reader/fetch.go (view raw)

 1package reader
 2
 3import (
 4	"bytes"
 5	"crypto/sha1"
 6	"encoding/hex"
 7	"fmt"
 8	"io"
 9	"net/http"
10	"net/url"
11	"strings"
12
13	"git.icyphox.sh/forlater/navani/cache"
14	readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18	readability.Article
19	URL *url.URL
20}
21
22func checksum(s []byte) string {
23	h := sha1.New()
24	h.Write(s)
25	b := h.Sum(nil)
26	return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32	sum := checksum([]byte(url))
33	c, err := cache.NewConn()
34
35	body, err := c.Get(sum)
36	// Not in cache.
37	if err != nil {
38		resp, err := http.Get(url)
39		if err != nil {
40			return nil, err
41		}
42		buf := bytes.Buffer{}
43		// Read into r and write into buf.
44		// Cache and return!
45		r := io.TeeReader(resp.Body, &buf)
46		b, err := io.ReadAll(r)
47		if err != nil {
48			c.Set(b)
49		}
50		return &buf, nil
51	}
52
53	return strings.NewReader(body), nil
54}
55
56// Makes a given html body readable. Returns an error if it
57// can't.
58func Readable(r io.Reader, u *url.URL) (Article, error) {
59	article, err := readability.FromReader(r, u)
60	if err != nil {
61		return Article{}, fmt.Errorf("failed to parse %s: %v\n", u, err)
62	}
63
64	return Article{article, u}, nil
65}