all repos — navani @ 87e4e39f51426e49685961140dfe3499c83378d9

forlater's primary mail processing service

reader/fetch.go (view raw)

 1package reader
 2
 3import (
 4	"bytes"
 5	"crypto/sha1"
 6	"encoding/hex"
 7	"fmt"
 8	"io"
 9	"net/http"
10	"net/url"
11	"strings"
12
13	"git.icyphox.sh/forlater/navani/cache"
14	readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18	readability.Article
19	URL *url.URL
20}
21
22func checksum(s []byte) string {
23	h := sha1.New()
24	h.Write(s)
25	b := h.Sum(nil)
26	return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32	sum := checksum([]byte(url))
33	c, err := cache.NewConn()
34	if err != nil {
35		return nil, fmt.Errorf("cache error: %w", err)
36	}
37
38	body, err := c.Get(sum)
39	// Not in cache.
40	if err != nil {
41		resp, err := http.Get(url)
42		if err != nil {
43			return nil, fmt.Errorf("http error: %w", err)
44		}
45		buf := bytes.Buffer{}
46		// Read into r and write into buf.
47		// Cache and return!
48		r := io.TeeReader(resp.Body, &buf)
49		b, err := io.ReadAll(r)
50		if err != nil {
51			return nil, fmt.Errorf("io error: %w", err)
52		}
53		_, err = c.Set(sum, b)
54		if err != nil {
55			return nil, fmt.Errorf("cache error: %w", err)
56		}
57		return &buf, nil
58	}
59
60	return strings.NewReader(body), nil
61}
62
63// Makes a given html body readable. Returns an error if it
64// can't.
65func Readable(r io.Reader, u *url.URL) (Article, error) {
66	article, err := readability.FromReader(r, u)
67	if err != nil {
68		return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %w\n", u, err)
69	}
70
71	return Article{article, u}, nil
72}