reader/fetch.go (view raw)
1package reader
2
3import (
4 "bytes"
5 "crypto/sha1"
6 "encoding/hex"
7 "fmt"
8 "io"
9 "net/http"
10 "net/url"
11 "strings"
12
13 "git.icyphox.sh/forlater/navani/cache"
14 readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18 readability.Article
19 URL *url.URL
20}
21
22func checksum(s []byte) string {
23 h := sha1.New()
24 h.Write(s)
25 b := h.Sum(nil)
26 return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32 sum := checksum([]byte(url))
33 c, err := cache.NewConn()
34 if err != nil {
35 return nil, fmt.Errorf("cache error: %w", err)
36 }
37
38 body, err := c.Get(sum)
39 // Not in cache.
40 if err != nil {
41 resp, err := http.Get(url)
42 if err != nil {
43 return nil, fmt.Errorf("http error: %w", err)
44 }
45 buf := bytes.Buffer{}
46 // Read into r and write into buf.
47 // Cache and return!
48 r := io.TeeReader(resp.Body, &buf)
49 b, err := io.ReadAll(r)
50 if err != nil {
51 return nil, fmt.Errorf("io error: %w", err)
52 }
53 _, err = c.Set(sum, b)
54 if err != nil {
55 return nil, fmt.Errorf("cache error: %w", err)
56 }
57 return &buf, nil
58 }
59
60 return strings.NewReader(body), nil
61}
62
63// Makes a given html body readable. Returns an error if it
64// can't.
65func Readable(r io.Reader, u *url.URL) (Article, error) {
66 article, err := readability.FromReader(r, u)
67 if err != nil {
68 return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %w\n", u, err)
69 }
70
71 return Article{article, u}, nil
72}