reader/fetch.go (view raw)
1package reader
2
3import (
4 "bytes"
5 "crypto/sha1"
6 "encoding/hex"
7 "fmt"
8 "io"
9 "net/http"
10 "net/url"
11 "strings"
12
13 "git.icyphox.sh/forlater/navani/cache"
14 readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18 readability.Article
19 URL *url.URL
20}
21
22func checksum(s []byte) string {
23 h := sha1.New()
24 h.Write(s)
25 b := h.Sum(nil)
26 return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32 client := &http.Client{}
33 sum := checksum([]byte(url))
34 c, err := cache.NewConn()
35 if err != nil {
36 return nil, fmt.Errorf("cache error: %w\n", err)
37 }
38
39 body, err := c.Get(sum)
40 // Not in cache.
41 if err != nil {
42 req, err := http.NewRequest("GET", url, nil)
43 if err != nil {
44 return nil, fmt.Errorf("http error: %w\n", err)
45 }
46
47 req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36")
48 resp, err := client.Do(req)
49 if err != nil {
50 return nil, fmt.Errorf("http client error: %w\n", err)
51 }
52
53 buf := bytes.Buffer{}
54 // Read into r and write into buf.
55 // Cache and return!
56 r := io.TeeReader(resp.Body, &buf)
57 b, err := io.ReadAll(r)
58 if err != nil {
59 return nil, fmt.Errorf("io error: %w\n", err)
60 }
61 _, err = c.Set(sum, b)
62 if err != nil {
63 return nil, fmt.Errorf("cache error: %w\n", err)
64 }
65 return &buf, nil
66 }
67
68 return strings.NewReader(body), nil
69}
70
71// Makes a given html body readable. Returns an error if it
72// can't.
73func Readable(r io.Reader, u *url.URL) (Article, error) {
74 article, err := readability.FromReader(r, u)
75 if err != nil {
76 return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %w\n", u, err)
77 }
78
79 return Article{article, u}, nil
80}