reader/fetch.go (view raw)
1package reader
2
3import (
4 "bytes"
5 "crypto/sha1"
6 "encoding/hex"
7 "fmt"
8 "io"
9 "net/http"
10 "net/url"
11 "strings"
12
13 "git.icyphox.sh/forlater/navani/cache"
14 readability "github.com/go-shiori/go-readability"
15)
16
17type Article struct {
18 readability.Article
19 URL *url.URL
20}
21
22func checksum(s []byte) string {
23 h := sha1.New()
24 h.Write(s)
25 b := h.Sum(nil)
26 return hex.EncodeToString(b)
27}
28
29// Fetches the web page and stores the hash of the URL against
30// the response body in cache. Returns an io.Reader.
31func Fetch(url string) (io.Reader, error) {
32 sum := checksum([]byte(url))
33 c, err := cache.NewConn()
34
35 body, err := c.Get(sum)
36 // Not in cache.
37 if err != nil {
38 resp, err := http.Get(url)
39 if err != nil {
40 return nil, err
41 }
42 buf := bytes.Buffer{}
43 // Read into r and write into buf.
44 // Cache and return!
45 r := io.TeeReader(resp.Body, &buf)
46 b, err := io.ReadAll(r)
47 if err != nil {
48 c.Set(b)
49 }
50 return &buf, nil
51 }
52
53 return strings.NewReader(body), nil
54}
55
56// Makes a given html body readable. Returns an error if it
57// can't.
58func Readable(r io.Reader, u *url.URL) (Article, error) {
59 if !readability.Check(r) {
60 return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s", u)
61 }
62 article, err := readability.FromReader(r, u)
63 if err != nil {
64 return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %v\n", u, err)
65 }
66
67 return Article{article, u}, nil
68}