all repos — navani @ 153b51ab5981138b1834e08eb273d6fec4cb6ef3

forlater's primary mail processing service

reader/fetch.go (view raw)

 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
package reader

import (
	"bytes"
	"crypto/sha1"
	"encoding/hex"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"strings"

	"git.icyphox.sh/forlater/navani/cache"
	readability "github.com/go-shiori/go-readability"
)

type Article struct {
	readability.Article
	URL *url.URL
}

func checksum(s []byte) string {
	h := sha1.New()
	h.Write(s)
	b := h.Sum(nil)
	return hex.EncodeToString(b)
}

// Fetches the web page and stores the hash of the URL against
// the response body in cache. Returns an io.Reader.
func Fetch(url string) (io.Reader, error) {
	sum := checksum([]byte(url))
	c, err := cache.NewConn()
	if err != nil {
		return nil, fmt.Errorf("cache error: %w", err)
	}

	body, err := c.Get(sum)
	// Not in cache.
	if err != nil {
		resp, err := http.Get(url)
		if err != nil {
			return nil, fmt.Errorf("http error: %w", err)
		}
		buf := bytes.Buffer{}
		// Read into r and write into buf.
		// Cache and return!
		r := io.TeeReader(resp.Body, &buf)
		b, err := io.ReadAll(r)
		if err != nil {
			return nil, fmt.Errorf("io error: %w", err)
		}
		_, err = c.Set(sum, b)
		if err != nil {
			return nil, fmt.Errorf("cache error: %w", err)
		}
		return &buf, nil
	}

	return strings.NewReader(body), nil
}

// Makes a given html body readable. Returns an error if it
// can't.
func Readable(r io.Reader, u *url.URL) (Article, error) {
	article, err := readability.FromReader(r, u)
	fmt.Println("article", article)
	if err != nil {
		return Article{readability.Article{}, u}, fmt.Errorf("failed to parse %s: %w\n", u, err)
	}

	return Article{article, u}, nil
}