rel2abs.go (view raw)
1package rel2abs
2
3import (
4 "bytes"
5 "fmt"
6 "net/url"
7
8 "golang.org/x/net/html"
9 "golang.org/x/net/html/atom"
10)
11
12func rel2abs(n *html.Node, nurl *url.URL) error {
13 if n.Type == html.ElementNode && n.DataAtom == atom.A {
14 for i := range n.Attr {
15 if n.Attr[i].Key == "href" {
16 rel, err := url.Parse(n.Attr[i].Val)
17 if err != nil {
18 return fmt.Errorf("relative url: %w\n", err)
19 }
20
21 n.Attr[i].Val = nurl.ResolveReference(rel).String()
22 }
23 }
24 }
25 for c := n.FirstChild; c != nil; c = c.NextSibling {
26 rel2abs(c, nurl)
27 }
28 return nil
29}
30
31// Converts all relative URLs in htmlContent to absolute URLs,
32// resolved against a base URL.
33func Rel2Abs(htmlContent []byte, base string) ([]byte, error) {
34 doc, err := html.Parse(bytes.NewReader(htmlContent))
35 if err != nil {
36 return nil, fmt.Errorf("html parse: %w\n", err)
37 }
38
39 nurl, err := url.Parse(base)
40 if err != nil {
41 return nil, fmt.Errorf("url parse: %w\n", err)
42 }
43 rel2abs(doc, nurl)
44 buf := bytes.Buffer{}
45 html.Render(&buf, doc)
46 return buf.Bytes(), nil
47}