rel2abs.go (view raw)
1package rel2abs
2
3import (
4 "bytes"
5 "fmt"
6 "net/url"
7
8 "golang.org/x/net/html"
9)
10
11func rel2abs(n *html.Node, nurl *url.URL) error {
12 if n.Type == html.ElementNode && n.Data == "a" {
13 for _, a := range n.Attr {
14 if a.Key == "href" {
15 rel, err := url.Parse(a.Val)
16 fmt.Println("rel:", rel)
17 if err != nil {
18 return fmt.Errorf("relative url: %w\n", err)
19 }
20
21 a.Val = nurl.ResolveReference(rel).String()
22 fmt.Println("resolved:", a.Val)
23 }
24 }
25 for c := n.FirstChild; c != nil; c = c.NextSibling {
26 rel2abs(c, nurl)
27 }
28 }
29 return nil
30}
31
32// Converts all relative URLs in htmlContent to absolute URLs,
33// resolved against a base URL.
34func Rel2Abs(htmlContent []byte, base string) ([]byte, error) {
35 doc, err := html.Parse(bytes.NewReader(htmlContent))
36 if err != nil {
37 return nil, fmt.Errorf("html parse: %w\n", err)
38 }
39
40 nurl, err := url.Parse(base)
41 if err != nil {
42 return nil, fmt.Errorf("url parse: %w\n", err)
43 }
44 rel2abs(doc, nurl)
45 buf := bytes.Buffer{}
46 html.Render(&buf, doc)
47 return buf.Bytes(), nil
48}