all repos — rel2abs @ 2fcd126dea001db07fde887da91745586af517e9

go library to convert all relative urls in a html document to absolute ones

rel2abs.go (view raw)

 1package rel2abs
 2
 3import (
 4	"bytes"
 5	"fmt"
 6	"net/url"
 7
 8	"golang.org/x/net/html"
 9	"golang.org/x/net/html/atom"
10)
11
12func rel2abs(n *html.Node, nurl *url.URL) error {
13	if n.Type == html.ElementNode && n.DataAtom == atom.A {
14		for i := range n.Attr {
15			if n.Attr[i].Key == "href" {
16				rel, err := url.Parse(n.Attr[i].Val)
17				if err != nil {
18					return fmt.Errorf("relative url: %w\n", err)
19				}
20
21				n.Attr[i].Val = nurl.ResolveReference(rel).String()
22			}
23		}
24	}
25	for c := n.FirstChild; c != nil; c = c.NextSibling {
26		rel2abs(c, nurl)
27	}
28	return nil
29}
30
31// Converts all relative URLs in htmlContent to absolute URLs,
32// resolved against a base URL.
33func Rel2Abs(htmlContent []byte, base string) ([]byte, error) {
34	doc, err := html.Parse(bytes.NewReader(htmlContent))
35	if err != nil {
36		return nil, fmt.Errorf("html parse: %w\n", err)
37	}
38
39	nurl, err := url.Parse(base)
40	if err != nil {
41		return nil, fmt.Errorf("url parse: %w\n", err)
42	}
43	rel2abs(doc, nurl)
44	buf := bytes.Buffer{}
45	html.Render(&buf, doc)
46	return buf.Bytes(), nil
47}