all repos — rel2abs @ 6680474200d2c244c4fae71ab11101deb905e53a

go library to convert all relative urls in a html document to absolute ones

Init rel2abs
Anirudh Oppiliappan x@icyphox.sh
Sun, 26 Sep 2021 15:05:38 +0530
commit

6680474200d2c244c4fae71ab11101deb905e53a

3 files changed, 62 insertions(+), 0 deletions(-)

jump to
A go.mod

@@ -0,0 +1,5 @@

+module git.icyphox.sh/rel2abs + +go 1.16 + +require golang.org/x/net v0.0.0-20210924151903-3ad01bbaa167
A go.sum

@@ -0,0 +1,7 @@

+golang.org/x/net v0.0.0-20210924151903-3ad01bbaa167 h1:eDd+TJqbgfXruGQ5sJRU7tEtp/58OAx4+Ayjxg4SM+4= +golang.org/x/net v0.0.0-20210924151903-3ad01bbaa167/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
A rel2abs.go

@@ -0,0 +1,50 @@

+package rel2abs + +import ( + "bytes" + "fmt" + "net/url" + + "golang.org/x/net/html" +) + +func rel2abs(n *html.Node, nurl *url.URL) error { + if n.Type == html.ElementNode && n.Data == "a" { + for _, a := range n.Attr { + if a.Key == "href" { + rel, err := url.Parse(a.Val) + if err != nil { + return fmt.Errorf("relative url: %w\n", err) + } + + a.Val = nurl.ResolveReference(rel).String() + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + rel2abs(c, nurl) + } + } + return nil +} + +// Converts all relative URLs in htmlContent to absolute URLs, +// resolved against a base URL. +// Example, with base as http://example.com/foo: +// <a href="#fn-1"> +// becomes +// <a href="http://example.com/foo#fn-1"> +func Rel2Abs(htmlContent []byte, base string) ([]byte, error) { + doc, err := html.Parse(bytes.NewReader(htmlContent)) + if err != nil { + return nil, fmt.Errorf("html parse: %w\n", err) + } + + nurl, err := url.Parse(base) + if err != nil { + return nil, fmt.Errorf("url parse: %w\n", err) + } + rel2abs(doc, nurl) + buf := bytes.Buffer{} + html.Render(&buf, doc) + return buf.Bytes(), nil +}