File: verifier.go

package info (click to toggle)
golang-github-davidmytton-url-verifier 1.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 168 kB
  • sloc: makefile: 2
file content (144 lines) | stat: -rw-r--r-- 4,799 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Package urlverifier is a Go library for URL validation and verification: does
// this URL actually work?
// SPDX-License-Identifier: MIT
package urlverifier

import (
	"errors"
	"fmt"
	"net"
	"net/url"

	"github.com/asaskevich/govalidator"
)

// Verifier is a URL Verifier. Create one using NewVerifier()
type Verifier struct {
	httpCheckEnabled       bool // Whether to check if the URL is reachable via HTTP (default: false)
	allowHttpCheckInternal bool // Whether to allow HTTP checks to hosts that resolve to internal IPs (default: false)
}

// Result is the result of a URL verification
type Result struct {
	URL           string   `json:"url"`            // The URL that was checked
	URLComponents *url.URL `json:"url_components"` // The URL components, if the URL is valid
	IsURL         bool     `json:"is_url"`         // Whether the URL is valid
	IsRFC3986URL  bool     `json:"is_rfc3986_url"` // Whether the URL is a valid URL according to RFC 3986. This is the same as IsRFC3986URI but with a check for a scheme.
	IsRFC3986URI  bool     `json:"is_rfc3986_uri"` // Whether the URL is a valid URI according to RFC 3986
	HTTP          *HTTP    `json:"http"`           // The result of a HTTP check, if enabled
}

// NewVerifier creates a new URL Verifier
func NewVerifier() *Verifier {
	return &Verifier{allowHttpCheckInternal: false}
}

// Verify verifies a URL. It checks if the URL is valid, parses it if so, and
// checks if it is valid according to RFC 3986 (as a URI without a scheme and a
// URL with a scheme). If the HTTP check is enabled, it also checks if the URL
// is reachable via HTTP.
func (v *Verifier) Verify(rawURL string) (*Result, error) {
	ret := Result{
		URL:          rawURL,
		IsURL:        false,
		IsRFC3986URL: false,
		IsRFC3986URI: false,
	}

	// Check if the URL is valid
	ret.IsURL = govalidator.IsURL(ret.URL)

	// If the URL is valid, parse it
	if ret.IsURL {
		p, err := url.Parse(ret.URL)
		if err != nil {
			return &ret, err
		}
		ret.URLComponents = p
	}

	// Check if the URL is a valid URI according to RFC 3986, plus a check for a
	// scheme.
	ret.IsRFC3986URL = v.IsRequestURL(ret.URL)

	// Check if the URL is a valid URI according to RFC 3986
	ret.IsRFC3986URI = v.IsRequestURI(ret.URL)

	// Check if the URL is reachable via HTTP
	if v.httpCheckEnabled {
		if ret.URLComponents != nil && (ret.URLComponents.Scheme == "http" || ret.URLComponents.Scheme == "https") {
			if !v.allowHttpCheckInternal {
				// Lookup host IP
				host := ret.URLComponents.Hostname()
				ips, err := net.LookupIP(host)
				if err != nil {
					return &ret, err
				}

				// Check each IP to see if it is an internal IP
				for _, ip := range ips {
					if ip.IsPrivate() || ip.IsLoopback() ||
						ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() ||
						ip.IsInterfaceLocalMulticast() || ip.IsUnspecified() {
						message := fmt.Sprintf("unable to check if the URL is reachable via HTTP: the URL %s resolves to an internal IP %s", host, ip)
						return &ret, errors.New(message)
					}
				}
			}

			http, err := v.CheckHTTP(ret.URL)
			if err != nil {
				ret.HTTP = http
				return &ret, err
			}
			ret.HTTP = http
		} else {
			return &ret, errors.New("unable to check if the URL is reachable via HTTP: the URL does not have a HTTP or HTTPS scheme")
		}
	}

	return &ret, nil
}

// IsRequestURL checks if the string rawURL, assuming it was received in an HTTP
// request, is a valid URL confirm to RFC 3986. Implemented from govalidator:
// https://github.com/asaskevich/govalidator/blob/f21760c49a8d602d863493de796926d2a5c1138d/validator.go#L130
func (v *Verifier) IsRequestURL(rawURL string) bool {
	url, err := url.ParseRequestURI(rawURL)
	if err != nil {
		return false // Couldn't even parse the rawURL
	}
	if len(url.Scheme) == 0 {
		return false // No Scheme found
	}
	return true
}

// IsRequestURI checks if the string rawURL, assuming it was received in an HTTP
// request, is an absolute URI or an absolute path. Implemented from
// govalidator:
// https://github.com/asaskevich/govalidator/blob/f21760c49a8d602d863493de796926d2a5c1138d/validator.go#L144
func (v *Verifier) IsRequestURI(rawURL string) bool {
	_, err := url.ParseRequestURI(rawURL)
	return err == nil
}

// DisableHTTPCheck disables checking if the URL is reachable via HTTP
func (v *Verifier) DisableHTTPCheck() {
	v.httpCheckEnabled = false
}

// EnableHTTPCheck enables checking if the URL is reachable via HTTP
func (v *Verifier) EnableHTTPCheck() {
	v.httpCheckEnabled = true
}

// AllowHTTPCheckInternal allows checking internal URLs
func (v *Verifier) AllowHTTPCheckInternal() {
	v.allowHttpCheckInternal = true
}

// DisallowHTTPCheckInternal disallows checking internal URLs
func (v *Verifier) DisallowHTTPCheckInternal() {
	v.allowHttpCheckInternal = false
}