1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
package rulesfn
import (
"fmt"
"net"
"net/url"
"strings"
smithyhttp "github.com/aws/smithy-go/transport/http"
)
// IsValidHostLabel returns if the input is a single valid [RFC 1123] host
// label. If allowSubDomains is true, will allow validation to include nested
// host labels. Returns false if the input is not a valid host label. If errors
// occur they will be added to the provided [ErrorCollector].
//
// [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
func IsValidHostLabel(input string, allowSubDomains bool) bool {
var labels []string
if allowSubDomains {
labels = strings.Split(input, ".")
} else {
labels = []string{input}
}
for _, label := range labels {
if !smithyhttp.ValidHostLabel(label) {
return false
}
}
return true
}
// ParseURL returns a [URL] if the provided string could be parsed. Returns nil
// if the string could not be parsed. Any parsing error will be added to the
// [ErrorCollector].
//
// If the input URL string contains an IP6 address with a zone index. The
// returned [builtin.URL.Authority] value will contain the percent escaped (%)
// zone index separator.
func ParseURL(input string) *URL {
u, err := url.Parse(input)
if err != nil {
return nil
}
if u.RawQuery != "" {
return nil
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil
}
normalizedPath := u.Path
if !strings.HasPrefix(normalizedPath, "/") {
normalizedPath = "/" + normalizedPath
}
if !strings.HasSuffix(normalizedPath, "/") {
normalizedPath = normalizedPath + "/"
}
// IP6 hosts may have zone indexes that need to be escaped to be valid in a
// URI. The Go URL parser will unescape the `%25` into `%`. This needs to
// be reverted since the returned URL will be used in string builders.
authority := strings.ReplaceAll(u.Host, "%", "%25")
return &URL{
Scheme: u.Scheme,
Authority: authority,
Path: u.Path,
NormalizedPath: normalizedPath,
IsIp: net.ParseIP(hostnameWithoutZone(u)) != nil,
}
}
// URL provides the structure describing the parts of a parsed URL returned by
// [ParseURL].
type URL struct {
Scheme string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
Authority string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
Path string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
IsIp bool
}
// URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
// input string.
//
// [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
func URIEncode(input string) string {
var output strings.Builder
for _, c := range []byte(input) {
if validPercentEncodedChar(c) {
output.WriteByte(c)
continue
}
fmt.Fprintf(&output, "%%%X", c)
}
return output.String()
}
func validPercentEncodedChar(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '-' || c == '_' || c == '.' || c == '~'
}
// hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
// such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
//
// FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
// that package as a dependency yet due to our min go version (1.15, netip
// starts in 1.18). When we align with go runtime deprecation policy in
// 10/2023, we can remove this.
func hostnameWithoutZone(u *url.URL) string {
full := u.Hostname()
// this more or less mimics the internals of net/ (see unexported
// splitHostZone in that source) but throws the zone away because we don't
// need it
if i := strings.LastIndex(full, "%"); i > -1 {
return full[:i]
}
return full
}
|