File: uri.go

package info (click to toggle)
golang-github-aws-smithy-go 1.19.0-1~bpo12%2B1
links: PTS, VCS
area: main
in suites: bookworm-backports
size: 2,680 kB
sloc: java: 15,917; xml: 166; sh: 131; makefile: 66
file content (130 lines) | stat: -rw-r--r-- 3,775 bytes
parent folder | download | duplicates (4)
package rulesfn

import (
	"fmt"
	"net"
	"net/url"
	"strings"

	smithyhttp "github.com/aws/smithy-go/transport/http"
)

// IsValidHostLabel returns if the input is a single valid [RFC 1123] host
// label. If allowSubDomains is true, will allow validation to include nested
// host labels. Returns false if the input is not a valid host label. If errors
// occur they will be added to the provided [ErrorCollector].
//
// [RFC 1123]: https://www.ietf.org/rfc/rfc1123.txt
func IsValidHostLabel(input string, allowSubDomains bool) bool {
	var labels []string
	if allowSubDomains {
		labels = strings.Split(input, ".")
	} else {
		labels = []string{input}
	}

	for _, label := range labels {
		if !smithyhttp.ValidHostLabel(label) {
			return false
		}
	}

	return true
}

// ParseURL returns a [URL] if the provided string could be parsed. Returns nil
// if the string could not be parsed. Any parsing error will be added to the
// [ErrorCollector].
//
// If the input URL string contains an IP6 address with a zone index. The
// returned [builtin.URL.Authority] value will contain the percent escaped (%)
// zone index separator.
func ParseURL(input string) *URL {
	u, err := url.Parse(input)
	if err != nil {
		return nil
	}

	if u.RawQuery != "" {
		return nil
	}

	if u.Scheme != "http" && u.Scheme != "https" {
		return nil
	}

	normalizedPath := u.Path
	if !strings.HasPrefix(normalizedPath, "/") {
		normalizedPath = "/" + normalizedPath
	}
	if !strings.HasSuffix(normalizedPath, "/") {
		normalizedPath = normalizedPath + "/"
	}

	// IP6 hosts may have zone indexes that need to be escaped to be valid in a
	// URI. The Go URL parser will unescape the `%25` into `%`. This needs to
	// be reverted since the returned URL will be used in string builders.
	authority := strings.ReplaceAll(u.Host, "%", "%25")

	return &URL{
		Scheme:         u.Scheme,
		Authority:      authority,
		Path:           u.Path,
		NormalizedPath: normalizedPath,
		IsIp:           net.ParseIP(hostnameWithoutZone(u)) != nil,
	}
}

// URL provides the structure describing the parts of a parsed URL returned by
// [ParseURL].
type URL struct {
	Scheme         string // https://www.rfc-editor.org/rfc/rfc3986#section-3.1
	Authority      string // https://www.rfc-editor.org/rfc/rfc3986#section-3.2
	Path           string // https://www.rfc-editor.org/rfc/rfc3986#section-3.3
	NormalizedPath string // https://www.rfc-editor.org/rfc/rfc3986#section-6.2.3
	IsIp           bool
}

// URIEncode returns an percent-encoded [RFC3986 section 2.1] version of the
// input string.
//
// [RFC3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
func URIEncode(input string) string {
	var output strings.Builder
	for _, c := range []byte(input) {
		if validPercentEncodedChar(c) {
			output.WriteByte(c)
			continue
		}

		fmt.Fprintf(&output, "%%%X", c)
	}

	return output.String()
}

func validPercentEncodedChar(c byte) bool {
	return (c >= 'a' && c <= 'z') ||
		(c >= 'A' && c <= 'Z') ||
		(c >= '0' && c <= '9') ||
		c == '-' || c == '_' || c == '.' || c == '~'
}

// hostname implements u.Hostname() but strips the ipv6 zone ID (if present)
// such that net.ParseIP can still recognize IPv6 addresses with zone IDs.
//
// FUTURE(10/2023): netip.ParseAddr handles this natively but we can't take
// that package as a dependency yet due to our min go version (1.15, netip
// starts in 1.18).  When we align with go runtime deprecation policy in
// 10/2023, we can remove this.
func hostnameWithoutZone(u *url.URL) string {
	full := u.Hostname()

	// this more or less mimics the internals of net/ (see unexported
	// splitHostZone in that source) but throws the zone away because we don't
	// need it
	if i := strings.LastIndex(full, "%"); i > -1 {
		return full[:i]
	}
	return full
}