File: ValidatedURL.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (162 lines) | stat: -rw-r--r-- 8,057 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/*
 This source file is part of the Swift.org open source project

 Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
 Licensed under Apache License v2.0 with Runtime Library Exception

 See https://swift.org/LICENSE.txt for license information
 See https://swift.org/CONTRIBUTORS.txt for Swift project authors
*/

import Foundation

/// An RFC 3986 compliant URL.
///
/// Use this wrapper type to make sure your stored URLs comply
/// to RFC 3986 that `URLComponents` implements, rather than the less-
/// strict implementation of `URL`.
///
/// For example, due to older RFC compliance, `URL` fails to parse relative topic
/// references with a fragment like this:
///  - `URL(string: "doc:tutorial#test")?.fragment` -> `nil`
///  - `URLComponents(string: "doc:tutorial#test")?.fragment` -> `"test"`
/// ## See Also
///  - [RFC 3986](http://www.ietf.org/rfc/rfc3986.txt)
public struct ValidatedURL: Hashable, Equatable {
    /// The raw components that make up the validated URL.
    public private(set) var components: URLComponents
    
    /// Creates a new RFC 3986 valid URL by using the given string URL.
    ///
    /// Will return `nil` when the given `string` is not a valid URL.
    /// - Parameter string: Source URL address as string
    ///
    /// > Note:
    /// > Attempting to parse a symbol path as a URL may result in unexpected URL components depending on the source language.
    /// > For example; an Objective-C instance method named `someMethodWithFirstValue:secondValue:` would be parsed as a
    /// > URL with the "someMethodWithFirstValue" scheme which is a valid link but which won't resolve to the intended symbol.
    /// >
    /// > When working with symbol destinations use ``init(symbolPath:)`` instead.
    /// >
    /// > When working with authored documentation links use ``init(parsingAuthoredLink:)`` instead.
    init?(parsingExact string: String) {
        guard let components = URLComponents(string: string) else {
            return nil
        }
        self.components = components
    }
    
    /// Creates a new RFC 3986 valid URL by using the given string URL and percent escaping the fragment component if necessary.
    ///
    /// Will return `nil` when the given `string` is not a valid URL.
    /// - Parameter string: Source URL address as string.
    ///
    /// If the parsed fragment component contains characters not allowed in the fragment of a URL, those characters will be percent encoded.
    ///
    /// Use this to parse author provided documentation links that may contain links to on-page subsections. Escaping the fragment allows authors
    /// to write links to subsections using characters that wouldn't otherwise be allowed in a fragment of a URL.
    init?(parsingAuthoredLink string: String) {
        // Try to parse the string without escaping anything
        if let parsed = ValidatedURL(parsingExact: string) {
            self.components = parsed.components
            return
        }
        
        // If the `URLComponents(string:)` parsing in `init(parsingExact:)` failed try a fallback that attempts to individually
        // percent encode each component.
        //
        // This fallback parsing tries to determine the substrings of the authored link that correspond to the scheme, bundle
        // identifier, path, and fragment of a documentation link or symbol link. It is not meant to work with general links.
        //
        // By identifying the subranges they can each be individually percent encoded with the characters that are allowed for
        // that component. This allows authored links to contain characters that wouldn't otherwise be valid in a general URL.
        //
        // Assigning the percent encoded values to `URLComponents/percentEncodedHost`, URLComponents/percentEncodedPath`, and
        // URLComponents/percentEncodedFragment` allow for the creation of a `URLComponents` value with special characters.
        var components = URLComponents()
        var remainder = string[...]
        
        // See if the link is a documentation link and try to split out the scheme and bundle identifier. If the link isn't a
        // documentation link it's assumed that it's a symbol link that start with the path component.
        // Other general URLs should have been successfully parsed with `URLComponents(string:)` in `init(parsingExact:)` above.
        if remainder.hasPrefix("\(ResolvedTopicReference.urlScheme):") {
            // The authored link is a doc link
            components.scheme = ResolvedTopicReference.urlScheme
            remainder = remainder.dropFirst("\(ResolvedTopicReference.urlScheme):".count)
            
            if remainder.hasPrefix("//") {
                // The authored link includes a bundle ID
                guard let startOfPath = remainder.dropFirst(2).firstIndex(of: "/") else {
                    // The link started with "doc://" but didn't contain another "/" to start of the path.
                    return nil
                }
                components.percentEncodedHost = String(remainder[..<startOfPath]).addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)
                remainder = remainder[startOfPath...]
            }
        }
        
        // This either is the start of a symbol link or the remainder of a doc link after the scheme and bundle ID was parsed.
        // This means that the remainder of the string is a path with an optional fragment. No other URL components are supported
        // by documentation links and symbol links.
        if let fragmentSeparatorIndex = remainder.firstIndex(of: "#") {
            // Encode the path substring and fragment substring separately
            guard let path = String(remainder[..<fragmentSeparatorIndex]).addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
                return nil
            }
            components.percentEncodedPath = path
            components.percentEncodedFragment = String(remainder[fragmentSeparatorIndex...].dropFirst()).addingPercentEncoding(withAllowedCharacters: .urlFragmentAllowed)
        } else {
            // Since the link didn't include a fragment, the rest of the string is the path.
            guard let path = String(remainder).addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
                return nil
            }
            components.percentEncodedPath = path
        }
        
        self.components = components
    }
    
    /// Creates a new RFC 3986 valid URL from the given URL.
    ///
    /// Will return `nil` when the given URL doesn't comply with RFC 3986.
    /// - Parameter url: Source URL
    init?(_ url: URL) {
        guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else {
            return nil
        }
        self.components = components
    }
    
    /// Creates a new RFC 3986 valid URL by using the given symbol path.
    ///
    /// - Parameter symbolDestination: A symbol path as a string, with path components separated by "/".
    init(symbolPath: String) {
        // Symbol links are assumed to be written as the path only, without a scheme or host component.
        var components = URLComponents()
        components.path = symbolPath
        self.components = components
    }
    
    /// Creates a new RFC 3986 valid URL.
    init(components: URLComponents) {
        self.components = components
    }
    
    /// Returns the unmodified value in case the URL matches the required scheme or nil otherwise.
    /// - Parameter scheme: A URL scheme to match.
    /// - Returns: A valid URL if the scheme matches, `nil` otherwise.
    func requiring(scheme: String) -> ValidatedURL? {
        guard scheme == components.scheme else { return nil }
        return self
    }
    
    /// The URL as an absolute string.
    var absoluteString: String {
        return components.string!
    }
    
    /// The URL as an RFC 3986 compliant `URL` value.
    var url: URL {
        return components.url!
    }
}