File: Core.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (243 lines) | stat: -rw-r--r-- 8,330 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

@_implementationOnly import _RegexParser

/// A type that represents a regular expression.
///
/// You can use types that conform to `RegexComponent` as parameters to string
/// searching operations and inside `RegexBuilder` closures.
@available(SwiftStdlib 5.7, *)
public protocol RegexComponent<RegexOutput> {
  /// The output type for this regular expression.
  ///
  /// A `Regex` instance's output type depends on whether the `Regex` has
  /// captures and how it is created.
  ///
  /// - A `Regex` created from a string using the ``init(_:)`` initializer
  ///   has an output type of ``AnyRegexOutput``, whether it has captures or
  ///   not.
  /// - A `Regex` without captures created from a regex literal, the
  ///   ``init(_:as:)`` initializer, or a `RegexBuilder` closure has a
  ///   `Substring` output type, where the substring is the portion of the
  ///   string that was matched.
  /// - A `Regex` with captures created from a regex literal or the
  ///   ``init(_:as:)`` initializer has a tuple of substrings as its output
  ///   type. The first component of the tuple is the full portion of the string
  ///   that was matched, with the remaining components holding the captures.
  associatedtype RegexOutput
  
  /// The regular expression represented by this component.
  var regex: Regex<RegexOutput> { get }
}

/// A regular expression.
///
/// Regular expressions are a concise way of describing a pattern, which can
/// help you match or extract portions of a string. You can create a `Regex`
/// instance using regular expression syntax, either in a regex literal or a
/// string.
///
///     // 'keyAndValue' is created using a regex literal
///     let keyAndValue = /(.+?): (.+)/
///     // 'simpleDigits' is created from a pattern in a string
///     let simpleDigits = try Regex("[0-9]+")
///
/// You can use a `Regex` to search for a pattern in a string or substring.
/// Call `contains(_:)` to check for the presence of a pattern, or
/// `firstMatch(of:)` or `matches(of:)` to find matches.
///
///     let setting = "color: 161 103 230"
///     if setting.contains(simpleDigits) {
///         print("'\(setting)' contains some digits.")
///     }
///     // Prints "'color: 161 103 230' contains some digits."
///
/// When you find a match, the resulting ``Match`` type includes an
/// ``Match/output`` property that contains the matched substring along with
/// any captures:
///
///     if let match = setting.firstMatch(of: keyAndValue) {
///         print("Key: \(match.1)")
///         print("Value: \(match.2)")
///     }
///     // Key: color
///     // Value: 161 103 230
///
/// When you import the `RegexBuilder` module, you can also create `Regex`
/// instances using a clear and flexible declarative syntax. Using this
/// style, you can combine, capture, and transform regexes, `RegexBuilder`
/// types, and custom parsers.
@available(SwiftStdlib 5.7, *)
public struct Regex<Output>: RegexComponent {
  let program: Program

  var hasCapture: Bool {
    program.tree.hasCapture
  }

  init(ast: AST) {
    self.program = Program(ast: ast)
  }
  init(ast: AST.Node) {
    self.program = Program(ast:
        .init(ast, globalOptions: nil, diags: Diagnostics()))
  }

  // Compiler interface. Do not change independently.
  @usableFromInline
  init(_regexString pattern: String) {
    self.init(ast: try! parse(pattern, .traditional))
  }

  // Compiler interface. Do not change independently.
  @usableFromInline
  init(_regexString pattern: String, version: Int) {
    assert(version == currentRegexLiteralFormatVersion)
    // The version argument is passed by the compiler using the value defined
    // in libswiftParseRegexLiteral.
    self.init(ast: try! parseWithDelimiters(pattern))
  }

  public var regex: Regex<Output> {
    self
  }
}

@available(SwiftStdlib 5.7, *)
extension Regex {
  @available(*, deprecated, renamed: "init(verbatim:)")
  public init(quoting _string: String) {
    self.init(node: .quotedLiteral(_string))
  }
}


@available(SwiftStdlib 5.7, *)
extension Regex {
  /// A program representation that caches any lowered representation for
  /// execution.
  internal final class Program {

    // This stored property should be stored at offset zero.  We perform atomic
    // operations on it.
    //
    /// Do not access this property directly - all accesses must go through `_loweredProgramStoragePtr `.
    fileprivate var _loweredProgramStorage: AnyObject? = nil

    /// The underlying IR.
    ///
    /// FIXME: If Regex is the unit of composition, then it should be a Node instead,
    /// and we should have a separate type that handled both global options and,
    /// likely, compilation/caching.
    let tree: DSLTree

    /// OptionSet of compiler options for testing purposes
    fileprivate var compileOptions: _CompileOptions = .default

    private final class ProgramBox {
      let value: MEProgram
      init(_ value: MEProgram) { self.value = value }
    }

    fileprivate var _loweredProgramStoragePtr: UnsafeMutablePointer<AnyObject?> {
      _getUnsafePointerToStoredProperties(self)
        .assumingMemoryBound(to: Optional<AnyObject>.self)
    }

    /// The program for execution with the matching engine.
    var loweredProgram: MEProgram {
      /// Atomically loads the compiled program if it has already been stored.
      func loadProgram() -> MEProgram? {
        guard let loweredObject = _stdlib_atomicLoadARCRef(object: _loweredProgramStoragePtr)
          else { return nil }
        return unsafeDowncast(loweredObject, to: ProgramBox.self).value
      }
      
      // Use the previously compiled program, if available.
      if let program = loadProgram() {
        return program
      }
      
      // Compile the DSLTree into a lowered program and store it atomically.
      let compiledProgram = try! Compiler(tree: tree, compileOptions: compileOptions).emit()
      let storedNewProgram = _stdlib_atomicInitializeARCRef(
        object: _loweredProgramStoragePtr,
        desired: ProgramBox(compiledProgram))
      
      // Return the winner of the storage race. We're guaranteed at this point
      // to have compiled program stored in `_loweredProgramStorage`.
      return storedNewProgram
        ? compiledProgram
        : loadProgram()!
    }

    init(ast: AST) {
      self.tree = ast.dslTree
    }

    init(tree: DSLTree) {
      self.tree = tree
    }
  }
  
  /// The set of matching options that applies to the start of this regex.
  ///
  /// Note that the initial options may not apply to the entire regex. For
  /// example, in this regex, only case insensitivity (`i`) and Unicode scalar
  /// semantics (set by API) apply to the entire regex, while ASCII character
  /// classes (`P`) is part of `initialOptions` but not global:
  ///
  ///     let regex = /(?i)(?P:\d+\s*)abc/.semanticLevel(.unicodeScalar)
  var initialOptions: MatchingOptions {
    program.loweredProgram.initialOptions
  }
}

@available(SwiftStdlib 5.7, *)
extension Regex {
  var root: DSLTree.Node {
    program.tree.root
  }

  init(node: DSLTree.Node) {
    self.program = Program(tree: .init(node))
  }
}

@available(SwiftStdlib 5.7, *)
@_spi(RegexBenchmark)
extension Regex {
  public enum _RegexInternalAction {
    case recompile
    case addOptions(_CompileOptions)
  }
  
  /// Internal API for RegexBenchmark
  /// Forces the regex to perform the given action, returning if it was successful
  public mutating func _forceAction(_ action: _RegexInternalAction) -> Bool {
    do {
      switch action {
      case .addOptions(let opts):
        program.compileOptions.insert(opts)
        program._loweredProgramStorage = nil
        return true
      case .recompile:
        let _ = try Compiler(
          tree: program.tree,
          compileOptions: program.compileOptions).emit()
        return true
      }
    } catch {
      return false
    }
  }
}