File: StringCharacterView.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (397 lines) | stat: -rw-r--r-- 15,369 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
//===--- StringCharacterView.swift - String's Collection of Characters ----===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
//  String is a collection of characters.
//
//===----------------------------------------------------------------------===//

import SwiftShims

extension String: BidirectionalCollection {
  public typealias SubSequence = Substring
  public typealias Element = Character

  /// The position of the first character in a nonempty string.
  ///
  /// In an empty string, `startIndex` is equal to `endIndex`.
  @inlinable @inline(__always)
  public var startIndex: Index { return _guts.startIndex }

  /// A string's "past the end" position---that is, the position one greater
  /// than the last valid subscript argument.
  ///
  /// In an empty string, `endIndex` is equal to `startIndex`.
  @inlinable @inline(__always)
  public var endIndex: Index { return _guts.endIndex }

  /// The number of characters in a string.
  ///
  /// To check whether a string is empty,
  /// use its `isEmpty` property instead of comparing `count` to zero.
  ///
  /// - Complexity: O(n), where n is the length of the string.
  @inline(__always)
  public var count: Int {
    return distance(from: startIndex, to: endIndex)
  }

  /// Return true if and only if `i` is a valid index in this substring,
  /// that is to say, it exactly addresses one of the `Character`s in it.
  internal func _isValidIndex(_ i: Index) -> Bool {
    return (
      _guts.hasMatchingEncoding(i)
      && i._encodedOffset <= _guts.count
      && _guts.isOnGraphemeClusterBoundary(i))
  }

  /// Returns the position immediately after the given index.
  ///
  /// - Parameter i: A valid index of the collection. `i` must be less than
  ///   `endIndex`.
  /// - Returns: The index value immediately after `i`.
  public func index(after i: Index) -> Index {
    let i = _guts.validateCharacterIndex(i)
    return _uncheckedIndex(after: i)
  }

  /// A version of `index(after:)` that assumes that the given index:
  ///
  /// - has the right encoding,
  /// - is within bounds, and
  /// - is scalar aligned.
  internal func _uncheckedIndex(after i: Index) -> Index {
    _internalInvariant(_guts.hasMatchingEncoding(i))
    _internalInvariant(i < endIndex)
    _internalInvariant(i._isCharacterAligned)

    // TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
    let stride = _characterStride(startingAt: i)
    let nextOffset = i._encodedOffset &+ stride
    let nextIndex = Index(_encodedOffset: nextOffset)._characterAligned
    let nextStride = _characterStride(startingAt: nextIndex)
    let r = Index(encodedOffset: nextOffset, characterStride: nextStride)
    return _guts.markEncoding(r._characterAligned)
  }

  /// Returns the position immediately before the given index.
  ///
  /// - Parameter i: A valid index of the collection. `i` must be greater than
  ///   `startIndex`.
  /// - Returns: The index value immediately before `i`.
  public func index(before i: Index) -> Index {
    // FIXME: This method used to not properly validate indices before 5.7;
    // temporarily allow older binaries to keep invoking undefined behavior as
    // before.
    let i = _guts.validateInclusiveCharacterIndex_5_7(i)

    // Note: Aligning an index may move it closer towards the `startIndex`, so
    // the `i > startIndex` check needs to come after rounding.
    _precondition(
      ifLinkedOnOrAfter: .v5_7_0,
      i > startIndex, "String index is out of bounds")

    return _uncheckedIndex(before: i)
  }

  /// A version of `index(before:)` that assumes that the given index:
  ///
  /// - has the right encoding,
  /// - is within bounds, and
  /// - is character aligned.
  internal func _uncheckedIndex(before i: Index) -> Index {
    _internalInvariant(_guts.hasMatchingEncoding(i))
    _internalInvariant(i > startIndex && i <= endIndex)
    _internalInvariant(i._isCharacterAligned)

    // TODO: known-ASCII fast path, single-scalar-grapheme fast path, etc.
    let stride = _characterStride(endingAt: i)
    let priorOffset = i._encodedOffset &- stride

    let r = Index(encodedOffset: priorOffset, characterStride: stride)
    return _guts.markEncoding(r._characterAligned)
  }

  /// Returns an index that is the specified distance from the given index.
  ///
  /// The following example obtains an index advanced four positions from a
  /// string's starting index and then prints the character at that position.
  ///
  ///     let s = "Swift"
  ///     let i = s.index(s.startIndex, offsetBy: 4)
  ///     print(s[i])
  ///     // Prints "t"
  ///
  /// The value passed as `distance` must not offset `i` beyond the bounds of
  /// the collection.
  ///
  /// - Parameters:
  ///   - i: A valid index of the collection.
  ///   - distance: The distance to offset `i`.
  /// - Returns: An index offset by `distance` from the index `i`. If
  ///   `distance` is positive, this is the same value as the result of
  ///   `distance` calls to `index(after:)`. If `distance` is negative, this
  ///   is the same value as the result of `abs(distance)` calls to
  ///   `index(before:)`.
  /// - Complexity: O(*n*), where *n* is the absolute value of `distance`.
  public func index(_ i: Index, offsetBy distance: Int) -> Index {
    // Note: prior to Swift 5.7, this method used to be inlinable, forwarding to
    // `_index(_:offsetBy:)`.

    // TODO: known-ASCII and single-scalar-grapheme fast path, etc.

    // FIXME: This method used to not properly validate indices before 5.7;
    // temporarily allow older binaries to keep invoking undefined behavior as
    // before.
    var i = _guts.validateInclusiveCharacterIndex_5_7(i)

    if distance >= 0 {
      for _ in stride(from: 0, to: distance, by: 1) {
        _precondition(i < endIndex, "String index is out of bounds")
        i = _uncheckedIndex(after: i)
      }
    } else {
      for _ in stride(from: 0, to: distance, by: -1) {
        _precondition(i > startIndex, "String index is out of bounds")
        i = _uncheckedIndex(before: i)
      }
    }
    return i
  }

  /// Returns an index that is the specified distance from the given index,
  /// unless that distance is beyond a given limiting index.
  ///
  /// The following example obtains an index advanced four positions from a
  /// string's starting index and then prints the character at that position.
  /// The operation doesn't require going beyond the limiting `s.endIndex`
  /// value, so it succeeds.
  ///
  ///     let s = "Swift"
  ///     if let i = s.index(s.startIndex, offsetBy: 4, limitedBy: s.endIndex) {
  ///         print(s[i])
  ///     }
  ///     // Prints "t"
  ///
  /// The next example attempts to retrieve an index six positions from
  /// `s.startIndex` but fails, because that distance is beyond the index
  /// passed as `limit`.
  ///
  ///     let j = s.index(s.startIndex, offsetBy: 6, limitedBy: s.endIndex)
  ///     print(j)
  ///     // Prints "nil"
  ///
  /// The value passed as `distance` must not offset `i` beyond the bounds of
  /// the collection, unless the index passed as `limit` prevents offsetting
  /// beyond those bounds.
  ///
  /// - Parameters:
  ///   - i: A valid index of the collection.
  ///   - distance: The distance to offset `i`.
  ///   - limit: A valid index of the collection to use as a limit. If
  ///     `distance > 0`, a limit that is less than `i` has no effect.
  ///     Likewise, if `distance < 0`, a limit that is greater than `i` has no
  ///     effect.
  /// - Returns: An index offset by `distance` from the index `i`, unless that
  ///   index would be beyond `limit` in the direction of movement. In that
  ///   case, the method returns `nil`.
  ///
  /// - Complexity: O(*n*), where *n* is the absolute value of `distance`.
  public func index(
    _ i: Index, offsetBy distance: Int, limitedBy limit: Index
  ) -> Index? {
    // Note: Prior to Swift 5.7, this function used to be inlinable, forwarding
    // to `BidirectionalCollection._index(_:offsetBy:limitedBy:)`.
    // Unfortunately, that approach isn't compatible with SE-0180, as it doesn't
    // support cases where `i` or `limit` aren't character aligned.

    // TODO: known-ASCII and single-scalar-grapheme fast path, etc.

    // Per SE-0180, `i` and `limit` are allowed to fall in between grapheme
    // breaks, in which case this function must still terminate without trapping
    // and return a result that makes sense.

    // Note: `limit` is intentionally not scalar (or character-) aligned to
    // ensure our behavior exactly matches the documentation above. We do need
    // to ensure it has a matching encoding, though. The same goes for `start`,
    // which is used to determine whether the limit applies at all.

    let limit = _guts.ensureMatchingEncoding(limit)
    let start = _guts.ensureMatchingEncoding(i)

    // FIXME: This method used to not properly validate indices before 5.7;
    // temporarily allow older binaries to keep invoking undefined behavior as
    // before.
    var i = _guts.validateInclusiveCharacterIndex_5_7(i)

    if distance >= 0 {
      for _ in stride(from: 0, to: distance, by: 1) {
        guard limit < start || i < limit else { return nil }
        _precondition(i < endIndex, "String index is out of bounds")
        i = _uncheckedIndex(after: i)
      }
      guard limit < start || i <= limit else { return nil }
    } else {
      for _ in stride(from: 0, to: distance, by: -1) {
        guard limit > start || i > limit else { return nil }
        _precondition(i > startIndex, "String index is out of bounds")
        i = _uncheckedIndex(before: i)
      }
      guard limit > start || i >= limit else { return nil }
    }
    return i
  }
  
  /// Returns the distance between two indices.
  ///
  /// - Parameters:
  ///   - start: A valid index of the collection.
  ///   - end: Another valid index of the collection. If `end` is equal to
  ///     `start`, the result is zero.
  /// - Returns: The distance between `start` and `end`.
  ///
  /// - Complexity: O(*n*), where *n* is the resulting distance.
  public func distance(from start: Index, to end: Index) -> Int {
    // Note: Prior to Swift 5.7, this function used to be inlinable, forwarding
    // to `BidirectionalCollection._distance(from:to:)`.

    // FIXME: This method used to not properly validate indices before 5.7;
    // temporarily allow older binaries to keep invoking undefined behavior as
    // before.
    let start = _guts.validateInclusiveCharacterIndex_5_7(start)
    let end = _guts.validateInclusiveCharacterIndex_5_7(end)

    // Per SE-0180, `start` and `end` are allowed to fall in between Character
    // boundaries, in which case this function must still terminate without
    // trapping and return a result that makes sense.
    var i = start._encodedOffset
    var count = 0
    if start < end {
      while i < end._encodedOffset { // Note `<` instead of `==`
        count &+= 1
        /*
         For the purposes of this loop, this should be equivalent to
         _uncheckedIndex(after: i). We don't need to spend time setting up
         actual Indexes when we only care about counting strides.
         */
        i &+= _guts._opaqueCharacterStride(startingAt: i)
      }
    } else if start > end {
      while i > end._encodedOffset { // Note `<` instead of `==`
        count &-= 1
        i &-= _guts._opaqueCharacterStride(endingAt: i)
      }
    }
    return count
  }

  /// Accesses the character at the given position.
  ///
  /// You can use the same indices for subscripting a string and its substring.
  /// For example, this code finds the first letter after the first space:
  ///
  ///     let str = "Greetings, friend! How are you?"
  ///     let firstSpace = str.firstIndex(of: " ") ?? str.endIndex
  ///     let substr = str[firstSpace...]
  ///     if let nextCapital = substr.firstIndex(where: { $0 >= "A" && $0 <= "Z" }) {
  ///         print("Capital after a space: \(str[nextCapital])")
  ///     }
  ///     // Prints "Capital after a space: H"
  ///
  /// - Parameter i: A valid index of the string. `i` must be less than the
  ///   string's end index.
  public subscript(i: Index) -> Character {
    // Prior to Swift 5.7, this function used to be inlinable.

    // Note: SE-0180 requires us not to round `i` down to the nearest whole
    // `Character` boundary.
    let i = _guts.validateScalarIndex(i)
    let distance = _characterStride(startingAt: i)
    return _guts.errorCorrectedCharacter(
      startingAt: i._encodedOffset, endingAt: i._encodedOffset &+ distance)
  }

  /// Return the length of the `Character` starting at the given index, measured
  /// in encoded code units, and without looking back at any scalar that
  /// precedes `i`.
  ///
  /// Note: if `i` isn't `Character`-aligned, then this operation must still
  /// finish successfully and return the length of the grapheme cluster starting
  /// at `i` _as if the string started on that scalar_. (This can be different
  /// from the length of the whole character when the preceding scalars are
  /// present!)
  ///
  /// This method is called from inlinable `subscript` implementations in
  /// current and previous versions of the stdlib, which require this contract
  /// not to be violated.
  @usableFromInline
  @inline(__always)
  internal func _characterStride(startingAt i: Index) -> Int {
    // Prior to Swift 5.7, this function used to be inlinable.
    _internalInvariant_5_1(i._isScalarAligned)

    // Fast check if it's already been measured, otherwise check resiliently
    if let d = i.characterStride { return d }

    if i == endIndex { return 0 }

    return _guts._opaqueCharacterStride(startingAt: i._encodedOffset)
  }

  @usableFromInline
  @inline(__always)
  internal func _characterStride(endingAt i: Index) -> Int {
    // Prior to Swift 5.7, this function used to be inlinable.
    _internalInvariant_5_1(i._isScalarAligned)

    if i == startIndex { return 0 }

    return _guts._opaqueCharacterStride(endingAt: i._encodedOffset)
  }
}

extension String {
  @frozen
  public struct Iterator: IteratorProtocol, Sendable {
    @usableFromInline
    internal var _guts: _StringGuts

    @usableFromInline
    internal var _position: Int = 0

    @usableFromInline
    internal var _end: Int

    @inlinable
    internal init(_ guts: _StringGuts) {
      self._end = guts.count
      self._guts = guts
    }

    public mutating func next() -> Character? {
      // Prior to Swift 5.7, this function used to be inlinable.
      guard _fastPath(_position < _end) else { return nil }

      let len = _guts._opaqueCharacterStride(startingAt: _position)
      let nextPosition = _position &+ len
      let result = _guts.errorCorrectedCharacter(
        startingAt: _position, endingAt: nextPosition)
      _position = nextPosition
      return result
    }
  }

  @inlinable
  public __consuming func makeIterator() -> Iterator {
    return Iterator(_guts)
  }
}