File: BigSubstring.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (372 lines) | stat: -rw-r--r-- 12,526 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2023 - 2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

#if swift(>=5.8)

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
public struct BigSubstring: Sendable {
  var _base: BigString
  var _bounds: Range<Index>

  public init(_unchecked base: BigString, in bounds: Range<Index>) {
    assert(bounds.lowerBound == base.index(roundingDown: bounds.lowerBound))
    assert(bounds.upperBound == base.index(roundingDown: bounds.upperBound))
    self._base = base
    self._bounds = bounds
  }

  public init(_ base: BigString, in bounds: Range<Index>) {
    self._base = base
    // Sub-character slicing could change character boundaries in the tree, requiring
    // resyncing metadata. This would not be acceptable to do during slicing, so let's
    // round substring bounds down to the nearest character.
    let start = base.index(roundingDown: bounds.lowerBound)
    let end = base.index(roundingDown: bounds.upperBound)
    self._bounds = Range(uncheckedBounds: (start, end))
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring {
  public var base: BigString { _base }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring {
  func _foreachChunk(
    _ body: (Substring) -> Void
  ) {
    self._base._foreachChunk(from: _bounds.lowerBound, to: _bounds.upperBound, body)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: CustomStringConvertible {
  public var description: String {
    String(_from: _base, in: _bounds)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: CustomDebugStringConvertible {
  public var debugDescription: String {
    description.debugDescription
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: ExpressibleByStringLiteral {
  public init(stringLiteral value: String) {
    self.init(value)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: LosslessStringConvertible {
  // init?(_: String) is implemented by RangeReplaceableCollection.init(_:)
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: Equatable {
  public static func ==(left: Self, right: Self) -> Bool {
    // FIXME: Implement properly normalized comparisons & hashing.
    // This is somewhat tricky as we shouldn't just normalize individual pieces of the string
    // split up on random Character boundaries -- Unicode does not promise that
    // norm(a + c) == norm(a) + norm(b) in this case.
    // To do this properly, we'll probably need to expose new stdlib entry points. :-/
    if left.isIdentical(to: right) { return true }

    guard left.count == right.count else { return false }

    // FIXME: Even if we keep doing characterwise comparisons, we should skip over shared subtrees.
    var it1 = left.makeIterator()
    var it2 = right.makeIterator()
    var a: Character? = nil
    var b: Character? = nil
    repeat {
      a = it1.next()
      b = it2.next()
      guard a == b else { return false }
    } while a != nil
    return true
  }

  public func isIdentical(to other: Self) -> Bool {
    guard self._base.isIdentical(to: other._base) else { return false }
    return self._bounds == other._bounds
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: Hashable {
  public func hash(into hasher: inout Hasher) {
    var it = self.makeIterator()
    while let character = it.next() {
      let s = String(character)
      s._withNFCCodeUnits { hasher.combine($0) }
    }
    hasher.combine(0xFF as UInt8)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: Comparable {
  public static func < (left: Self, right: Self) -> Bool {
    // FIXME: Implement properly normalized comparisons & hashing.
    // This is somewhat tricky as we shouldn't just normalize individual pieces of the string
    // split up on random Character boundaries -- Unicode does not promise that
    // norm(a + c) == norm(a) + norm(b) in this case.
    // To do this properly, we'll probably need to expose new stdlib entry points. :-/
    if left.isIdentical(to: right) { return false }
    // FIXME: Even if we keep doing characterwise comparisons, we should skip over shared subtrees.
    var it1 = left.makeIterator()
    var it2 = right.makeIterator()
    while true {
      switch (it1.next(), it2.next()) {
      case (nil, nil): return false
      case (nil, .some): return true
      case (.some, nil): return false
      case let (a?, b?):
        if a == b { continue }
        return a < b
      }
    }
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: Sequence {
  public typealias Element = Character

  public struct Iterator: IteratorProtocol {
    let _end: BigString.Index
    var _it: BigString.Iterator

    init(_substring: BigSubstring) {
      self._it = BigString.Iterator(_substring._base, from: _substring.startIndex)
      self._end = _substring.endIndex
    }

    public mutating func next() -> Character? {
      guard _it.isBelow(_end) else { return nil }
      return _it.next()
    }
  }

  public func makeIterator() -> Iterator {
    Iterator(_substring: self)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: BidirectionalCollection {
  public typealias Index = BigString.Index
  public typealias SubSequence = Self

  @inline(__always)
  public var startIndex: Index { _bounds.lowerBound }

  @inline(__always)
  public var endIndex: Index { _bounds.upperBound }

  public var count: Int {
    distance(from: _bounds.lowerBound, to: _bounds.upperBound)
  }

  @inline(__always)
  public func index(after i: Index) -> Index {
    precondition(i < endIndex, "Can't advance above end index")
    return _base.index(after: i)
  }

  @inline(__always)
  public func index(before i: Index) -> Index {
    precondition(i > startIndex, "Can't advance below start index")
    return _base.index(before: i)
  }

  @inline(__always)
  public func index(_ i: Index, offsetBy distance: Int) -> Index {
    precondition(i >= startIndex && i <= endIndex, "Index out of bounds")
    let j = _base.index(i, offsetBy: distance)
    precondition(j >= startIndex && j <= endIndex, "Index out of bounds")
    return j
  }

  public func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? {
    precondition(i >= startIndex && i <= endIndex, "Index out of bounds")
    guard let j = _base.index(i, offsetBy: distance, limitedBy: limit) else { return nil }
    precondition(j >= startIndex && j <= endIndex, "Index out of bounds")
    return j
  }

  public func distance(from start: Index, to end: Index) -> Int {
    precondition(start >= startIndex && start <= endIndex, "Index out of bounds")
    precondition(end >= startIndex && end <= endIndex, "Index out of bounds")
    return _base.distance(from: start, to: end)
  }

  public subscript(position: Index) -> Character {
    precondition(position >= startIndex && position < endIndex, "Index out of bounds")
    return _base[position]
  }

  public subscript(bounds: Range<Index>) -> Self {
    precondition(
      bounds.lowerBound >= startIndex && bounds.upperBound <= endIndex,
      "Range out of bounds")
    return Self(_base, in: bounds)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring {
  public func index(roundingDown i: Index) -> Index {
    precondition(i >= startIndex && i <= endIndex, "Index out of bounds")
    return _base.index(roundingDown: i)
  }

  public func index(roundingUp i: Index) -> Index {
    precondition(i >= startIndex && i <= endIndex, "Index out of bounds")
    return _base.index(roundingUp: i)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring {
  /// Run the closure `body` to mutate the contents of this view within `range`, then update
  /// the bounds of this view to maintain an approximation of their logical position in the
  /// resulting string.
  ///
  /// The `range` argument is validated to be within the original bounds of the substring.
  internal mutating func _mutateBasePreservingBounds<R>(
    in range: Range<Index>,
    with body: (inout BigString) -> R
  ) -> R {
    precondition(
      range.lowerBound >= _bounds.lowerBound && range.upperBound <= _bounds.upperBound,
      "Range out of bounds")

    let startOffset = self.startIndex.utf8Offset
    let endOffset = self.endIndex.utf8Offset
    let oldCount = self._base._utf8Count

    defer {
      // Substring mutations may change grapheme boundaries across the bounds of the original
      // substring value, and we need to ensure that the substring's bounds remain well-aligned.
      // Unfortunately, there are multiple ways of doing this, none of which are obviously
      // superior to others. To keep the behavior easier to explan, we emulate substring
      // initialization and round the start and end indices down to the nearest Character boundary
      // after each mutation.
      let delta = self._base._utf8Count - oldCount
      let start = _base.index(roundingDown: Index(_utf8Offset: startOffset))
      let end = _base.index(roundingDown: Index(_utf8Offset: endOffset + delta))
      self._bounds = start ..< end
    }
    return body(&self._base)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigSubstring: RangeReplaceableCollection {
  public init() {
    let str = BigString()
    let bounds = Range(uncheckedBounds: (str.startIndex, str.endIndex))
    self.init(_unchecked: str, in: bounds)
  }

  public mutating func reserveCapacity(_ n: Int) {
    // Do nothing.
  }

  public mutating func replaceSubrange( // Note: Sequence, not Collection
    _ subrange: Range<Index>,
    with newElements: __owned some Sequence<Character>
  ) {
    _mutateBasePreservingBounds(in: subrange) {
      $0.replaceSubrange(subrange, with: newElements)
    }
  }

  public init(_ elements: some Sequence<Character>) {
    let base = BigString(elements)
    self.init(base, in: base.startIndex ..< base.endIndex)
  }

  public init(repeating repeatedValue: Character, count: Int) {
    self.init(BigString(repeating: repeatedValue, count: count))
  }

  public init(repeating repeatedValue: some StringProtocol, count: Int) {
    self.init(BigString(repeating: repeatedValue, count: count))
  }

  public init(repeating repeatedValue: BigString, count: Int) {
    self.init(BigString(repeating: repeatedValue, count: count))
  }

  public init(repeating repeatedValue: BigSubstring, count: Int) {
    self.init(BigString(repeating: repeatedValue, count: count))
  }

  public mutating func append(_ newElement: Character) {
    let i = endIndex
    _mutateBasePreservingBounds(in: i ..< i) {
      $0.insert(newElement, at: i)
    }
  }

  public mutating func append(contentsOf newElements: __owned some Sequence<Character>) {
    let i = endIndex
    _mutateBasePreservingBounds(in: i ..< i) {
      $0.insert(contentsOf: newElements, at: i)
    }
  }

  public mutating func insert(_ newElement: Character, at i: Index) {
    _mutateBasePreservingBounds(in: i ..< i) {
      $0.insert(newElement, at: i)
    }
  }

  public mutating func insert(
    contentsOf newElements: __owned some Sequence<Character>, // Note: Sequence, not Collection
    at i: Index
  ) {
    _mutateBasePreservingBounds(in: i ..< i) {
      $0.insert(contentsOf: newElements, at: i)
    }
  }

  @discardableResult
  public mutating func remove(at i: Index) -> Character {
    let j = self.index(after: i)
    return _mutateBasePreservingBounds(in: i ..< j) {
      $0.remove(at: i)
    }
  }

  public mutating func removeSubrange(_ bounds: Range<Index>) {
    _mutateBasePreservingBounds(in: bounds) {
      $0.removeSubrange(bounds)
    }
  }

  public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
    let bounds = self._bounds
    _mutateBasePreservingBounds(in: bounds) {
      $0.removeSubrange(bounds)
    }
    assert(_bounds.isEmpty)
  }
}

#endif