File: BigString%2BIngester.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (167 lines) | stat: -rw-r--r-- 5,036 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2023 - 2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

#if swift(>=5.8)

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigString {
  func _ingester(
    forInserting input: __owned Substring,
    at index: Index,
    allowForwardPeek: Bool
  ) -> _Ingester {
    let hint = allowForwardPeek ? input.unicodeScalars.first : nil
    let state = self._breakState(upTo: index, nextScalarHint: hint)
    return _Ingester(input, startState: state)
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigString {
  internal struct _Ingester {
    typealias _Chunk = BigString._Chunk
    typealias Counts = BigString._Chunk.Counts
    
    var input: Substring
    
    /// The index of the beginning of the next chunk.
    var start: String.Index
    
    /// Grapheme breaking state at the start of the next chunk.
    var state: _CharacterRecognizer
    
    init(_ input: Substring) {
      self.input = input
      self.start = input.startIndex
      self.state = _CharacterRecognizer()
    }
    
    init(_ input: Substring, startState: __owned _CharacterRecognizer) {
      self.input = input
      self.start = input.startIndex
      self.state = startState
    }
    
    init(_ input: String) {
      self.init(input[...])
    }
    
    init<S: StringProtocol>(_ input: S) {
      self.init(Substring(input))
    }
    
    var isAtEnd: Bool {
      start == input.endIndex
    }
    
    var remainingUTF8: Int {
      input.utf8.distance(from: start, to: input.endIndex)
    }
    
    mutating func nextSlice(
      maxUTF8Count: Int = _Chunk.maxUTF8Count
    ) -> _Chunk.Slice? {
      guard let range = input.base._nextSlice(
        after: start, limit: input.endIndex, maxUTF8Count: maxUTF8Count)
      else {
        assert(start == input.endIndex)
        return nil
      }
      if range.isEmpty {
        return nil // Not enough room.
      }
      assert(range.lowerBound == start && range.upperBound <= input.endIndex)
      start = range.upperBound
      
      var s = input[range]
      let c8 = s.utf8.count
      guard let r = state.firstBreak(in: s) else {
        // Anomalous case -- chunk is entirely a continuation of a single character.
        return (
          string: s,
          characters: 0,
          prefix: c8,
          suffix: c8)
      }
      let first = r.lowerBound
      s = s.suffix(from: r.upperBound)
      
      var characterCount = 1
      var last = first
      while let r = state.firstBreak(in: s) {
        last = r.lowerBound
        s = s.suffix(from: r.upperBound)
        characterCount += 1
      }
      let prefixCount = input.utf8.distance(from: range.lowerBound, to: first)
      let suffixCount = input.utf8.distance(from: last, to: range.upperBound)
      return (
        string: input[range],
        characters: characterCount,
        prefix: prefixCount,
        suffix: suffixCount)
    }
    
    mutating func nextChunk(maxUTF8Count: Int = _Chunk.maxUTF8Count) -> _Chunk? {
      guard let slice = nextSlice(maxUTF8Count: maxUTF8Count) else { return nil }
      return _Chunk(slice)
    }
    
    static func desiredNextChunkSize(remaining: Int) -> Int {
      if remaining <= _Chunk.maxUTF8Count {
        return remaining
      }
      if remaining >= _Chunk.maxUTF8Count + _Chunk.minUTF8Count {
        return _Chunk.maxUTF8Count
      }
      return remaining - _Chunk.minUTF8Count
    }
    
    mutating func nextWellSizedSlice(suffix: Int = 0) -> _Chunk.Slice? {
      let desired = Self.desiredNextChunkSize(remaining: remainingUTF8 + suffix)
      return nextSlice(maxUTF8Count: desired)
    }
    
    mutating func nextWellSizedChunk(suffix: Int = 0) -> _Chunk? {
      guard let slice = nextWellSizedSlice(suffix: suffix) else { return nil }
      return _Chunk(slice)
    }
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension String {
  func _nextSlice(
    after i: Index,
    limit: Index,
    maxUTF8Count: Int
  ) -> Range<Index>? {
    assert(maxUTF8Count >= 0)
    assert(i._isKnownScalarAligned)
    guard i < limit else { return nil }
    let end = self.utf8.index(i, offsetBy: maxUTF8Count, limitedBy: limit) ?? limit
    let j = self.unicodeScalars._index(roundingDown: end)
    return Range(uncheckedBounds: (i, j))
  }
}

@available(macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4, *)
extension BigString._Chunk {
  init(_ string: String) {
    guard !string.isEmpty else { self.init(); return }
    assert(string.utf8.count <= Self.maxUTF8Count)
    var ingester = BigString._Ingester(string)
    self = ingester.nextChunk()!
    assert(ingester.isAtEnd)
  }
}

#endif