File: UTFEncoding.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (88 lines) | stat: -rw-r--r-- 3,342 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
//===--- UTFEncoding.swift - Common guts of the big 3 UnicodeEncodings ----===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
//  These components would be internal if it were possible to use internal
//  protocols to supply public conformance requirements.
//
//===----------------------------------------------------------------------===//


public protocol _UTFParser {
  associatedtype Encoding: _UnicodeEncoding

  func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8)
  func _bufferedScalar(bitCount: UInt8) -> Encoding.EncodedScalar
  
  var _buffer: _UIntBuffer<Encoding.CodeUnit> { get set }
}

extension _UTFParser
where Encoding.EncodedScalar: RangeReplaceableCollection {

  @inlinable
  @inline(__always)
  public mutating func parseScalar<I: IteratorProtocol>(
    from input: inout I
  ) -> Unicode.ParseResult<Encoding.EncodedScalar>
    where I.Element == Encoding.CodeUnit {

    // Bufferless single-scalar fastpath.
    if _fastPath(_buffer.isEmpty) {
      guard let codeUnit = input.next() else { return .emptyInput }
      // ASCII, return immediately.
      if Encoding._isScalar(codeUnit) {
        return .valid(Encoding.EncodedScalar(CollectionOfOne(codeUnit)))
      }
      // Non-ASCII, proceed to buffering mode.
      _buffer.append(codeUnit)
    } else if Encoding._isScalar(
      Encoding.CodeUnit(truncatingIfNeeded: _buffer._storage)
    ) {
      // ASCII in _buffer.  We don't refill the buffer so we can return
      // to bufferless mode once we've exhausted it.
      let codeUnit = Encoding.CodeUnit(truncatingIfNeeded: _buffer._storage)
      _buffer.remove(at: _buffer.startIndex)
      return .valid(Encoding.EncodedScalar(CollectionOfOne(codeUnit)))
    }
    // Buffering mode.
    // Fill buffer back to 4 bytes (or as many as are left in the iterator).
    repeat {
      if let codeUnit = input.next() {
        _buffer.append(codeUnit)
      } else {
        if _buffer.isEmpty { return .emptyInput }
        break // We still have some bytes left in our buffer.
      }
    } while _buffer.count < _buffer.capacity

    // Find one unicode scalar.
    let (isValid, scalarBitCount) = _parseMultipleCodeUnits()
    _internalInvariant(scalarBitCount % numericCast(Encoding.CodeUnit.bitWidth) == 0)
    _internalInvariant(1...4 ~= scalarBitCount / 8)
    _internalInvariant(scalarBitCount <= _buffer._bitCount)
    
    // Consume the decoded bytes (or maximal subpart of ill-formed sequence).
    let encodedScalar = _bufferedScalar(bitCount: scalarBitCount)
    
    _buffer._storage = UInt32(
      // widen to 64 bits so that we can empty the buffer in the 4-byte case
      truncatingIfNeeded: UInt64(_buffer._storage) &>> scalarBitCount)
      
    _buffer._bitCount = _buffer._bitCount &- scalarBitCount

    if _fastPath(isValid) {
      return .valid(encodedScalar)
    }
    return .error(
      length: Int(scalarBitCount / numericCast(Encoding.CodeUnit.bitWidth)))
  }
}