File: BuiltInUnicodeScalarSet.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (96 lines) | stat: -rw-r--r-- 3,452 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

internal import _FoundationCShims

// Native implementation of CFCharacterSet.
// Represents sets of unicode scalars of those whose bitmap data we own.
// whitespace, whitespaceAndNewline, and newline are not included since they're not stored with bitmaps
// This only contains a subset of predefined CFCharacterSet that are in use for now.
internal struct BuiltInUnicodeScalarSet {
    enum SetType {
        case lowercaseLetter
        case uppercaseLetter
        case canonicalDecomposable

        // Below are internal
        case hfsPlusDecomposable
        case caseIgnorable
        case graphemeExtend
    }

    var charset: SetType
    init(type: SetType) {
        charset = type
    }

    // Equivalent to  __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID())
    private var _bitmapTableIndex: Int {
        switch charset {
        case .lowercaseLetter:
            return 2
        case .uppercaseLetter:
            return 3
        case .canonicalDecomposable:
            return 5
        case .hfsPlusDecomposable:
            return 12
        case .caseIgnorable:
            return 20
        case .graphemeExtend:
            return 21
        }
    }

    // CFUniCharIsMemberOf
    func contains(_ scalar: Unicode.Scalar) -> Bool {
        let planeNo = Int((scalar.value >> 16) & 0xFF)
        let bitmp = _bitmapPtrForPlane(planeNo)
        return _isMemberOfBitmap(scalar, bitmp)
    }

    // CFUniCharGetBitmapPtrForPlane
    func _bitmapPtrForPlane(_ plane: Int) -> UnsafePointer<UInt8>? {
        let tableIndex = _bitmapTableIndex
        guard tableIndex < __CFUniCharNumberOfBitmaps else {
            return nil
        }

        let data = withUnsafePointer(to: __CFUniCharBitmapDataArray) { ptr in
            ptr.withMemoryRebound(to: __CFUniCharBitmapData.self, capacity: Int(__CFUniCharNumberOfBitmaps)) { bitmapDataPtr in
                bitmapDataPtr.advanced(by: tableIndex).pointee
            }
        }
        return plane < data._numPlanes ? data._planes[plane] : nil
    }

    let bitShiftForByte = UInt16(3)
    let bitShiftForMask = UInt16(7)

    // CFUniCharIsMemberOfBitmap
    func _isMemberOfBitmap(_ scalar: Unicode.Scalar, _ bitmap: UnsafePointer<UInt8>?) -> Bool {
        guard let bitmap else { return false }
        let theChar = UInt16(truncatingIfNeeded: scalar.value) // intentionally truncated

        let position = bitmap[Int(theChar >> bitShiftForByte)]
        let mask = theChar & bitShiftForMask
        let new = (Int(position) & Int(UInt32(1) << mask)) != 0
        return new
    }

    static let uppercaseLetters = Self.init(type: .uppercaseLetter)
    static let lowercaseLetters = Self.init(type: .lowercaseLetter)
    static let caseIgnorables = Self.init(type: .caseIgnorable)
    static let hfsPlusDecomposables = Self.init(type: .hfsPlusDecomposable)
    static let graphemeExtends = Self.init(type: .graphemeExtend)
    static let canonicalDecomposables = Self.init(type: .canonicalDecomposable)
}