1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import SwiftShims
extension Unicode {
internal enum _GraphemeBreakProperty {
case any
case control
case extend
case extendedPictographic
case l
case lv
case lvt
case prepend
case regionalIndicator
case spacingMark
case t
case v
case zwj
@inline(__always)
init(from scalar: Unicode.Scalar) {
switch scalar.value {
// Some fast paths for ascii characters...
case 0x0 ... 0x1F:
self = .control
case 0x20 ... 0x7E:
self = .any
case 0x200D:
self = .zwj
case 0x1100 ... 0x115F,
0xA960 ... 0xA97C:
self = .l
case 0x1160 ... 0x11A7,
0xD7B0 ... 0xD7C6:
self = .v
case 0x11A8 ... 0x11FF,
0xD7CB ... 0xD7FB:
self = .t
case 0xAC00 ... 0xD7A3:
if scalar.value % 28 == 16 {
self = .lv
} else {
self = .lvt
}
case 0x1F1E6 ... 0x1F1FF:
self = .regionalIndicator
case 0x1FC00 ... 0x1FFFD:
self = .extendedPictographic
case 0xE01F0 ... 0xE0FFF:
self = .control
default:
// Otherwise, default to binary searching the data array.
let rawEnumValue = _swift_stdlib_getGraphemeBreakProperty(scalar.value)
switch rawEnumValue {
case 0:
self = .control
case 1:
self = .extend
case 2:
self = .prepend
case 3:
self = .spacingMark
// Extended pictographic uses 2 values for its representation.
case 4, 5:
self = .extendedPictographic
default:
self = .any
}
}
}
}
}
extension Unicode {
internal enum _WordBreakProperty {
case aLetter
case any
case doubleQuote
case extend
case extendedPictographic
case extendNumLet
case format
case hebrewLetter
case katakana
case midLetter
case midNum
case midNumLet
case newlineCRLF
case numeric
case regionalIndicator
case singleQuote
case wSegSpace
case zwj
init(from scalar: Unicode.Scalar) {
switch scalar.value {
case 0xA ... 0xD,
0x85,
0x2028 ... 0x2029:
self = .newlineCRLF
case 0x22:
self = .doubleQuote
case 0x27:
self = .singleQuote
case 0x200D:
self = .zwj
case 0x1F1E6 ... 0x1F1FF:
self = .regionalIndicator
default:
let rawValue = _swift_stdlib_getWordBreakProperty(scalar.value)
switch rawValue {
case 0:
self = .extend
case 1:
self = .format
case 2:
self = .katakana
case 3:
self = .hebrewLetter
case 4:
self = .aLetter
case 5:
self = .midNumLet
case 6:
self = .midLetter
case 7:
self = .midNum
case 8:
self = .numeric
case 9:
self = .extendNumLet
case 10:
self = .wSegSpace
case 11:
self = .extendedPictographic
default:
self = .any
}
}
}
}
}
|