File: CharacterProperties.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (346 lines) | stat: -rw-r--r-- 12,351 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

extension Character {
  @inlinable
  internal var _firstScalar: Unicode.Scalar {
    return self.unicodeScalars.first!
  }
  @inlinable
  internal var _isSingleScalar: Bool {
    return self.unicodeScalars.index(
      after: self.unicodeScalars.startIndex
    ) == self.unicodeScalars.endIndex
  }

  /// A Boolean value indicating whether this is an ASCII character.
  @inlinable
  public var isASCII: Bool {
    return asciiValue != nil
  }

  /// The ASCII encoding value of this character, if it is an ASCII character.
  ///
  ///     let chars: [Character] = ["a", " ", "™"]
  ///     for ch in chars {
  ///         print(ch, "-->", ch.asciiValue)
  ///     }
  ///     // Prints:
  ///     // a --> Optional(97)
  ///     //   --> Optional(32)
  ///     // ™ --> nil
  ///
  /// A character with the value "\r\n" (CR-LF) is normalized to "\n" (LF) and
  /// has an `asciiValue` property equal to 10.
  ///
  ///     let cr = "\r" as Character
  ///     // cr.asciiValue == 13
  ///     let lf = "\n" as Character
  ///     // lf.asciiValue == 10
  ///     let crlf = "\r\n" as Character
  ///     // crlf.asciiValue == 10
  @inlinable
  public var asciiValue: UInt8? {
    if _slowPath(self == "\r\n") { return 0x000A /* LINE FEED (LF) */ }
    if _slowPath(!_isSingleScalar || _firstScalar.value >= 0x80) { return nil }
    return UInt8(_firstScalar.value)
  }

  /// A Boolean value indicating whether this character represents whitespace,
  /// including newlines.
  ///
  /// For example, the following characters all represent whitespace:
  ///
  /// - "\t" (U+0009 CHARACTER TABULATION)
  /// - " " (U+0020 SPACE)
  /// - U+2029 PARAGRAPH SEPARATOR
  /// - U+3000 IDEOGRAPHIC SPACE
  public var isWhitespace: Bool {
    return _firstScalar.properties.isWhitespace
  }

  /// A Boolean value indicating whether this character represents a newline.
  ///
  /// For example, the following characters all represent newlines:
  ///
  /// - "\n" (U+000A): LINE FEED (LF)
  /// - U+000B: LINE TABULATION (VT)
  /// - U+000C: FORM FEED (FF)
  /// - "\r" (U+000D): CARRIAGE RETURN (CR)
  /// - "\r\n" (U+000D U+000A): CR-LF
  /// - U+0085: NEXT LINE (NEL)
  /// - U+2028: LINE SEPARATOR
  /// - U+2029: PARAGRAPH SEPARATOR
  @inlinable
  public var isNewline: Bool {
    switch _firstScalar.value {
      case 0x000A...0x000D /* LF ... CR */: return true
      case 0x0085 /* NEXT LINE (NEL) */: return true
      case 0x2028 /* LINE SEPARATOR */: return true
      case 0x2029 /* PARAGRAPH SEPARATOR */: return true
      default: return false
    }
  }

  /// A Boolean value indicating whether this character represents a number.
  ///
  /// For example, the following characters all represent numbers:
  ///
  /// - "7" (U+0037 DIGIT SEVEN)
  /// - "⅚" (U+215A VULGAR FRACTION FIVE SIXTHS)
  /// - "㊈" (U+3288 CIRCLED IDEOGRAPH NINE)
  /// - "𝟠" (U+1D7E0 MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT)
  /// - "๒" (U+0E52 THAI DIGIT TWO)
  public var isNumber: Bool {
    return _firstScalar.properties.numericType != nil
  }

  /// A Boolean value indicating whether this character represents a whole
  /// number.
  ///
  /// For example, the following characters all represent whole numbers:
  ///
  /// - "1" (U+0031 DIGIT ONE) => 1
  /// - "५" (U+096B DEVANAGARI DIGIT FIVE) => 5
  /// - "๙" (U+0E59 THAI DIGIT NINE) => 9
  /// - "万" (U+4E07 CJK UNIFIED IDEOGRAPH-4E07) => 10_000
  @inlinable
  public var isWholeNumber: Bool {
    return wholeNumberValue != nil
  }

  /// The numeric value this character represents, if it represents a whole
  /// number.
  ///
  /// If this character does not represent a whole number, or the value is too
  /// large to represent as an `Int`, the value of this property is `nil`.
  ///
  ///     let chars: [Character] = ["4", "④", "万", "a"]
  ///     for ch in chars {
  ///         print(ch, "-->", ch.wholeNumberValue)
  ///     }
  ///     // Prints:
  ///     // 4 --> Optional(4)
  ///     // ④ --> Optional(4)
  ///     // 万 --> Optional(10000)
  ///     // a --> nil
  public var wholeNumberValue: Int? {
    guard _isSingleScalar else { return nil }
    guard let value = _firstScalar.properties.numericValue else { return nil }
    return Int(exactly: value)
  }

  /// A Boolean value indicating whether this character represents a
  /// hexadecimal digit.
  ///
  /// Hexadecimal digits include 0-9, Latin letters a-f and A-F, and their
  /// fullwidth compatibility forms. To get the character's value, use the
  /// `hexDigitValue` property.
  @inlinable
  public var isHexDigit: Bool {
    return hexDigitValue != nil
  }

  /// The numeric value this character represents, if it is a hexadecimal digit.
  ///
  /// Hexadecimal digits include 0-9, Latin letters a-f and A-F, and their
  /// fullwidth compatibility forms. If the character does not represent a
  /// hexadecimal digit, the value of this property is `nil`.
  ///
  ///     let chars: [Character] = ["1", "a", "F", "g"]
  ///     for ch in chars {
  ///         print(ch, "-->", ch.hexDigitValue)
  ///     }
  ///     // Prints:
  ///     // 1 --> Optional(1)
  ///     // a --> Optional(10)
  ///     // F --> Optional(15)
  ///     // g --> nil
  public var hexDigitValue: Int? {
    guard _isSingleScalar else { return nil }
    let value = _firstScalar.value
    switch value {
      // DIGIT ZERO..DIGIT NINE
      case 0x0030...0x0039: return Int(value &- 0x0030)
      // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
      case 0x0041...0x0046: return Int((value &+ 10) &- 0x0041)
      // LATIN SMALL LETTER A..LATIN SMALL LETTER F
      case 0x0061...0x0066: return Int((value &+ 10) &- 0x0061)
      // FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
      case 0xFF10...0xFF19: return Int(value &- 0xFF10)
      // FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F
      case 0xFF21...0xFF26: return Int((value &+ 10) &- 0xFF21)
      // FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F
      case 0xFF41...0xFF46: return Int((value &+ 10) &- 0xFF41)

      default: return nil
    }
  }

  /// A Boolean value indicating whether this character is a letter.
  ///
  /// For example, the following characters are all letters:
  ///
  /// - "A" (U+0041 LATIN CAPITAL LETTER A)
  /// - "é" (U+0065 LATIN SMALL LETTER E, U+0301 COMBINING ACUTE ACCENT)
  /// - "ϴ" (U+03F4 GREEK CAPITAL THETA SYMBOL)
  /// - "ڈ" (U+0688 ARABIC LETTER DDAL)
  /// - "日" (U+65E5 CJK UNIFIED IDEOGRAPH-65E5)
  /// - "ᚨ" (U+16A8 RUNIC LETTER ANSUZ A)
  public var isLetter: Bool {
    return _firstScalar.properties.isAlphabetic
  }

  /// Returns an uppercased version of this character.
  ///
  /// Because case conversion can result in multiple characters, the result
  /// of `uppercased()` is a string.
  ///
  ///     let chars: [Character] = ["e", "é", "и", "π", "ß", "1"]
  ///     for ch in chars {
  ///         print(ch, "-->", ch.uppercased())
  ///     }
  ///     // Prints:
  ///     // e --> E
  ///     // é --> É
  ///     // и --> И
  ///     // π --> Π
  ///     // ß --> SS
  ///     // 1 --> 1
  public func uppercased() -> String { return String(self).uppercased() }

  /// Returns a lowercased version of this character.
  ///
  /// Because case conversion can result in multiple characters, the result
  /// of `lowercased()` is a string.
  ///
  ///     let chars: [Character] = ["E", "É", "И", "Π", "1"]
  ///     for ch in chars {
  ///         print(ch, "-->", ch.lowercased())
  ///     }
  ///     // Prints:
  ///     // E --> e
  ///     // É --> é
  ///     // И --> и
  ///     // Π --> π
  ///     // 1 --> 1
  public func lowercased() -> String { return String(self).lowercased() }

  @usableFromInline
  internal var _isUppercased: Bool { return String(self) == self.uppercased() }
  @usableFromInline
  internal var _isLowercased: Bool { return String(self) == self.lowercased() }

  /// A Boolean value indicating whether this character is considered uppercase.
  ///
  /// Uppercase characters vary under case-conversion to lowercase, but not when
  /// converted to uppercase. The following characters are all uppercase:
  ///
  /// - "É" (U+0045 LATIN CAPITAL LETTER E, U+0301 COMBINING ACUTE ACCENT)
  /// - "И" (U+0418 CYRILLIC CAPITAL LETTER I)
  /// - "Π" (U+03A0 GREEK CAPITAL LETTER PI)
  @inlinable
  public var isUppercase: Bool {
    if _fastPath(_isSingleScalar && _firstScalar.properties.isUppercase) {
      return true
    }
    return _isUppercased && isCased
  }

  /// A Boolean value indicating whether this character is considered lowercase.
  ///
  /// Lowercase characters change when converted to uppercase, but not when
  /// converted to lowercase. The following characters are all lowercase:
  ///
  /// - "é" (U+0065 LATIN SMALL LETTER E, U+0301 COMBINING ACUTE ACCENT)
  /// - "и" (U+0438 CYRILLIC SMALL LETTER I)
  /// - "π" (U+03C0 GREEK SMALL LETTER PI)
  @inlinable
  public var isLowercase: Bool {
    if _fastPath(_isSingleScalar && _firstScalar.properties.isLowercase) {
      return true
    }
    return _isLowercased && isCased
  }

  /// A Boolean value indicating whether this character changes under any form
  /// of case conversion.
  @inlinable
  public var isCased: Bool {
    if _fastPath(_isSingleScalar && _firstScalar.properties.isCased) {
      return true
    }
    return !_isUppercased || !_isLowercased
  }

  /// A Boolean value indicating whether this character represents a symbol.
  ///
  /// This property is `true` only for characters composed of scalars in the
  /// "Math_Symbol", "Currency_Symbol", "Modifier_Symbol", or "Other_Symbol"
  /// categories in the
  /// [Unicode Standard](https://unicode.org/reports/tr44/#General_Category_Values).
  ///
  /// For example, the following characters all represent symbols:
  ///
  /// - "®" (U+00AE REGISTERED SIGN)
  /// - "⌹" (U+2339 APL FUNCTIONAL SYMBOL QUAD DIVIDE)
  /// - "⡆" (U+2846 BRAILLE PATTERN DOTS-237)
  public var isSymbol: Bool {
    return _firstScalar.properties.generalCategory._isSymbol
  }

  /// A Boolean value indicating whether this character represents a symbol
  /// that naturally appears in mathematical contexts.
  ///
  /// For example, the following characters all represent math symbols:
  ///
  /// - "+" (U+002B PLUS SIGN)
  /// - "∫" (U+222B INTEGRAL)
  /// - "ϰ" (U+03F0 GREEK KAPPA SYMBOL)
  ///
  /// The set of characters that have an `isMathSymbol` value of `true` is not
  /// a strict subset of those for which `isSymbol` is `true`. This includes
  /// characters used both as letters and commonly in mathematical formulas.
  /// For example, "ϰ" (U+03F0 GREEK KAPPA SYMBOL) is considered both a
  /// mathematical symbol and a letter.
  ///
  /// This property corresponds to the "Math" property in the
  /// [Unicode Standard](http://www.unicode.org/versions/latest/).
  public var isMathSymbol: Bool {
    return _firstScalar.properties.isMath
  }

  /// A Boolean value indicating whether this character represents a currency
  /// symbol.
  ///
  /// For example, the following characters all represent currency symbols:
  ///
  /// - "$" (U+0024 DOLLAR SIGN)
  /// - "¥" (U+00A5 YEN SIGN)
  /// - "€" (U+20AC EURO SIGN)
  public var isCurrencySymbol: Bool {
    return _firstScalar.properties.generalCategory == .currencySymbol
  }

  /// A Boolean value indicating whether this character represents punctuation.
  ///
  /// For example, the following characters all represent punctuation:
  ///
  /// - "!" (U+0021 EXCLAMATION MARK)
  /// - "؟" (U+061F ARABIC QUESTION MARK)
  /// - "…" (U+2026 HORIZONTAL ELLIPSIS)
  /// - "—" (U+2014 EM DASH)
  /// - "“" (U+201C LEFT DOUBLE QUOTATION MARK)
  public var isPunctuation: Bool {
    return _firstScalar.properties.generalCategory._isPunctuation
  }
}