File: UnicodeBreakProperty.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (157 lines) | stat: -rw-r--r-- 3,700 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import SwiftShims

extension Unicode {
  internal enum _GraphemeBreakProperty {
    case any
    case control
    case extend
    case extendedPictographic
    case l
    case lv
    case lvt
    case prepend
    case regionalIndicator
    case spacingMark
    case t
    case v
    case zwj

    @inline(__always)
    init(from scalar: Unicode.Scalar) {
      switch scalar.value {
      // Some fast paths for ascii characters...
      case 0x0 ... 0x1F:
        self = .control
      case 0x20 ... 0x7E:
        self = .any

      case 0x200D:
        self = .zwj
      case 0x1100 ... 0x115F,
           0xA960 ... 0xA97C:
        self = .l
      case 0x1160 ... 0x11A7,
           0xD7B0 ... 0xD7C6:
        self = .v
      case 0x11A8 ... 0x11FF,
           0xD7CB ... 0xD7FB:
        self = .t
      case 0xAC00 ... 0xD7A3:
        if scalar.value % 28 == 16 {
          self = .lv
        } else {
          self = .lvt
        }
      case 0x1F1E6 ... 0x1F1FF:
        self = .regionalIndicator
      case 0x1FC00 ... 0x1FFFD:
        self = .extendedPictographic
      case 0xE01F0 ... 0xE0FFF:
        self = .control
      default:
        // Otherwise, default to binary searching the data array.
        let rawEnumValue = _swift_stdlib_getGraphemeBreakProperty(scalar.value)

        switch rawEnumValue {
        case 0:
          self = .control
        case 1:
          self = .extend
        case 2:
          self = .prepend
        case 3:
          self = .spacingMark

        // Extended pictographic uses 2 values for its representation.
        case 4, 5:
          self = .extendedPictographic
        default:
          self = .any
        }
      }
    }
  }
}

extension Unicode {
  internal enum _WordBreakProperty {
    case aLetter
    case any
    case doubleQuote
    case extend
    case extendedPictographic
    case extendNumLet
    case format
    case hebrewLetter
    case katakana
    case midLetter
    case midNum
    case midNumLet
    case newlineCRLF
    case numeric
    case regionalIndicator
    case singleQuote
    case wSegSpace
    case zwj
    
    init(from scalar: Unicode.Scalar) {
      switch scalar.value {
      case 0xA ... 0xD,
           0x85,
           0x2028 ... 0x2029:
        self = .newlineCRLF
      case 0x22:
        self = .doubleQuote
      case 0x27:
        self = .singleQuote
      case 0x200D:
        self = .zwj
      case 0x1F1E6 ... 0x1F1FF:
        self = .regionalIndicator
      default:
        let rawValue = _swift_stdlib_getWordBreakProperty(scalar.value)
        
        switch rawValue {
        case 0:
          self = .extend
        case 1:
          self = .format
        case 2:
          self = .katakana
        case 3:
          self = .hebrewLetter
        case 4:
          self = .aLetter
        case 5:
          self = .midNumLet
        case 6:
          self = .midLetter
        case 7:
          self = .midNum
        case 8:
          self = .numeric
        case 9:
          self = .extendNumLet
        case 10:
          self = .wSegSpace
        case 11:
          self = .extendedPictographic
        default:
          self = .any
        }
      }
    }
  }
}