File: uchar.js

package info (click to toggle)
aseba-plugin-blockly 20180211%2Bgit-2
  • links: PTS
  • area: non-free
  • in suites: buster
  • size: 64,472 kB
  • sloc: xml: 7,976; python: 2,314; sh: 261; lisp: 24; makefile: 10
file content (292 lines) | stat: -rw-r--r-- 10,316 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
// Copyright 2009 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * @fileoverview Collection of utility functions for Unicode character.
 *
 */

goog.provide('goog.i18n.uChar');


// Constants for handling Unicode supplementary characters (surrogate pairs).


/**
 * The minimum value for Supplementary code points.
 * @type {number}
 * @private
 */
goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ = 0x10000;


/**
 * The highest Unicode code point value (scalar value) according to the Unicode
 * Standard.
 * @type {number}
 * @private
 */
goog.i18n.uChar.CODE_POINT_MAX_VALUE_ = 0x10FFFF;


/**
 * Lead surrogate minimum value.
 * @type {number}
 * @private
 */
goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ = 0xD800;


/**
 * Lead surrogate maximum value.
 * @type {number}
 * @private
 */
goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_ = 0xDBFF;


/**
 * Trail surrogate minimum value.
 * @type {number}
 * @private
 */
goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;


/**
 * Trail surrogate maximum value.
 * @type {number}
 * @private
 */
goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;


/**
 * The number of least significant bits of a supplementary code point that in
 * UTF-16 become the least significant bits of the trail surrogate. The rest of
 * the in-use bits of the supplementary code point become the least significant
 * bits of the lead surrogate.
 * @type {number}
 * @private
 */
goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_ = 10;


/**
 * Gets the U+ notation string of a Unicode character. Ex: 'U+0041' for 'A'.
 * @param {string} ch The given character.
 * @return {string} The U+ notation of the given character.
 */
goog.i18n.uChar.toHexString = function(ch) {
  var chCode = goog.i18n.uChar.toCharCode(ch);
  var chCodeStr = 'U+' +
      goog.i18n.uChar.padString_(chCode.toString(16).toUpperCase(), 4, '0');

  return chCodeStr;
};


/**
 * Gets a string padded with given character to get given size.
 * @param {string} str The given string to be padded.
 * @param {number} length The target size of the string.
 * @param {string} ch The character to be padded with.
 * @return {string} The padded string.
 * @private
 */
goog.i18n.uChar.padString_ = function(str, length, ch) {
  while (str.length < length) {
    str = ch + str;
  }
  return str;
};


/**
 * Gets Unicode value of the given character.
 * @param {string} ch The given character, which in the case of a supplementary
 * character is actually a surrogate pair. The remainder of the string is
 * ignored.
 * @return {number} The Unicode value of the character.
 */
goog.i18n.uChar.toCharCode = function(ch) {
  return goog.i18n.uChar.getCodePointAround(ch, 0);
};


/**
 * Gets a character from the given Unicode value. If the given code point is not
 * a valid Unicode code point, null is returned.
 * @param {number} code The Unicode value of the character.
 * @return {?string} The character corresponding to the given Unicode value.
 */
goog.i18n.uChar.fromCharCode = function(code) {
  if (!goog.isDefAndNotNull(code) ||
      !(code >= 0 && code <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_)) {
    return null;
  }
  if (goog.i18n.uChar.isSupplementaryCodePoint(code)) {
    // First, we split the code point into the trail surrogate part (the
    // TRAIL_SURROGATE_BIT_COUNT_ least significant bits) and the lead surrogate
    // part (the rest of the bits, shifted down; note that for now this includes
    // the supplementary offset, also shifted down, to be subtracted off below).
    var leadBits = code >> goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_;
    var trailBits = code &
        // A bit-mask to get the TRAIL_SURROGATE_BIT_COUNT_ (i.e. 10) least
        // significant bits. 1 << 10 = 0x0400. 0x0400 - 1 = 0x03FF.
        ((1 << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) - 1);

    // Now we calculate the code point of each surrogate by adding each offset
    // to the corresponding base code point.
    var leadCodePoint = leadBits +
        (goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ -
         // Subtract off the supplementary offset, which had been shifted down
         // with the rest of leadBits. We do this here instead of before the
         // shift in order to save a separate subtraction step.
         (goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ >>
          goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_));
    var trailCodePoint = trailBits + goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_;

    // Convert the code points into a 2-character long string.
    return String.fromCharCode(leadCodePoint) +
        String.fromCharCode(trailCodePoint);
  }
  return String.fromCharCode(code);
};


/**
 * Returns the Unicode code point at the specified index.
 *
 * If the char value specified at the given index is in the leading-surrogate
 * range, and the following index is less than the length of {@code string}, and
 * the char value at the following index is in the trailing-surrogate range,
 * then the supplementary code point corresponding to this surrogate pair is
 * returned.
 *
 * If the char value specified at the given index is in the trailing-surrogate
 * range, and the preceding index is not before the start of {@code string}, and
 * the char value at the preceding index is in the leading-surrogate range, then
 * the negated supplementary code point corresponding to this surrogate pair is
 * returned.
 *
 * The negation allows the caller to differentiate between the case where the
 * given index is at the leading surrogate and the one where it is at the
 * trailing surrogate, and thus deduce where the next character starts and
 * preceding character ends.
 *
 * Otherwise, the char value at the given index is returned. Thus, a leading
 * surrogate is returned when it is not followed by a trailing surrogate, and a
 * trailing surrogate is returned when it is not preceded by a leading
 * surrogate.
 *
 * @param {string} string The string.
 * @param {number} index The index from which the code point is to be retrieved.
 * @return {number} The code point at the given index. If the given index is
 * that of the start (i.e. lead surrogate) of a surrogate pair, returns the code
 * point encoded by the pair. If the given index is that of the end (i.e. trail
 * surrogate) of a surrogate pair, returns the negated code pointed encoded by
 * the pair.
 */
goog.i18n.uChar.getCodePointAround = function(string, index) {
  var charCode = string.charCodeAt(index);
  if (goog.i18n.uChar.isLeadSurrogateCodePoint(charCode) &&
      index + 1 < string.length) {
    var trail = string.charCodeAt(index + 1);
    if (goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {
      // Part of a surrogate pair.
      return /** @type {number} */ (
          goog.i18n.uChar.buildSupplementaryCodePoint(charCode, trail));
    }
  } else if (goog.i18n.uChar.isTrailSurrogateCodePoint(charCode) && index > 0) {
    var lead = string.charCodeAt(index - 1);
    if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead)) {
      // Part of a surrogate pair.
      return /** @type {number} */ (
          -goog.i18n.uChar.buildSupplementaryCodePoint(lead, charCode));
    }
  }
  return charCode;
};


/**
 * Determines the length of the string needed to represent the specified
 * Unicode code point.
 * @param {number} codePoint
 * @return {number} 2 if codePoint is a supplementary character, 1 otherwise.
 */
goog.i18n.uChar.charCount = function(codePoint) {
  return goog.i18n.uChar.isSupplementaryCodePoint(codePoint) ? 2 : 1;
};


/**
 * Determines whether the specified Unicode code point is in the supplementary
 * Unicode characters range.
 * @param {number} codePoint
 * @return {boolean} Whether then given code point is a supplementary character.
 */
goog.i18n.uChar.isSupplementaryCodePoint = function(codePoint) {
  return codePoint >= goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ &&
      codePoint <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_;
};


/**
 * Gets whether the given code point is a leading surrogate character.
 * @param {number} codePoint
 * @return {boolean} Whether the given code point is a leading surrogate
 * character.
 */
goog.i18n.uChar.isLeadSurrogateCodePoint = function(codePoint) {
  return codePoint >= goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ &&
      codePoint <= goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_;
};


/**
 * Gets whether the given code point is a trailing surrogate character.
 * @param {number} codePoint
 * @return {boolean} Whether the given code point is a trailing surrogate
 * character.
 */
goog.i18n.uChar.isTrailSurrogateCodePoint = function(codePoint) {
  return codePoint >= goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ &&
      codePoint <= goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_;
};


/**
 * Composes a supplementary Unicode code point from the given UTF-16 surrogate
 * pair. If leadSurrogate isn't a leading surrogate code point or trailSurrogate
 * isn't a trailing surrogate code point, null is returned.
 * @param {number} lead The leading surrogate code point.
 * @param {number} trail The trailing surrogate code point.
 * @return {?number} The supplementary Unicode code point obtained by decoding
 * the given UTF-16 surrogate pair.
 */
goog.i18n.uChar.buildSupplementaryCodePoint = function(lead, trail) {
  if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead) &&
      goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {
    var shiftedLeadOffset =
        (lead << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) -
        (goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_
         << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_);
    var trailOffset = trail - goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ +
        goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_;
    return shiftedLeadOffset + trailOffset;
  }
  return null;
};