1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
// Copyright 2009 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @fileoverview The decompressor for Base88 compressed character lists.
*
* The compression is by base 88 encoding the delta between two adjacent
* characters in ths list. The deltas can be positive or negative. Also, there
* would be character ranges. These three types of values
* are given enum values 0, 1 and 2 respectively. Initial 3 bits are used for
* encoding the type and total length of the encoded value. Length enums 0, 1
* and 2 represents lengths 1, 2 and 4. So (value * 8 + type * 3 + length enum)
* is encoded in base 88 by following characters for numbers from 0 to 87:
* 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ (continued in next line)
* abcdefghijklmnopqrstuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~
*
* Value uses 0 based counting. That is value for the range [a, b] is 0 and
* that of [a, c] is 1. Simillarly, the delta of "ab" is 0.
*
* Following python script can be used to compress character lists taken
* standard input: http://go/charlistcompressor.py
*
*/
goog.provide('goog.i18n.CharListDecompressor');
goog.require('goog.array');
goog.require('goog.i18n.uChar');
/**
* Class to decompress base88 compressed character list.
* @constructor
* @final
*/
goog.i18n.CharListDecompressor = function() {
this.buildCharMap_(
'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr' +
'stuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~');
};
/**
* 1-1 mapping from ascii characters used in encoding to an integer in the
* range 0 to 87.
* @type {Object}
* @private
*/
goog.i18n.CharListDecompressor.prototype.charMap_ = null;
/**
* Builds the map from ascii characters used for the base88 scheme to number
* each character represents.
* @param {string} str The string of characters used in base88 scheme.
* @private
*/
goog.i18n.CharListDecompressor.prototype.buildCharMap_ = function(str) {
if (!this.charMap_) {
this.charMap_ = {};
for (var i = 0; i < str.length; i++) {
this.charMap_[str.charAt(i)] = i;
}
}
};
/**
* Gets the number encoded in base88 scheme by a substring of given length
* and placed at the a given position of the string.
* @param {string} str String containing sequence of characters encoding a
* number in base 88 scheme.
* @param {number} start Starting position of substring encoding the number.
* @param {number} leng Length of the substring encoding the number.
* @return {number} The encoded number.
* @private
*/
goog.i18n.CharListDecompressor.prototype.getCodeAt_ = function(
str, start, leng) {
var result = 0;
for (var i = 0; i < leng; i++) {
var c = this.charMap_[str.charAt(start + i)];
result += c * Math.pow(88, i);
}
return result;
};
/**
* Add character(s) specified by the value and type to given list and return
* the next character in the sequence.
* @param {Array<string>} list The list of characters to which the specified
* characters are appended.
* @param {number} lastcode The last codepoint that was added to the list.
* @param {number} value The value component that representing the delta or
* range.
* @param {number} type The type component that representing whether the value
* is a positive or negative delta or range.
* @return {number} Last codepoint that is added to the list.
* @private
*/
goog.i18n.CharListDecompressor.prototype.addChars_ = function(
list, lastcode, value, type) {
if (type == 0) {
lastcode += value + 1;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
} else if (type == 1) {
lastcode -= value + 1;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
} else if (type == 2) {
for (var i = 0; i <= value; i++) {
lastcode++;
goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode));
}
}
return lastcode;
};
/**
* Gets the list of characters specified in the given string by base 88 scheme.
* @param {string} str The string encoding character list.
* @return {!Array<string>} The list of characters specified by the given
* string in base 88 scheme.
*/
goog.i18n.CharListDecompressor.prototype.toCharList = function(str) {
var metasize = 8;
var result = [];
var lastcode = 0;
var i = 0;
while (i < str.length) {
var c = this.charMap_[str.charAt(i)];
var meta = c % metasize;
var type = Math.floor(meta / 3);
var leng = (meta % 3) + 1;
if (leng == 3) {
leng++;
}
var code = this.getCodeAt_(str, i, leng);
var value = Math.floor(code / metasize);
lastcode = this.addChars_(result, lastcode, value, type);
i += leng;
}
return result;
};
|