1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
|
//------------------------------------------------------------------------------
// <copyright file="NumberFormatter.cs" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
// <owner current="true" primary="true">Microsoft</owner>
//------------------------------------------------------------------------------
using System.Diagnostics;
using System.Text;
namespace System.Xml.Xsl.Runtime {
using Res = System.Xml.Utils.Res;
internal static class CharUtil {
// Checks whether a given character is alphanumeric. Alphanumeric means any character that has
// a Unicode category of Nd (8), Nl (9), No (10), Lu (0), Ll (1), Lt (2), Lm (3) or Lo (4)
// <spec>http://www.w3.org/TR/xslt.html#convert</spec>
public static bool IsAlphaNumeric(char ch) {
int category = (int)char.GetUnicodeCategory(ch);
return category <= 4 || (category <= 10 && category >= 8);
}
// Checks whether a given character has decimal digit value of 1. The decimal digits are characters
// having the Unicode category of Nd (8). NOTE: We do not support Tamil and Ethiopic numbering systems
// having no zeros.
public static bool IsDecimalDigitOne(char ch) {
int category = (int)char.GetUnicodeCategory(--ch);
return category == 8 && char.GetNumericValue(ch) == 0;
}
}
internal enum NumberingSequence {
Nil = -1,
FirstDecimal,
Arabic = FirstDecimal, // 0x0031 -- 1, 2, 3, 4, ...
DArabic, // 0xff11 -- Combines DbChar w/ Arabic
Hindi3, // 0x0967 -- Hindi numbers
Thai2, // 0x0e51 -- Thai numbers
FEDecimal, // 0x4e00 -- FE numbering style (decimal numbers)
KorDbNum1, // 0xc77c -- Korea (decimal)
LastNum = KorDbNum1,
// Alphabetic numbering sequences (do not change order unless you also change _rgnfcToLab's order)
FirstAlpha,
UCLetter = FirstAlpha, // 0x0041 -- A, B, C, D, ...
LCLetter, // 0x0061 -- a, b, c, d, ...
UCRus, // 0x0410 -- Upper case Russian alphabet
LCRus, // 0x0430 -- Lower case Russian alphabet
Thai1, // 0x0e01 -- Thai letters
Hindi1, // 0x0915 -- Hindi vowels
Hindi2, // 0x0905 -- Hindi consonants
Aiueo, // 0xff71 -- Japan numbering style (SbChar)
DAiueo, // 0x30a2 -- Japan - Combines DbChar w/ Aiueo
Iroha, // 0xff72 -- Japan numbering style (SbChar)
DIroha, // 0x30a4 -- Japan - Combines DbChar w/ Iroha// New defines for 97...
DChosung, // 0x3131 -- Korea Chosung (DbChar)
Ganada, // 0xac00 -- Korea
ArabicScript, // 0x0623 -- BIDI AraAlpha for Arabic/Persian/Urdu
LastAlpha = ArabicScript,
// Special numbering sequences (includes peculiar alphabetic and numeric sequences)
FirstSpecial,
UCRoman = FirstSpecial, // 0x0049 -- I, II, III, IV, ...
LCRoman, // 0x0069 -- i, ii, iii, iv, ...
Hebrew, // 0x05d0 -- BIDI Heb1 for Hebrew
DbNum3, // 0x58f1 -- FE numbering style (similar to China2, some different characters)
ChnCmplx, // 0x58f9 -- China (complex, traditional chinese, spell out numbers)
KorDbNum3, // 0xd558 -- Korea (1-99)
Zodiac1, // 0x7532 -- CJK-heavenly-stem (10 numbers)
Zodiac2, // 0x5b50 -- CJK-earthly-branch (12 numbers)
Zodiac3, // 0x7532 -- (Zodiac1 + Zodiac2 Combination)
LastSpecial = Zodiac3,
}
internal class NumberFormatterBase {
protected const int MaxAlphabeticValue = int.MaxValue; // Maximum value that can be represented
private const int MaxAlphabeticLength = 7; // Number of letters needed to represent the maximum value
public static void ConvertToAlphabetic(StringBuilder sb, double val, char firstChar, int totalChars) {
Debug.Assert(1 <= val && val <= MaxAlphabeticValue);
Debug.Assert(Math.Pow(totalChars, MaxAlphabeticLength) >= MaxAlphabeticValue);
char[] letters = new char[MaxAlphabeticLength];
int idx = MaxAlphabeticLength;
int number = (int)val;
while (number > totalChars) {
int quot = --number / totalChars;
letters[--idx] = (char)(firstChar + (number - quot * totalChars));
number = quot;
}
letters[--idx] = (char)(firstChar + --number);
sb.Append(letters, idx, MaxAlphabeticLength - idx);
}
protected const int MaxRomanValue = 32767;
private const string RomanDigitsUC = "IIVIXXLXCCDCM";
private const string RomanDigitsLC = "iivixxlxccdcm";
// RomanDigit = { I IV V IX X XL L XC C CD D CM M }
private static readonly int[] RomanDigitValue = { 1, 4, 5, 9, 10, 40, 50, 90, 100, 400, 500, 900, 1000 };
public static void ConvertToRoman(StringBuilder sb, double val, bool upperCase) {
Debug.Assert(1 <= val && val <= MaxRomanValue);
int number = (int)val;
string digits = upperCase ? RomanDigitsUC : RomanDigitsLC;
for (int idx = RomanDigitValue.Length; idx-- != 0; ) {
while (number >= RomanDigitValue[idx]) {
number -= RomanDigitValue[idx];
sb.Append(digits, idx, 1 + (idx & 1));
}
}
}
// Most of tables here were taken from MSXML sources and compared with the last
// CSS3 proposal available at http://www.w3.org/TR/2002/WD-css3-lists-20021107/
// MSXML-, CSS3+
// CSS3 inserts two new characters U+3090, U+3091 before U+3092
private const string hiraganaAiueo =
"\u3042\u3044\u3046\u3048\u304a\u304b\u304d\u304f\u3051\u3053" +
"\u3055\u3057\u3059\u305b\u305d\u305f\u3061\u3064\u3066\u3068" +
"\u306a\u306b\u306c\u306d\u306e\u306f\u3072\u3075\u3078\u307b" +
"\u307e\u307f\u3080\u3081\u3082\u3084\u3086\u3088\u3089\u308a" +
"\u308b\u308c\u308d\u308f\u3092\u3093";
// MSXML-, CSS3+
private const string hiraganaIroha =
"\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c" +
"\u308b\u3092\u308f\u304b\u3088\u305f\u308c\u305d\u3064\u306d" +
"\u306a\u3089\u3080\u3046\u3090\u306e\u304a\u304f\u3084\u307e" +
"\u3051\u3075\u3053\u3048\u3066\u3042\u3055\u304d\u3086\u3081" +
"\u307f\u3057\u3091\u3072\u3082\u305b\u3059";
// MSXML+, CSS3+
// CSS3 inserts two new characters U+30F0, U+30F1 before U+30F2
private const string katakanaAiueo =
"\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3" +
"\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8" +
"\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db" +
"\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea" +
"\u30eb\u30ec\u30ed\u30ef\u30f2\u30f3";
// MSXML+, CSS3+
// CSS3 removes last U+30F3 character
private const string katakanaIroha =
"\u30a4\u30ed\u30cf\u30cb\u30db\u30d8\u30c8\u30c1\u30ea\u30cc" +
"\u30eb\u30f2\u30ef\u30ab\u30e8\u30bf\u30ec\u30bd\u30c4\u30cd" +
"\u30ca\u30e9\u30e0\u30a6\u30f0\u30ce\u30aa\u30af\u30e4\u30de" +
"\u30b1\u30d5\u30b3\u30a8\u30c6\u30a2\u30b5\u30ad\u30e6\u30e1" +
"\u30df\u30b7\u30f1\u30d2\u30e2\u30bb\u30b9\u30f3";
// MSXML+, CSS3-
private const string katakanaAiueoHw =
"\uff71\uff72\uff73\uff74\uff75\uff76\uff77\uff78\uff79\uff7a" +
"\uff7b\uff7c\uff7d\uff7e\uff7f\uff80\uff81\uff82\uff83\uff84" +
"\uff85\uff86\uff87\uff88\uff89\uff8a\uff8b\uff8c\uff8d\uff8e" +
"\uff8f\uff90\uff91\uff92\uff93\uff94\uff95\uff96\uff97\uff98" +
"\uff99\uff9a\uff9b\uff9c\uff66\uff9d";
// MSXML+, CSS3-
private const string katakanaIrohaHw =
"\uff72\uff9b\uff8a\uff86\uff8e\uff8d\uff84\uff81\uff98\uff87" +
"\uff99\uff66\uff9c\uff76\uff96\uff80\uff9a\uff7f\uff82\uff88" +
"\uff85\uff97\uff91\uff73\u30f0\uff89\uff75\uff78\uff94\uff8f" +
"\uff79\uff8c\uff7a\uff74\uff83\uff71\uff7b\uff77\uff95\uff92" +
"\uff90\uff7c\u30f1\uff8b\uff93\uff7e\uff7d\uff9d";
// MSXML+, CSS3-
// Unicode 4.0.0 spec: When used to represent numbers in decimal notation, zero
// is represented by U+3007. Otherwise, zero is represented by U+96F6.
private const string cjkIdeographic =
"\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d";
}
}
|