File: NumberFormatter.cs

package info (click to toggle)
mono 6.14.1%2Bds2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,282,732 kB
  • sloc: cs: 11,182,461; xml: 2,850,281; ansic: 699,123; cpp: 122,919; perl: 58,604; javascript: 30,841; asm: 21,845; makefile: 19,602; sh: 10,973; python: 4,772; pascal: 925; sql: 859; sed: 16; php: 1
file content (177 lines) | stat: -rw-r--r-- 9,352 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
//------------------------------------------------------------------------------
// <copyright file="NumberFormatter.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>
// <owner current="true" primary="true">Microsoft</owner>
//------------------------------------------------------------------------------

using System.Diagnostics;
using System.Text;

namespace System.Xml.Xsl.Runtime {
    using Res = System.Xml.Utils.Res;

    internal static class CharUtil {
        // Checks whether a given character is alphanumeric.  Alphanumeric means any character that has
        // a Unicode category of Nd (8), Nl (9), No (10), Lu (0), Ll (1), Lt (2), Lm (3) or Lo (4)
        // <spec>http://www.w3.org/TR/xslt.html#convert</spec>
        public static bool IsAlphaNumeric(char ch) {
            int category = (int)char.GetUnicodeCategory(ch);
            return category <= 4 || (category <= 10 && category >= 8);
        }

        // Checks whether a given character has decimal digit value of 1.  The decimal digits are characters
        // having the Unicode category of Nd (8).  NOTE: We do not support Tamil and Ethiopic numbering systems
        // having no zeros.
        public static bool IsDecimalDigitOne(char ch) {
            int category = (int)char.GetUnicodeCategory(--ch);
            return category == 8 && char.GetNumericValue(ch) == 0;
        }
    }

    internal enum NumberingSequence {
        Nil = -1,
        FirstDecimal,
        Arabic = FirstDecimal,      // 0x0031 -- 1, 2, 3, 4, ...
        DArabic,                    // 0xff11 -- Combines DbChar w/ Arabic
        Hindi3,                     // 0x0967 -- Hindi numbers
        Thai2,                      // 0x0e51 -- Thai numbers
        FEDecimal,                  // 0x4e00 -- FE numbering style (decimal numbers)
        KorDbNum1,                  // 0xc77c -- Korea (decimal)
        LastNum = KorDbNum1,

        // Alphabetic numbering sequences (do not change order unless you also change _rgnfcToLab's order)
        FirstAlpha,
        UCLetter = FirstAlpha,      // 0x0041 -- A, B, C, D, ...
        LCLetter,                   // 0x0061 -- a, b, c, d, ...
        UCRus,                      // 0x0410 -- Upper case Russian alphabet
        LCRus,                      // 0x0430 -- Lower case Russian alphabet
        Thai1,                      // 0x0e01 -- Thai letters
        Hindi1,                     // 0x0915 -- Hindi vowels
        Hindi2,                     // 0x0905 -- Hindi consonants
        Aiueo,                      // 0xff71 -- Japan numbering style (SbChar)
        DAiueo,                     // 0x30a2 -- Japan - Combines DbChar w/ Aiueo
        Iroha,                      // 0xff72 -- Japan numbering style (SbChar)
        DIroha,                     // 0x30a4 -- Japan - Combines DbChar w/ Iroha//  New defines for 97...
        DChosung,                   // 0x3131 -- Korea Chosung (DbChar)
        Ganada,                     // 0xac00 -- Korea
        ArabicScript,               // 0x0623 -- BIDI AraAlpha for Arabic/Persian/Urdu
        LastAlpha = ArabicScript,

        // Special numbering sequences (includes peculiar alphabetic and numeric sequences)
        FirstSpecial,
        UCRoman = FirstSpecial,     // 0x0049 -- I, II, III, IV, ...
        LCRoman,                    // 0x0069 -- i, ii, iii, iv, ...
        Hebrew,                     // 0x05d0 -- BIDI Heb1 for Hebrew
        DbNum3,                     // 0x58f1 -- FE numbering style (similar to China2, some different characters)
        ChnCmplx,                   // 0x58f9 -- China (complex, traditional chinese, spell out numbers)
        KorDbNum3,                  // 0xd558 -- Korea (1-99)
        Zodiac1,                    // 0x7532 -- CJK-heavenly-stem (10 numbers)
        Zodiac2,                    // 0x5b50 -- CJK-earthly-branch (12 numbers)
        Zodiac3,                    // 0x7532 -- (Zodiac1 + Zodiac2 Combination)
        LastSpecial = Zodiac3,
    }

    internal class NumberFormatterBase {
        protected const int   MaxAlphabeticValue  = int.MaxValue;     // Maximum value that can be represented
        private   const int   MaxAlphabeticLength = 7;                // Number of letters needed to represent the maximum value

        public static void ConvertToAlphabetic(StringBuilder sb, double val, char firstChar, int totalChars) {
            Debug.Assert(1 <= val && val <= MaxAlphabeticValue);
            Debug.Assert(Math.Pow(totalChars, MaxAlphabeticLength) >= MaxAlphabeticValue);

            char[] letters = new char[MaxAlphabeticLength];
            int idx = MaxAlphabeticLength;
            int number = (int)val;

            while (number > totalChars) {
                int quot = --number / totalChars;
                letters[--idx] = (char)(firstChar + (number - quot * totalChars));
                number = quot;
            }
            letters[--idx] = (char)(firstChar + --number);
            sb.Append(letters, idx, MaxAlphabeticLength - idx);
        }

        protected const int     MaxRomanValue = 32767;
        private   const string  RomanDigitsUC = "IIVIXXLXCCDCM";
        private   const string  RomanDigitsLC = "iivixxlxccdcm";

        //                            RomanDigit       = { I  IV   V  IX   X  XL   L  XC    C   CD    D   CM     M }
        private static readonly int[] RomanDigitValue  = { 1,  4,  5,  9, 10, 40, 50, 90, 100, 400, 500, 900, 1000 };

        public static void ConvertToRoman(StringBuilder sb, double val, bool upperCase) {
            Debug.Assert(1 <= val && val <= MaxRomanValue);

            int number = (int)val;
            string digits = upperCase ? RomanDigitsUC : RomanDigitsLC;

            for (int idx = RomanDigitValue.Length; idx-- != 0; ) {
                while (number >= RomanDigitValue[idx]) {
                    number -= RomanDigitValue[idx];
                    sb.Append(digits, idx, 1 + (idx & 1));
                }
            }
        }

        // Most of tables here were taken from MSXML sources and compared with the last
        // CSS3 proposal available at http://www.w3.org/TR/2002/WD-css3-lists-20021107/

        // MSXML-, CSS3+
        // CSS3 inserts two new characters U+3090, U+3091 before U+3092
        private const string hiraganaAiueo =
            "\u3042\u3044\u3046\u3048\u304a\u304b\u304d\u304f\u3051\u3053" +
            "\u3055\u3057\u3059\u305b\u305d\u305f\u3061\u3064\u3066\u3068" +
            "\u306a\u306b\u306c\u306d\u306e\u306f\u3072\u3075\u3078\u307b" +
            "\u307e\u307f\u3080\u3081\u3082\u3084\u3086\u3088\u3089\u308a" +
            "\u308b\u308c\u308d\u308f\u3092\u3093";

        // MSXML-, CSS3+
        private const string hiraganaIroha =
            "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c" +
            "\u308b\u3092\u308f\u304b\u3088\u305f\u308c\u305d\u3064\u306d" +
            "\u306a\u3089\u3080\u3046\u3090\u306e\u304a\u304f\u3084\u307e" +
            "\u3051\u3075\u3053\u3048\u3066\u3042\u3055\u304d\u3086\u3081" +
            "\u307f\u3057\u3091\u3072\u3082\u305b\u3059";

        // MSXML+, CSS3+
        // CSS3 inserts two new characters U+30F0, U+30F1 before U+30F2
        private const string katakanaAiueo =
            "\u30a2\u30a4\u30a6\u30a8\u30aa\u30ab\u30ad\u30af\u30b1\u30b3" +
            "\u30b5\u30b7\u30b9\u30bb\u30bd\u30bf\u30c1\u30c4\u30c6\u30c8" +
            "\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d2\u30d5\u30d8\u30db" +
            "\u30de\u30df\u30e0\u30e1\u30e2\u30e4\u30e6\u30e8\u30e9\u30ea" +
            "\u30eb\u30ec\u30ed\u30ef\u30f2\u30f3";

        // MSXML+, CSS3+
        // CSS3 removes last U+30F3 character
        private const string katakanaIroha =
            "\u30a4\u30ed\u30cf\u30cb\u30db\u30d8\u30c8\u30c1\u30ea\u30cc" +
            "\u30eb\u30f2\u30ef\u30ab\u30e8\u30bf\u30ec\u30bd\u30c4\u30cd" +
            "\u30ca\u30e9\u30e0\u30a6\u30f0\u30ce\u30aa\u30af\u30e4\u30de" +
            "\u30b1\u30d5\u30b3\u30a8\u30c6\u30a2\u30b5\u30ad\u30e6\u30e1" +
            "\u30df\u30b7\u30f1\u30d2\u30e2\u30bb\u30b9\u30f3";

        // MSXML+, CSS3-
        private const string katakanaAiueoHw =
            "\uff71\uff72\uff73\uff74\uff75\uff76\uff77\uff78\uff79\uff7a" +
            "\uff7b\uff7c\uff7d\uff7e\uff7f\uff80\uff81\uff82\uff83\uff84" +
            "\uff85\uff86\uff87\uff88\uff89\uff8a\uff8b\uff8c\uff8d\uff8e" +
            "\uff8f\uff90\uff91\uff92\uff93\uff94\uff95\uff96\uff97\uff98" +
            "\uff99\uff9a\uff9b\uff9c\uff66\uff9d";

        // MSXML+, CSS3-
        private const string katakanaIrohaHw =
            "\uff72\uff9b\uff8a\uff86\uff8e\uff8d\uff84\uff81\uff98\uff87" +
            "\uff99\uff66\uff9c\uff76\uff96\uff80\uff9a\uff7f\uff82\uff88" +
            "\uff85\uff97\uff91\uff73\u30f0\uff89\uff75\uff78\uff94\uff8f" +
            "\uff79\uff8c\uff7a\uff74\uff83\uff71\uff7b\uff77\uff95\uff92" +
            "\uff90\uff7c\u30f1\uff8b\uff93\uff7e\uff7d\uff9d";

        // MSXML+, CSS3-
        // Unicode 4.0.0 spec: When used to represent numbers in decimal notation, zero
        // is represented by U+3007. Otherwise, zero is represented by U+96F6.
        private const string cjkIdeographic =
            "\u3007\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d";
    }
}