File: CodePageUtils.cs

package info (click to toggle)
mono 6.8.0.105%2Bdfsg-3.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,284,512 kB
  • sloc: cs: 11,172,132; xml: 2,850,069; ansic: 671,653; cpp: 122,091; perl: 59,366; javascript: 30,841; asm: 22,168; makefile: 20,093; sh: 15,020; python: 4,827; pascal: 925; sql: 859; sed: 16; php: 1
file content (97 lines) | stat: -rw-r--r-- 4,209 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
//------------------------------------------------------------------------------
// <copyright file="CodePageUtils.cs" company="Microsoft">
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// </copyright>                                                                
//------------------------------------------------------------------------------

namespace System.Web.Util {
    using System.Collections;
    
    //
    // Utility class to help with determining if a given code page
    // is ASCII compatible (preserves 0-127 Unicode characters as is)
    //

    internal static class CodePageUtils {

        /*      The following array of ASCII compatible code pages
                is generated by running this code on a machine that
                has _many_ codepages installed:

                using System;
                using System.Collections;
                using System.Text;
                public class qq {
                    public static void Main(string[] args) {
                        ArrayList list = new ArrayList();
                        byte[] bb = new byte[128]; for (int i = 0; i < 128; i++) bb[i] = (byte)i;
                        String asciiString = Encoding.ASCII.GetString(bb);
                        for (int i = 1; i < 100000; i++) {
                            try {
                                Encoding e = Encoding.GetEncoding(i);
                                byte[] xx = e.GetBytes(asciiString);
                                if (xx.Length == 128) {
                                    bool good = true;
                                    for (int j = 0; j < 128; j++) { if (bb[j] != xx[j]) { good = false; break; } }
                                    if (good) list.Add(i);
                                }
                            }
                            catch {}
                        }
                        int n = list.Count;
                        Console.Write("private const int[] _asciiCompatCodePages = new int[" + n + "] {\r\n    ");
                        for (int i = 0; i < n; i++) {
                            Console.Write("{0,5}", list[i]);
                            if (i < n-1) Console.Write(", ");
                            if (((i+1) % 10) == 0) Console.Write("\r\n    ");
                        }
                        Console.Write("\r\n};\r\n");
                    }
                }

        */

        private static int[] _asciiCompatCodePages = new int[79] {
              437,   708,   720,   737,   775,   850,   852,   855,   857,   858,
              860,   861,   862,   863,   864,   865,   866,   869,   874,   932,
              936,   949,   950,  1250,  1251,  1252,  1253,  1254,  1255,  1256,
             1257,  1258,  1361, 10000, 10001, 10002, 10003, 10004, 10005, 10006,
            10007, 10008, 10010, 10017, 10029, 10079, 10081, 10082, 20000, 20001,
            20002, 20003, 20004, 20005, 20127, 20866, 20932, 20936, 20949, 21866,
            28591, 28592, 28593, 28594, 28595, 28596, 28597, 28598, 28599, 28605,
            38598, 50220, 50221, 50222, 50225, 50227, 51932, 51949, 65001
        };

        internal /*public*/ static bool IsAsciiCompatibleCodePage(int codepage) {
            //alternatives to binary search considered
            //Hashtable: static initialization increases startup up, perf relative
            //Byte array would consume ~8K, but lookups constant
            //with 80 entries, binary search limited to 7 indexes into array
            int lo = 0;
            int hi = 78;
            while(lo <= hi) {
                int i = (lo + hi) >> 1;
                int c;
                c = _asciiCompatCodePages[i] - codepage;
                if (c == 0) return true; //i is the index of the item
                if (c < 0) {
                    lo = i + 1;
                }
                else {
                    hi = i - 1;
                }
            }
            return false; 
            //lo is the index of the item immediately after 
            //~lo returned in some implementations for false indicator with additional info
        }

        internal const int CodePageUT8 = 65001;
    }



}