File: Encoding.java

package info (click to toggle)
libpgjava 8.4-701-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 3,532 kB
  • ctags: 4,162
  • sloc: java: 33,948; xml: 3,158; makefile: 14; sh: 10
file content (293 lines) | stat: -rw-r--r-- 10,189 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*-------------------------------------------------------------------------
*
* Copyright (c) 2003-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
*   $PostgreSQL: pgjdbc/org/postgresql/core/Encoding.java,v 1.23 2008/01/08 06:56:27 jurka Exp $
*
*-------------------------------------------------------------------------
*/
package org.postgresql.core;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.io.IOException;
import java.util.HashMap;

/**
 * Representation of a particular character encoding.
 */
public class Encoding
{
    private static final Encoding DEFAULT_ENCODING = new Encoding(null);

    /*
     * Preferred JVM encodings for backend encodings.
     */
    private static final HashMap encodings = new HashMap();

    static {
        //Note: this list should match the set of supported server
        // encodings found in backend/util/mb/encnames.c
        encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" });
        encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" });
        encodings.put("UTF8", new String[] { "UTF-8", "UTF8" }); // 8.1's canonical name for UNICODE changed.
        encodings.put("LATIN1", new String[] { "ISO8859_1" });
        encodings.put("LATIN2", new String[] { "ISO8859_2" });
        encodings.put("LATIN3", new String[] { "ISO8859_3" });
        encodings.put("LATIN4", new String[] { "ISO8859_4" });
        encodings.put("ISO_8859_5", new String[] { "ISO8859_5" });
        encodings.put("ISO_8859_6", new String[] { "ISO8859_6" });
        encodings.put("ISO_8859_7", new String[] { "ISO8859_7" });
        encodings.put("ISO_8859_8", new String[] { "ISO8859_8" });
        encodings.put("LATIN5", new String[] { "ISO8859_9" });
        encodings.put("LATIN7", new String[] { "ISO8859_13" });
        encodings.put("LATIN9", new String[] { "ISO8859_15_FDIS" });
        encodings.put("EUC_JP", new String[] { "EUC_JP" });
        encodings.put("EUC_CN", new String[] { "EUC_CN" });
        encodings.put("EUC_KR", new String[] { "EUC_KR" });
        encodings.put("JOHAB", new String[] { "Johab" });
        encodings.put("EUC_TW", new String[] { "EUC_TW" });
        encodings.put("SJIS", new String[] { "MS932", "SJIS" });
        encodings.put("BIG5", new String[] { "Big5", "MS950", "Cp950" });
        encodings.put("GBK", new String[] { "GBK", "MS936" });
        encodings.put("UHC", new String[] { "MS949", "Cp949", "Cp949C" });
        encodings.put("TCVN", new String[] { "Cp1258" });
        encodings.put("WIN1256", new String[] { "Cp1256" });
        encodings.put("WIN1250", new String[] { "Cp1250" });
        encodings.put("WIN874", new String[] { "MS874", "Cp874" });
        encodings.put("WIN", new String[] { "Cp1251" });
        encodings.put("ALT", new String[] { "Cp866" });
        // We prefer KOI8-U, since it is a superset of KOI8-R.
        encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" });
        // If the database isn't encoding-aware then we can't have
        // any preferred encodings.
        encodings.put("UNKNOWN", new String[0]);
        // The following encodings do not have a java equivalent
        encodings.put("MULE_INTERNAL", new String[0]);
        encodings.put("LATIN6", new String[0]);
        encodings.put("LATIN8", new String[0]);
        encodings.put("LATIN10", new String[0]);
    }

    private final String encoding;
    private final boolean fastASCIINumbers;

    protected Encoding(String encoding)
    {
        this.encoding = encoding;
        fastASCIINumbers = testAsciiNumbers();
    }
    
    /**
     * Returns true if this encoding has characters
     * '-' and '0'..'9' in exactly same posision as ascii.
     *  
     * @return true if the bytes can be scanned directly for ascii numbers.
     */
    public boolean hasAsciiNumbers() {
        return fastASCIINumbers;
    }

    /**
     * Construct an Encoding for a given JVM encoding.
     * 
     * @param jvmEncoding the name of the JVM encoding
     * @return an Encoding instance for the specified encoding,
     *   or an Encoding instance for the default JVM encoding if the
     *   specified encoding is unavailable.
     */
    public static Encoding getJVMEncoding(String jvmEncoding) {
        if (isAvailable(jvmEncoding))
        {
            if (jvmEncoding.equals("UTF-8") || jvmEncoding.equals("UTF8"))
                return new UTF8Encoding(jvmEncoding);
            else
                return new Encoding(jvmEncoding);
        }
        else
            return defaultEncoding();
    }

    /**
     * Construct an Encoding for a given database encoding.
     * 
     * @param databaseEncoding the name of the database encoding
     * @return an Encoding instance for the specified encoding,
     *   or an Encoding instance for the default JVM encoding if the
     *   specified encoding is unavailable.
     */
    public static Encoding getDatabaseEncoding(String databaseEncoding)
    {
        // If the backend encoding is known and there is a suitable
        // encoding in the JVM we use that. Otherwise we fall back
        // to the default encoding of the JVM.

        String[] candidates = (String[]) encodings.get(databaseEncoding);
        if (candidates != null)
        {
            for (int i = 0; i < candidates.length; i++)
            {
                if (isAvailable(candidates[i]))
                {
                    return new Encoding(candidates[i]);
                }
            }
        }

        // Try the encoding name directly -- maybe the charset has been
        // provided by the user.
        if (isAvailable(databaseEncoding))
            return new Encoding(databaseEncoding);

        // Fall back to default JVM encoding.
        return defaultEncoding();
    }

    /**
     * Get the name of the (JVM) encoding used.
     *
     * @return the JVM encoding name used by this instance.
     */
    public String name()
    {
        return encoding;
    }

    /**
     * Encode a string to an array of bytes.
     *
     * @param s the string to encode
     * @return a bytearray containing the encoded string
     * @throws IOException if something goes wrong
     */
    public byte[] encode(String s) throws IOException
    {
        if (s == null)
            return null;

        if (encoding == null)
            return s.getBytes();

        return s.getBytes(encoding);
    }

    /**
     * Decode an array of bytes into a string.
     * 
     * @param encodedString a bytearray containing the encoded string  the string to encod
     * @param offset the offset in <code>encodedString</code> of the first byte of the encoded representation
     * @param length the length, in bytes, of the encoded representation
     * @return the decoded string
     * @throws IOException if something goes wrong
     */
    public String decode(byte[] encodedString, int offset, int length) throws IOException
    {
        if (encoding == null)
            return new String(encodedString, offset, length);

        return new String(encodedString, offset, length, encoding);
    }

    /**
     * Decode an array of bytes into a string.
     *
     * @param encodedString a bytearray containing the encoded string  the string to encod
     * @return the decoded string
     * @throws IOException if something goes wrong
     */
    public String decode(byte[] encodedString) throws IOException
    {
        return decode(encodedString, 0, encodedString.length);
    }

    /**
     * Get a Reader that decodes the given InputStream using this encoding.
     *
     * @param in the underlying stream to decode from
     * @return a non-null Reader implementation.
     * @throws IOException if something goes wrong
     */
    public Reader getDecodingReader(InputStream in) throws IOException
    {
        if (encoding == null)
            return new InputStreamReader(in);

        return new InputStreamReader(in, encoding);
    }

    /**
     * Get a Writer that encodes to the given OutputStream using this encoding.
     *
     * @param out the underlying stream to encode to
     * @return a non-null Writer implementation.
     * @throws IOException if something goes wrong
     */
    public Writer getEncodingWriter(OutputStream out) throws IOException
    {
        if (encoding == null)
            return new OutputStreamWriter(out);

        return new OutputStreamWriter(out, encoding);
    }

    /**
     * Get an Encoding using the default encoding for the JVM.
     * @return an Encoding instance
     */
    public static Encoding defaultEncoding()
    {
        return DEFAULT_ENCODING;
    }

    /**
     * Test if an encoding is available in the JVM.
     *
     * @param encodingName the JVM encoding name to test
     * @return true iff the encoding is supported
     */
    private static boolean isAvailable(String encodingName)
    {
        try
        {
            "DUMMY".getBytes(encodingName);
            return true;
        }
        catch (java.io.UnsupportedEncodingException e)
        {
            return false;
        }
    }

    public String toString() {
        return (encoding == null ? "<default JVM encoding>" : encoding);
    }
    
    /**
     * Checks weather this encoding is compatible with ASCII for the number
     * characters '-' and '0'..'9'. Where compatible means that they are encoded
     * with exactly same values. 
     * 
     * @return If faster ASCII number parsing can be used with this encoding.
     */
    private boolean testAsciiNumbers() {
        // TODO: test all postgres supported encoding to see if there are
        // any which do _not_ have ascii numbers in same location
        // at least all the encoding listed in the encodings hashmap have
        // working ascii numbers
	try {
	    String test = "-0123456789";
	    byte[] bytes = encode(test);
	    String res = new String(bytes, "US-ASCII");
	    return test.equals(res);
	} catch (java.io.UnsupportedEncodingException e) {
	    return false;
	} catch (IOException e) {
	    return false;
	}
    }
}