1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
|
/*-------------------------------------------------------------------------
*
* Copyright (c) 2003-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgjdbc/org/postgresql/core/Encoding.java,v 1.23 2008/01/08 06:56:27 jurka Exp $
*
*-------------------------------------------------------------------------
*/
package org.postgresql.core;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.io.IOException;
import java.util.HashMap;
/**
* Representation of a particular character encoding.
*/
public class Encoding
{
private static final Encoding DEFAULT_ENCODING = new Encoding(null);
/*
* Preferred JVM encodings for backend encodings.
*/
private static final HashMap encodings = new HashMap();
static {
//Note: this list should match the set of supported server
// encodings found in backend/util/mb/encnames.c
encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" });
encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" });
encodings.put("UTF8", new String[] { "UTF-8", "UTF8" }); // 8.1's canonical name for UNICODE changed.
encodings.put("LATIN1", new String[] { "ISO8859_1" });
encodings.put("LATIN2", new String[] { "ISO8859_2" });
encodings.put("LATIN3", new String[] { "ISO8859_3" });
encodings.put("LATIN4", new String[] { "ISO8859_4" });
encodings.put("ISO_8859_5", new String[] { "ISO8859_5" });
encodings.put("ISO_8859_6", new String[] { "ISO8859_6" });
encodings.put("ISO_8859_7", new String[] { "ISO8859_7" });
encodings.put("ISO_8859_8", new String[] { "ISO8859_8" });
encodings.put("LATIN5", new String[] { "ISO8859_9" });
encodings.put("LATIN7", new String[] { "ISO8859_13" });
encodings.put("LATIN9", new String[] { "ISO8859_15_FDIS" });
encodings.put("EUC_JP", new String[] { "EUC_JP" });
encodings.put("EUC_CN", new String[] { "EUC_CN" });
encodings.put("EUC_KR", new String[] { "EUC_KR" });
encodings.put("JOHAB", new String[] { "Johab" });
encodings.put("EUC_TW", new String[] { "EUC_TW" });
encodings.put("SJIS", new String[] { "MS932", "SJIS" });
encodings.put("BIG5", new String[] { "Big5", "MS950", "Cp950" });
encodings.put("GBK", new String[] { "GBK", "MS936" });
encodings.put("UHC", new String[] { "MS949", "Cp949", "Cp949C" });
encodings.put("TCVN", new String[] { "Cp1258" });
encodings.put("WIN1256", new String[] { "Cp1256" });
encodings.put("WIN1250", new String[] { "Cp1250" });
encodings.put("WIN874", new String[] { "MS874", "Cp874" });
encodings.put("WIN", new String[] { "Cp1251" });
encodings.put("ALT", new String[] { "Cp866" });
// We prefer KOI8-U, since it is a superset of KOI8-R.
encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" });
// If the database isn't encoding-aware then we can't have
// any preferred encodings.
encodings.put("UNKNOWN", new String[0]);
// The following encodings do not have a java equivalent
encodings.put("MULE_INTERNAL", new String[0]);
encodings.put("LATIN6", new String[0]);
encodings.put("LATIN8", new String[0]);
encodings.put("LATIN10", new String[0]);
}
private final String encoding;
private final boolean fastASCIINumbers;
protected Encoding(String encoding)
{
this.encoding = encoding;
fastASCIINumbers = testAsciiNumbers();
}
/**
* Returns true if this encoding has characters
* '-' and '0'..'9' in exactly same posision as ascii.
*
* @return true if the bytes can be scanned directly for ascii numbers.
*/
public boolean hasAsciiNumbers() {
return fastASCIINumbers;
}
/**
* Construct an Encoding for a given JVM encoding.
*
* @param jvmEncoding the name of the JVM encoding
* @return an Encoding instance for the specified encoding,
* or an Encoding instance for the default JVM encoding if the
* specified encoding is unavailable.
*/
public static Encoding getJVMEncoding(String jvmEncoding) {
if (isAvailable(jvmEncoding))
{
if (jvmEncoding.equals("UTF-8") || jvmEncoding.equals("UTF8"))
return new UTF8Encoding(jvmEncoding);
else
return new Encoding(jvmEncoding);
}
else
return defaultEncoding();
}
/**
* Construct an Encoding for a given database encoding.
*
* @param databaseEncoding the name of the database encoding
* @return an Encoding instance for the specified encoding,
* or an Encoding instance for the default JVM encoding if the
* specified encoding is unavailable.
*/
public static Encoding getDatabaseEncoding(String databaseEncoding)
{
// If the backend encoding is known and there is a suitable
// encoding in the JVM we use that. Otherwise we fall back
// to the default encoding of the JVM.
String[] candidates = (String[]) encodings.get(databaseEncoding);
if (candidates != null)
{
for (int i = 0; i < candidates.length; i++)
{
if (isAvailable(candidates[i]))
{
return new Encoding(candidates[i]);
}
}
}
// Try the encoding name directly -- maybe the charset has been
// provided by the user.
if (isAvailable(databaseEncoding))
return new Encoding(databaseEncoding);
// Fall back to default JVM encoding.
return defaultEncoding();
}
/**
* Get the name of the (JVM) encoding used.
*
* @return the JVM encoding name used by this instance.
*/
public String name()
{
return encoding;
}
/**
* Encode a string to an array of bytes.
*
* @param s the string to encode
* @return a bytearray containing the encoded string
* @throws IOException if something goes wrong
*/
public byte[] encode(String s) throws IOException
{
if (s == null)
return null;
if (encoding == null)
return s.getBytes();
return s.getBytes(encoding);
}
/**
* Decode an array of bytes into a string.
*
* @param encodedString a bytearray containing the encoded string the string to encod
* @param offset the offset in <code>encodedString</code> of the first byte of the encoded representation
* @param length the length, in bytes, of the encoded representation
* @return the decoded string
* @throws IOException if something goes wrong
*/
public String decode(byte[] encodedString, int offset, int length) throws IOException
{
if (encoding == null)
return new String(encodedString, offset, length);
return new String(encodedString, offset, length, encoding);
}
/**
* Decode an array of bytes into a string.
*
* @param encodedString a bytearray containing the encoded string the string to encod
* @return the decoded string
* @throws IOException if something goes wrong
*/
public String decode(byte[] encodedString) throws IOException
{
return decode(encodedString, 0, encodedString.length);
}
/**
* Get a Reader that decodes the given InputStream using this encoding.
*
* @param in the underlying stream to decode from
* @return a non-null Reader implementation.
* @throws IOException if something goes wrong
*/
public Reader getDecodingReader(InputStream in) throws IOException
{
if (encoding == null)
return new InputStreamReader(in);
return new InputStreamReader(in, encoding);
}
/**
* Get a Writer that encodes to the given OutputStream using this encoding.
*
* @param out the underlying stream to encode to
* @return a non-null Writer implementation.
* @throws IOException if something goes wrong
*/
public Writer getEncodingWriter(OutputStream out) throws IOException
{
if (encoding == null)
return new OutputStreamWriter(out);
return new OutputStreamWriter(out, encoding);
}
/**
* Get an Encoding using the default encoding for the JVM.
* @return an Encoding instance
*/
public static Encoding defaultEncoding()
{
return DEFAULT_ENCODING;
}
/**
* Test if an encoding is available in the JVM.
*
* @param encodingName the JVM encoding name to test
* @return true iff the encoding is supported
*/
private static boolean isAvailable(String encodingName)
{
try
{
"DUMMY".getBytes(encodingName);
return true;
}
catch (java.io.UnsupportedEncodingException e)
{
return false;
}
}
public String toString() {
return (encoding == null ? "<default JVM encoding>" : encoding);
}
/**
* Checks weather this encoding is compatible with ASCII for the number
* characters '-' and '0'..'9'. Where compatible means that they are encoded
* with exactly same values.
*
* @return If faster ASCII number parsing can be used with this encoding.
*/
private boolean testAsciiNumbers() {
// TODO: test all postgres supported encoding to see if there are
// any which do _not_ have ascii numbers in same location
// at least all the encoding listed in the encodings hashmap have
// working ascii numbers
try {
String test = "-0123456789";
byte[] bytes = encode(test);
String res = new String(bytes, "US-ASCII");
return test.equals(res);
} catch (java.io.UnsupportedEncodingException e) {
return false;
} catch (IOException e) {
return false;
}
}
}
|