File: DatabaseEncodingTest.java

package info (click to toggle)
libpgjava 8.4-701-1
links: PTS, VCS
area: main
in suites: squeeze
size: 3,532 kB
ctags: 4,162
sloc: java: 33,948; xml: 3,158; makefile: 14; sh: 10
file content (322 lines) | stat: -rw-r--r-- 12,613 bytes
/*-------------------------------------------------------------------------
*
* Copyright (c) 2004-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
*   $PostgreSQL: pgjdbc/org/postgresql/test/jdbc2/DatabaseEncodingTest.java,v 1.8 2008/01/08 06:56:30 jurka Exp $
*
*-------------------------------------------------------------------------
*/
package org.postgresql.test.jdbc2;

import org.postgresql.test.TestUtil;
import junit.framework.TestCase;
import java.sql.*;
import org.postgresql.core.Encoding;
import java.io.IOException;
import java.util.Arrays;

/*
 * Test case for various encoding problems.
 *
 * Ensure that we can do a round-trip of all server-supported unicode
 * values without trashing them, and that bad encodings are
 * detected.
 */
public class DatabaseEncodingTest extends TestCase
{
    private Connection con;

    public DatabaseEncodingTest(String name)
    {
        super(name);
    }

    private static final int STEP = 100;

    // Set up the fixture for this testcase: a connection to a database with
    // a table for this test.
    protected void setUp() throws Exception
    {
        con = TestUtil.openDB();
        TestUtil.createTable(con,
                             "testdbencoding",
                             "unicode_ordinal integer primary key not null, unicode_string varchar(" + STEP + ")");
        // disabling auto commit makes the test run faster
        // by not committing each insert individually.
        con.setAutoCommit(false);
    }

    // Tear down the fixture for this test case.
    protected void tearDown() throws Exception
    {
        con.setAutoCommit(true);
        TestUtil.dropTable(con, "testdbencoding");
        TestUtil.closeDB(con);
    }

    private static String dumpString(String s) {
        StringBuffer sb = new StringBuffer(s.length() * 6);
        for (int i = 0; i < s.length(); ++i)
        {
            sb.append("\\u");
            char c = s.charAt(i);
            sb.append(Integer.toHexString((c >> 12)&15));
            sb.append(Integer.toHexString((c >> 8)&15));
            sb.append(Integer.toHexString((c >> 4)&15));
            sb.append(Integer.toHexString(c&15));
        }
        return sb.toString();
    }

    public void testEncoding() throws Exception {
        // Check that we have a UNICODE server encoding, or we must skip this test.
        Statement stmt = con.createStatement();
        ResultSet rs = stmt.executeQuery("SELECT getdatabaseencoding()");
        assertTrue(rs.next());

        String dbEncoding = rs.getString(1);
        if (!dbEncoding.equals("UNICODE") && !dbEncoding.equals("UTF8"))
        {
            System.err.println("DatabaseEncodingTest: Skipping UNICODE database tests as test database encoding is " + dbEncoding);
            rs.close();
            return ; // not a UNICODE database.
        }

        rs.close();

        boolean testHighUnicode = TestUtil.haveMinimumServerVersion(con, "8.1");

        // Create data.
        // NB: we avoid d800-dfff as those are reserved for surrogates in UTF-16
        PreparedStatement insert = con.prepareStatement("INSERT INTO testdbencoding(unicode_ordinal, unicode_string) VALUES (?,?)");
        for (int i = 1; i < 0xd800; i += STEP)
        {
            int count = (i + STEP) > 0xd800 ? 0xd800 - i : STEP;
            char[] testChars = new char[count];
            for (int j = 0; j < count; ++j)
                testChars[j] = (char)(i + j);

            String testString = new String(testChars);

            insert.setInt(1, i);
            insert.setString(2, testString);
            assertEquals(1, insert.executeUpdate());
        }

        for (int i = 0xe000; i < 0x10000; i += STEP)
        {
            int count = (i + STEP) > 0x10000 ? 0x10000 - i : STEP;
            char[] testChars = new char[count];
            for (int j = 0; j < count; ++j)
                testChars[j] = (char)(i + j);

            String testString = new String(testChars);

            insert.setInt(1, i);
            insert.setString(2, testString);
            assertEquals(1, insert.executeUpdate());
        }

        if (testHighUnicode) {
            for (int i = 0x10000; i < 0x110000; i += STEP)
            {
                int count = (i + STEP) > 0x110000 ? 0x110000 - i : STEP;
                char[] testChars = new char[count*2];
                for (int j = 0; j < count; ++j) {
                    testChars[j*2]   = (char)(0xd800 + ((i + j - 0x10000) >> 10));
                    testChars[j*2+1] = (char)(0xdc00 + ((i + j - 0x10000) & 0x3ff));
                }
                
                String testString = new String(testChars);
                
                insert.setInt(1, i);
                insert.setString(2, testString);
                
                //System.err.println("Inserting: " + dumpString(testString));

                assertEquals(1, insert.executeUpdate());
            }
        }            

        con.commit();

        // Check data.
        stmt.setFetchSize(1);
        rs = stmt.executeQuery("SELECT unicode_ordinal, unicode_string FROM testdbencoding ORDER BY unicode_ordinal");
        for (int i = 1; i < 0xd800; i += STEP)
        {
            assertTrue(rs.next());
            assertEquals(i, rs.getInt(1));

            int count = (i + STEP) > 0xd800 ? 0xd800 - i : STEP;
            char[] testChars = new char[count];
            for (int j = 0; j < count; ++j)
                testChars[j] = (char)(i + j);

            String testString = new String(testChars);

            assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2)));
        }

        for (int i = 0xe000; i < 0x10000; i += STEP)
        {
            assertTrue(rs.next());
            assertEquals(i, rs.getInt(1));

            int count = (i + STEP) > 0x10000 ? 0x10000 - i : STEP;
            char[] testChars = new char[count];
            for (int j = 0; j < count; ++j)
                testChars[j] = (char)(i + j);

            String testString = new String(testChars);

            assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2)));
        }

        if (testHighUnicode) {
            for (int i = 0x10000; i < 0x110000; i += STEP)
            {
                assertTrue(rs.next());
                assertEquals(i, rs.getInt(1));

                int count = (i + STEP) > 0x110000 ? 0x110000 - i : STEP;
                char[] testChars = new char[count*2];
                for (int j = 0; j < count; ++j) {
                    testChars[j*2]   = (char)(0xd800 + ((i + j - 0x10000) >> 10));
                    testChars[j*2+1] = (char)(0xdc00 + ((i + j - 0x10000) & 0x3ff));
                }
                
                String testString = new String(testChars);
                
                assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2)));
            }
        }
    }

    public void testUTF8Decode() throws Exception {
        // Tests for our custom UTF-8 decoder.

        Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8");
        
        for (int ch = 0; ch < 0x110000; ++ch) {
            if (ch >= 0xd800 && ch < 0xe000)
                continue; // Surrogate range.
            
            String testString;
            if (ch >= 0x10000) {
                testString = new String(new char[] {
                    (char) (0xd800 + ((ch-0x10000) >> 10)),
                    (char) (0xdc00 + ((ch-0x10000) & 0x3ff)) });
            } else {
                testString = new String(new char[] { (char)ch });
            }
            
            byte[] jvmEncoding = testString.getBytes("UTF-8");
            String jvmDecoding = new String(jvmEncoding, 0, jvmEncoding.length, "UTF-8");
            String ourDecoding = utf8Encoding.decode(jvmEncoding, 0, jvmEncoding.length);
            
            assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(jvmDecoding));
            assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(ourDecoding));
        }
    }
 
    public void testBadUTF8Decode() throws Exception {
        Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8");

        byte[][] badSequences = new byte[][] {
            // One-byte illegal sequences
            { (byte)0x80 }, // First byte may not be 10xxxxxx
            
            // Two-byte illegal sequences
            { (byte)0xc0, (byte)0x00 },  // Second byte must be 10xxxxxx
            { (byte)0xc0, (byte)0x80 },  // Can't represent a value < 0x80
            
            // Three-byte illegal sequences
            { (byte)0xe0, (byte)0x00 },  // Second byte must be 10xxxxxx
            { (byte)0xe0, (byte)0x80, (byte)0x00 },  // Third byte must be 10xxxxxx
            { (byte)0xe0, (byte)0x80, (byte)0x80 },  // Can't represent a value < 0x800
            { (byte)0xed, (byte)0xa0, (byte)0x80 },  // Not allowed to encode the range d800..dfff
            
            // Four-byte illegal sequences
            { (byte)0xf0, (byte)0x00 },  // Second byte must be 10xxxxxx
            { (byte)0xf0, (byte)0x80, (byte)0x00 },  // Third byte must be 10xxxxxx
            { (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x00 },  // Fourth byte must be 10xxxxxx
            { (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80 },  // Can't represent a value < 0x10000
            
            // Five-byte illegal sequences
            { (byte)0xf8 }, // Can't have a five-byte sequence.

            // Six-byte illegal sequences
            { (byte)0xfc }, // Can't have a six-byte sequence.

            // Seven-byte illegal sequences
            { (byte)0xfe }, // Can't have a seven-byte sequence.
            
            // Eigth-byte illegal sequences
            { (byte)0xff }, // Can't have an eight-byte sequence.
        }; 

        byte[] paddedSequence = new byte[32];
        for (int i = 0; i < badSequences.length; ++i) {
            byte[] sequence = badSequences[i];
            
            try {
                String str = utf8Encoding.decode(sequence, 0, sequence.length);
                fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">");
            } catch (IOException ioe) {
                // Expected exception.
            }
            
            // Try it with padding.
            Arrays.fill(paddedSequence, (byte)0);
            System.arraycopy(sequence, 0, paddedSequence, 0, sequence.length);
            
            try {
                String str = utf8Encoding.decode(paddedSequence, 0, paddedSequence.length);
                fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">");
            } catch (IOException ioe) {
                // Expected exception.
            }
        }
    }

    public void testTruncatedUTF8Decode() throws Exception {
        Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8");

        byte[][] shortSequences = new byte[][] {
            { (byte)0xc0 },              // Second byte must be present
            
            { (byte)0xe0 },              // Second byte must be present
            { (byte)0xe0, (byte)0x80 },  // Third byte must be present
            
            { (byte)0xf0 },              // Second byte must be present
            { (byte)0xf0, (byte)0x80 },  // Third byte must be present
            { (byte)0xf0, (byte)0x80, (byte)0x80 },  // Fourth byte must be present
        };
        
        byte[] paddedSequence = new byte[32];
        for (int i = 0; i < shortSequences.length; ++i) {
            byte[] sequence = shortSequences[i];
            
            try {
                String str = utf8Encoding.decode(sequence, 0, sequence.length);
                fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">");
            } catch (IOException ioe) {
                // Expected exception.
            }
            
            
            // Try it with padding and a truncated length.
            Arrays.fill(paddedSequence, (byte)0);
            System.arraycopy(sequence, 0, paddedSequence, 0, sequence.length);
            
            try {
                String str = utf8Encoding.decode(paddedSequence, 0, sequence.length);
                fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">");
            } catch (IOException ioe) {
                // Expected exception.
            }
        }
    }
}