1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
|
# 2002 May 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library. The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
finish_test
return
}
proc do_bincmp_test {testname got expect} {
binary scan $expect \c* expectvals
binary scan $got \c* gotvals
do_test $testname [list set dummy $gotvals] $expectvals
}
# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
# to change the byte-order of the string.
proc swap_byte_order {utf16} {
binary scan $utf16 \c* ints
foreach {a b} $ints {
lappend ints2 $b
lappend ints2 $a
}
return [binary format \c* $ints2]
}
#
# Test that the SQLite routines for converting between UTF encodings
# produce the same results as their TCL counterparts.
#
# $testname is the prefix to be used for the test names.
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
#
# The test procedure is:
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
# SQLite routines produce the same results.
#
# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
# SQLite routines produce the same results.
#
# 3. Use the SQLite routines to convert the native machine order UTF-16
# representation back to the original UTF-8. Check that the result
# matches the original representation.
#
# 4. Add a byte-order mark to each of the UTF-16 representations and
# check that the SQLite routines can convert them back to UTF-8. For
# byte-order mark info, refer to section 3.10 of the unicode standard.
#
# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
# that SQLite can convert them both to native byte order UTF-16
# strings, sans BOM.
#
# Coverage:
#
# sqlite_utf8to16be (step 2)
# sqlite_utf8to16le (step 1)
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
# Step 1.
set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
set utf16le_tcl [encoding convertto unicode $str]
append utf16le_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)!="littleEndian" } {
set utf16le_tcl [swap_byte_order $utf16le_tcl]
}
do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
set utf16le $utf16le_tcl
# Step 2.
set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
set utf16be_tcl [encoding convertto unicode $str]
append utf16be_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16be_tcl [swap_byte_order $utf16be_tcl]
}
do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
set utf16be $utf16be_tcl
# Step 3.
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16 $utf16le
} else {
set utf16 $utf16be
}
set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
# Step 4 (little endian).
append utf16le_bom "\xFF\xFE" $utf16le
set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
# Step 4 (big endian).
append utf16be_bom "\xFE\xFF" $utf16be
set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
# Step 5 (little endian to little endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
# Step 5 (big endian to big endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
# Step 5 (big endian to little endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
# Step 5 (little endian to big endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
}
translate_selftest
test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_conversion enc-10 [string repeat "\uE000" 100]
proc test_collate {enc zLeft zRight} {
return [string compare $zLeft $zRight]
}
add_test_collate $::DB 0 0 1
do_test enc-11.1 {
execsql {
CREATE TABLE ab(a COLLATE test_collate, b);
INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
CREATE INDEX ab_i ON ab(a, b);
}
} {}
do_test enc-11.2 {
set cp200 "\u00C8"
execsql {
SELECT count(*) FROM ab WHERE a = $::cp200;
}
} {2}
#-------------------------------------------------------------------------
reset_db
forcedelete test.db2
forcedelete test.db3
do_execsql_test enc-12.0 {
PRAGMA encoding = 'utf-8';
CREATE TABLE t1(a, b, c);
INSERT INTO t1 VALUES('a', 'b', 'c');
ATTACH 'test.db3' AS aux;
CREATE TABLE aux.t3(x, y, z);
INSERT INTO t3 VALUES('xxx', 'yyy', 'zzz');
PRAGMA encoding;
} {UTF-8}
do_test enc-12.1 {
sqlite3 db2 test.db2
db2 eval {
PRAGMA encoding = 'UTF-16le';
CREATE TABLE t2(d, e, f);
INSERT INTO t2 VALUES('d', 'e', 'f');
PRAGMA encoding;
}
} {UTF-16le}
do_test enc-12.2 {
db2 backup test.db
db2 close
} {}
do_catchsql_test enc-12.3 {
SELECT * FROM t2;
} {1 {attached databases must use the same text encoding as main database}}
db close
sqlite3 db test.db3
do_execsql_test enc-12.4 {
SELECT * FROM t3;
PRAGMA encoding = 'UTF-16le';
SELECT * FROM t3;
} {xxx yyy zzz xxx yyy zzz}
db close
sqlite3 db test.db3
breakpoint
do_execsql_test enc-12.5 {
PRAGMA encoding = 'UTF-16le';
PRAGMA encoding;
} {UTF-8}
reset_db
do_execsql_test enc-12.6 {
PRAGMA encoding = 'UTF-8';
CREATE TEMP TABLE t1(a, b, c);
INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
}
do_test enc-12.7 {
sqlite3 db2 test.db2
db2 backup test.db
db2 close
db eval {
SELECT * FROM t1;
}
} {xxx yyy zzz}
do_catchsql_test enc-12.8 {
SELECT * FROM t2;
SELECT * FROM t1;
} {1 {attached databases must use the same text encoding as main database}}
db close
sqlite3 db test.db
do_execsql_test enc-12.9 {
CREATE TEMP TABLE t1(a, b, c);
INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz');
}
do_execsql_test enc-12.10 {
SELECT * FROM t2;
SELECT * FROM t1;
} {d e f xxx yyy zzz}
finish_test
|