1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#
source [file join [file dirname [info script]] fts5_common.tcl]
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc fts3_unicode_path {file} {
file join .. [file dirname [info script]] .. .. fts3 unicode $file
}
source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3
set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]
tl_load_casefolding_txt $CF
foreach x [an_load_unicodedata_text $UD] {
set aNotAlnum($x) 1
}
foreach {y} [rd_load_unicodedata_text $UD] {
foreach {code ascii f} $y {}
if {$ascii==""} {
set int 0
} else {
binary scan $ascii c int
}
set aDiacritic($code,$f) $int
if {$f==0} { set aDiacritic($code,1) $int }
}
proc tcl_fold {i {bRemoveDiacritic 0}} {
global tl_lookup_table
global aDiacritic
set f [expr $bRemoveDiacritic==2]
if {[info exists tl_lookup_table($i)]} {
set i $tl_lookup_table($i)
}
if {$bRemoveDiacritic && [info exists aDiacritic($i,$f)]} {
set i $aDiacritic($i,$f)
}
expr $i
}
db func tcl_fold tcl_fold
proc tcl_isalnum {i} {
global aNotAlnum
expr {![info exists aNotAlnum($i)]}
}
db func tcl_isalnum tcl_isalnum
do_catchsql_test 1.0.1 {
SELECT fts5_isalnum(1, 2, 3);
} {1 {wrong number of arguments to function fts5_isalnum}}
do_catchsql_test 1.0.2 {
SELECT fts5_fold();
} {1 {wrong number of arguments to function fts5_fold}}
do_catchsql_test 1.0.3 {
SELECT fts5_fold(1,2,3);
} {1 {wrong number of arguments to function fts5_fold}}
do_execsql_test 1.1 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
} {0 {}}
do_execsql_test 1.2.1 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii
WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
} {0 {}}
do_execsql_test 1.2.2 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii
WHERE fts5_fold(i,2)!=CAST(tcl_fold(i,2) AS int);
} {0 {}}
do_execsql_test 1.3 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii
WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
} {0 {}}
do_test 1.4 {
set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
append str {"unicode61 separators '}
for {set i 700} {$i<900} {incr i} {
append str [format %c $i]
}
append str {'");}
execsql $str
} {}
do_test 1.5 {
set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
append str {"unicode61 tokenchars '}
for {set i 700} {$i<900} {incr i} {
append str [format %c $i]
}
append str {'");}
execsql $str
} {}
finish_test
|