1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
|
# See the file LICENSE for redistribution information.
#
# Copyright (c) 1996-2002
# Sleepycat Software. All rights reserved.
#
# $Id: join.tcl,v 1.1.1.1 2003/11/20 22:13:57 toshok Exp $
#
# TEST jointest
# TEST Test duplicate assisted joins. Executes 1, 2, 3 and 4-way joins
# TEST with differing index orders and selectivity.
# TEST
# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those
# TEST work, everything else does as well. We'll create test databases
# TEST called join1.db, join2.db, join3.db, and join4.db. The number on
# TEST the database describes the duplication -- duplicates are of the
# TEST form 0, N, 2N, 3N, ... where N is the number of the database.
# TEST Primary.db is the primary database, and null.db is the database
# TEST that has no matching duplicates.
# TEST
# TEST We should test this on all btrees, all hash, and a combination thereof
proc jointest { {psize 8192} {with_dup_dups 0} {flags 0} } {
global testdir
global rand_init
source ./include.tcl
env_cleanup $testdir
berkdb srand $rand_init
# Use one environment for all database opens so we don't
# need oodles of regions.
set env [berkdb_env -create -home $testdir]
error_check_good env_open [is_valid_env $env] TRUE
# With the new offpage duplicate code, we don't support
# duplicate duplicates in sorted dup sets. Thus, if with_dup_dups
# is greater than one, run only with "-dup".
if { $with_dup_dups > 1 } {
set doptarray {"-dup"}
} else {
set doptarray {"-dup -dupsort" "-dup" RANDOMMIX RANDOMMIX }
}
# NB: these flags are internal only, ok
foreach m "DB_BTREE DB_HASH DB_BOTH" {
# run with two different random mixes.
foreach dopt $doptarray {
set opt [list "-env" $env $dopt]
puts "Join test: ($m $dopt) psize $psize,\
$with_dup_dups dup\
dups, flags $flags."
build_all $m $psize $opt oa $with_dup_dups
# null.db is db_built fifth but is referenced by
# zero; set up the option array appropriately.
set oa(0) $oa(5)
# Build the primary
puts "\tBuilding the primary database $m"
set oflags "-create -truncate -mode 0644 -env $env\
[conv $m [berkdb random_int 1 2]]"
set db [eval {berkdb_open} $oflags primary.db]
error_check_good dbopen [is_valid_db $db] TRUE
for { set i 0 } { $i < 1000 } { incr i } {
set key [format "%04d" $i]
set ret [$db put $key stub]
error_check_good "primary put" $ret 0
}
error_check_good "primary close" [$db close] 0
set did [open $dict]
gets $did str
do_join primary.db "1 0" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "2 0" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "3 0" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "4 0" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "1" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "2" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "3" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "4" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "1 2" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "1 2 3" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "1 2 3 4" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "3 2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "4 3 2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "1 3" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "3 1" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "1 4" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "4 1" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "2 3" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "3 2" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "2 4" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "4 2" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "3 4" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "4 3" $str oa $flags $with_dup_dups
gets $did str
do_join primary.db "2 3 4" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "3 4 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "4 2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "0 2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "3 2 0" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "4 3 2 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "4 3 0 1" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "3 3 3" $str oa $flags\
$with_dup_dups
gets $did str
do_join primary.db "2 2 3 3" $str oa $flags\
$with_dup_dups
gets $did str2
gets $did str
do_join primary.db "1 2" $str oa $flags\
$with_dup_dups "3" $str2
# You really don't want to run this section
# with $with_dup_dups > 2.
if { $with_dup_dups <= 2 } {
gets $did str2
gets $did str
do_join primary.db "1 2 3" $str\
oa $flags $with_dup_dups "3 3 1" $str2
gets $did str2
gets $did str
do_join primary.db "4 0 2" $str\
oa $flags $with_dup_dups "4 3 3" $str2
gets $did str2
gets $did str
do_join primary.db "3 2 1" $str\
oa $flags $with_dup_dups "0 2" $str2
gets $did str2
gets $did str
do_join primary.db "2 2 3 3" $str\
oa $flags $with_dup_dups "1 4 4" $str2
gets $did str2
gets $did str
do_join primary.db "2 2 3 3" $str\
oa $flags $with_dup_dups "0 0 4 4" $str2
gets $did str2
gets $did str
do_join primary.db "2 2 3 3" $str2\
oa $flags $with_dup_dups "2 4 4" $str
gets $did str2
gets $did str
do_join primary.db "2 2 3 3" $str2\
oa $flags $with_dup_dups "0 0 4 4" $str
}
close $did
}
}
error_check_good env_close [$env close] 0
}
proc build_all { method psize opt oaname with_dup_dups {nentries 100} } {
global testdir
db_build join1.db $nentries 50 1 [conv $method 1]\
$psize $opt $oaname $with_dup_dups
db_build join2.db $nentries 25 2 [conv $method 2]\
$psize $opt $oaname $with_dup_dups
db_build join3.db $nentries 16 3 [conv $method 3]\
$psize $opt $oaname $with_dup_dups
db_build join4.db $nentries 12 4 [conv $method 4]\
$psize $opt $oaname $with_dup_dups
db_build null.db $nentries 0 5 [conv $method 5]\
$psize $opt $oaname $with_dup_dups
}
proc conv { m i } {
switch -- $m {
DB_HASH { return "-hash"}
"-hash" { return "-hash"}
DB_BTREE { return "-btree"}
"-btree" { return "-btree"}
DB_BOTH {
if { [expr $i % 2] == 0 } {
return "-hash";
} else {
return "-btree";
}
}
}
}
proc random_opts { } {
set j [berkdb random_int 0 1]
if { $j == 0 } {
return " -dup"
} else {
return " -dup -dupsort"
}
}
proc db_build { name nkeys ndups dup_interval method psize lopt oaname \
with_dup_dups } {
source ./include.tcl
# Get array of arg names (from two levels up the call stack)
upvar 2 $oaname oa
# Search for "RANDOMMIX" in $opt, and if present, replace
# with " -dup" or " -dup -dupsort" at random.
set i [lsearch $lopt RANDOMMIX]
if { $i != -1 } {
set lopt [lreplace $lopt $i $i [random_opts]]
}
# Save off db_open arguments for this database.
set opt [eval concat $lopt]
set oa($dup_interval) $opt
# Create the database and open the dictionary
set oflags "-create -truncate -mode 0644 $method\
-pagesize $psize"
set db [eval {berkdb_open} $oflags $opt $name]
error_check_good dbopen [is_valid_db $db] TRUE
set did [open $dict]
set count 0
puts -nonewline "\tBuilding $name: $nkeys keys "
puts -nonewline "with $ndups duplicates at interval of $dup_interval"
if { $with_dup_dups > 0 } {
puts ""
puts "\t\tand $with_dup_dups duplicate duplicates."
} else {
puts "."
}
for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } {
incr count} {
set str $str$name
# We need to make sure that the dups are inserted in a
# random, or near random, order. Do this by generating
# them and putting each in a list, then sorting the list
# at random.
set duplist {}
for { set i 0 } { $i < $ndups } { incr i } {
set data [format "%04d" [expr $i * $dup_interval]]
lappend duplist $data
}
# randomize the list
for { set i 0 } { $i < $ndups } {incr i } {
# set j [berkdb random_int $i [expr $ndups - 1]]
set j [expr ($i % 2) + $i]
if { $j >= $ndups } { set j $i }
set dupi [lindex $duplist $i]
set dupj [lindex $duplist $j]
set duplist [lreplace $duplist $i $i $dupj]
set duplist [lreplace $duplist $j $j $dupi]
}
foreach data $duplist {
if { $with_dup_dups != 0 } {
for { set j 0 }\
{ $j < $with_dup_dups }\
{incr j} {
set ret [$db put $str $data]
error_check_good put$j $ret 0
}
} else {
set ret [$db put $str $data]
error_check_good put $ret 0
}
}
if { $ndups == 0 } {
set ret [$db put $str NODUP]
error_check_good put $ret 0
}
}
close $did
error_check_good close:$name [$db close] 0
}
proc do_join { primary dbs key oanm flags with_dup_dups {dbs2 ""} {key2 ""} } {
global testdir
source ./include.tcl
upvar $oanm oa
puts -nonewline "\tJoining: $dbs on $key"
if { $dbs2 == "" } {
puts ""
} else {
puts " with $dbs2 on $key2"
}
# Open all the databases
set p [berkdb_open -unknown $testdir/$primary]
error_check_good "primary open" [is_valid_db $p] TRUE
set dblist ""
set curslist ""
set ndx [llength $dbs]
foreach i [concat $dbs $dbs2] {
set opt $oa($i)
set db [eval {berkdb_open -unknown} $opt [n_to_name $i]]
error_check_good "[n_to_name $i] open" [is_valid_db $db] TRUE
set curs [$db cursor]
error_check_good "$db cursor" \
[is_substr $curs "$db.c"] 1
lappend dblist $db
lappend curslist $curs
if { $ndx > 0 } {
set realkey [concat $key[n_to_name $i]]
} else {
set realkey [concat $key2[n_to_name $i]]
}
set pair [$curs get -set $realkey]
error_check_good cursor_set:$realkey:$pair \
[llength [lindex $pair 0]] 2
incr ndx -1
}
set join_curs [eval {$p join} $curslist]
error_check_good join_cursor \
[is_substr $join_curs "$p.c"] 1
# Calculate how many dups we expect.
# We go through the list of indices. If we find a 0, then we
# expect 0 dups. For everything else, we look at pairs of numbers,
# if the are relatively prime, multiply them and figure out how
# many times that goes into 50. If they aren't relatively prime,
# take the number of times the larger goes into 50.
set expected 50
set last 1
foreach n [concat $dbs $dbs2] {
if { $n == 0 } {
set expected 0
break
}
if { $last == $n } {
continue
}
if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } {
if { $n > $last } {
set last $n
set expected [expr 50 / $last]
}
} else {
set last [expr $n * $last / [gcd $n $last]]
set expected [expr 50 / $last]
}
}
# If $with_dup_dups is greater than zero, each datum has
# been inserted $with_dup_dups times. So we expect the number
# of dups to go up by a factor of ($with_dup_dups)^(number of databases)
if { $with_dup_dups > 0 } {
foreach n [concat $dbs $dbs2] {
set expected [expr $expected * $with_dup_dups]
}
}
set ndups 0
if { $flags == " -join_item"} {
set l 1
} else {
set flags ""
set l 2
}
for { set pair [eval {$join_curs get} $flags] } { \
[llength [lindex $pair 0]] == $l } {
set pair [eval {$join_curs get} $flags] } {
set k [lindex [lindex $pair 0] 0]
foreach i $dbs {
error_check_bad valid_dup:$i:$dbs $i 0
set kval [string trimleft $k 0]
if { [string length $kval] == 0 } {
set kval 0
}
error_check_good valid_dup:$i:$dbs [expr $kval % $i] 0
}
incr ndups
}
error_check_good number_of_dups:$dbs $ndups $expected
error_check_good close_primary [$p close] 0
foreach i $curslist {
error_check_good close_cursor:$i [$i close] 0
}
foreach i $dblist {
error_check_good close_index:$i [$i close] 0
}
}
proc n_to_name { n } {
global testdir
if { $n == 0 } {
return null.db;
} else {
return join$n.db;
}
}
proc gcd { a b } {
set g 1
for { set i 2 } { $i <= $a } { incr i } {
if { [expr $a % $i] == 0 && [expr $b % $i] == 0 } {
set g $i
}
}
return $g
}
|