1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
|
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2002, 2013 Oracle and/or its affiliates. All rights reserved.
#
# TEST rep067
# TEST Full election timeout test.
# TEST
# TEST Verify that elections use a separate "full election timeout" (if such
# TEST configuration is in use) instead of the normal timeout, when the
# TEST replication group is "cold-booted" (all sites starting with recovery).
# TEST
proc rep067 { method args } {
source ./include.tcl
set tnum "067"
# Run for btree only.
if { $checking_valid_methods } {
set test_methods { btree }
return $test_methods
}
if { [is_btree $method] == 0 } {
puts "Rep$tnum: Skipping for method $method."
return
}
puts "Rep$tnum: Full election timeout test."
# This test consists of three cases, two of which can be handled by
# script that is similar enough to be handled by a single proc
# (rep067a_sub), with a parameter to determine whether a client is
# down. The other case is different enough to warrant its own proc
# (rep067b_sub).
#
rep067a_sub $tnum yes
rep067a_sub $tnum no
rep067b_sub $tnum
}
# Cold boot the group. Sites A and B come up just fine, but site C might not
# come up (depending on the client_down flag). Hold an election. (The amount
# of time it takes depends on whether site C is running.) Then, shut down site
# A, start site C if it isn't already running, and hold another election.
#
proc rep067a_sub { tnum client_down } {
source ./include.tcl
global rand_init
error_check_good set_random_seed [berkdb srand $rand_init] 0
global repfiles_in_memory
global rep_verbose
global verbose_type
# Set up some arbitrary timeout values for this test. The only
# constraint is that they should be large enough, and different enough,
# so as to allow for some amount of measurement imprecision introduced
# by the overhead of the test mechnism. Timeout values themselves
# expressed in microseconds, since they'll be passed to DB; leeway
# values in seconds, so that we can measure the result here in Tcl.
#
set elect_to 15000000
set elect_secs_leeway 13
set full_elect_to 30000000
set full_secs_leeway 27
puts -nonewline "Rep$tnum.a: Full election test, "
if { $client_down } {
puts "with one client missing"
puts -nonewline "\tRep$tnum.b: First election"
puts " expected to take [expr $full_elect_to / 1000000] seconds"
} else {
puts "with all clients initially present"
puts "\tRep$tnum.b: First election expected to complete quickly"
}
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
env_cleanup $testdir
set qdir $testdir/MSGQUEUEDIR
replsetup $qdir
# Configure all three clients. Use EID's starting at 2, because that's
# what run_election expects.
#
set nsites 3
foreach i { 0 1 2 } eid { 2 3 4 } p { 20 50 100 } {
set clientdir($i) $testdir/CLIENTDIR.$i
file mkdir $clientdir($i)
repladd $eid
set env_cmd($i) "berkdb_env_noerr -create \
-event $repmemargs -home $clientdir($i) \
-txn -rep_client $verbargs \
-errpfx CLIENT.$i -rep_transport \[list $eid replsend\]"
set errcmd($i) "none"
set crash($i) 0
set pri($i) $p
}
set elect_timeout [list $elect_to $full_elect_to]
# Start the clients, but perhaps not all of them.
#
set envlist {}
if { $client_down } {
set participants 2
} else {
set participants 3
}
for { set i 0 } { $i < $participants } { incr i } {
set clientenv($i) [eval $env_cmd($i)]
set eid [expr $i + 2]
lappend envlist "$clientenv($i) $eid"
}
process_msgs $envlist
# In this test, the expected winner is always the last one in the
# array. We made sure of that by arranging the priorities that way.
# This is convenient so that we can remove the winner (master) in the
# second phase, without leaving a hole in the arrays that the
# run_election proc wouldn't cope with.
#
set winner [expr $participants - 1]
set initiator 0
set nvotes 2
set reopen_flag 0
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.c" $initiator $nsites $nvotes $participants \
$winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
puts "\tRep$tnum.d: the election took about $duration seconds"
if { $client_down } {
# Case #2.
#
# Without full participation on a cold boot, the election should
# take the full long timeout. In any case it should be way more
# than the "normal" timeout.
#
error_check_good duration1a \
[expr $duration > $full_secs_leeway] 1
} else {
# Case #1.
#
# With full participation, the election should complete "right
# away". At least it should be way less than the "normal"
# election timeout.
error_check_good duration1b \
[expr $duration < $elect_secs_leeway] 1
}
process_msgs $envlist
if { !$client_down } {
# Shut down the master and hold another election between the
# remaining two sites.
#
puts "\tRep$tnum.e: Shut down elected master, and run another election"
puts "\tRep$tnum.g: (expected to take [expr $elect_to / 1000000] seconds)"
$clientenv($winner) close
set envlist [lreplace $envlist $winner $winner]
set winner 1
set participants 2
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.b" $initiator $nsites $nvotes \
$participants $winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
# We don't have full participation, so the election can only be
# won after a timeout. But these clients have seen a master, so
# we shouldn't have to wait for the full-election timeout.
#
puts "\tRep$tnum.g: the election took about $duration seconds"
error_check_good duration2 \
[expr $duration > $elect_secs_leeway && \
$duration < $full_secs_leeway] 1
}
$clientenv(0) close
$clientenv(1) close
replclose $testdir/MSGQUEUEDIR
}
# Run an election where one of the clients has seen a master, but the other has
# not. Verify that the first client learns from the second that a master has
# been seen, and allows the election to complete after the normal timeout,
# rather than the full election timeout.
#
proc rep067b_sub { tnum } {
source ./include.tcl
global rand_init
global repfiles_in_memory
global rep_verbose
global verbose_type
error_check_good set_random_seed [berkdb srand $rand_init] 0
set elect_to 10000000
set elect_secs_leeway 10
set full_elect_to 180000000
set full_secs_leeway 100
puts "Rep$tnum.a: Mixed full election test"
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
env_cleanup $testdir
set qdir $testdir/MSGQUEUEDIR
replsetup $qdir
# Start a master and one client. This first step is just setup, for the
# purpose of creating a client that has heard from a master.
#
file mkdir $testdir/MASTERDIR
set mcmd "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/MASTERDIR \
-txn -rep_master $verbargs \
-errpfx MASTER -rep_transport \[list 1 replsend\]"
file mkdir $testdir/CLIENTDIR
set ccmd "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/CLIENTDIR \
-txn -rep_client $verbargs \
-errpfx CLIENT.0 -rep_transport \[list 2 replsend\]"
puts "\tRep$tnum.b: Start master and first client"
repladd 1
set menv [eval $mcmd]
repladd 2
set cenv [eval $ccmd]
process_msgs [list [list $menv 1] [list $cenv 2]]
puts "\tRep$tnum.c: Shut down master; start other client"
$menv close
# Now set up for the election test we're really interested in. We'll
# need $ccmd in array position 0 of env_cmd, for passing to
# run_election. Then, start the second client. We now have a mixture
# of clients: one who's seen a master, and the other who hasn't.
#
# The run_election proc assumes an offset of 2 between the array index
# and the EID. Thus EID 3 has to correspond to array index 1, etc.
#
set env_cmd(0) $ccmd
repladd 3
file mkdir $testdir/CLIENTDIR2
set env_cmd(1) "berkdb_env_noerr -create \
-event $repmemargs -home $testdir/CLIENTDIR2 \
-txn -rep_client $verbargs \
-errpfx CLIENT.1 -rep_transport \[list 3 replsend\]"
set c2env [eval $env_cmd(1)]
set envlist {}
foreach i { 0 1 } eid { 2 3 } p { 100 50 } e [list $cenv $c2env] {
set errcmd($i) "none"
set crash($i) 0
set pri($i) $p
lappend envlist [list $e $eid]
}
set elect_timeout [list $elect_to $full_elect_to]
set nsites 3
set participants 2
process_msgs $envlist
puts "\tRep$tnum.d: Election expected to take [expr $elect_to / 1000000] seconds"
set winner 0
set initiator 0
set nvotes 2
set reopen_flag 0
run_election envlist errcmd pri crash \
$qdir "Rep$tnum.e" $initiator $nsites $nvotes $participants \
$winner $reopen_flag NULL 0 0 $elect_timeout
set duration [rep067_max_duration $envlist]
puts "\tRep$tnum.f: the election took about $duration seconds"
# We don't have full participation, so the election can only be won
# after a timeout. But even if only one client has seen a master, we
# shouldn't have to wait for the full-election timeout.
#
error_check_good duration3 \
[expr $duration > $elect_secs_leeway && \
$duration < $full_secs_leeway] 1
$cenv close
$c2env close
replclose $testdir/MSGQUEUEDIR
}
proc rep067_max_duration { envlist } {
set max 0.0
foreach pair $envlist {
set env [lindex $pair 0]
set s [stat_field $env rep_stat "Election seconds"]
set u [stat_field $env rep_stat "Election usecs"]
set d [expr ( $u / 1000000.0 ) + $s ]
if { $d > $max } {
set max $d
}
}
return $max
}
|