1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2009, 2013 Oracle and/or its affiliates. All rights reserved.
#
# TEST repmgr026
# TEST Test of "full election" timeouts.
# TEST 1. Cold boot with all sites present.
# TEST 2. Cold boot with some sites missing.
# TEST 3. Partial-participation election with one client having seen a master,
# TEST but another just starting up fresh.
# TEST 4. Partial participation, with all participants already having seen a
# TEST master.
# TEST
proc repmgr026 { { tnum 026 } } {
source ./include.tcl
if { $is_freebsd_test == 1 } {
puts "Skipping replication manager test on FreeBSD platform."
return
}
foreach use_leases {no yes} {
foreach client_down {no yes} {
puts "Repmgr$tnum: Full election test, \
client_down: $client_down; leases: $use_leases"
repmgr026_sub $tnum $client_down $use_leases
}
}
}
proc repmgr026_sub { tnum client_down use_leases } {
global testdir
global repfiles_in_memory
global rep_verbose
global verbose_type
set verbargs ""
if { $rep_verbose == 1 } {
set verbargs " -verbose {$verbose_type on} "
}
set repmemargs ""
if { $repfiles_in_memory } {
set repmemargs "-rep_inmem_files "
}
env_cleanup $testdir
file mkdir [set dira $testdir/SITE_A]
file mkdir [set dirb $testdir/SITE_B]
file mkdir [set dirc $testdir/SITE_C]
file mkdir [set dird $testdir/SITE_D]
file mkdir [set dire $testdir/SITE_E]
foreach { porta portb portc portd porte } [available_ports 5] {}
# First, just create/establish the group.
puts -nonewline "Repmgr$tnum: Create a group of 5 sites: "
set common "-create -txn $verbargs $repmemargs \
-rep -thread -event"
if { $use_leases } {
append common " -rep_lease {[list 3000000]} "
}
set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira"
set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb"
set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc"
set cmdd "berkdb_env_noerr $common -errpfx SITE_D -home $dird"
set cmde "berkdb_env_noerr $common -errpfx SITE_E -home $dire"
set common_mgr " -start elect \
-timeout {connection_retry 5000000} \
-timeout {election_retry 2000000} \
-timeout {full_election 60000000} \
-timeout {election 5000000} -timeout {ack 3000000}"
set enva [eval $cmda]
eval $enva repmgr $common_mgr \
-local {[list 127.0.0.1 $porta creator]}
puts -nonewline "." ; flush stdout
set envb [eval $cmdb]
eval $envb repmgr $common_mgr \
-local {[list 127.0.0.1 $portb]} -remote {[list 127.0.0.1 $porta]}
await_startup_done $envb
puts -nonewline "." ; flush stdout
set envc [eval $cmdc]
eval $envc repmgr $common_mgr \
-local {[list 127.0.0.1 $portc]} -remote {[list 127.0.0.1 $porta]}
await_startup_done $envc
puts -nonewline "." ; flush stdout
set envd [eval $cmdd]
eval $envd repmgr $common_mgr \
-local {[list 127.0.0.1 $portd]} -remote {[list 127.0.0.1 $porta]}
await_startup_done $envd
puts -nonewline "." ; flush stdout
set enve [eval $cmde]
eval $enve repmgr $common_mgr \
-local {[list 127.0.0.1 $porte]} -remote {[list 127.0.0.1 $porta]}
await_startup_done $enve
puts "."
$enve close
$envd close
$envc close
$envb close
$enva close
# Cold boot the group (with or without site E), giving site A a
# high priority.
#
# The wait_limit's are intended to be an amount that is way more than
# the expected timeout, used for nothing more than preventing the test
# from hanging forever. The leeway amount should be enough less than
# the timeout to allow for any imprecision introduced by the test
# mechanism.
#
set elect_wait_limit 25
set full_secs_leeway 59
set full_wait_limit 85
puts "\tRepmgr$tnum.a: Start first four sites."
set enva [eval $cmda]
eval $enva repmgr $common_mgr -pri 200 -local {[list 127.0.0.1 $porta]}
set envb [eval $cmdb]
eval $envb repmgr $common_mgr -pri 100 -local {[list 127.0.0.1 $portb]}
set envc [eval $cmdc]
eval $envc repmgr $common_mgr -pri 90 -local {[list 127.0.0.1 $portc]}
set envd [eval $cmdd]
eval $envd repmgr $common_mgr -pri 80 -local {[list 127.0.0.1 $portd]}
if { $client_down } {
set enve NONE
} else {
puts "\tRepmgr$tnum.b: Start fifth site."
set enve [eval $cmde]
eval $enve repmgr $common_mgr -pri 50 \
-local {[list 127.0.0.1 $porte]}
}
# wait for results, and make sure they're correct
#
set envlist [list $enva $envb $envc $envd]
if { $enve != "NONE" } {
lappend envlist $enve
}
set limit $full_wait_limit
puts "\tRepmgr$tnum.c: wait (up to $limit seconds) for first election."
set t [repmgr026_await_election_result $envlist $limit]
if { $client_down } {
error_check_good slow_election [expr $t > $full_secs_leeway] 1
} else {
# When all sites participate, the election should finish in way
# less than 60 seconds.
#
error_check_good timely_election [expr $t < $full_secs_leeway] 1
}
puts "\tRepmgr$tnum.d: first election completed in $t seconds"
puts "\tRepmgr$tnum.e: wait for start-up done"
$enva event_info -clear
await_startup_done $envb
$envb event_info -clear
await_startup_done $envc
$envc event_info -clear
await_startup_done $envd
$envd event_info -clear
if { $enve != "NONE" } {
await_startup_done $enve
$enve event_info -clear
}
# Shut down site A, in order to test elections with less than the whole
# group voting. However, normally repmgr's reaction to losing master
# connection is to try a "fast election" (the n-1 trick). So we must do
# something to mitigate that (see below).
#
puts "\tRepmgr$tnum.f: shut down master site A"
if { $client_down } {
# The fifth site is already down, so now we'll have just B, C,
# and D running. Therefore, even with repmgr pulling its "fast
# election" (n-1) trick, we don't have enough votes for a
# full-participation short circuit; so this is a valid test of
# the "normal" election timeout.
#
$enva close
} else {
# Here all sites are running, so if we just killed the master
# repmgr would invoke its "fast election" trick, resulting in no
# timeout. Since the purpose of this test is to ensure the
# correct use of timeouts, that's no good. Instead, let's first
# kill one more other site.
$enve close
$enva close
}
# wait for results, and check them
#
set envlist [list $envb $envc $envd]
set limit $elect_wait_limit
puts "\tRepmgr$tnum.h: wait (up to $limit seconds) for second election."
set t [repmgr026_await_election_result $envlist $limit]
error_check_good normal_election [expr $t < $full_secs_leeway] 1
puts "\tRepmgr$tnum.i: second election completed in $t seconds"
$envd close
$envc close
$envb close
}
# Wait (a limited amount of time) for the election to finish. The first env
# handle in the list is the expected winner, and the others are the remaining
# clients. Returns the approximate amount of time (in seconds) that the
# election took.
#
proc repmgr026_await_election_result { envlist limit } {
set begin [clock seconds]
set deadline [expr $begin + $limit]
while { true } {
set t [clock seconds]
if { $t > $deadline } {
error "FAIL: time limit exceeded"
}
if { [repmgr026_is_ready $envlist] } {
return [expr $t - $begin]
}
tclsleep 1
}
}
proc repmgr026_is_ready { envlist } {
set winner [lindex $envlist 0]
if {![is_elected $winner]} {
return false
}
foreach client [lrange $envlist 1 end] {
if {![is_event_present $client newmaster]} {
return false
}
}
return true
}
|