File: repmgr026.tcl

package info (click to toggle)
db5.3 5.3.28%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 158,500 kB
  • sloc: ansic: 448,411; java: 111,824; tcl: 80,544; sh: 44,264; cs: 33,697; cpp: 21,604; perl: 14,557; xml: 10,799; makefile: 4,077; javascript: 1,998; yacc: 1,003; awk: 965; sql: 801; erlang: 342; python: 216; php: 24; asm: 14
file content (243 lines) | stat: -rw-r--r-- 7,495 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2009, 2013 Oracle and/or its affiliates.  All rights reserved.
#
# TEST	repmgr026
# TEST	Test of "full election" timeouts.
# TEST	1. Cold boot with all sites present.
# TEST	2. Cold boot with some sites missing.
# TEST	3. Partial-participation election with one client having seen a master,
# TEST	   but another just starting up fresh.
# TEST	4. Partial participation, with all participants already having seen a
# TEST	   master.
# TEST

proc repmgr026 { { tnum 026 } } {
	source ./include.tcl

	if { $is_freebsd_test == 1 } {
		puts "Skipping replication manager test on FreeBSD platform."
		return
	}

	foreach use_leases {no yes} {
		foreach client_down {no yes} {
			puts "Repmgr$tnum: Full election test, \
			    client_down: $client_down; leases: $use_leases"
			repmgr026_sub $tnum $client_down $use_leases
		}
	}
}

proc repmgr026_sub { tnum client_down use_leases } {
	global testdir
	global repfiles_in_memory
	global rep_verbose
	global verbose_type
	
	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {$verbose_type on} "
	}

	set repmemargs ""
	if { $repfiles_in_memory } {
		set repmemargs "-rep_inmem_files "
	}

	env_cleanup $testdir
	file mkdir [set dira $testdir/SITE_A]
	file mkdir [set dirb $testdir/SITE_B]
	file mkdir [set dirc $testdir/SITE_C]
	file mkdir [set dird $testdir/SITE_D]
	file mkdir [set dire $testdir/SITE_E]
	foreach { porta portb portc portd porte } [available_ports 5] {}

	# First, just create/establish the group.
	puts -nonewline "Repmgr$tnum: Create a group of 5 sites: "
	set common "-create -txn $verbargs $repmemargs \
	    -rep -thread -event"
	if { $use_leases } {
		append common " -rep_lease {[list 3000000]} "
	}
	set cmda "berkdb_env_noerr $common -errpfx SITE_A -home $dira"
	set cmdb "berkdb_env_noerr $common -errpfx SITE_B -home $dirb"
	set cmdc "berkdb_env_noerr $common -errpfx SITE_C -home $dirc"
	set cmdd "berkdb_env_noerr $common -errpfx SITE_D -home $dird"
	set cmde "berkdb_env_noerr $common -errpfx SITE_E -home $dire"
	set common_mgr " -start elect \
	    -timeout {connection_retry 5000000} \
	    -timeout {election_retry 2000000} \
	    -timeout {full_election 60000000} \
	    -timeout {election 5000000} -timeout {ack 3000000}"
	set enva [eval $cmda]
	eval $enva repmgr $common_mgr  \
	    -local {[list 127.0.0.1 $porta creator]}
	puts -nonewline "." ; 	flush stdout
	set envb [eval $cmdb]
	eval $envb repmgr $common_mgr \
	    -local {[list 127.0.0.1 $portb]} -remote {[list 127.0.0.1 $porta]}
	await_startup_done $envb
	puts -nonewline "." ; 	flush stdout
	set envc [eval $cmdc]
	eval $envc repmgr $common_mgr \
	    -local {[list 127.0.0.1 $portc]} -remote {[list 127.0.0.1 $porta]}
	await_startup_done $envc
	puts -nonewline "." ; 	flush stdout
	set envd [eval $cmdd]
	eval $envd repmgr $common_mgr \
	    -local {[list 127.0.0.1 $portd]} -remote {[list 127.0.0.1 $porta]}
	await_startup_done $envd
	puts -nonewline "." ; 	flush stdout
	set enve [eval $cmde]
	eval $enve repmgr $common_mgr \
	    -local {[list 127.0.0.1 $porte]} -remote {[list 127.0.0.1 $porta]}
	await_startup_done $enve
	puts "."
	$enve close
	$envd close
	$envc close
	$envb close
	$enva close

	# Cold boot the group (with or without site E), giving site A a
	# high priority.
	# 

	# The wait_limit's are intended to be an amount that is way more than
	# the expected timeout, used for nothing more than preventing the test
	# from hanging forever.  The leeway amount should be enough less than
	# the timeout to allow for any imprecision introduced by the test
	# mechanism.
	# 
	set elect_wait_limit 25
	set full_secs_leeway 59
	set full_wait_limit 85

	puts "\tRepmgr$tnum.a: Start first four sites."
	set enva [eval $cmda]
	eval $enva repmgr $common_mgr -pri 200 -local {[list 127.0.0.1 $porta]}

	set envb [eval $cmdb]
	eval $envb repmgr $common_mgr -pri 100 -local {[list 127.0.0.1 $portb]}

	set envc [eval $cmdc]
	eval $envc repmgr $common_mgr -pri 90 -local {[list 127.0.0.1 $portc]}

	set envd [eval $cmdd]
	eval $envd repmgr $common_mgr -pri 80 -local {[list 127.0.0.1 $portd]}

	if { $client_down } {
		set enve NONE
	} else {
		puts "\tRepmgr$tnum.b: Start fifth site."
		set enve [eval $cmde]
		eval $enve repmgr $common_mgr -pri 50 \
		    -local {[list 127.0.0.1 $porte]}
	}

	# wait for results, and make sure they're correct
	#
	set envlist [list $enva $envb $envc $envd]
	if { $enve != "NONE" } {
		lappend envlist $enve
	}
	set limit $full_wait_limit
	puts "\tRepmgr$tnum.c: wait (up to $limit seconds) for first election."
	set t [repmgr026_await_election_result $envlist $limit]
	if { $client_down } {
		error_check_good slow_election [expr $t > $full_secs_leeway] 1
	} else {
		# When all sites participate, the election should finish in way
		# less than 60 seconds.
		# 
		error_check_good timely_election [expr $t < $full_secs_leeway] 1
	}
	puts "\tRepmgr$tnum.d: first election completed in $t seconds"

	puts "\tRepmgr$tnum.e: wait for start-up done"
	$enva event_info -clear
	await_startup_done $envb
	$envb event_info -clear
	await_startup_done $envc
	$envc event_info -clear
	await_startup_done $envd
	$envd event_info -clear
	if { $enve != "NONE" } {
		await_startup_done $enve
		$enve event_info -clear
	}

	# Shut down site A, in order to test elections with less than the whole
	# group voting.  However, normally repmgr's reaction to losing master
	# connection is to try a "fast election" (the n-1 trick).  So we must do
	# something to mitigate that (see below).
	# 
	puts "\tRepmgr$tnum.f: shut down master site A"
	if { $client_down } {
		# The fifth site is already down, so now we'll have just B, C,
		# and D running.  Therefore, even with repmgr pulling its "fast
		# election" (n-1) trick, we don't have enough votes for a
		# full-participation short circuit; so this is a valid test of
		# the "normal" election timeout.
		#
		$enva close
	} else {
		# Here all sites are running, so if we just killed the master
		# repmgr would invoke its "fast election" trick, resulting in no
		# timeout.  Since the purpose of this test is to ensure the
		# correct use of timeouts, that's no good.  Instead, let's first
		# kill one more other site.
		$enve close
		$enva close
	}

	# wait for results, and check them
	# 
	set envlist [list $envb $envc $envd]
	set limit $elect_wait_limit
	puts "\tRepmgr$tnum.h: wait (up to $limit seconds) for second election."
	set t [repmgr026_await_election_result $envlist $limit]
	error_check_good normal_election [expr $t < $full_secs_leeway] 1
	puts "\tRepmgr$tnum.i: second election completed in $t seconds"

	$envd close
	$envc close
	$envb close
}

# Wait (a limited amount of time) for the election to finish.  The first env
# handle in the list is the expected winner, and the others are the remaining
# clients.  Returns the approximate amount of time (in seconds) that the
# election took.
# 
proc repmgr026_await_election_result { envlist limit } {
	set begin [clock seconds]
	set deadline [expr $begin + $limit]
	while { true } {
		set t [clock seconds]
		if { $t > $deadline } {
			error "FAIL: time limit exceeded"
		}

		if { [repmgr026_is_ready $envlist] } {
			return [expr $t - $begin]
		}

		tclsleep 1
	}
}

proc repmgr026_is_ready { envlist } {
	set winner [lindex $envlist 0]
	if {![is_elected $winner]} {
		return false
	}

	foreach client [lrange $envlist 1 end] {
		if {![is_event_present $client newmaster]} {
			return false
		}
	}
	return true
}