File: repmgr034.tcl

package info (click to toggle)
db5.3 5.3.28%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 158,500 kB
  • sloc: ansic: 448,411; java: 111,824; tcl: 80,544; sh: 44,264; cs: 33,697; cpp: 21,604; perl: 14,557; xml: 10,799; makefile: 4,077; javascript: 1,998; yacc: 1,003; awk: 965; sql: 801; erlang: 342; python: 216; php: 24; asm: 14
file content (228 lines) | stat: -rw-r--r-- 7,602 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2011, 2013 Oracle and/or its affiliates.  All rights reserved.
#
# $Id$
#
# TEST	repmgr034
# TEST	Repmgr site removal and restart.
# TEST	
# TEST	Start two repmgr sites, master and client1.  The client1 removes
# TEST	itself and restarts, sometimes more than once.  Start a third 
# TEST	repmgr site client2 and make sure it can remove client1 from
# TEST	the group.
# TEST
# TEST	This test must use heartbeats to ensure that a client that has
# TEST	been removed and restarted without recovery can sync up.
# TEST
proc repmgr034 { {niter 3} {tnum "034"} } {
	source ./include.tcl

        if { $is_freebsd_test == 1 } {
		puts "Skipping replication manager test on FreeBSD platform."
		return
	}

	set method "btree"
	# The iter is the number of iteration on site removal and restart.
	# When iter is 0, start master, client1, client2, and let client2
	# remove client1.  When iter is non-0, client1 removes itself and
	# restart iterately before being removed by client2.
	foreach iter "0 $niter" {
		# The nentries is the number of records being inserted
		# to master when client1 is removed from the group.
		# When nentries is 0, client1 does not need any sync up
		# with master when restart its repmgr.  When it is 100,
		# client1 needs to sync up a bit.  When it is 1000,
		# it is simulated that client1 has been removed for a
		# while in the group.
		foreach nentries {0 100 1000} {
			repmgr034_sub $method $tnum $iter $nentries
		}
	}
}

proc repmgr034_sub { method tnum niter nentries} {
	source ./include.tcl
	global databases_in_memory
	global env_private
	global testdir
	global repfiles_in_memory
	global rep_verbose
	global verbose_type

	if { $databases_in_memory } {
		set dbmemmsg "using in-memory databases "
		if { [is_queueext $method] } {
			puts -nonewline "Skipping repmgr$tnum for method "
			puts "$method with named in-memory databases."
			return
		}
		set dbname { "" "test.db" }
	} else {
		set dbmemmsg "using on-disk databases "
		set dbname "test.db"
	}

	if { $env_private == 1 } {
		set privargs " -private "
		set primsg "in private env "
	} else {
		set privargs ""
		set primsg ""
	}

	if { $niter > 0 } {
		set nitermsg "and restart for $niter time(s) "
	} else {
		set nitermsg ""
	}

	if { $repfiles_in_memory } {
		set repmemargs " -rep_inmem_files "
		set repmemmsg "and in-memory replication files "
	} else {
		set repmemargs ""
		set repmemmsg "and on-disk replication files "
	}

	if { $rep_verbose == 1 } {
		set verbargs " -verbose {$verbose_type on} "
	} else {
		set verbargs ""
	}

	set omethod [convert_method $method]

	puts "Repmgr$tnum: Repmgr site removal, clean-up\
	    $nitermsg$dbmemmsg$repmemmsg$primsg\
	    with $nentries record(s)"

	foreach {port0 port1 port2} [available_ports 3] {}
	env_cleanup $testdir
	file mkdir [set masterdir $testdir/MASTERDIR]
	file mkdir [set clientdir1 $testdir/CLIENTDIR1]
	file mkdir [set clientdir2 $testdir/CLIENTDIR2]

	puts "\tRepmgr$tnum.a: Start master"
	set env0 [eval "berkdb_env -create -errpfx MASTER -home $masterdir \
	    -txn -rep -thread -recover $privargs $repmemargs $verbargs"]
	$env0 repmgr -timeout {heartbeat_send 500000}
	$env0 repmgr -local [list 127.0.0.1 $port0] -start master
	error_check_good nsites_A0 [$env0 rep_get_nsites] 1
	set db [berkdb_open_noerr -create -auto_commit -env $env0 \
	    $omethod $dbname]
	error_check_good env0_opendb [is_valid_db $db] TRUE

	puts "\tRepmgr$tnum.b: Start client1"
	set env1 [eval "berkdb_env_noerr -create -errpfx CLIENT1 \
	    -home $clientdir1 -txn -rep -thread -recover -event \
	    $privargs $repmemargs $verbargs"]
	$env1 repmgr -timeout {heartbeat_monitor 1100000}
	$env1 repmgr -local [list 127.0.0.1 $port1] \
	    -remote [list 127.0.0.1 $port0] -start client
	await_startup_done $env1
	error_check_good nsites_B0 [$env0 rep_get_nsites] 2
	error_check_good nsites_A1 [$env1 rep_get_nsites] 2
	set db1 [berkdb_open_noerr -create -auto_commit -env $env1 \
	    $omethod $dbname]
	error_check_good env1_opendb [is_valid_db $db1] TRUE


	if {$niter == 0} {
    		puts "\tRepmgr$tnum.c: Skip site self-removal and restart"
	} else {
		puts "\tRepmgr$tnum.c: Site removal and restart for\
		    $niter time(s)"
	}
	for { set count 1 } { $count <= $niter } {incr count } {
		$env1 event_info -clear
		puts "\t\tRepmgr$tnum.c.$count.(a): Client1 removes itself from\
		    the group: iter $count"
		$env1 repmgr -remove [list 127.0.0.1 $port1]
		await_event $env1 local_site_removed
		error_check_good nsites_C0 [$env0 rep_get_nsites] 1

		puts "\t\tRepmgr$tnum.c.$count.(b): The removed client1 is\
		    still read only"
		catch { [$db1 put "key" "data"] } ret
		error_check_good readonly_failure \
		    [is_substr $ret "permission denied"] 1
		error_check_good db1_notfound [llength [$db1 get "none"]] 0

		if { $nentries > 0 } {
			puts "\t\tRepmgr$tnum.c.$count.(b1): Write $nentries\
			    record(s) on master, might/might not be synced\
			    up to client1"
			for { set i 0 } { $i < $nentries } { incr i } {
				error_check_good db_put \
				    [$db put "key_${count}_${i}]" "data"] 0
			}
			error_check_good nkeys \
			    [stat_field $db stat "Number of keys"] \
			    [expr $nentries * $count ]
		}

		puts "\t\tRepmgr$tnum.c.$count.(c): Restart client1"
		$env1 repmgr -timeout {heartbeat_monitor 1100000}
		# Allow a retry in case client1 didn't have time to fully
		# shut down.
		if {[catch {$env1 repmgr -local [list 127.0.0.1 $port1] \
		    -remote [list 127.0.0.1 $port0] -start client} result] && \
		    [string match "*REP_UNAVAIL*" $result]} {
			tclsleep 10
			$env1 repmgr -local [list 127.0.0.1 $port1] \
			-remote [list 127.0.0.1 $port0] -start client
		}
		await_event $env1 connection_established		
		error_check_good nsites_D0 [$env0 rep_get_nsites] 2
		error_check_good nsites_B1 [$env1 rep_get_nsites] 2

		if { $nentries > 0 } {
			puts "\t\tRepmgr$tnum.c.$count.(c1): Check client1 sync\
			    up with master"
			set max_retries [expr $nentries / 10 ]
			for { set i 0 } { $i < $max_retries } { incr i } {
				if { [stat_field $db1 stat "Number of keys"] \
				    == [expr $nentries * $count ] } {
					break
				} else {
					tclsleep 2
				}
			}
			if { $i == $max_retries } {
				error "sync up duration is longer than expected"
			}
		}
	}

	puts "\tRepmgr$tnum.d: Start client2"
	set env2 [eval "berkdb_env -create -errpfx CLIENT2  -home $clientdir2 \
	    -txn -rep -thread -recover -event $privargs $repmemargs $verbargs"]
	# It is possible, especially when nentries=0, that we need a delay
	# before the recently restarted client1 can ack a new site addition.
	$env2 repmgr -timeout {heartbeat_monitor 1200000}
	if {[catch {$env2 repmgr -local [list 127.0.0.1 $port2] \
	    -remote [list 127.0.0.1 $port0] -start client} result] && \
	    [string match "*REP_UNAVAIL*" $result]} {
		tclsleep 3
		$env2 repmgr -local [list 127.0.0.1 $port2] \
		    -remote [list 127.0.0.1 $port0] -start client
	}
	await_startup_done $env2 100

	puts "\tRepmgr$tnum.e: Client2 removes client1 from the group"
	$env2 repmgr -remove [list 127.0.0.1 $port1]
	await_event $env1 local_site_removed
	error_check_good nsites_F0 [$env0 rep_get_nsites] 2
	catch { [$db1 put "key" "data"] } ret
	error_check_good readonly_failure \
	    [is_substr $ret "permission denied"] 1

	puts "\tRepmgr$tnum.f: Close all"
	error_check_good db1_close [$db1 close] 0
	error_check_good db_close [$db close] 0
	error_check_good s_2_close [$env2 close] 0
	error_check_good s_1_close [$env1 close] 0
	error_check_good s_0_close [$env0 close] 0
}