File: rpl_semi_sync_shutdown_await_ack.test

package info (click to toggle)
mariadb 1%3A11.8.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 772,520 kB
  • sloc: ansic: 2,414,714; cpp: 1,791,394; asm: 381,336; perl: 62,905; sh: 49,647; pascal: 40,897; java: 39,363; python: 20,791; yacc: 20,432; sql: 17,907; xml: 12,344; ruby: 8,544; cs: 6,542; makefile: 6,145; ada: 1,879; lex: 1,193; javascript: 996; objc: 80; tcl: 73; awk: 46; php: 22
file content (295 lines) | stat: -rw-r--r-- 11,018 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
--source include/no_valgrind_without_big.inc

#
# Purpose:
#   This test validates that data is consistent between a primary and replica
# in semi-sync mode when the primary is issued `SHUTDOWN WAIT FOR SLAVES`
# during an active communication. More specifically, the primary should not
# kill the Ack_thread until it is sure a replica has received all binlog
# data, i.e. once the primary receives the ACK. If a primary is issued a
# shutdown before receiving an ACK, it should wait until either 1) the ACK is
# received, or 2) the configured timeout (rpl_semi_sync_master_timeout) is
# reached.
#
# Methodology:
#   Using a topology consisting of one primary with two replicas, all in
# semi-sync mode, we use DEBUG_DBUG to simulate an error or delay on the
# replicas during an active communication while the primary is issued
# `SHUTDOWN WAIT FOR SLAVES`. We create four test cases to ensure the
# Ack_thread is not prematurely killed due to the shutdown.
#   Test Case 1) If both replicas simulate a delay that is within the allowed
#                timeout, the primary should delay killing the Ack_thread
#                until an ACK is received.
#   Test Case 2) If both replicas simulate an error before sending an ACK, the
#                primary should delay killing the Ack_thread until the
#                the timeout is reached.
#   Test Case 3) If one replica simulates a delay within the allowed timeout
#                and the other simulates an error before sending an ACK, the
#                primary should delay killing the Ack_thread until it
#                receives an ACK from the delayed slave.
#   Test Case 4) If a replica errors before sending an ACK, it will cause the
#                IO thread to stop and handle the error. During error handling,
#                if semi-sync is active, the replica will form a new connection
#                with the primary to kill the active connection. However, if
#                the primary is shutting down, it may kill the new connection,
#                thereby leaving the active semi-sync connection in-tact. The
#                slave should notice this, and not issue a `QUIT` command to
#                the primary, which would otherwise be sent to kill an active
#                connection. This test case validates that the slave does not
#                send a `QUIT` in this case.
#   Test Case 5) If a waiting-for-ACK user thread is killed (disconnected)
#                during SHUTDOWN WAIT FOR ALL SLAVES, ensure the primary will
#                still await the ACK from the replica before killing the
#                Ack_thread.
#
# References:
#   MDEV-11853: semisync thread can be killed after sync binlog but before ACK
#               in the sync state
#   MDEV-28114: Semi-sync Master ACK Receiver Thread Can Error on COM_QUIT
#

--echo #############################
--echo # Common setup for all tests
--echo #############################

--echo # Note: Simulated slave delay is hardcoded to 800 milliseconds
--echo # Note: Simulated master shutdown delay is hardcoded to 500 milliseconds

--source include/have_innodb.inc
--source include/have_debug.inc
--let $rpl_topology=1->2, 1->3
--source include/rpl_init.inc

--connection server_1

--echo # Slaves which simulate an error will produce a timeout on the primary
call mtr.add_suppression("Timeout waiting");
call mtr.add_suppression("did not exit");
call mtr.add_suppression("Got an error reading communication packets");

--let $sav_master_timeout= `SELECT @@GLOBAL.rpl_semi_sync_master_timeout`
--let $sav_enabled_master= `SELECT @@GLOBAL.rpl_semi_sync_master_enabled`

--echo # Suppress slave errors related to the simulated error
--connection server_2
call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;

--connection server_3
call mtr.add_suppression("reply failed");
call mtr.add_suppression("Replication event checksum verification");
call mtr.add_suppression("Relay log write failure");
call mtr.add_suppression("Failed to kill the active semi-sync connection");
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;

--connection server_1
CREATE TABLE t1 (a int) engine=innodb;
--save_master_pos

--let i= 2
--let slave_last= 3
while (`SELECT $i <= $slave_last`)
{
  --connection server_$i
  --sync_with_master
  --inc $i
}

# Set up the connection used to issue the shutdown
--connect(server_1_con2, localhost, root,,)


--echo #############################
--echo # Test cases
--echo #############################

--echo #
--echo # Test Case 1) If both replicas simulate a delay that is within the
--echo # allowed timeout, the primary should delay killing the Ack_thread
--echo # until an ACK is received.
--echo #
--let server_2_sync_slave_reply=1
--let server_2_dbug= "+d,simulate_delay_semisync_slave_reply"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 1
--let server_3_expect_row_count= 1
--source rpl_semi_sync_shutdown_await_ack.inc

--echo #
--echo # Test Case 2) If both replicas simulate an error before sending an ACK,
--echo # the primary should delay killing the Ack_thread until the
--echo # timeout is reached.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=0
--let server_3_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let semisync_timeout= 500
--let server_2_expect_row_count= 0
--let server_3_expect_row_count= 0
--source rpl_semi_sync_shutdown_await_ack.inc

--echo #
--echo # Test Case 3) If one replica simulates a delay within the allowed
--echo # timeout and the other simulates an error before sending an ACK, the
--echo # primary should delay killing the Ack_thread until it receives an
--echo # ACK from the delayed slave.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
--let server_3_expect_row_count= 1
--source rpl_semi_sync_shutdown_await_ack.inc

--echo #
--echo # Test Case 4) If a replica errors before sending an ACK, it will cause
--echo # the IO thread to stop and handle the error. During error handling, if
--echo # semi-sync is active, the replica will form a new connection with the
--echo # primary to kill the active connection. However, if the primary is
--echo # shutting down, it may kill the new connection, thereby leaving the
--echo # active semi-sync connection in-tact. The slave should notice this, and
--echo # not issue a `QUIT` command to the primary, which would otherwise be
--echo # sent to kill an active connection. This test case validates that the
--echo # slave does not send a `QUIT` in this case.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
--let server_3_expect_row_count= 1
--source rpl_semi_sync_shutdown_await_ack.inc

#
# Added with MDEV-33551
#
--echo #
--echo # Test Case 5) If a waiting-for-ACK user thread is killed (disconnected)
--echo # during SHUTDOWN WAIT FOR ALL SLAVES, ensure the primary will still
--echo # await the ACK from the replica before killing the Ack_receiver thread
--echo #
--connection server_1
insert into t1 values (1);
--source include/save_master_gtid.inc

--connection server_2
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
SET GLOBAL rpl_semi_sync_slave_enabled= 1;
--source include/start_slave.inc

--connection server_1
SET GLOBAL rpl_semi_sync_master_enabled= 1;
SET GLOBAL rpl_semi_sync_master_timeout= 2000;

--let $status_var= Rpl_semi_sync_master_clients
--let $status_var_value= 1
source include/wait_for_status_var.inc;

show status like 'Rpl_semi_sync_master_status';
show status like 'Rpl_semi_sync_master_clients';

--connection server_2
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,simulate_delay_semisync_slave_reply";

--connect(con1, localhost, root,,)
--connect(con2, localhost, root,,)

--connection con1
--send insert into t1 values (2)

--connection server_1
--echo # Wait for thd to begin semi-sync wait..
--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE state = 'Waiting for semi-sync ACK from slave'
--source include/wait_condition.inc
--echo # ..done

--connection server_2
set debug_sync= "now wait_for io_thd_at_slave_reply";

--disconnect con1

--connection default
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait
EOF

--connection con2
SHUTDOWN WAIT FOR ALL SLAVES;

--echo # Waitng for shutdown to be delayed..
--connection server_2
--inc $n_logged_delayed_shutdown_notes
let SEARCH_FILE= $log_error_file;
let SEARCH_PATTERN=Delaying shutdown to await semi-sync ACK;
let SEARCH_WAIT=FOUND $n_logged_delayed_shutdown_notes;
source include/search_pattern_in_file.inc;

--connection default
--source include/wait_until_disconnected.inc

--connection server_1
--source include/wait_until_disconnected.inc

--connection con2
--source include/wait_until_disconnected.inc

--connection server_2
set debug_sync= "now signal io_thd_do_reply";
--let $rpl_allow_error= 1
source include/stop_slave.inc;
--connection server_3
source include/stop_slave.inc;
--let $rpl_allow_error=

--connection default
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart
EOF
--enable_reconnect
--source include/wait_until_connected_again.inc

--connection server_1
--enable_reconnect
--source include/wait_until_connected_again.inc


--echo #############################
--echo # Cleanup
--echo #############################

--connection server_2
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
source include/start_slave.inc;

--connection server_3
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
source include/start_slave.inc;

--connection server_1
let $status_var= Rpl_semi_sync_master_clients;
let $status_var_value= 0;
source include/wait_for_status_var.inc;

--disable_query_log
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout= $sav_master_timeout;
--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $sav_enabled_master;
--enable_query_log

drop table t1;

--source include/rpl_end.inc