File: gr_crash_recovery_server.test

package info (click to toggle)
mysql-8.0 8.0.43-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,273,924 kB
  • sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (238 lines) | stat: -rw-r--r-- 9,018 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
################################################################################
# This test verifies whether the group_replication works fine if the
# server is killed during its recovery phase.
#
# Test:
# 0. The test requires three servers: M1,M2 and M3.
# 1. Start GR on servers M1 and M2.
# 2. Execute some operations on the group through server M1
#    using procedure dml_operations.
# 3. Join M3 to the group and wait until the server is in Recovery.
# 4. start executing DML operations on the group through server1 to
#    ensure that the cached transactions are present at the recovery
#    server when it gets killed.
# 5. Kill and restart the server M3 during phase-1 of recovery.
# 6. Again Join the restarted member to the group
# 7. Wait until the recovery phase-1 of the server M3 ends.
# 8. Start executing the DML transactions on the group through server1
#    so that the cached transaction will be added on the server M3.
# 9. Kill and restart the server M3 during phase-2 of recovery.
# 10.Wait until all the servers are online.
# 11.Verify that all the members have same data.
# 12.Cleanup
################################################################################

# This test does crashes servers, thence we skip it on valgrind.
--source include/not_valgrind.inc
--source include/big_test.inc
--source include/force_restart.inc

--source include/set_privilege_checks_user_as_system_user.inc
--source include/have_group_replication_plugin.inc
--let $rpl_server_count= 3
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc

# START GR on Two servers M1 and M2
--let $rpl_connection_name= server1
--source include/rpl_connection.inc

SET sql_log_bin=0;
CREATE TABLE t1(a int primary key);
SET sql_log_bin=1;

--source include/start_and_bootstrap_group_replication.inc

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

SET sql_log_bin=0;
CREATE TABLE t1(a int primary key);
SET sql_log_bin=1;

--source include/start_group_replication.inc

# Execute the operations on the group through server1 so
# that when M3 joins it will be in recovery for some time.

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

# Create the procedure to perform dml operations
delimiter $$;
CREATE PROCEDURE dml_operations(IN p INT,IN q INT)
     BEGIN
     declare x INT;
     set x=p;
     while x<q do
     insert into t1 values (x);
     update t1 set a=x+400 where a=x;
     delete from t1 where a<420;
     set x=x+1;
     end  while;
     end$$
delimiter ;$$
--echo

--echo ----call procedure----
call dml_operations(1,100);
--echo

# JOIN M3 to the group
--let $rpl_connection_name= server3
--source include/rpl_connection.inc

set sql_log_bin=0;
--disable_query_log
call mtr.add_suppression(".*Replica SQL for channel 'group_replication_applier': ... The replica coordinator and worker threads are stopped, possibly leaving data in inconsistent state*");
call mtr.add_suppression("\\[Warning\\] \\[[^]]*\\] Database page corruption or a failed file read of page");
call mtr.add_suppression("There was an error when connecting to the donor server. Please check that group_replication_recovery channel credentials and all MEMBER_HOST column.*");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Fatal error: Invalid (empty) username .*Connection attempt terminated. Error_code: MY-013117");
call mtr.add_suppression(".*For details please check performance_schema.replication_connection_status table and error log messages of Replica I/O for channel group_replication_recovery.");
--enable_query_log
CREATE TABLE t1(a int primary key);
SET sql_log_bin=1;
LOCK TABLES t1 READ;

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc
--let $group_replication_start_member_state= RECOVERING
--source include/start_group_replication.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

--echo ----call procedure----
send call dml_operations(100,200);
--echo

--let $rpl_connection_name= server_1
--source include/rpl_connection.inc

# Wait to ensure that some cached transactions will be present at the time
# of kill of the recovery server.
--let $wait_timeout= 200
--let $wait_condition= SELECT MAX(a) > 520 FROM t1
--source include/wait_condition.inc

--let $rpl_connection_name= server3
--source include/rpl_connection.inc
UNLOCK TABLES;

# sleep for sometime so that some transactions executes on server3.
sleep 2;

# Kill server3
--echo # killing
--let $group_replication_local_address= `SELECT @@GLOBAL.group_replication_local_address`
--let $group_replication_group_seeds= `SELECT @@GLOBAL.group_replication_group_seeds`
--let $group_replication_comm_stack= `SELECT @@GLOBAL.group_replication_communication_stack`
--let $restart_parameters=restart:--group_replication_local_address=$group_replication_local_address --group_replication_group_seeds=$group_replication_group_seeds --group_replication_group_name=$group_replication_group_name --group_replication_communication_stack=$group_replication_comm_stack
--replace_result $group_replication_local_address GROUP_REPLICATION_LOCAL_ADDRESS $group_replication_group_seeds GROUP_REPLICATION_GROUP_SEEDS $group_replication_group_name GROUP_REPLICATION_GROUP_NAME $group_replication_comm_stack GROUP_REPLICATION_COMMUNICATION_STACK

--source include/kill_and_restart_mysqld.inc
--echo # restarting

# Needed as we are not using rpl_restart_server.inc
--let $rpl_server_number= 3
--source include/rpl_reconnect.inc

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

# Wait until group has only 2 members, that is, server3's death is detected by
# the group.
--echo # check that there are 2 mebers in the group
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
reap;

--echo ----call procedure----
call dml_operations(200,300);
--echo

# Start GR on server M3
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
LOCK TABLES t1 READ;

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc
start group_replication;

--let $group_replication_member_state= RECOVERING
--source include/gr_wait_for_member_state.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

--echo ----call procedure----
send call dml_operations(300,400);
--echo

--let $rpl_connection_name= server3
--source include/rpl_connection.inc
UNLOCK TABLES;

# Wait until Recovery phase1 ends.(until the service_state of group_replication_recovery goes to OFF state.)
--let $rpl_connection_name= server3
--source include/rpl_connection.inc

--let $wait_timeout=300
--let $wait_condition= SELECT COUNT(*)=1 FROM performance_schema.replication_connection_status WHERE channel_name='group_replication_recovery' AND service_state='OFF'
--source include/wait_condition.inc

sleep 1;

# Kill the recovery server
# kill server M3
--echo # killing
--let $restart_parameters=restart:--group_replication_local_address=$group_replication_local_address --group_replication_group_seeds=$group_replication_group_seeds --group_replication_group_name=$group_replication_group_name --group_replication_communication_stack=$group_replication_comm_stack
--let $group_replication_comm_stack= `SELECT @@GLOBAL.group_replication_communication_stack`
--replace_result $group_replication_local_address GROUP_REPLICATION_LOCAL_ADDRESS $group_replication_group_seeds GROUP_REPLICATION_GROUP_SEEDS $group_replication_group_name GROUP_REPLICATION_GROUP_NAME $group_replication_comm_stack GROUP_REPLICATION_COMMUNICATION_STACK
--source include/kill_and_restart_mysqld.inc
--echo # restarting

# Needed as we are not using rpl_restart_server.inc
--let $rpl_server_number= 3
--source include/rpl_reconnect.inc

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

# Wait until group has only 2 members, that is, recovery server's death is detected by
# the group.
--echo # check that there are 2 mebers in a group
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
reap;

# Start GR on killed and restarted recovery server.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc

# Setting recovery user to avoid sporadic failures due to empty user.
--disable_warnings
RESET SLAVE FOR CHANNEL 'group_replication_recovery';
CHANGE REPLICATION SOURCE TO SOURCE_USER= 'root' FOR CHANNEL 'group_replication_recovery';
--enable_warnings
--let $wait_timeout= 300
--source include/start_group_replication.inc

--source include/rpl_sync.inc

# Verify that table t1 has same data on all the servers.
--let $diff_tables=server1:t1, server2:t1, server3:t1
--source include/diff_tables.inc

# Clean up.
drop table t1;
drop procedure dml_operations;
--let $skip_restore_connection= 0
--source include/group_replication_end.inc