File: gr_clone_applier_stop.test

package info (click to toggle)
mysql-8.0 8.0.43-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,273,924 kB
  • sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (298 lines) | stat: -rw-r--r-- 12,532 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
################################################################################
# This test confirms that:
# 1: On applier failure clone does not start.
# 2: During clone group_replication_applier SQL thread is OFF.
# 3: If clone fails group_replication_applier is restarted.
#
# Test:
# 0. The test requires two servers: M1 and M2
# 1. Bootstrap M1 and install clone plugin.
# 2. Setup clone on M2.
# 3. Force clone. Force applier failure on M2.
#    Start GR on M2 fails. Applier fails before clone stops applier.
#    Assert clone never starts.
# 4. Reset debug points for applier failures.
#    Restart GR on M2.
#    Assert clone starts and group_replication_applier SQL thread is OFF.
# 5. Block applier on M2.
#    Uninstall clone on M1 so that clone fails.
#    Create some transactions on M1 to create applier backlog on M2.
# 6. Stop GR on M2 without committing the received transactions.
#    Create transactions on M1 for M2 to clone.
# 7. Start GR on M2.
#    Clone will fail and incremental recovery will start.
#    Applier will be OFF till clone failure is detected.
# 8. Cleanup.
################################################################################

--source include/big_test.inc
--source include/have_debug_sync.inc
--source include/have_mysqld_monitoring_process.inc
--source include/have_clone_plugin.inc
--source include/force_restart.inc
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc

# Validate plugins
--let plugins = CLONE_PLUGIN,GROUP_REPLICATION
--source include/check_plugin_dir.inc

--echo
--echo # 1. Bootstrap M1 and install clone plugin.
--echo
--source include/start_and_bootstrap_group_replication.inc
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'

CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 values (1);
INSERT INTO t1 values (2);

--echo
--echo # 2. Setup clone on M2.
--echo

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $member2_uuid= query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
--let $_group_replication_local_address= `SELECT @@GLOBAL.group_replication_local_address`
--let $_group_replication_group_seeds= `SELECT @@GLOBAL.group_replication_group_seeds`
--let $_group_replication_comm_stack= `SELECT @@GLOBAL.group_replication_communication_stack`

--let $plugin_list= $GROUP_REPLICATION
--source include/spawn_monitoring_process.inc

--disable_query_log
--eval SET PERSIST group_replication_group_name= "$group_replication_group_name"
--eval SET PERSIST group_replication_local_address= "$_group_replication_local_address"
--eval SET PERSIST group_replication_group_seeds= "$_group_replication_group_seeds"
--eval SET PERSIST group_replication_communication_stack= "$_group_replication_comm_stack"
SET PERSIST group_replication_start_on_boot= ON;
--enable_query_log
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'

--echo
--echo # 3. Force clone. Force applier failure on M2.
--echo #    Start GR on M2 fails. Applier fails before clone stops applier.
--echo #    Assert clone never starts.
--echo

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $_group_replication_threshold_save= `SELECT @@GLOBAL.group_replication_clone_threshold`
SET GLOBAL group_replication_clone_threshold= 1;
--replace_result $group_replication_group_name GROUP_NAME
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name"
SET @@GLOBAL.DEBUG= '+d,force_sql_thread_error';
SET @@GLOBAL.DEBUG='+d,gr_clone_before_applier_stop';

--send START GROUP_REPLICATION;

--let $rpl_connection_name= server_2
--source include/rpl_connection.inc

--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF"
--source include/wait_condition_or_abort.inc

SET DEBUG_SYNC = 'now SIGNAL applier_stopped';
SET @@GLOBAL.DEBUG='-d,gr_clone_before_applier_stop';

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

# One of three things can happen:
# A. The applier fails after the join and the member leaves the group
#    but the START GROUP_REPLICATION command does not fail
# B. The applier fails after initialization and sets the state to ERROR,
#    making the group join fail that makes the START GROUP_REPLICATION
#    command fail with a generic ER_GROUP_REPLICATION_CONFIGURATION.
# C. The applier fails and its initialization processes catches the error.
#    During the start, it is known that the applier failed so the START
#    GROUP_REPLICATION fails with: ER_GROUP_REPLICATION_APPLIER_INIT_ERROR

--error 0, ER_GROUP_REPLICATION_CONFIGURATION, ER_GROUP_REPLICATION_APPLIER_INIT_ERROR
--reap

SET @@GLOBAL.DEBUG= '-d,force_sql_thread_error';
SET DEBUG_SYNC= 'RESET';

--let $assert_text= Clone must not start.
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.clone_status] = 0;
--source include/assert.inc

--echo
--echo # 4. Reset debug points for applier failures.
--echo #    Restart GR on M2.
--echo #    Assert clone starts and group_replication_applier SQL thread is OFF.
--echo

SET @@GLOBAL.DEBUG='+d,gr_clone_wait';

START GROUP_REPLICATION;

SET DEBUG_SYNC = 'now WAIT_FOR gr_clone_paused';
SET @@GLOBAL.DEBUG='-d,gr_clone_wait';

--let $assert_text= group_replication_applier SQL Thread will be off.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc

SET DEBUG_SYNC = 'now SIGNAL gr_clone_continue';

--source include/wait_until_disconnected.inc

--let $rpl_server_number= 2
--source include/rpl_reconnect.inc

--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc

--let $assert_text= Clone must be completed
--let $assert_cond= [SELECT state="Completed" FROM performance_schema.clone_status] = 1;
--source include/assert.inc

--echo
--echo # 5. Block applier on M2.
--echo #    Uninstall clone on M1 so that clone fails.
--echo #    Create some transactions on M1 to create applier backlog on M2.
--echo

SET @@GLOBAL.DEBUG='+d,block_applier_updates';

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
UNINSTALL PLUGIN clone;
INSERT INTO t1 values (3);
INSERT INTO t1 values (4);

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_group_member_stats WHERE member_id='$member2_uuid' AND count_transactions_remote_in_applier_queue = 2
--source include/wait_condition_or_abort.inc

--echo
--echo # 6. Stop GR on M2 without committing the received transactions.
--echo #    Create transactions on M1 for M2 to clone.
--echo

SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET @@GLOBAL.DEBUG='+d,force_sql_thread_error';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";
--let $group_replication_member_state= ERROR
--source include/gr_wait_for_member_state.inc
SET @@GLOBAL.DEBUG='-d,force_sql_thread_error';

--source include/stop_group_replication.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
# Force clone.
INSERT INTO t1 values (5);
INSERT INTO t1 values (6);

--echo
--echo # 7. Start GR on M2.
--echo #    Clone will fail and incremental recovery will start.
--echo #    Applier will be OFF till clone failure is detected.
--echo

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET @@GLOBAL.DEBUG='+d,block_applier_updates';

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

SET @@GLOBAL.DEBUG='+d,gr_run_clone_query_fail_once';
SET GLOBAL group_replication_clone_threshold= 1;
START GROUP_REPLICATION;

SET DEBUG_SYNC = 'now WAIT_FOR signal.run_clone_query_waiting';
SET @@GLOBAL.DEBUG='-d,gr_run_clone_query_fail_once';

# Clone is executing
--let $assert_text= "Clone is executing"
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.events_stages_current WHERE event_name LIKE "%stage/group_rpl/Group Replication Cloning%"] = 1
--source include/assert.inc

--let $assert_text= group_replication_applier SQL Thread will be OFF.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc

SET DEBUG_SYNC = 'now SIGNAL signal.run_clone_query_continue';

# Clone will fail and will start channel group_replication_recovery
--let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_connection_status WHERE CHANNEL_NAME="group_replication_recovery" AND SERVICE_STATE='ON'
--source include/wait_condition.inc

--let $assert_text= group_replication_applier SQL Thread will be ON.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "ON",count, 1] = 1
--source include/assert.inc

# Allow the recovery to continue
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc

--let $diff_tables= server1:test.t1, server2:test.t1
--source include/diff_tables.inc


--echo
--echo # 8. Cleanup.
--echo

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

DROP TABLE t1;

set session sql_log_bin=0;
call mtr.add_suppression("Timeout while waiting for the group communication engine to exit!");
call mtr.add_suppression("The member has failed to gracefully leave the group.");
set session sql_log_bin=1;

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
set session sql_log_bin=0;
call mtr.add_suppression("Replica SQL for channel 'group_replication_applier': Relay log read failure: *.*");
call mtr.add_suppression("The applier thread execution was aborted. *.*");
call mtr.add_suppression("Error running query, replica SQL thread aborted. *");
call mtr.add_suppression("Fatal error during execution on the Applier process of Group Replication. The server will now leave the group.");
call mtr.add_suppression("Skipping leave operation: concurrent attempt to leave the group is on-going.");
call mtr.add_suppression("Unable to confirm whether the server has left the group or not. Check performance_schema.replication_group_members to check group membership information.");
call mtr.add_suppression("The server was automatically set into read only mode after an error was detected.");
call mtr.add_suppression("Unable to initialize the Group Replication applier module.");
call mtr.add_suppression("There was a previous plugin error while the member joined the group. The member will now exit the group.");
call mtr.add_suppression("This member will start distributed recovery using clone. It is due to the number of missing transactions being higher than the configured threshold of 1.");
call mtr.add_suppression("Clone removing all user data for provisioning: *");
call mtr.add_suppression("Internal query: CLONE INSTANCE FROM *");
call mtr.add_suppression("There was an issue when cloning from another server: *");
call mtr.add_suppression("Due to some issue on the previous step distributed recovery is now executing: Incremental Recovery.");
call mtr.add_suppression("Timeout while waiting for the group communication engine to be ready!");
call mtr.add_suppression("The group communication engine is not ready for the member to join. .*");
call mtr.add_suppression("The member was unable to join the group.*");
call mtr.add_suppression("There was an issue when configuring the remote cloning process: The plugin was not able to stop the group_replication_applier channel.");
set session sql_log_bin=1;

RESET PERSIST group_replication_group_name;
RESET PERSIST group_replication_local_address;
RESET PERSIST group_replication_group_seeds;
RESET PERSIST group_replication_start_on_boot;
RESET PERSIST IF EXISTS group_replication_communication_stack;

--source include/clean_monitoring_process.inc
--source include/group_replication_end.inc