1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
|
################################################################################
# This test confirms that:
# 1: On applier failure clone does not start.
# 2: During clone group_replication_applier SQL thread is OFF.
# 3: If clone fails group_replication_applier is restarted.
#
# Test:
# 0. The test requires two servers: M1 and M2
# 1. Bootstrap M1 and install clone plugin.
# 2. Setup clone on M2.
# 3. Force clone. Force applier failure on M2.
# Start GR on M2 fails. Applier fails before clone stops applier.
# Assert clone never starts.
# 4. Reset debug points for applier failures.
# Restart GR on M2.
# Assert clone starts and group_replication_applier SQL thread is OFF.
# 5. Block applier on M2.
# Uninstall clone on M1 so that clone fails.
# Create some transactions on M1 to create applier backlog on M2.
# 6. Stop GR on M2 without committing the received transactions.
# Create transactions on M1 for M2 to clone.
# 7. Start GR on M2.
# Clone will fail and incremental recovery will start.
# Applier will be OFF till clone failure is detected.
# 8. Cleanup.
################################################################################
--source include/big_test.inc
--source include/have_debug_sync.inc
--source include/have_mysqld_monitoring_process.inc
--source include/have_clone_plugin.inc
--source include/force_restart.inc
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc
# Validate plugins
--let plugins = CLONE_PLUGIN,GROUP_REPLICATION
--source include/check_plugin_dir.inc
--echo
--echo # 1. Bootstrap M1 and install clone plugin.
--echo
--source include/start_and_bootstrap_group_replication.inc
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'
CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 values (1);
INSERT INTO t1 values (2);
--echo
--echo # 2. Setup clone on M2.
--echo
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $member2_uuid= query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
--let $_group_replication_local_address= `SELECT @@GLOBAL.group_replication_local_address`
--let $_group_replication_group_seeds= `SELECT @@GLOBAL.group_replication_group_seeds`
--let $_group_replication_comm_stack= `SELECT @@GLOBAL.group_replication_communication_stack`
--let $plugin_list= $GROUP_REPLICATION
--source include/spawn_monitoring_process.inc
--disable_query_log
--eval SET PERSIST group_replication_group_name= "$group_replication_group_name"
--eval SET PERSIST group_replication_local_address= "$_group_replication_local_address"
--eval SET PERSIST group_replication_group_seeds= "$_group_replication_group_seeds"
--eval SET PERSIST group_replication_communication_stack= "$_group_replication_comm_stack"
SET PERSIST group_replication_start_on_boot= ON;
--enable_query_log
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'
--echo
--echo # 3. Force clone. Force applier failure on M2.
--echo # Start GR on M2 fails. Applier fails before clone stops applier.
--echo # Assert clone never starts.
--echo
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $_group_replication_threshold_save= `SELECT @@GLOBAL.group_replication_clone_threshold`
SET GLOBAL group_replication_clone_threshold= 1;
--replace_result $group_replication_group_name GROUP_NAME
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name"
SET @@GLOBAL.DEBUG= '+d,force_sql_thread_error';
SET @@GLOBAL.DEBUG='+d,gr_clone_before_applier_stop';
--send START GROUP_REPLICATION;
--let $rpl_connection_name= server_2
--source include/rpl_connection.inc
--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF"
--source include/wait_condition_or_abort.inc
SET DEBUG_SYNC = 'now SIGNAL applier_stopped';
SET @@GLOBAL.DEBUG='-d,gr_clone_before_applier_stop';
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
# One of three things can happen:
# A. The applier fails after the join and the member leaves the group
# but the START GROUP_REPLICATION command does not fail
# B. The applier fails after initialization and sets the state to ERROR,
# making the group join fail that makes the START GROUP_REPLICATION
# command fail with a generic ER_GROUP_REPLICATION_CONFIGURATION.
# C. The applier fails and its initialization processes catches the error.
# During the start, it is known that the applier failed so the START
# GROUP_REPLICATION fails with: ER_GROUP_REPLICATION_APPLIER_INIT_ERROR
--error 0, ER_GROUP_REPLICATION_CONFIGURATION, ER_GROUP_REPLICATION_APPLIER_INIT_ERROR
--reap
SET @@GLOBAL.DEBUG= '-d,force_sql_thread_error';
SET DEBUG_SYNC= 'RESET';
--let $assert_text= Clone must not start.
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.clone_status] = 0;
--source include/assert.inc
--echo
--echo # 4. Reset debug points for applier failures.
--echo # Restart GR on M2.
--echo # Assert clone starts and group_replication_applier SQL thread is OFF.
--echo
SET @@GLOBAL.DEBUG='+d,gr_clone_wait';
START GROUP_REPLICATION;
SET DEBUG_SYNC = 'now WAIT_FOR gr_clone_paused';
SET @@GLOBAL.DEBUG='-d,gr_clone_wait';
--let $assert_text= group_replication_applier SQL Thread will be off.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc
SET DEBUG_SYNC = 'now SIGNAL gr_clone_continue';
--source include/wait_until_disconnected.inc
--let $rpl_server_number= 2
--source include/rpl_reconnect.inc
--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc
--let $assert_text= Clone must be completed
--let $assert_cond= [SELECT state="Completed" FROM performance_schema.clone_status] = 1;
--source include/assert.inc
--echo
--echo # 5. Block applier on M2.
--echo # Uninstall clone on M1 so that clone fails.
--echo # Create some transactions on M1 to create applier backlog on M2.
--echo
SET @@GLOBAL.DEBUG='+d,block_applier_updates';
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
UNINSTALL PLUGIN clone;
INSERT INTO t1 values (3);
INSERT INTO t1 values (4);
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_group_member_stats WHERE member_id='$member2_uuid' AND count_transactions_remote_in_applier_queue = 2
--source include/wait_condition_or_abort.inc
--echo
--echo # 6. Stop GR on M2 without committing the received transactions.
--echo # Create transactions on M1 for M2 to clone.
--echo
SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET @@GLOBAL.DEBUG='+d,force_sql_thread_error';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";
--let $group_replication_member_state= ERROR
--source include/gr_wait_for_member_state.inc
SET @@GLOBAL.DEBUG='-d,force_sql_thread_error';
--source include/stop_group_replication.inc
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
# Force clone.
INSERT INTO t1 values (5);
INSERT INTO t1 values (6);
--echo
--echo # 7. Start GR on M2.
--echo # Clone will fail and incremental recovery will start.
--echo # Applier will be OFF till clone failure is detected.
--echo
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET @@GLOBAL.DEBUG='+d,block_applier_updates';
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
SET @@GLOBAL.DEBUG='+d,gr_run_clone_query_fail_once';
SET GLOBAL group_replication_clone_threshold= 1;
START GROUP_REPLICATION;
SET DEBUG_SYNC = 'now WAIT_FOR signal.run_clone_query_waiting';
SET @@GLOBAL.DEBUG='-d,gr_run_clone_query_fail_once';
# Clone is executing
--let $assert_text= "Clone is executing"
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.events_stages_current WHERE event_name LIKE "%stage/group_rpl/Group Replication Cloning%"] = 1
--source include/assert.inc
--let $assert_text= group_replication_applier SQL Thread will be OFF.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc
SET DEBUG_SYNC = 'now SIGNAL signal.run_clone_query_continue';
# Clone will fail and will start channel group_replication_recovery
--let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_connection_status WHERE CHANNEL_NAME="group_replication_recovery" AND SERVICE_STATE='ON'
--source include/wait_condition.inc
--let $assert_text= group_replication_applier SQL Thread will be ON.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "ON",count, 1] = 1
--source include/assert.inc
# Allow the recovery to continue
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc
--let $diff_tables= server1:test.t1, server2:test.t1
--source include/diff_tables.inc
--echo
--echo # 8. Cleanup.
--echo
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
DROP TABLE t1;
set session sql_log_bin=0;
call mtr.add_suppression("Timeout while waiting for the group communication engine to exit!");
call mtr.add_suppression("The member has failed to gracefully leave the group.");
set session sql_log_bin=1;
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
set session sql_log_bin=0;
call mtr.add_suppression("Replica SQL for channel 'group_replication_applier': Relay log read failure: *.*");
call mtr.add_suppression("The applier thread execution was aborted. *.*");
call mtr.add_suppression("Error running query, replica SQL thread aborted. *");
call mtr.add_suppression("Fatal error during execution on the Applier process of Group Replication. The server will now leave the group.");
call mtr.add_suppression("Skipping leave operation: concurrent attempt to leave the group is on-going.");
call mtr.add_suppression("Unable to confirm whether the server has left the group or not. Check performance_schema.replication_group_members to check group membership information.");
call mtr.add_suppression("The server was automatically set into read only mode after an error was detected.");
call mtr.add_suppression("Unable to initialize the Group Replication applier module.");
call mtr.add_suppression("There was a previous plugin error while the member joined the group. The member will now exit the group.");
call mtr.add_suppression("This member will start distributed recovery using clone. It is due to the number of missing transactions being higher than the configured threshold of 1.");
call mtr.add_suppression("Clone removing all user data for provisioning: *");
call mtr.add_suppression("Internal query: CLONE INSTANCE FROM *");
call mtr.add_suppression("There was an issue when cloning from another server: *");
call mtr.add_suppression("Due to some issue on the previous step distributed recovery is now executing: Incremental Recovery.");
call mtr.add_suppression("Timeout while waiting for the group communication engine to be ready!");
call mtr.add_suppression("The group communication engine is not ready for the member to join. .*");
call mtr.add_suppression("The member was unable to join the group.*");
call mtr.add_suppression("There was an issue when configuring the remote cloning process: The plugin was not able to stop the group_replication_applier channel.");
set session sql_log_bin=1;
RESET PERSIST group_replication_group_name;
RESET PERSIST group_replication_local_address;
RESET PERSIST group_replication_group_seeds;
RESET PERSIST group_replication_start_on_boot;
RESET PERSIST IF EXISTS group_replication_communication_stack;
--source include/clean_monitoring_process.inc
--source include/group_replication_end.inc
|