File: gr_clone_applier_stop.test

package info (click to toggle)
mysql-8.0 8.0.43-3
links: PTS, VCS
area: main
in suites: sid
size: 1,273,924 kB
sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (298 lines) | stat: -rw-r--r-- 12,532 bytes
################################################################################
# This test confirms that:
# 1: On applier failure clone does not start.
# 2: During clone group_replication_applier SQL thread is OFF.
# 3: If clone fails group_replication_applier is restarted.
#
# Test:
# 0. The test requires two servers: M1 and M2
# 1. Bootstrap M1 and install clone plugin.
# 2. Setup clone on M2.
# 3. Force clone. Force applier failure on M2.
#    Start GR on M2 fails. Applier fails before clone stops applier.
#    Assert clone never starts.
# 4. Reset debug points for applier failures.
#    Restart GR on M2.
#    Assert clone starts and group_replication_applier SQL thread is OFF.
# 5. Block applier on M2.
#    Uninstall clone on M1 so that clone fails.
#    Create some transactions on M1 to create applier backlog on M2.
# 6. Stop GR on M2 without committing the received transactions.
#    Create transactions on M1 for M2 to clone.
# 7. Start GR on M2.
#    Clone will fail and incremental recovery will start.
#    Applier will be OFF till clone failure is detected.
# 8. Cleanup.
################################################################################

--source include/big_test.inc
--source include/have_debug_sync.inc
--source include/have_mysqld_monitoring_process.inc
--source include/have_clone_plugin.inc
--source include/force_restart.inc
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc

# Validate plugins
--let plugins = CLONE_PLUGIN,GROUP_REPLICATION
--source include/check_plugin_dir.inc

--echo
--echo # 1. Bootstrap M1 and install clone plugin.
--echo
--source include/start_and_bootstrap_group_replication.inc
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'

CREATE TABLE t1 (a INT PRIMARY KEY);
INSERT INTO t1 values (1);
INSERT INTO t1 values (2);

--echo
--echo # 2. Setup clone on M2.
--echo

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $member2_uuid= query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
--let $_group_replication_local_address= `SELECT @@GLOBAL.group_replication_local_address`
--let $_group_replication_group_seeds= `SELECT @@GLOBAL.group_replication_group_seeds`
--let $_group_replication_comm_stack= `SELECT @@GLOBAL.group_replication_communication_stack`

--let $plugin_list= $GROUP_REPLICATION
--source include/spawn_monitoring_process.inc

--disable_query_log
--eval SET PERSIST group_replication_group_name= "$group_replication_group_name"
--eval SET PERSIST group_replication_local_address= "$_group_replication_local_address"
--eval SET PERSIST group_replication_group_seeds= "$_group_replication_group_seeds"
--eval SET PERSIST group_replication_communication_stack= "$_group_replication_comm_stack"
SET PERSIST group_replication_start_on_boot= ON;
--enable_query_log
--replace_result $CLONE_PLUGIN CLONE_PLUGIN
--eval INSTALL PLUGIN clone SONAME '$CLONE_PLUGIN'

--echo
--echo # 3. Force clone. Force applier failure on M2.
--echo #    Start GR on M2 fails. Applier fails before clone stops applier.
--echo #    Assert clone never starts.
--echo

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $_group_replication_threshold_save= `SELECT @@GLOBAL.group_replication_clone_threshold`
SET GLOBAL group_replication_clone_threshold= 1;
--replace_result $group_replication_group_name GROUP_NAME
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name"
SET @@GLOBAL.DEBUG= '+d,force_sql_thread_error';
SET @@GLOBAL.DEBUG='+d,gr_clone_before_applier_stop';

--send START GROUP_REPLICATION;

--let $rpl_connection_name= server_2
--source include/rpl_connection.inc

--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF"
--source include/wait_condition_or_abort.inc

SET DEBUG_SYNC = 'now SIGNAL applier_stopped';
SET @@GLOBAL.DEBUG='-d,gr_clone_before_applier_stop';

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

# One of three things can happen:
# A. The applier fails after the join and the member leaves the group
#    but the START GROUP_REPLICATION command does not fail
# B. The applier fails after initialization and sets the state to ERROR,
#    making the group join fail that makes the START GROUP_REPLICATION
#    command fail with a generic ER_GROUP_REPLICATION_CONFIGURATION.
# C. The applier fails and its initialization processes catches the error.
#    During the start, it is known that the applier failed so the START
#    GROUP_REPLICATION fails with: ER_GROUP_REPLICATION_APPLIER_INIT_ERROR

--error 0, ER_GROUP_REPLICATION_CONFIGURATION, ER_GROUP_REPLICATION_APPLIER_INIT_ERROR
--reap

SET @@GLOBAL.DEBUG= '-d,force_sql_thread_error';
SET DEBUG_SYNC= 'RESET';

--let $assert_text= Clone must not start.
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.clone_status] = 0;
--source include/assert.inc

--echo
--echo # 4. Reset debug points for applier failures.
--echo #    Restart GR on M2.
--echo #    Assert clone starts and group_replication_applier SQL thread is OFF.
--echo

SET @@GLOBAL.DEBUG='+d,gr_clone_wait';

START GROUP_REPLICATION;

SET DEBUG_SYNC = 'now WAIT_FOR gr_clone_paused';
SET @@GLOBAL.DEBUG='-d,gr_clone_wait';

--let $assert_text= group_replication_applier SQL Thread will be off.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc

SET DEBUG_SYNC = 'now SIGNAL gr_clone_continue';

--source include/wait_until_disconnected.inc

--let $rpl_server_number= 2
--source include/rpl_reconnect.inc

--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc

--let $assert_text= Clone must be completed
--let $assert_cond= [SELECT state="Completed" FROM performance_schema.clone_status] = 1;
--source include/assert.inc

--echo
--echo # 5. Block applier on M2.
--echo #    Uninstall clone on M1 so that clone fails.
--echo #    Create some transactions on M1 to create applier backlog on M2.
--echo

SET @@GLOBAL.DEBUG='+d,block_applier_updates';

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
UNINSTALL PLUGIN clone;
INSERT INTO t1 values (3);
INSERT INTO t1 values (4);

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $wait_condition= SELECT COUNT(*) = 1 FROM performance_schema.replication_group_member_stats WHERE member_id='$member2_uuid' AND count_transactions_remote_in_applier_queue = 2
--source include/wait_condition_or_abort.inc

--echo
--echo # 6. Stop GR on M2 without committing the received transactions.
--echo #    Create transactions on M1 for M2 to clone.
--echo

SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET @@GLOBAL.DEBUG='+d,force_sql_thread_error';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";
--let $group_replication_member_state= ERROR
--source include/gr_wait_for_member_state.inc
SET @@GLOBAL.DEBUG='-d,force_sql_thread_error';

--source include/stop_group_replication.inc

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
# Force clone.
INSERT INTO t1 values (5);
INSERT INTO t1 values (6);

--echo
--echo # 7. Start GR on M2.
--echo #    Clone will fail and incremental recovery will start.
--echo #    Applier will be OFF till clone failure is detected.
--echo

--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET @@GLOBAL.DEBUG='+d,block_applier_updates';

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

SET @@GLOBAL.DEBUG='+d,gr_run_clone_query_fail_once';
SET GLOBAL group_replication_clone_threshold= 1;
START GROUP_REPLICATION;

SET DEBUG_SYNC = 'now WAIT_FOR signal.run_clone_query_waiting';
SET @@GLOBAL.DEBUG='-d,gr_run_clone_query_fail_once';

# Clone is executing
--let $assert_text= "Clone is executing"
--let $assert_cond= [SELECT COUNT(*) FROM performance_schema.events_stages_current WHERE event_name LIKE "%stage/group_rpl/Group Replication Cloning%"] = 1
--source include/assert.inc

--let $assert_text= group_replication_applier SQL Thread will be OFF.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "OFF",count, 1] = 1
--source include/assert.inc

SET DEBUG_SYNC = 'now SIGNAL signal.run_clone_query_continue';

# Clone will fail and will start channel group_replication_recovery
--let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_connection_status WHERE CHANNEL_NAME="group_replication_recovery" AND SERVICE_STATE='ON'
--source include/wait_condition.inc

--let $assert_text= group_replication_applier SQL Thread will be ON.
--let $assert_cond= [SELECT COUNT(*) as count FROM performance_schema.replication_applier_status WHERE CHANNEL_NAME="group_replication_applier" AND SERVICE_STATE = "ON",count, 1] = 1
--source include/assert.inc

# Allow the recovery to continue
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET DEBUG_SYNC = "now WAIT_FOR applier_read_blocked";
SET @@GLOBAL.DEBUG='-d,block_applier_updates';
SET DEBUG_SYNC = "now SIGNAL resume_applier_read";

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

--let $group_replication_member_state=ONLINE
--source include/gr_wait_for_member_state.inc

--let $diff_tables= server1:test.t1, server2:test.t1
--source include/diff_tables.inc


--echo
--echo # 8. Cleanup.
--echo

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

DROP TABLE t1;

set session sql_log_bin=0;
call mtr.add_suppression("Timeout while waiting for the group communication engine to exit!");
call mtr.add_suppression("The member has failed to gracefully leave the group.");
set session sql_log_bin=1;

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
set session sql_log_bin=0;
call mtr.add_suppression("Replica SQL for channel 'group_replication_applier': Relay log read failure: *.*");
call mtr.add_suppression("The applier thread execution was aborted. *.*");
call mtr.add_suppression("Error running query, replica SQL thread aborted. *");
call mtr.add_suppression("Fatal error during execution on the Applier process of Group Replication. The server will now leave the group.");
call mtr.add_suppression("Skipping leave operation: concurrent attempt to leave the group is on-going.");
call mtr.add_suppression("Unable to confirm whether the server has left the group or not. Check performance_schema.replication_group_members to check group membership information.");
call mtr.add_suppression("The server was automatically set into read only mode after an error was detected.");
call mtr.add_suppression("Unable to initialize the Group Replication applier module.");
call mtr.add_suppression("There was a previous plugin error while the member joined the group. The member will now exit the group.");
call mtr.add_suppression("This member will start distributed recovery using clone. It is due to the number of missing transactions being higher than the configured threshold of 1.");
call mtr.add_suppression("Clone removing all user data for provisioning: *");
call mtr.add_suppression("Internal query: CLONE INSTANCE FROM *");
call mtr.add_suppression("There was an issue when cloning from another server: *");
call mtr.add_suppression("Due to some issue on the previous step distributed recovery is now executing: Incremental Recovery.");
call mtr.add_suppression("Timeout while waiting for the group communication engine to be ready!");
call mtr.add_suppression("The group communication engine is not ready for the member to join. .*");
call mtr.add_suppression("The member was unable to join the group.*");
call mtr.add_suppression("There was an issue when configuring the remote cloning process: The plugin was not able to stop the group_replication_applier channel.");
set session sql_log_bin=1;

RESET PERSIST group_replication_group_name;
RESET PERSIST group_replication_local_address;
RESET PERSIST group_replication_group_seeds;
RESET PERSIST group_replication_start_on_boot;
RESET PERSIST IF EXISTS group_replication_communication_stack;

--source include/clean_monitoring_process.inc
--source include/group_replication_end.inc