File: gr_recovery_purged_donor_failover.test

package info (click to toggle)
mysql-8.0 8.0.43-3
links: PTS, VCS
area: main
in suites: sid
size: 1,273,924 kB
sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (179 lines) | stat: -rw-r--r-- 6,400 bytes
################################################################################
# This test evaluates that whenever the donor has purged GTIDs that causes
# errors in the joiner, a failover to another donor will happen.
#
# The test steps are:
#   0) The test requires three servers.
#   1) Bootstrap start group on server1. Add some data. Start GR on server2.
#   2) Purge some GTIDs on server1 by flushing and purging binary log.
#      Create a replication user for recovery.
#   3) Start server3 with the configured replication user and watch it fail as
#      1. Server1 has purged GTIDs
#      2. Server2 does not has the correct replication user
#   4) Create the recovery user on server2.
#      The joiner (server3) should now connect to server2 when failing over.
#   5) The joiner (server3) should become online. Validate data.
#   6) Clean up.
################################################################################
--source include/big_test.inc
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--let $rpl_server_count= 3
--source include/group_replication.inc

--let $recovery_user= recovery_user
--let $recovery_password= recovery_password

--echo #
--echo # Setup a new 2 member group
--echo #

--connection server1
--source include/start_and_bootstrap_group_replication.inc

#insert some data
CREATE TABLE t1 (c1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);

--connection server2

SET SESSION sql_log_bin= 0;
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--source include/start_group_replication.inc

--echo #
--echo # Purge member 1 binlog after a flush and create a replication user
--echo #

--connection server1

--let $server_binlog_file_prev= query_get_value(SHOW MASTER STATUS, File, 1)
FLUSH BINARY LOGS;
--let $server_binlog_file_cur= query_get_value(SHOW MASTER STATUS, File, 1)

#
# At this point the dump thread spawned to attend the
# recovery procedure for server2 must have stopped or
# is about to. We make sure that it is so we can purge
# the binary log successfully - note that
# start_group_replication.inc waits for the member to
# be ONLINE.
#
# Otherwise the test could fail since the purge
# command might not be able to remove the file because,
# the lingering dump thread could still clinging on
# to it.
#
--source include/stop_dump_threads.inc

#
# And just to make sure that the file is indeed not
# being used.
#
--let $wait_file_name=$server_binlog_file_prev
--source include/wait_for_file_closed.inc

--disable_query_log
--eval PURGE BINARY LOGS TO '$server_binlog_file_cur'
--enable_query_log

SET SESSION sql_log_bin= 0;
CALL mtr.add_suppression("Cannot replicate to server with server_uuid*");
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT REPLICATION SLAVE ON *.* TO "$recovery_user"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--echo #
--echo # Start recovery and watch it fail for a bit as:
--echo # 1) Server 1 has purged GTIDs
--echo # 2) Server 2 does not has the correct replication user
--echo #

--connection server3

SET SESSION sql_log_bin= 0;
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;

call mtr.add_suppression("There was an error when connecting to the donor*");
call mtr.add_suppression("For details please check performance_schema.replication_connection_status table and error log messages of Replica I/O for channel group_replication_recovery.");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Got fatal error 1236*");
call mtr.add_suppression("Error while starting the group replication incremental recovery receiver/applier threads");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Source command COM_REGISTER_REPLICA failed: Access denied for user 'recovery_user'.*");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Source command COM_REGISTER_REPLICA failed: failed registering on source, reconnecting to try again.*");
call mtr.add_suppression("Replica I/O thread couldn't register on source");
call mtr.add_suppression("Error while creating the group replication recovery channel with donor.*");
call mtr.add_suppression("Error when configuring the asynchronous recovery channel connection to the donor.*");
SET SESSION sql_log_bin= 1;

--disable_warnings
--eval CHANGE REPLICATION SOURCE TO SOURCE_USER= '$recovery_user', SOURCE_PASSWORD= '$recovery_password' FOR CHANNEL 'group_replication_recovery'
--enable_warnings

SET @debug_save_rec_int= @@GLOBAL.group_replication_recovery_reconnect_interval;

--eval SET GLOBAL group_replication_recovery_reconnect_interval= 1 # seconds

--disable_query_log
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name";
--enable_query_log
--source include/start_group_replication_command.inc

#give it time to fail several times
--sleep 5

--let $group_replication_member_state= RECOVERING
--source include/gr_wait_for_member_state.inc

--echo #
--echo # Create the recovery user on server 2
--echo # The joiner should now be able to connect to server 2 when failing over.
--echo #

--connection server2
SET SESSION sql_log_bin= 0;
--eval GRANT REPLICATION SLAVE ON *.* TO "$recovery_user"
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--connection server3

--let $group_replication_member_state= ONLINE
--source include/gr_wait_for_member_state.inc

--let $assert_text= On the recovered member, the table should exist and have 1 elements
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1;
--source include/assert.inc

--echo #
--echo # Cleaning up
--echo #

SET @@GLOBAL.group_replication_recovery_reconnect_interval= @debug_save_rec_int;

DROP TABLE t1;

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--connection server2

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--connection server1

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--source include/group_replication_end.inc