File: gr_autorejoin_while_applier_locked.test

package info (click to toggle)
mysql-8.0 8.0.43-3
links: PTS, VCS
area: main
in suites: sid
size: 1,273,924 kB
sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (223 lines) | stat: -rw-r--r-- 8,313 bytes
###############################################################################
#
# When a member has applier blocked it should not cause problems on rejoin,
# it shall fail on applier initialization.
#
# Test:
#   0. The test requires three servers
#   1. Create table t1 on server1 with binlog disabled and bootstrap the
#      group
#   2. Create table t1 on server2 with binlog disabled and join to the
#      group
#   3. Server3 will do three attempts of rejoin to validate that not block
#      and reduce stop_component timeout to recovery module stop faster
#   4. Server3 disable binlog and suppress warnings, create table t1 and
#      get pid. Pid will be used to expel member from group. Join group.
#   5. Enable super read only not to avoid problem set it after LOCK
#      tables READ. Table locked will prevent server to finish recovery
#   6. Insert data on group
#   7. Wait until group_replication_applier channel waits on table t1
#      read lock
#   8. Expel server3 from the group
#   9. Server3 should change is status to ERROR
#  10. Waiting for two auto-rejoin to happen, only the third attempt
#       will be successful. First will fail due recovery fail to stop.
#       Second fail to start applier module due channel being active
#  11. Wait that message about applier being blocked was logged
#  12. UNLOCK tables on server3
#  13. Confirm that group_replication_applier channel is stopped
#  14. Wait for member to be ONLINE after autorejoin third attempt succeed
#  15. Cleanup
#
###############################################################################

--source include/linux.inc
--source include/big_test.inc
--source include/not_have_privilege_checks_user.inc
--source include/have_group_replication_plugin.inc
--let $rpl_server_count= 3
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc

--echo
--echo # 1. Create table t1 on server1 with binlog disabled and bootstrap the
--echo #    group

SET sql_log_bin=0;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET sql_log_bin=1;

--source include/start_and_bootstrap_group_replication.inc

--echo
--echo # 2. Create table t1 on server2 with binlog disabled and join to the
--echo #    group

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

SET sql_log_bin=0;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET sql_log_bin=1;

--source include/start_group_replication.inc

--echo
--echo # 3. Server3 will do three attempts of rejoin to validate that not block
--echo #    and reduce stop_component timeout to recovery module stop faster

--let $rpl_connection_name= server3
--source include/rpl_connection.inc

SET @group_replication_autorejoin_tries_save = @@GLOBAL.group_replication_autorejoin_tries;
SET @group_replication_components_stop_timeout_save = @@GLOBAL.group_replication_components_stop_timeout;

SET GLOBAL group_replication_autorejoin_tries = 3;
SET GLOBAL group_replication_components_stop_timeout = 30;

--echo
--echo # 4. Server3 disable binlog and suppress warnings, create table t1 and
--echo #    get pid. Pid will be used to expel member from group. Join group.

SET SESSION sql_log_bin= 0;

call mtr.add_suppression('On shutdown there was a timeout on the Group Replication applier termination.');
call mtr.add_suppression('Failed to stop the group replication applier thread.');
call mtr.add_suppression('Member was expelled from the group due to network failures, changing member status to ERROR.');
call mtr.add_suppression('The server was automatically set into read only mode after an error was detected.');
call mtr.add_suppression('The group_replication_applier channel is still running, most likely it is waiting for a database/table lock*');
call mtr.add_suppression('Unable to confirm whether the server has left the group or not.*');

CREATE TABLE pid_table(pid_no INT PRIMARY KEY);
--let $pid_file=`SELECT @@pid_file`
--replace_result $pid_file pid_file
--eval LOAD DATA LOCAL INFILE '$pid_file' INTO TABLE pid_table
--let $server_pid=`SELECT pid_no FROM pid_table`
DROP TABLE pid_table;

CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET SESSION sql_log_bin= 1;

--let $member3_uuid = `SELECT @@GLOBAL.server_uuid`


--let $rpl_connection_name= server3
--source include/rpl_connection.inc

--source include/start_group_replication.inc

--echo
--echo # 5. Enable super read only not to avoid problem set it after LOCK
--echo #    tables READ. Table locked will prevent server to finish recovery

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc

SET GLOBAL super_read_only= 1;
FLUSH TABLES WITH READ LOCK;


--echo
--echo # 6. Insert data on group

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

INSERT INTO test.t1 VALUES (1, 1);
INSERT INTO test.t1 VALUES (2, 1);


--echo
--echo # 7. Wait until group_replication_applier channel waits on table t1
--echo #    read lock

--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--let $wait_condition=SELECT COUNT(*)=1 FROM information_schema.processlist WHERE user='system user' AND state='Waiting for global read lock'
--source include/wait_condition.inc


--echo
--echo # 8. Expel server3 from the group

# Send signal SIGSTOP to server 3.
--exec kill -19 $server_pid

# Wait until the group settles on 2 members
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

# Send signal SIGCONT to server 3.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--exec kill -18 $server_pid

--echo
--echo # 9. Server3 should change is status to ERROR

--let $group_replication_member_state= ERROR
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc

--echo
--echo # 10. Waiting for two auto-rejoin to happen, only the third attempt
--echo #     will be successful. First will fail due recovery fail to stop.
--echo #     Second fail to start applier module due channel being active

--let $wait_timeout= 600
--let $wait_condition= SELECT WORK_COMPLETED = 2 FROM performance_schema.events_stages_current WHERE EVENT_NAME LIKE '%auto-rejoin%'
--source include/wait_condition.inc

--let $wait_condition=SELECT COUNT(*)=1 FROM information_schema.processlist WHERE user='system user' AND state='Waiting for global read lock'
--source include/wait_condition.inc

--let $group_replication_member_state= ERROR
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc


--echo
--echo # 11. Wait that message about applier being blocked was logged

--let $wait_condition= SELECT COUNT(*)=1 FROM performance_schema.error_log WHERE error_code='MY-013701' AND data LIKE "%The group_replication_applier channel is still running, most likely it is waiting for a database/table lock, which is preventing the channel from stopping. Please check database/table locks, including the ones created by backup tools.%"
--source include/wait_condition.inc


--echo
--echo # 12. UNLOCK tables on server3

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc
UNLOCK TABLES;

--echo
--echo # 13. Confirm that group_replication_applier channel is stopped

--let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_applier_status WHERE channel_name='group_replication_applier' AND service_state='OFF'
--source include/wait_condition.inc

--echo
--echo # 14. Wait for member to be ONLINE after autorejoin third attempt succeed

--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--let $wait_timeout= 600
--let $group_replication_member_state= ONLINE
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc

--echo
--echo # 15. Cleanup

DROP TABLE t1;
SET GLOBAl group_replication_autorejoin_tries = @group_replication_autorejoin_tries_save;
SET GLOBAl group_replication_components_stop_timeout = @group_replication_components_stop_timeout_save;

--source include/group_replication_end.inc