File: gr_autorejoin_while_recovery_locked.test

package info (click to toggle)
mysql-8.0 8.0.43-3
links: PTS, VCS
area: main
in suites: sid
size: 1,273,924 kB
sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (226 lines) | stat: -rw-r--r-- 8,186 bytes
###############################################################################
#
# When a member has recovery blocked it should not cause problems on rejoin,
# it shall stop (or try too) recovery module and do another rejoin attempt.
#
# Test:
#   0. The test requires three servers
#   1. Create table t1 on server1 with binlog disabled and bootstrap the
#      group
#   2. Create table t1 on server2 with binlog disabled and join to the
#      group
#   3. Setup Server3.
#      Set the attempts of rejoin to 3 so we validate the process wont
#      block.
#      Reduce stop_component timeout so recovery module stops faster.
#   4. Server3 disable binlog and supreess warnings, create table t1 and
#      get pid. Pid will be used to expel member from group
#   5. Enable super read only, to avoid problem set it after FLUSH
#      TABLES WITH READ LOCK. Table locked will prevent server from
#      finishing recovery
#   6. Insert data on group
#   7. Join server3 to group, it will stay on recovery due transactions
#      on group that it can't apply as the table is locked
#   8. Wait until group_replication_recovery channel waits on table t1
#      commit lock
#   9. Expel server3 from the group
#   10. Server3 should change is status to ERROR
#   11. Waiting for two auto-rejoin attempts to happen, they will fail
#       due to recovery failing to stop
#   12. After the beginning of the second attempt server3 will stay in
#       ERROR state
#   13. group_replication_recovery channel still waits on table t1
#       commit lock
#   14. UNLOCK tables on server3 and the join shall succeed
#   15. Server3 shall be ONLINE
#   16. Cleanup
#
###############################################################################

--source include/linux.inc
--source include/big_test.inc
--source include/not_have_privilege_checks_user.inc
--source include/have_group_replication_plugin.inc
--let $rpl_server_count= 3
--let $rpl_skip_group_replication_start= 1
--source include/group_replication.inc

--echo
--echo # 1. Create table t1 on server1 with binlog disabled and bootstrap the
--echo #    group

SET sql_log_bin=0;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET sql_log_bin=1;

--source include/start_and_bootstrap_group_replication.inc

--echo
--echo # 2. Create table t1 on server2 with binlog disabled and join to the
--echo #    group

--let $rpl_connection_name= server2
--source include/rpl_connection.inc

SET sql_log_bin=0;
CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET sql_log_bin=1;

--source include/start_group_replication.inc

--echo
--echo # 3. Setup Server3.
--echo #    Set the attempts of rejoin to 3 so we validate the process wont
--echo #    block.
--echo #    Reduce stop_component timeout so recovery module stops faster.

--let $rpl_connection_name= server3
--source include/rpl_connection.inc

SET @group_replication_autorejoin_tries_save = @@GLOBAL.group_replication_autorejoin_tries;
SET @group_replication_components_stop_timeout_save = @@GLOBAL.group_replication_components_stop_timeout;

SET GLOBAL group_replication_autorejoin_tries = 3;
SET GLOBAL group_replication_components_stop_timeout = 30;

--echo
--echo # 4. Server3 disable binlog and supreess warnings, create table t1 and
--echo #    get pid. Pid will be used to expel member from group

SET SESSION sql_log_bin= 0;

call mtr.add_suppression('On shutdown there was a timeout on the Group Replication recovery module termination. Check the log for more details');
call mtr.add_suppression('Member was expelled from the group due to network failures, changing member status to ERROR.');
call mtr.add_suppression('The server was automatically set into read only mode after an error was detected.');
call mtr.add_suppression("Error when stopping the group replication incremental recovery's donor connection");
call mtr.add_suppression('Unable to confirm whether the server has left the group or not. Check performance_schema.replication_group_members to check group membership information.');

CREATE TABLE pid_table(pid_no INT PRIMARY KEY);
--let $pid_file=`SELECT @@pid_file`
--replace_result $pid_file pid_file
--eval LOAD DATA LOCAL INFILE '$pid_file' INTO TABLE pid_table
--let $server_pid=`SELECT pid_no FROM pid_table`
DROP TABLE pid_table;

CREATE TABLE t1 (c1 INT NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 INT);
SET SESSION sql_log_bin= 1;

--let $member3_uuid = `SELECT @@GLOBAL.server_uuid`

--echo
--echo # 5. Enable super read only, to avoid problem set it after FLUSH
--echo #    TABLES WITH READ LOCK. Table locked will prevent server from
--echo #    finishing recovery

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc
SET GLOBAL super_read_only= 1;
FLUSH TABLES WITH READ LOCK;

--echo
--echo # 6. Insert data on group

--let $rpl_connection_name= server1
--source include/rpl_connection.inc

INSERT INTO test.t1 VALUES (1, 1);
INSERT INTO test.t1 VALUES (2, 1);

--echo
--echo # 7. Join server3 to group, it will stay on recovery due transactions
--echo #    on group that it can't apply as the table is locked

--let $rpl_connection_name= server3
--source include/rpl_connection.inc

--let $group_replication_start_member_state= RECOVERING
--source include/start_group_replication.inc

--echo
--echo # 8. Wait until group_replication_recovery channel waits on table t1
--echo #    commit lock

--let $wait_condition=SELECT COUNT(*)=1 FROM information_schema.processlist WHERE user='system user' AND state='Waiting for commit lock'
--source include/wait_condition.inc

--echo
--echo # 9. Expel server3 from the group

# Send signal SIGSTOP to server 3.
--exec kill -19 $server_pid

# Wait until the group settles on 2 members
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

--let $rpl_connection_name= server2
--source include/rpl_connection.inc
--let $group_replication_number_of_members= 2
--source include/gr_wait_for_number_of_members.inc

# Send signal SIGCONT to server 3.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--exec kill -18 $server_pid

--echo
--echo # 10. Server3 should change is status to ERROR

--let $group_replication_member_state= ERROR
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc

--echo
--echo # 11. Waiting for two auto-rejoin attempts to happen, they will fail
--echo #     due to recovery failing to stop

--let $wait_timeout= 600
--let $wait_condition= SELECT WORK_COMPLETED = 2 FROM performance_schema.events_stages_current WHERE EVENT_NAME LIKE '%auto-rejoin%'
--source include/wait_condition.inc

--echo
--echo # 12. After the beginning of the second attempt server3 will stay in
--echo #     ERROR state

--let $group_replication_member_state= ERROR
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc

--echo
--echo # 13. group_replication_recovery channel still waits on table t1
--echo #     commit lock

--let $wait_condition=SELECT COUNT(*)=1 FROM information_schema.processlist WHERE user='system user' AND state='Waiting for commit lock'
--source include/wait_condition.inc

--echo
--echo # 14. UNLOCK tables on server3 and the join shall succeed

--let $rpl_connection_name= server_3
--source include/rpl_connection.inc
UNLOCK TABLES;

--echo
--echo # 15. Server3 shall be ONLINE

--let $rpl_connection_name= server3
--source include/rpl_connection.inc
# Due to concurrency between the completeness of second auto-rejoin attempt
# and the UNLOCK TABLES, member can successfully rejoin on the second or third
# attempt, thence the big wait_timeout.
--let $wait_timeout= 600
--let $group_replication_member_state= ONLINE
--let $group_replication_member_id= $member3_uuid
--source include/gr_wait_for_member_state.inc

--echo
--echo # 16. Cleanup

DROP TABLE t1;

SET GLOBAl group_replication_autorejoin_tries = @group_replication_autorejoin_tries_save;
SET GLOBAl group_replication_components_stop_timeout = @group_replication_components_stop_timeout_save;

--source include/group_replication_end.inc