File: gr_recovery_purged_donor_failover.test

package info (click to toggle)
mysql-8.0 8.0.43-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,273,924 kB
  • sloc: cpp: 4,684,605; ansic: 412,450; pascal: 108,398; java: 83,641; perl: 30,221; cs: 27,067; sql: 26,594; sh: 24,181; python: 21,816; yacc: 17,169; php: 11,522; xml: 7,388; javascript: 7,076; makefile: 2,194; lex: 1,075; awk: 670; asm: 520; objc: 183; ruby: 97; lisp: 86
file content (179 lines) | stat: -rw-r--r-- 6,400 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
################################################################################
# This test evaluates that whenever the donor has purged GTIDs that causes
# errors in the joiner, a failover to another donor will happen.
#
# The test steps are:
#   0) The test requires three servers.
#   1) Bootstrap start group on server1. Add some data. Start GR on server2.
#   2) Purge some GTIDs on server1 by flushing and purging binary log.
#      Create a replication user for recovery.
#   3) Start server3 with the configured replication user and watch it fail as
#      1. Server1 has purged GTIDs
#      2. Server2 does not has the correct replication user
#   4) Create the recovery user on server2.
#      The joiner (server3) should now connect to server2 when failing over.
#   5) The joiner (server3) should become online. Validate data.
#   6) Clean up.
################################################################################
--source include/big_test.inc
--source include/have_group_replication_plugin.inc
--let $rpl_skip_group_replication_start= 1
--let $rpl_server_count= 3
--source include/group_replication.inc

--let $recovery_user= recovery_user
--let $recovery_password= recovery_password

--echo #
--echo # Setup a new 2 member group
--echo #

--connection server1
--source include/start_and_bootstrap_group_replication.inc

#insert some data
CREATE TABLE t1 (c1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);

--connection server2

SET SESSION sql_log_bin= 0;
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--source include/start_group_replication.inc

--echo #
--echo # Purge member 1 binlog after a flush and create a replication user
--echo #

--connection server1

--let $server_binlog_file_prev= query_get_value(SHOW MASTER STATUS, File, 1)
FLUSH BINARY LOGS;
--let $server_binlog_file_cur= query_get_value(SHOW MASTER STATUS, File, 1)

#
# At this point the dump thread spawned to attend the
# recovery procedure for server2 must have stopped or
# is about to. We make sure that it is so we can purge
# the binary log successfully - note that
# start_group_replication.inc waits for the member to
# be ONLINE.
#
# Otherwise the test could fail since the purge
# command might not be able to remove the file because,
# the lingering dump thread could still clinging on
# to it.
#
--source include/stop_dump_threads.inc

#
# And just to make sure that the file is indeed not
# being used.
#
--let $wait_file_name=$server_binlog_file_prev
--source include/wait_for_file_closed.inc

--disable_query_log
--eval PURGE BINARY LOGS TO '$server_binlog_file_cur'
--enable_query_log

SET SESSION sql_log_bin= 0;
CALL mtr.add_suppression("Cannot replicate to server with server_uuid*");
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT REPLICATION SLAVE ON *.* TO "$recovery_user"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--echo #
--echo # Start recovery and watch it fail for a bit as:
--echo # 1) Server 1 has purged GTIDs
--echo # 2) Server 2 does not has the correct replication user
--echo #

--connection server3

SET SESSION sql_log_bin= 0;
--eval CREATE USER "$recovery_user" IDENTIFIED BY "$recovery_password"
--eval GRANT GROUP_REPLICATION_STREAM ON *.* TO "$recovery_user"@'%';
FLUSH PRIVILEGES;

call mtr.add_suppression("There was an error when connecting to the donor*");
call mtr.add_suppression("For details please check performance_schema.replication_connection_status table and error log messages of Replica I/O for channel group_replication_recovery.");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Got fatal error 1236*");
call mtr.add_suppression("Error while starting the group replication incremental recovery receiver/applier threads");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Source command COM_REGISTER_REPLICA failed: Access denied for user 'recovery_user'.*");
call mtr.add_suppression("Replica I/O for channel 'group_replication_recovery': Source command COM_REGISTER_REPLICA failed: failed registering on source, reconnecting to try again.*");
call mtr.add_suppression("Replica I/O thread couldn't register on source");
call mtr.add_suppression("Error while creating the group replication recovery channel with donor.*");
call mtr.add_suppression("Error when configuring the asynchronous recovery channel connection to the donor.*");
SET SESSION sql_log_bin= 1;

--disable_warnings
--eval CHANGE REPLICATION SOURCE TO SOURCE_USER= '$recovery_user', SOURCE_PASSWORD= '$recovery_password' FOR CHANNEL 'group_replication_recovery'
--enable_warnings

SET @debug_save_rec_int= @@GLOBAL.group_replication_recovery_reconnect_interval;

--eval SET GLOBAL group_replication_recovery_reconnect_interval= 1 # seconds

--disable_query_log
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name";
--enable_query_log
--source include/start_group_replication_command.inc

#give it time to fail several times
--sleep 5

--let $group_replication_member_state= RECOVERING
--source include/gr_wait_for_member_state.inc

--echo #
--echo # Create the recovery user on server 2
--echo # The joiner should now be able to connect to server 2 when failing over.
--echo #

--connection server2
SET SESSION sql_log_bin= 0;
--eval GRANT REPLICATION SLAVE ON *.* TO "$recovery_user"
FLUSH PRIVILEGES;
SET SESSION sql_log_bin= 1;

--connection server3

--let $group_replication_member_state= ONLINE
--source include/gr_wait_for_member_state.inc

--let $assert_text= On the recovered member, the table should exist and have 1 elements
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1;
--source include/assert.inc

--echo #
--echo # Cleaning up
--echo #

SET @@GLOBAL.group_replication_recovery_reconnect_interval= @debug_save_rec_int;

DROP TABLE t1;

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--connection server2

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--connection server1

SET SESSION sql_log_bin= 0;
--eval DROP USER "$recovery_user"
SET SESSION sql_log_bin= 1;

--source include/group_replication_end.inc