1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
# The following test aims at verifying MTS recovery.
# It can be invoked in a standard as well as Multi-source replication environment.
#
# Algorithm of test
# -----------------
#
# A number of concurrently executable transactions are prepared on the
# master. It's done with adding a "separator" group which is a DML in
# myisam table. Considering their sizes and a small value of
# max_relay_log_size they are relay-logged in a sequence of files
# possibly being broken into pieces so different pieces of a
# transaction be scattered across few logs.
# At applying a number of transactions is let to complete until a separator
# is found. At this point a slave local session locks random records in
# the data tables which leads to a timeout error.
# After the slave applier is stopped, the failure conditions are removed,
# and slave resumes with two phases. When the gaps are filled up, a next
# undisturbed round of applying follows.
# Gaps filling is done in two branches: an immediate restart in the
# "hot" server, and the slave server shutdown, restart and the slave
# service restart. In either case MTS recovery must be completed with
# positive data consistency check.
#------------------
# Parameters:
#
# Number of master connection. For Logical_clock it means
# parallelization window, for the Database scheduler it means a number
# of databases.
--let $conn_cnt=4
# Total number of failures that the slave is subject to by simulation
--let $total_fail_cnt=4
--let $fail_to_shutdown=2
# max number of simulataneous errors by Workers at a time
--let $simul_fail_cnt=4
# total number of transactions to be processed by slave
--let $total_trx_cnt=128
# give up waiting for lock in Innodb at error simulation
--let $innodb_wait_timeout=1
# small value for max relay-log-size
--let $relay_log_size= 4096
--connection slave
CALL mtr.add_suppression("The replica coordinator and worker threads are stopped");
SET @save.innodb_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout;
eval SET @@global.innodb_lock_wait_timeout=$innodb_wait_timeout;
SET @save.replica_transaction_retries= @@global.replica_transaction_retries;
SET @@global.replica_transaction_retries= 0;
SET @save_replica_preserve_commit_order= @@global.replica_preserve_commit_order;
SET @save.max_relay_log_size= @@global.max_relay_log_size;
--eval SET @@global.max_relay_log_size=$relay_log_size
--source include/stop_slave_sql.inc
SET GLOBAL replica_preserve_commit_order= OFF;
--source include/start_slave_sql.inc
--let $assert_text= the max size of relay log must be $relay_log_size
--let $assert_cond= [select @@global.max_relay_log_size] = $relay_log_size
--source include/assert.inc
--let $trx_max_size=`select 3*@@global.max_relay_log_size/2`
#
# End of parameters
# ------------------
--let $index=$conn_cnt
while ($index)
{
--connect(con$index, localhost, root,,)
eval CREATE DATABASE db$index;
eval CREATE TABLE db$index.t (a INT AUTO_INCREMENT PRIMARY KEY, b TEXT) ENGINE=innodb;
--dec $index
}
--connection master
# Inserts into myisam will be caught/blocked on the slave side
# to serve as synchronization tool by which the slave side
# becomes aware of execution/scheduling point in the replicated load.
# So each such insert is made to "trigger" a failure.
CREATE TABLE t1m (a INT) ENGINE=myisam;
if ($rpl_multi_source)
{
--let $rpl_channel_name= 'channel_1'
}
#--let $sync_slave_connection= server_2
--let $sync_slave_connection= slave
--source include/sync_slave_sql_with_master.inc
--connection slave
LOCK TABLES t1m WRITE;
--connection master
#
# Parallelizable load generator
#
--let $trans_no_failure=`SELECT FLOOR($total_trx_cnt / $total_fail_cnt )`
--let $iter=$total_trx_cnt
--disable_query_log
while ($iter)
{
--let $index=$conn_cnt
while ($index)
{
--connection con$index
BEGIN;
eval INSERT INTO db$index.t values (NULL, REPEAT('a', FLOOR(RAND()*$trx_max_size)));
--dec $index
}
--let $index=$conn_cnt
while ($index)
{
--connection con$index
COMMIT;
if (`SELECT ($total_trx_cnt-$iter) % $trans_no_failure = FLOOR($trans_no_failure/2)`)
{
--connection master
INSERT INTO t1m SET a=1;
}
--dec $index
--dec $iter # the outer loop counter is decremented on purpose here.
}
}
--enable_query_log
#
# The slave failures loop. Slave sql threads stop and restarted
# prescribed number of times. After being waited tables are unlocked
# the slave must always recover.
#
--disable_query_log
--disable_result_log
--let $iter=$total_fail_cnt
while ($iter)
{
--let $iter_err= $simul_fail_cnt
--connection slave1
BEGIN;
while ($iter_err)
{
--let $index=`SELECT 1 + FLOOR(RAND()*100) % $conn_cnt`
--eval SELECT * FROM db$index .t FOR UPDATE
--dec $iter_err
}
--connection slave
UNLOCK TABLES;
--let $slave_sql_errno=convert_error(ER_LOCK_WAIT_TIMEOUT)
--let $show_slave_sql_error= 0
--source include/wait_for_slave_sql_error.inc
--connection slave1
ROLLBACK;
# first few iterations with a "warm" restart
if ($iter > $fail_to_shutdown)
{
--connection slave
if ($rpl_multi_source)
{
START SLAVE UNTIL SQL_AFTER_MTS_GAPS for channel 'channel_1';
--let $rpl_channel_name= 'channel_1'
}
if (!$rpl_multi_source)
{
START SLAVE UNTIL SQL_AFTER_MTS_GAPS;
}
--source include/wait_for_slave_sql_to_stop.inc
}
if ($iter <= $fail_to_shutdown)
{
--let $rpl_server_number= 2
--let $rpl_skip_start_slave= 1
--source include/rpl_restart_server.inc
--connection slave
SET @save.innodb_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout;
eval SET @@global.innodb_lock_wait_timeout=$innodb_wait_timeout;
SET @save.replica_transaction_retries= @@global.replica_transaction_retries;
SET @@global.replica_transaction_retries= 0;
SET @save_replica_preserve_commit_order= @@global.replica_preserve_commit_order;
SET @@global.replica_preserve_commit_order=OFF;
SET @save.max_relay_log_size= @@global.max_relay_log_size;
SET @@global.max_relay_log_size=4096;
if ($rpl_multi_source)
{
START SLAVE UNTIL SQL_AFTER_MTS_GAPS for channel 'channel_1';
--let $rpl_channel_name= 'channel_1'
}
if (!$rpl_multi_source)
{
START SLAVE UNTIL SQL_AFTER_MTS_GAPS;
}
--source include/wait_for_slave_sql_to_stop.inc
}
LOCK TABLES t1m WRITE;
if ($rpl_multi_source)
{
--let $rpl_channel_name= 'channel_1'
}
--source include/start_slave_sql.inc
--dec $iter
} #eof of the slave error load
--enable_result_log
--enable_query_log
--connection slave
UNLOCK TABLES;
--connection master
if ($rpl_multi_source)
{
--let $rpl_channel_name= 'channel_1'
}
--let $sync_slave_connection= slave
--source include/sync_slave_sql_with_master.inc
#
# Cleanup
#
--connection master
DROP TABLE t1m;
--let $index=$conn_cnt
while ($index)
{
--eval DROP DATABASE db$index
--dec $index
}
--source include/sync_slave_sql_with_master.inc
SET @@global.innodb_lock_wait_timeout=@save.innodb_lock_wait_timeout;
SET @@global.replica_transaction_retries= @save.replica_transaction_retries;
SET @@global.max_relay_log_size= @save.max_relay_log_size;
--source include/stop_slave_sql.inc
SET @@global.replica_preserve_commit_order= @save_replica_preserve_commit_order;
--source include/start_slave_sql.inc
|