1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
--source include/have_debug.inc
--source include/have_innodb.inc
--source include/have_binlog_format_statement.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--echo *** MDEV-5509: Incorrect value for Seconds_Behind_Master if parallel replication ***
--connection server_2
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
set @old_parallel_mode= @@GLOBAL.slave_parallel_mode;
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=5;
set global slave_parallel_mode= optimistic;
--source include/start_slave.inc
--connection server_1
CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave");
--save_master_pos
--connection server_2
--sync_with_master
--connection server_1
INSERT INTO t1 VALUES (1,sleep(2));
--save_master_pos
--connection server_2
--sync_with_master
# The slave position (which --sync_with_master waits for) is updated just
# before the Seconds_Behind_Master. So we have to wait for the zero status
# to appear, otherwise there is a small window between --sync_with_master
# and SHOW SLAVE STATUS where we can see a non-zero value.
--let $slave_param= Seconds_Behind_Master
--let $slave_param_value= 0
--source include/wait_for_slave_param.inc
--echo Seconds_Behind_Master should be zero here because the slave is fully caught up and idle.
--let $status_items= Seconds_Behind_Master
--source include/show_slave_status.inc
--echo *** MDEV-8294: Inconsistent behavior of slave parallel threads at runtime ***
--connection server_1
INSERT INTO t1 VALUES (10,0);
# Force a duplicate key error on the slave.
SET sql_log_bin= 0;
DELETE FROM t1 WHERE a=10;
SET sql_log_bin= 1;
INSERT INTO t1 VALUES (10,0);
--save_master_pos
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
--connection server_2
--let $slave_sql_errno= 1062
--source include/wait_for_slave_sql_error.inc
# At this point, the worker threads should have stopped also.
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc
# Check that the pool can still be resized, but remains inactive as no slave
# SQL thread is running.
SET GLOBAL slave_parallel_threads=8;
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc
STOP SLAVE;
# At this point, the worker threads should have stopped.
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc
SET GLOBAL sql_slave_skip_counter= 1;
--source include/start_slave.inc
# At this point, the worker threads should have been spawned.
--let $wait_condition= SELECT COUNT(*)=8 FROM information_schema.processlist WHERE User = "system user" AND State = "Waiting for work from SQL thread";
--source include/wait_condition.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
--echo *** MDEV-7818: Deadlock occurring with parallel replication and FTWRL ***
--connection server_1
CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1,0), (2,0), (3,0);
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
--connection server_1
# Create a group commit with two transactions, will be used to provoke the
# problematic thread interaction with FTWRL on the slave.
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 4242;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
BEGIN;
UPDATE t2 SET b=b+1 WHERE a=2;
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (4,10);
COMMIT;
SET SESSION debug_dbug= @old_dbug;
INSERT INTO t2 VALUES (5,0);
INSERT INTO t2 VALUES (6,0);
INSERT INTO t2 VALUES (7,0);
INSERT INTO t2 VALUES (8,0);
INSERT INTO t2 VALUES (9,0);
INSERT INTO t2 VALUES (10,0);
INSERT INTO t2 VALUES (11,0);
INSERT INTO t2 VALUES (12,0);
INSERT INTO t2 VALUES (13,0);
INSERT INTO t2 VALUES (14,0);
INSERT INTO t2 VALUES (15,0);
INSERT INTO t2 VALUES (16,0);
INSERT INTO t2 VALUES (17,0);
INSERT INTO t2 VALUES (18,0);
INSERT INTO t2 VALUES (19,0);
--save_master_pos
--connection server_2
--connect (s1, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
# Block one transaction on a row lock.
BEGIN;
SELECT * FROM t2 WHERE a=2 FOR UPDATE;
--connection server_2
# Wait for slave thread of the other transaction to have the commit lock.
--source include/start_slave.inc
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
--connect (s2, 127.0.0.1, root,, test, $SLAVE_MYPORT,)
send FLUSH TABLES WITH READ LOCK;
# The bug was that at this point we were deadlocked.
# The FTWRL command would wait forever for T2 to commit.
# T2 would wait for T1 to commit first, but T1 is waiting for
# the global read lock to be released.
--connection s1
# Release the lock that blocs T1 from replicating.
COMMIT;
--connection s1
send STOP SLAVE;
--connection s2
reap;
--connection server_1
SELECT * FROM t2 ORDER BY a;
--connection s2
UNLOCK TABLES;
SELECT "after UNLOCK TABLES" as state;
--connection s1
reap;
SELECT "after reap of STOP SLAVE" as state;
--connection server_2
--source include/wait_for_slave_to_stop.inc
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t2 ORDER BY a;
--echo *** MDEV-8318: Assertion `!pool->busy' failed in pool_mark_busy(rpl_parallel_thread_pool*) on concurrent FTWRL ***
--connection server_1
LOCK TABLE t2 WRITE;
--connect (m1,localhost,root,,test)
--connection m1
--let $cid=`SELECT CONNECTION_ID()`
send FLUSH TABLES WITH READ LOCK;
--connect (m2,localhost,root,,test)
# We cannot force the race with DEBUG_SYNC, because the race does not
# exist after fixing the bug. At best we could force a debug sync to
# time out, which is effectively just a sleep.
# So just put a small sleep here; it is enough to trigger the bug in
# most run before the bug fix, and the code should work correctly
# however the thread scheduling happens.
--sleep 0.1
send FLUSH TABLES WITH READ LOCK;
--connection server_1
--replace_result $cid CID
eval KILL QUERY $cid;
--connection m1
--error ER_QUERY_INTERRUPTED
reap;
--connection server_1
UNLOCK TABLES;
--connection m2
reap;
UNLOCK TABLES;
# Clean up.
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
set global slave_parallel_mode= @old_parallel_mode;
--source include/start_slave.inc
--connection server_1
DROP TABLE t1, t2;
--source include/rpl_end.inc
|