File: rpl_deadlock_show_slave_status.test

package info (click to toggle)
mariadb 1%3A11.8.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 765,428 kB
  • sloc: ansic: 2,382,827; cpp: 1,803,532; asm: 378,315; perl: 63,176; sh: 46,496; pascal: 40,776; java: 39,363; yacc: 20,428; python: 19,506; sql: 17,864; xml: 12,463; ruby: 8,544; makefile: 6,059; cs: 5,855; ada: 1,700; lex: 1,193; javascript: 1,039; objc: 80; tcl: 73; awk: 46; php: 22
file content (121 lines) | stat: -rw-r--r-- 4,023 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#
#   Verify that SHOW SLAVE STATUS will not cause deadlocks on the replica.
# A deadlock has been seen in do_gco_wait if the thread is killed, as it will
# hold the LOCK_parallel_entry, and during error reporting, try to grab the
# err_lock. Prior to MDEV-10653, SHOW SLAVE STATUS would grab these locks in
# the reverse order, as calling workers_idle() used to grab LOCK_parallel_entry
# with the err_lock already grabbed (though the MDEV-10653 patch changed the
# workles_idle() implementation to remove the need for locking the
# parallel_entry).
#
# References:
#   MDEV-10653: SHOW SLAVE STATUS Can Deadlock an Errored Slave
#

--source include/master-slave.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_binlog_format_row.inc

--echo #
--echo # Initialize test data
--connection master
create table t1 (a int) engine=innodb;
insert into t1 values (1);
--source include/save_master_gtid.inc

--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc

call mtr.add_suppression("Connection was killed");
call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");

set @save_parallel_threads= @@global.slave_parallel_threads;
set @save_parallel_mode= @@global.slave_parallel_mode;
set @save_transaction_retries= @@global.slave_transaction_retries;
set @save_innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;

set @@global.slave_parallel_threads= 2;
set @@global.slave_parallel_mode= CONSERVATIVE;
set @@global.slave_transaction_retries= 0;
set @@global.innodb_lock_wait_timeout= 10;

--echo # Grabbing lock on innodb row to force future replication transaction to wait (and eventually timeout)
BEGIN;
select * from t1 where a=1 for update;

--connection master

set @old_dbug= @@session.debug_dbug;
set @@session.debug_dbug="+d,binlog_force_commit_id";


# GCO 1
SET @commit_id= 10000;
# T1
update t1 set a=2 where a=1;

# GCO 2
SET @commit_id= 10001;
# T2
insert into t1 values (3);

set @@session.debug_dbug= @old_dbug;

--connection slave
start slave;

--echo # Waiting for first transaction to start (and be held at innodb row lock)..
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(%)%' and  command LIKE 'Slave_worker';
--source include/wait_condition.inc

--echo # Waiting for next transaction to start and hold at do_gco_wait()..
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to start commit%' and  command LIKE 'Slave_worker';
--source include/wait_condition.inc

--connection slave1
set @@session.debug_dbug="+d,hold_sss_with_err_lock";
--send show slave status

--connection slave
set debug_sync="now wait_for sss_got_err_lock";

--let $t2_tid= `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for prior transaction to start commit%'`
--replace_result $t2_tid "<TID of worker in do_gco_wait>"
--eval kill $t2_tid
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE command LIKE 'Killed';
--source include/wait_condition.inc

set debug_sync="now signal sss_continue";

--connection slave1
--echo # Waiting for SHOW SLAVE STATUS to complete..
--disable_result_log
--reap
--enable_result_log
--echo # ..done

--connection slave
ROLLBACK;
--let $slave_sql_errno= 1927
--source include/wait_for_slave_sql_error.inc


--echo #
--echo # Cleanup
--connection master
drop table t1;
--source include/save_master_gtid.inc

--connection slave
set debug_sync= "RESET";
set @@global.slave_parallel_threads= @save_parallel_threads;
set @@global.slave_parallel_mode= @save_parallel_mode;
set @@global.slave_transaction_retries= @save_transaction_retries;
set @@global.innodb_lock_wait_timeout= @save_innodb_lock_wait_timeout;
start slave sql_thread;
--source include/sync_with_master_gtid.inc

--source include/rpl_end.inc
--echo # End of rpl_deadlock_show_slave_status.test