1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
--source include/not_group_replication_plugin.inc
# This test assumes 4 workers are used
--let $option_name = replica_parallel_workers
--let $option_value = 4
--source include/only_with_option.inc
--source include/only_mts_replica_parallel_type_logical_clock.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_binlog_format_row.inc
--source include/master-slave.inc
################################################################################
# BUG#19641963 RPL.RPL_SEMI_SYNC_GROUP_COMMIT_DEADLOCK FAILS WITH ASSERT IN
# MTS CODE
#
# The assertion failure happened in below case:
# Coord Worker1 Worker2
# ===== ======== ========
# applying ts 3 applying ts 4
# min_waited_timestamp=3
# waiting for the signal
# from a worker
# mark ts 3 finished mark ts 4 finished
#
# slave_worker_ends_group slave_worker_ends_group
# store store
# min_waited_timestamp(3) min_waited_timestamp(3)
# into a local variable into a local variable
#
# find the local variable
# is not SEQ_UNINIT, so
# update lwm to 4
# (get_lwm_timestamp)
# signal coordinator lwm
# is changed
#
# get the signal from Worker2
# set min_waited_timestamp=0
# dispatch ts 5
# applying ts 5
#
# dispatch ts 7(have a gap,
# ts 6 is not in the binlog)
# waiting for all workers
# to finish
#
# mta_checkpoint_routine
# remove ts 3,4 from GAQ
# lwm = 5. GAQ head is ts 7
# find the local variable
# is not SEQ_UNINIT, so it
# calls get_lwm_timestamp to
# update lwm and
# assertion failure happens
# in get_lwm_timestamp()
# assert lwm(5)+1 == GAQ head ts(7)
#
# The test verifys that the assertion failure doesn't happen.
################################################################################
--source include/rpl_connection_slave.inc
SET @saved_innodb_lock_wait_timeout = @@GLOBAL.innodb_lock_wait_timeout;
SET GLOBAL innodb_lock_wait_timeout = 1000;
--source include/restart_slave_sql.inc
--source include/rpl_connection_master.inc
CREATE TABLE t1(c1 INT PRIMARY KEY);
CREATE TABLE t2(c1 INT PRIMARY KEY);
--source include/sync_slave_sql_with_master.inc
#
# Below two transactions will pause two workers and make coordinator
# in the status that it is waiting for a timestamp to be applied.
#
# Blocking the worker applying the same statement
BEGIN;
INSERT INTO t1 VALUES(1);
# Blocking the worker applying the same statement
--source include/rpl_connection_slave1.inc
BEGIN;
INSERT INTO t2 VALUES(1);
--source include/rpl_connection_master.inc
# Two transactions have same commit parent, so they can be applied parallel
SET debug = '+d,set_commit_parent_100';
INSERT INTO t1 VALUES(1);
INSERT INTO t2 VALUES(1);
SET debug = '-d,set_commit_parent_100';
# Its commit parent is the sequence_number of INSERT INTO t2 VALUES(1);
# So coordinator has to wait for the INSERT to be applied.
INSERT INTO t1 VALUES(2);
# Increments timestamp twice to generate a timestamp gap.
SET debug = '+d,logical_clock_step_2';
INSERT INTO t1 VALUES(3);
SET debug = '-d,logical_clock_step_2';
--source include/save_master_pos.inc
# Make sure coordinator is waiting for a commit parent and
# guaratee min_waited_timestamp is set to a valid timestamp
--source include/rpl_connection_slave.inc
--let $wait_condition = SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state = 'Waiting for dependent transaction to commit'
--source include/wait_condition.inc
# It will pause the worker after it read min_waited_timestamp
SET GLOBAL debug = '+d,replica_worker_ends_group_before_signal_lwm';
ROLLBACK;
# Make sure the worker read min_waited_timestamp
--let $wait_condition = SELECT count(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state = 'debug sync point: now'
--source include/wait_condition.inc
# clear the debug point so other workers are not paused
SET GLOBAL debug = '-d,replica_worker_ends_group_before_signal_lwm';
# It will pause coordinator after it finds that all workers finished.
SET GLOBAL debug = '+d,wait_for_workers_to_finish_after_wait';
--source include/rpl_connection_slave1.inc
# Resume the worker applying INSERT INTO t2 VALUES(1)
ROLLBACK;
#--let $wait_condition = SELECT count(*) = 3 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state = 'Waiting for an event from Coordinator'
#--source include/wait_condition.inc
# Make sure coordinator is handling the gap and
--let $wait_condition = SELECT count(*) = 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state = 'debug sync point: now'
--source include/wait_condition.inc
SET debug_sync='now SIGNAL worker_continue';
# Make sure the worker applying INSERT INTO t1 VALUES(1) finished
--let $wait_condition = SELECT count(*) = 4 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE state = 'Waiting for an event from Coordinator'
--source include/wait_condition.inc
#
# Cleanup
#
SET debug_sync='now SIGNAL coordinator_continue';
--source include/sync_slave_sql.inc
SET GLOBAL debug = '-d,wait_for_workers_to_finish_after_wait';
--source include/rpl_connection_slave.inc
SET GLOBAL innodb_lock_wait_timeout = @saved_innodb_lock_wait_timeout;
--source include/rpl_connection_master.inc
DROP TABLE t1, t2;
--source include/rpl_end.inc
|