1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-slave-restart";
if ( $sandbox_version lt '5.6' ) {
plan skip_all => "Requires MySQL 5.6";
}
diag('Restarting the sandbox');
diag(`SAKILA=0 GTID=1 $trunk/sandbox/test-env restart`);
diag("Sandbox restarted");
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $master_dbh = $sb->get_dbh_for('master');
my $slave1_dbh = $sb->get_dbh_for('slave1');
my $slave2_dbh = $sb->get_dbh_for('slave2');
if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
elsif ( !$slave1_dbh ) {
plan skip_all => 'Cannot connect to sandbox slave1';
}
elsif ( !$slave2_dbh ) {
plan skip_all => 'Cannot connect to sandbox slave2';
}
my $slave1_dsn = $sb->dsn_for("slave1");
my $slave2_dsn = $sb->dsn_for("slave2");
my $pid_file = "/tmp/pt-slave-restart-test-$PID.pid";
my $log_file = "/tmp/pt-slave-restart-test-$PID.log";
my $cmd = "$trunk/bin/pt-slave-restart --daemonize --run-time 5 --max-sleep 0.25 --pid $pid_file --log $log_file";
sub start {
my ( $extra ) = @_;
stop() or return;
system "$cmd $extra";
PerconaTest::wait_for_files($pid_file);
}
sub stop() {
return 1 if !is_running();
diag(`$trunk/bin/pt-slave-restart --stop -q >/dev/null 2>&1 &`);
wait_until(sub { !-f $pid_file }, 0.3, 2);
diag(`rm -f /tmp/pt-slave-restart-sentinel`);
return is_running() ? 0 : 1;
}
sub is_running {
chomp(my $running = `ps -eaf | grep -v grep | grep '$cmd'`);
if (!-f $pid_file && !$running) {
return 0;
} elsif (-f $pid_file && !$running) {
diag(`rm -f $pid_file`);
return 0;
}
return 1;
}
sub wait_repl_broke {
my $dbh = shift;
return wait_until(
sub {
my $row = $dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno};
}
);
}
sub wait_repl_ok {
my $dbh = shift;
wait_until(
sub {
my $row = $dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno} == 0;
},
0.30,
5,
);
}
# #############################################################################
# Basic test to see if restart works with GTID.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
$sb->wait_for_slaves;
# Bust replication
$slave1_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_repl_broke($slave1_dbh) or die "Failed to break replication";
my $r = $slave1_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slave: Replication broke');
# Start pt-slave-restart and wait up to 5s for it to fix replication
# (it should take < 1s but tests can be really slow sometimes).
start("$slave1_dsn") or die "Failed to start pt-slave-restart";
wait_repl_ok($slave1_dbh);
# Check if replication is fixed.
$r = $slave1_dbh->selectrow_hashref('show slave status');
like(
$r->{last_errno},
qr/^0$/,
'Event is skipped',
) or BAIL_OUT("Replication is broken");
# Stop pt-slave-restart.
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# Test the slave of the master.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
$sb->wait_for_slaves;
# Bust replication
$slave2_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_repl_broke($slave2_dbh) or die "Failed to break replication";
# fetch the master uuid, which is the machine we need to skip an event from
$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid');
my $uuid = $r->{uuid};
$r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslave: Replication broke');
# Start an instance
start("--master-uuid=$uuid $slave2_dsn") or die;
wait_repl_ok($slave2_dbh);
$r = $slave2_dbh->selectrow_hashref('show slave status');
like(
$r->{last_errno},
qr/^0$/,
'Skips event from master on slave2'
) or BAIL_OUT("Replication is broken");
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# Test skipping 2 events in a row.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
$sb->wait_for_slaves;
# Bust replication
$slave2_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_repl_broke($slave2_dbh) or die "Failed to break replication";
# fetch the master uuid, which is the machine we need to skip an event from
$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid');
$uuid = $r->{uuid};
$r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslaveskip2: Replication broke');
# Start an instance
start("--skip-count=2 --master-uuid=$uuid $slave2_dsn") or die;
wait_repl_ok($slave2_dbh);
$r = $slave2_dbh->selectrow_hashref('show slave status');
like(
$r->{last_errno},
qr/^0$/,
'Skips multiple events'
) or BAIL_OUT("Replication is broken");
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# Done.
# #############################################################################
diag(`rm -f $pid_file $log_file >/dev/null`);
diag(`$trunk/sandbox/test-env restart`);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;
|