Skip to content

Commit 5a9162c

Browse files
author
Venkatesh Duggirala
committed
Bug#18731211 Duplicate slave server_uuid in replication
forum gives no specific error message Problem & Analysis: =================== When a new slave joins to a replication forum with uuid which is exactly same as one of the existing slave's uuid, then master is confused between old slave and the newly joined slave. When com_binlog_send request comes from newly joined slave, it thinks that old slave's dump thread is zombie and kills it. After some time, old slave tries to reconnect to the master and sends com_binlog_send request to Master. Then Master thinks that newly joined slave's dump thread is zombie and kills it. This ping-pong reconnections between Master->old_slave and Master->new_slave happens continously. This is known expected behaviour but the error message in old_slave and new_slave's error log file is not clear enough to identify this situation. Information in error log file was: "received end packet from server, apparent master shutdown" which does not say anything about server_uuid misconfiguration in the replication forum. Fix: Changing the error message which clearly mentions all the possible causes which resulted this disconnection. New information in error log file is: "Slave: received end packet from server due to dump thread being killed on master. Dump threads are killed for example during master shutdown, explicitly by a user, or when the master receives a binlog send request from a duplicate server UUID <server_uuid>. " And also a NOTE is added on master's error log file with the following information when log_warnings is greater than 1. "While initializing dump thread for slave with UUID <slave_uuid>, found a zombie dump thread with the same UUID. Master is killing the zombie dump thread."
1 parent 80629ae commit 5a9162c

6 files changed

Lines changed: 105 additions & 5 deletions

File tree

mysql-test/suite/rpl/r/rpl_server_uuid.result

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Note #### Storing MySQL user name or password information in the master info rep
66
call mtr.add_suppression("Slave I/O thread .* register on master");
77
call mtr.add_suppression("Slave I/O: Master command COM_REGISTER_SLAVE failed: .*");
88
CALL mtr.add_suppression(".*master and slave have equal MySQL server UUIDs.*");
9-
CALL mtr.add_suppression("Master's UUID has changed, its old UUID is.*");
9+
CALL mtr.add_suppression("Master's UUID has changed, although this should not happen unless you have changed it manually");
1010
CALL mtr.add_suppression("Slave I/O: SET @master_heartbeat_period to master failed with error: Lost connection to MySQL server during query");
1111
CALL mtr.add_suppression("Notifying master by SET @master_binlog_checksum= @@global.binlog_checksum failed with error");
1212
include/sync_slave_sql_with_master.inc
@@ -117,4 +117,25 @@ include/check_slave_param.inc [Slave_IO_Running]
117117
CHANGE MASTER TO
118118
MASTER_LOG_FILE= 'MASTER_LOG_FILE', MASTER_LOG_POS= MASTER_POS;
119119
include/start_slave.inc
120+
121+
# Case 6:
122+
# In an existing master-slave replication forum (M->S1), if another
123+
# slave (S2) with the same UUID as S1 joins the forum and connects
124+
# to Master(M), then there will be ping-pong reconnections happens
125+
# between M->S2, M->S1, M->S2, M->S1,.... And both slave1 and slave2
126+
# will be informed about this UUID misconfiguration in their error
127+
# log file. And Master log will be populated with information that
128+
# it found a zombie dump thread and it is killing it(only when
129+
# log_warnings is greater than 1)
130+
-----------------------------------------------------------------------------
131+
include/rpl_restart_server.inc [server_number=3]
132+
CREATE TABLE t1(i int);
133+
DROP TABLE t1;
134+
include/sync_slave_sql_with_master.inc
135+
include/sync_slave_sql_with_master.inc
136+
Pattern "found a zombie dump thread with the same UUID" found
137+
Pattern "master receives a binlog send request from a duplicate server UUID" found
138+
Pattern "master receives a binlog send request from a duplicate server UUID" found
139+
include/rpl_restart_server.inc [server_number=3]
140+
include/stop_slave.inc
120141
include/rpl_end.inc

mysql-test/suite/rpl/t/rpl_server_uuid.cnf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
!include ../my.cnf
22

3+
[mysqld.1]
4+
log-warnings=2
5+
log-error=../../tmp/rpl_server_uuid_test.mysqld.1.err
6+
7+
[mysqld.2]
8+
log-error=../../tmp/rpl_server_uuid_test.mysqld.2.err
9+
310
[mysqld.3]
411
replicate-same-server-id=1
512
gtid-mode=off
13+
log-error=../../tmp/rpl_server_uuid_test.mysqld.3.err
614

715
[ENV]
816
SERVER_MYPORT_3= @mysqld.3.port

mysql-test/suite/rpl/t/rpl_server_uuid.test

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
call mtr.add_suppression("Slave I/O thread .* register on master");
1919
call mtr.add_suppression("Slave I/O: Master command COM_REGISTER_SLAVE failed: .*");
2020
CALL mtr.add_suppression(".*master and slave have equal MySQL server UUIDs.*");
21-
CALL mtr.add_suppression("Master's UUID has changed, its old UUID is.*");
21+
CALL mtr.add_suppression("Master's UUID has changed, although this should not happen unless you have changed it manually");
2222
CALL mtr.add_suppression("Slave I/O: SET @master_heartbeat_period to master failed with error: Lost connection to MySQL server during query");
2323
CALL mtr.add_suppression("Notifying master by SET @master_binlog_checksum= @@global.binlog_checksum failed with error");
2424

@@ -236,4 +236,66 @@ RESET SLAVE;
236236
eval CHANGE MASTER TO
237237
MASTER_LOG_FILE= '$old_master_log_file', MASTER_LOG_POS= $old_master_log_pos;
238238
--source include/start_slave.inc
239+
240+
--echo
241+
--echo # Case 6:
242+
--echo # In an existing master-slave replication forum (M->S1), if another
243+
--echo # slave (S2) with the same UUID as S1 joins the forum and connects
244+
--echo # to Master(M), then there will be ping-pong reconnections happens
245+
--echo # between M->S2, M->S1, M->S2, M->S1,.... And both slave1 and slave2
246+
--echo # will be informed about this UUID misconfiguration in their error
247+
--echo # log file. And Master log will be populated with information that
248+
--echo # it found a zombie dump thread and it is killing it(only when
249+
--echo # log_warnings is greater than 1)
250+
--echo -----------------------------------------------------------------------------
251+
252+
# Step-1: Copy server 2(S1)'s auto.cnf into server 3 (S2)'s data directory.
253+
--connection server_2
254+
--let $datadir2=`select @@datadir`
255+
-- connection server_3
256+
--let $datadir3=`select @@datadir`
257+
--copy_file $datadir3/auto.cnf $datadir3/auto_backup.cnf
258+
--remove_file $datadir3/auto.cnf
259+
--copy_file $datadir2/auto.cnf $datadir3/auto.cnf
260+
261+
# Step-2: Restart the server(two servers with the same UUID)
262+
--let $rpl_server_number= 3
263+
--source include/rpl_restart_server.inc
264+
265+
# Step-3: Just do some command on server_1
266+
--connection server_1
267+
CREATE TABLE t1(i int);
268+
DROP TABLE t1;
269+
270+
# Step-4: Sync the data on both the slaves and observe that
271+
# sync will be success at both slaves.
272+
--let $sync_slave_connection= server_2
273+
--source include/sync_slave_sql_with_master.inc
274+
275+
--connection server_1
276+
--let $sync_slave_connection= server_3
277+
--source include/sync_slave_sql_with_master.inc
278+
279+
# Step-5: Observe that misconfiguration messages are in their respective
280+
# error log files.
281+
282+
--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rpl_server_uuid_test.mysqld.1.err
283+
--let SEARCH_PATTERN=found a zombie dump thread with the same UUID
284+
--source include/search_pattern.inc
285+
286+
--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rpl_server_uuid_test.mysqld.2.err
287+
--let SEARCH_PATTERN=master receives a binlog send request from a duplicate server UUID
288+
--source include/search_pattern.inc
289+
290+
--let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/rpl_server_uuid_test.mysqld.3.err
291+
--let SEARCH_PATTERN=master receives a binlog send request from a duplicate server UUID
292+
--source include/search_pattern.inc
293+
294+
# Cleanup (restore the server 3's auto.cnf back to proper one and restart)
295+
--remove_file $datadir3/auto.cnf
296+
--copy_file $datadir3/auto_backup.cnf $datadir3/auto.cnf
297+
--remove_file $datadir3/auto_backup.cnf
298+
--let $rpl_server_number= 3
299+
--source include/rpl_restart_server.inc
300+
--source include/stop_slave.inc
239301
--source include/rpl_end.inc

scripts/mysqlaccess.conf

100644100755
File mode changed.

sql/rpl_master.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,11 @@ void kill_zombie_dump_threads(String *slave_uuid)
19511951
it will be slow because it will iterate through the list
19521952
again. We just to do kill the thread ourselves.
19531953
*/
1954+
if (log_warnings > 1)
1955+
sql_print_information("While initializing dump thread for slave with "
1956+
"UUID <%s>, found a zombie dump thread with "
1957+
"the same UUID. Master is killing the zombie dump "
1958+
"thread.", slave_uuid->c_ptr());
19541959
tmp->awake(THD::KILL_QUERY);
19551960
mysql_mutex_unlock(&tmp->LOCK_thd_data);
19561961
}

sql/rpl_slave.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3275,9 +3275,13 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
32753275
/* Check if eof packet */
32763276
if (len < 8 && mysql->net.read_pos[0] == 254)
32773277
{
3278-
sql_print_information("Slave: received end packet from server, apparent "
3279-
"master shutdown: %s",
3280-
mysql_error(mysql));
3278+
sql_print_information("Slave: received end packet from server due to dump "
3279+
"thread being killed on master. Dump threads are "
3280+
"killed for example during master shutdown, "
3281+
"explicitly by a user, or when the master receives "
3282+
"a binlog send request from a duplicate server "
3283+
"UUID <%s> : Error %s", ::server_uuid,
3284+
mysql_error(mysql));
32813285
DBUG_RETURN(packet_error);
32823286
}
32833287

0 commit comments

Comments
 (0)