Skip to content

Commit 3b6b4bf

Browse files
author
Venkatesh Duggirala
committed
BUG#20938915 2PC SUCCEEDS EVEN THOUGH BINLOG FLUSH/SYNC FAILS
Problem: When flushing cache to binary log fails, the error is not getting caught by group commit logic. Analysis: Leader of a group in group commit logic tries to flush cache to binary log file. If it fails (which is very rarely happens due to some low level errors like disk failure or disk full), error is thrown to the higher level. This error is not checked properly and the transaction is getting committed in storage engine as the error is not checked and not propagated to the higher level. No error to client and no information in binary logs is making users to get confused when they see inconsistencies between Master and Slave data. Fix: Using binlog_error_action variable, Server will decide the action to do if this situation happens. If binlog_error_action == ABORT_SERVER, then it will abort the server after informing the client with 'ER_BINLOGGING_IMPOSSIBLE' error. If binlog_error_action == 'IGNORE_ERROR', then it will ignore the error and disable the binlogging further until server is restarted again. The same will be mentioned in the error log file.
1 parent e0e1f02 commit 3b6b4bf

4 files changed

Lines changed: 85 additions & 3 deletions

File tree

mysql-test/suite/binlog/r/binlog_error_action.result

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ call mtr.add_suppression("Could not open .*");
66
call mtr.add_suppression("Could not use .*");
77
call mtr.add_suppression("Can't generate a unique log-filename master-bin");
88
call mtr.add_suppression("The server was unable to create a new log file *");
9+
call mtr.add_suppression("An error occured during flushing cache to file.");
10+
call mtr.add_suppression("Either disk is full or file system is read only");
911
RESET MASTER;
1012
Test case1
1113
SET GLOBAL binlog_error_action= ABORT_SERVER;
@@ -59,4 +61,17 @@ Tables_in_test
5961
t2
6062
DROP TABLE t2;
6163
SET SESSION debug="-d,fault_injection_init_name";
64+
Test case9
65+
CREATE TABLE t1(i INT);
66+
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
67+
SET GLOBAL binlog_error_action= ABORT_SERVER;
68+
INSERT INTO t1 VALUES (1);
69+
ERROR HY000: Binary logging not possible. Message: An error occured during flushing cache to file. 'binlog_error_action' is set to 'ABORT_SERVER'. Hence aborting the server
70+
include/assert.inc [Count of elements in t1 should be 0.]
71+
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
72+
SET GLOBAL binlog_error_action= IGNORE_ERROR;
73+
INSERT INTO t1 VALUES (2);
74+
include/assert.inc [Count of elements in t1 should be 1.]
75+
DROP table t1;
76+
SET SESSION debug="-d,simulate_error_during_flush_cache_to_file";
6277
SET SESSION debug="";

mysql-test/suite/binlog/t/binlog_error_action.test

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ call mtr.add_suppression("Could not open .*");
3333
call mtr.add_suppression("Could not use .*");
3434
call mtr.add_suppression("Can't generate a unique log-filename master-bin");
3535
call mtr.add_suppression("The server was unable to create a new log file *");
36-
36+
call mtr.add_suppression("An error occured during flushing cache to file.");
37+
call mtr.add_suppression("Either disk is full or file system is read only");
3738
let $old=`select @@debug`;
3839

3940
RESET MASTER;
@@ -135,5 +136,34 @@ DROP TABLE t2;
135136
SET SESSION debug="-d,fault_injection_init_name";
136137
--source include/restart_mysqld.inc
137138

139+
--echo Test case9
140+
CREATE TABLE t1(i INT);
141+
# Simulate error during flushing cache to file and test the behaviour
142+
# when binlog_error_action is set to ABORT_SERVER/IGNORE_ERROR.
143+
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
144+
SET GLOBAL binlog_error_action= ABORT_SERVER;
145+
--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
146+
--error ER_BINLOG_LOGGING_IMPOSSIBLE
147+
INSERT INTO t1 VALUES (1);
148+
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
149+
--enable_reconnect
150+
--source include/wait_until_connected_again.inc
151+
152+
--let $assert_cond= COUNT(*) = 0 FROM t1;
153+
--let $assert_text= Count of elements in t1 should be 0.
154+
--source include/assert.inc
155+
156+
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
157+
SET GLOBAL binlog_error_action= IGNORE_ERROR;
158+
INSERT INTO t1 VALUES (2);
159+
160+
--let $assert_cond= COUNT(*) = 1 FROM t1;
161+
--let $assert_text= Count of elements in t1 should be 1.
162+
--source include/assert.inc
163+
164+
DROP table t1;
165+
SET SESSION debug="-d,simulate_error_during_flush_cache_to_file";
166+
--source include/restart_mysqld.inc
167+
138168
# Cleanup
139169
eval SET SESSION debug="$old";

sql/binlog.cc

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,8 @@ bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
33593359
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
33603360
"file system is read only while opening the binlog. Aborting the "
33613361
"server");
3362+
sql_print_error("Either disk is full or file system is read only while "
3363+
"opening the binlog. Aborting the server");
33623364
thd->protocol->end_statement();
33633365
_exit(EXIT_FAILURE);
33643366
}
@@ -5000,6 +5002,8 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_even
50005002
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
50015003
"file system is read only while rotating the binlog. Aborting "
50025004
"the server");
5005+
sql_print_error("Either disk is full or file system is read only while "
5006+
"rotating the binlog. Aborting the server");
50035007
thd->protocol->end_statement();
50045008
_exit(EXIT_FAILURE);
50055009
}
@@ -6775,8 +6779,39 @@ MYSQL_BIN_LOG::change_stage(THD *thd,
67756779
int
67766780
MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
67776781
{
6778-
if (flush_io_cache(&log_file))
6779-
return ER_ERROR_ON_WRITE;
6782+
if (DBUG_EVALUATE_IF("simulate_error_during_flush_cache_to_file", 1,
6783+
flush_io_cache(&log_file)))
6784+
{
6785+
if (binlog_error_action == ABORT_SERVER)
6786+
{
6787+
THD *thd= current_thd;
6788+
/*
6789+
On fatal error when code enters here we should forcefully clear the
6790+
previous errors so that a new critical error message can be pushed
6791+
to the client side.
6792+
*/
6793+
thd->clear_error();
6794+
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "An error occured during "
6795+
"flushing cache to file. 'binlog_error_action' is set to "
6796+
"'ABORT_SERVER'. Hence aborting the server");
6797+
sql_print_error("An error occured during flushing cache to file. "
6798+
"'binlog_error_action' is set to 'ABORT_SERVER'. "
6799+
"Hence aborting the server");
6800+
thd->protocol->end_statement();
6801+
_exit(EXIT_FAILURE);
6802+
}
6803+
else
6804+
{
6805+
sql_print_error("An error occured during flushing cache to file. "
6806+
"'binlog_error_action' is set to 'IGNORE_ERROR'. "
6807+
"Hence turning logging off for the whole duration "
6808+
"of the MySQL server process. To turn it on "
6809+
"again: fix the cause, shutdown the MySQL "
6810+
"server and restart it.");
6811+
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
6812+
return ER_ERROR_ON_WRITE;
6813+
}
6814+
}
67806815
*end_pos_var= my_b_tell(&log_file);
67816816
return 0;
67826817
}

sql/log.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,8 @@ bool MYSQL_LOG::open(
16171617
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
16181618
"file system is read only while opening the binlog. Aborting the "
16191619
"server");
1620+
sql_print_error("Either disk is full or file system is read only while "
1621+
"opening the binlog. Aborting the server");
16201622
thd->protocol->end_statement();
16211623
_exit(EXIT_FAILURE);
16221624
}

0 commit comments

Comments
 (0)