Skip to content

Commit ae6801e

Browse files
author
Davi Arnaut
committed
Bug#37780: Make KILL reliable (main.kill fails randomly)
- A prerequisite cleanup patch for making KILL reliable. The test case main.kill did not work reliably. The following problems have been identified: 1. A kill signal could go lost if it came in, short before a thread went reading on the client connection. 2. A kill signal could go lost if it came in, short before a thread went waiting on a condition variable. These problems have been solved as follows. Please see also added code comments for more details. 1. There is no safe way to detect, when a thread enters the blocking state of a read(2) or recv(2) system call, where it can be interrupted by a signal. Hence it is not possible to wait for the right moment to send a kill signal. It has been decided, not to fix it in the code. Instead, the test case repeats the KILL statement until the connection terminates. 2. Before waiting on a condition variable, we register it together with a synchronizating mutex in THD::mysys_var. After this, we need to test THD::killed again. At some places we did only test it in a loop condition before the registration. When THD::killed had been set between this test and the registration, we entered waiting without noticing the killed flag. Additional checks ahve been introduced where required. In addition to the above, a re-write of the main.kill test case has been done. All sleeps have been replaced by Debug Sync Facility synchronization. A couple of sync points have been added to the server code. To avoid further problems, if the test case fails in spite of the fixes, the test case has been added to the "experimental" list for now. - Most of the work on this patch is authored by Ingo Struewing
1 parent 26e7ee2 commit ae6801e

File tree

10 files changed

+502
-369
lines changed

10 files changed

+502
-369
lines changed

mysql-test/r/kill.result

Lines changed: 154 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,143 +1,178 @@
1-
set @old_concurrent_insert= @@global.concurrent_insert;
2-
set @@global.concurrent_insert= 0;
3-
drop table if exists t1, t2, t3;
4-
create table t1 (kill_id int);
5-
insert into t1 values(connection_id());
6-
select ((@id := kill_id) - kill_id) from t1;
7-
((@id := kill_id) - kill_id)
8-
0
9-
kill @id;
10-
select ((@id := kill_id) - kill_id) from t1;
11-
((@id := kill_id) - kill_id)
12-
0
13-
select @id != connection_id();
14-
@id != connection_id()
1+
SET DEBUG_SYNC = 'RESET';
2+
DROP TABLE IF EXISTS t1, t2, t3;
3+
DROP FUNCTION IF EXISTS MY_KILL;
4+
CREATE FUNCTION MY_KILL(tid INT) RETURNS INT
5+
BEGIN
6+
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
7+
KILL tid;
8+
RETURN (SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE ID = tid);
9+
END|
10+
SET DEBUG_SYNC= 'thread_end SIGNAL con1_end';
11+
SET DEBUG_SYNC= 'before_do_command_net_read SIGNAL con1_read';
12+
SET DEBUG_SYNC='now WAIT_FOR con1_read';
13+
SET DEBUG_SYNC= 'now WAIT_FOR con1_end';
14+
SET DEBUG_SYNC = 'RESET';
15+
SELECT 1;
16+
Got one of the listed errors
17+
SELECT 1;
18+
1
1519
1
16-
select 4;
20+
SELECT @id != CONNECTION_ID();
21+
@id != CONNECTION_ID()
22+
1
23+
SELECT 4;
1724
4
1825
4
19-
drop table t1;
20-
kill (select count(*) from mysql.user);
26+
KILL (SELECT COUNT(*) FROM mysql.user);
2127
ERROR 42000: This version of MySQL doesn't yet support 'Usage of subqueries or stored function calls as part of this statement'
22-
create table t1 (id int primary key);
23-
create table t2 (id int unsigned not null);
24-
insert into t2 select id from t1;
25-
create table t3 (kill_id int);
26-
insert into t3 values(connection_id());
27-
select id from t1 where id in (select distinct a.id from t2 a, t2 b, t2 c, t2 d group by a.id, b.id, c.id, d.id having a.id between 10 and 20);
28-
select ((@id := kill_id) - kill_id) from t3;
29-
((@id := kill_id) - kill_id)
30-
0
31-
kill @id;
28+
SET DEBUG_SYNC= 'thread_end SIGNAL con1_end';
29+
SET DEBUG_SYNC= 'before_do_command_net_read SIGNAL con1_read WAIT_FOR kill';
30+
SET DEBUG_SYNC= 'now WAIT_FOR con1_read';
31+
SET DEBUG_SYNC= 'now WAIT_FOR con1_end';
32+
SET DEBUG_SYNC = 'RESET';
33+
SELECT 1;
3234
Got one of the listed errors
33-
drop table t1, t2, t3;
34-
select get_lock("a", 10);
35-
get_lock("a", 10)
36-
1
37-
select get_lock("a", 10);
38-
get_lock("a", 10)
39-
NULL
40-
select 1;
41-
1
42-
1
43-
select RELEASE_LOCK("a");
44-
RELEASE_LOCK("a")
45-
1
46-
create table t1(f1 int);
47-
create function bug27563() returns int(11)
48-
deterministic
49-
begin
50-
declare continue handler for sqlstate '70100' set @a:= 'killed';
51-
declare continue handler for sqlexception set @a:= 'exception';
52-
set @a= get_lock("lock27563", 10);
53-
return 1;
54-
end|
55-
select get_lock("lock27563",10);
56-
get_lock("lock27563",10)
57-
1
58-
insert into t1 values (bug27563());
59-
ERROR 70100: Query execution was interrupted
60-
select @a;
61-
@a
62-
NULL
63-
select * from t1;
35+
SELECT 1;
36+
1
37+
1
38+
SELECT @id != CONNECTION_ID();
39+
@id != CONNECTION_ID()
40+
1
41+
SELECT 4;
42+
4
43+
4
44+
CREATE TABLE t1 (id INT PRIMARY KEY AUTO_INCREMENT);
45+
CREATE TABLE t2 (id INT UNSIGNED NOT NULL);
46+
INSERT INTO t1 VALUES
47+
(0),(0),(0),(0),(0),(0),(0),(0), (0),(0),(0),(0),(0),(0),(0),(0),
48+
(0),(0),(0),(0),(0),(0),(0),(0), (0),(0),(0),(0),(0),(0),(0),(0),
49+
(0),(0),(0),(0),(0),(0),(0),(0), (0),(0),(0),(0),(0),(0),(0),(0),
50+
(0),(0),(0),(0),(0),(0),(0),(0), (0),(0),(0),(0),(0),(0),(0),(0);
51+
INSERT t1 SELECT 0 FROM t1 AS a1, t1 AS a2 LIMIT 4032;
52+
INSERT INTO t2 SELECT id FROM t1;
53+
SET DEBUG_SYNC= 'thread_end SIGNAL con1_end';
54+
SET DEBUG_SYNC= 'before_acos_function SIGNAL in_sync';
55+
SELECT id FROM t1 WHERE id IN
56+
(SELECT DISTINCT a.id FROM t2 a, t2 b, t2 c, t2 d
57+
GROUP BY ACOS(1/a.id), b.id, c.id, d.id
58+
HAVING a.id BETWEEN 10 AND 20);
59+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
60+
KILL @id;
61+
SET DEBUG_SYNC= 'now WAIT_FOR con1_end';
62+
Got one of the listed errors
63+
SELECT 1;
64+
1
65+
1
66+
SET DEBUG_SYNC = 'RESET';
67+
DROP TABLE t1, t2;
68+
SET DEBUG_SYNC= 'before_acos_function SIGNAL in_sync WAIT_FOR kill';
69+
SELECT ACOS(0);
70+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
71+
KILL QUERY @id;
72+
ACOS(0)
73+
1.5707963267948966
74+
SELECT 1;
75+
1
76+
1
77+
SELECT @id = CONNECTION_ID();
78+
@id = CONNECTION_ID()
79+
1
80+
SET DEBUG_SYNC = 'RESET';
81+
CREATE TABLE t1 (f1 INT);
82+
CREATE FUNCTION bug27563() RETURNS INT(11)
83+
DETERMINISTIC
84+
BEGIN
85+
DECLARE CONTINUE HANDLER FOR SQLSTATE '70100' SET @a:= 'killed';
86+
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION SET @a:= 'exception';
87+
SET DEBUG_SYNC= 'now SIGNAL in_sync WAIT_FOR kill';
88+
RETURN 1;
89+
END|
90+
INSERT INTO t1 VALUES (bug27563());
91+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
92+
KILL QUERY @id;
93+
ERROR 70100: Query execution was interrupted
94+
SELECT * FROM t1;
6495
f1
65-
insert into t1 values(0);
66-
update t1 set f1= bug27563();
96+
SET DEBUG_SYNC = 'RESET';
97+
INSERT INTO t1 VALUES(0);
98+
UPDATE t1 SET f1= bug27563();
99+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
100+
KILL QUERY @id;
67101
ERROR 70100: Query execution was interrupted
68-
select @a;
69-
@a
70-
NULL
71-
select * from t1;
102+
SELECT * FROM t1;
72103
f1
73104
0
74-
insert into t1 values(1);
75-
delete from t1 where bug27563() is null;
105+
SET DEBUG_SYNC = 'RESET';
106+
INSERT INTO t1 VALUES(1);
107+
DELETE FROM t1 WHERE bug27563() IS NULL;
108+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
109+
KILL QUERY @id;
76110
ERROR 70100: Query execution was interrupted
77-
select @a;
78-
@a
79-
NULL
80-
select * from t1;
111+
SELECT * FROM t1;
81112
f1
82113
0
83114
1
84-
select * from t1 where f1= bug27563();
85-
ERROR 70100: Query execution was interrupted
86-
select @a;
87-
@a
88-
NULL
89-
create procedure proc27563()
90-
begin
91-
declare continue handler for sqlstate '70100' set @a:= 'killed';
92-
declare continue handler for sqlexception set @a:= 'exception';
93-
select get_lock("lock27563",10);
94-
select "shouldn't be selected";
95-
end|
96-
call proc27563();
97-
get_lock("lock27563",10)
98-
NULL
99-
ERROR 70100: Query execution was interrupted
100-
select @a;
101-
@a
102-
NULL
103-
create table t2 (f2 int);
104-
create trigger trg27563 before insert on t1 for each row
105-
begin
106-
declare continue handler for sqlstate '70100' set @a:= 'killed';
107-
declare continue handler for sqlexception set @a:= 'exception';
108-
set @a:= get_lock("lock27563",10);
109-
insert into t2 values(1);
110-
end|
111-
insert into t1 values(2),(3);
112-
ERROR 70100: Query execution was interrupted
113-
select @a;
114-
@a
115-
NULL
116-
select * from t1;
115+
SET DEBUG_SYNC = 'RESET';
116+
SELECT * FROM t1 WHERE f1= bug27563();
117+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
118+
KILL QUERY @id;
119+
ERROR 70100: Query execution was interrupted
120+
SELECT * FROM t1;
117121
f1
118122
0
119123
1
120-
select * from t2;
121-
f2
122-
select release_lock("lock27563");
123-
release_lock("lock27563")
124+
SET DEBUG_SYNC = 'RESET';
125+
DROP FUNCTION bug27563;
126+
CREATE TABLE t2 (f2 INT);
127+
CREATE TRIGGER trg27563 BEFORE INSERT ON t1 FOR EACH ROW
128+
BEGIN
129+
DECLARE CONTINUE HANDLER FOR SQLSTATE '70100' SET @a:= 'killed';
130+
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION SET @a:= 'exception';
131+
INSERT INTO t2 VALUES(0);
132+
SET DEBUG_SYNC= 'now SIGNAL in_sync WAIT_FOR kill';
133+
INSERT INTO t2 VALUES(1);
134+
END|
135+
INSERT INTO t1 VALUES(2),(3);
136+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
137+
KILL QUERY @id;
138+
ERROR 70100: Query execution was interrupted
139+
SELECT * FROM t1;
140+
f1
141+
0
124142
1
125-
drop table t1, t2;
126-
drop function bug27563;
127-
drop procedure proc27563;
143+
SELECT * FROM t2;
144+
f2
145+
0
146+
SET DEBUG_SYNC = 'RESET';
147+
DROP TABLE t1, t2;
148+
SET DEBUG_SYNC= 'before_join_optimize SIGNAL in_sync';
128149
PREPARE stmt FROM 'EXPLAIN SELECT * FROM t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24,t25,t26,t27,t28,t29,t30,t31,t32,t33,t34,t35,t36,t37,t38,t39,t40 WHERE a1=a2 AND a2=a3 AND a3=a4 AND a4=a5 AND a5=a6 AND a6=a7 AND a7=a8 AND a8=a9 AND a9=a10 AND a10=a11 AND a11=a12 AND a12=a13 AND a13=a14 AND a14=a15 AND a15=a16 AND a16=a17 AND a17=a18 AND a18=a19 AND a19=a20 AND a20=a21 AND a21=a22 AND a22=a23 AND a23=a24 AND a24=a25 AND a25=a26 AND a26=a27 AND a27=a28 AND a28=a29 AND a29=a30 AND a30=a31 AND a31=a32 AND a32=a33 AND a33=a34 AND a34=a35 AND a35=a36 AND a36=a37 AND a37=a38 AND a38=a39 AND a39=a40 ';
129150
EXECUTE stmt;
151+
SET DEBUG_SYNC= 'now WAIT_FOR in_sync';
152+
KILL QUERY @id;
153+
ERROR 70100: Query execution was interrupted
154+
SET DEBUG_SYNC = 'RESET';
130155
#
131156
# Bug#19723: kill of active connection yields different error code
132157
# depending on platform.
133158
#
134159

135-
# Connection: con2.
136-
KILL CONNECTION_ID();
137-
# CR_SERVER_LOST, CR_SERVER_GONE_ERROR, depending on the timing
138-
# of close of the connection socket
160+
# Connection: con1.
161+
SET DEBUG_SYNC= 'thread_end SIGNAL con1_end';
162+
KILL @id;
163+
ERROR 70100: Query execution was interrupted
164+
SET DEBUG_SYNC= 'now WAIT_FOR con1_end';
165+
# ER_SERVER_SHUTDOWN, CR_SERVER_GONE_ERROR, CR_SERVER_LOST,
166+
# depending on the timing of close of the connection socket
139167
SELECT 1;
140168
Got one of the listed errors
169+
SELECT 1;
170+
1
171+
1
172+
SELECT @id != CONNECTION_ID();
173+
@id != CONNECTION_ID()
174+
1
175+
SET DEBUG_SYNC = 'RESET';
141176
#
142177
# Additional test for WL#3726 "DDL locking for all metadata objects"
143178
# Check that DDL and DML statements waiting for metadata locks can
@@ -208,13 +243,11 @@ ERROR 70100: Query execution was interrupted
208243
# Test for DML waiting for meta-data lock
209244
# Switching to connection 'blocker'
210245
unlock tables;
211-
drop table t2;
212-
create table t2 (k int);
213246
lock tables t1 read;
214247
# Switching to connection 'ddl'
215-
rename tables t1 to t3, t2 to t1;
248+
truncate table t1;
216249
# Switching to connection 'dml'
217-
insert into t2 values (1);
250+
insert into t1 values (1);
218251
# Switching to connection 'default'
219252
kill query ID2;
220253
# Switching to connection 'dml'
@@ -239,6 +272,7 @@ unlock tables;
239272
# Switching to connection 'ddl'
240273
# Cleanup.
241274
# Switching to connection 'default'
242-
drop table t3;
243275
drop table t1;
244-
set @@global.concurrent_insert= @old_concurrent_insert;
276+
drop table t2;
277+
SET DEBUG_SYNC = 'RESET';
278+
DROP FUNCTION MY_KILL;

mysql-test/t/disabled.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
# Do not use any TAB characters for whitespace.
1010
#
1111
##############################################################################
12-
kill : Bug#37780 2008-12-03 HHunger need some changes to be robust enough for pushbuild.
1312
lowercase_table3 : Bug#54845 2010-06-30 alik main.lowercase_table3 on Mac OSX
1413
mysqlhotcopy_myisam : Bug#54129 2010-08-31 alik mysqlhotcopy* fails
1514
mysqlhotcopy_archive : Bug#54129 2010-08-31 alik mysqlhotcopy* fails

0 commit comments

Comments
 (0)