Skip to content

Commit 97657db

Browse files
author
Neeraj Bisht
committed
Bug#16691598 - ORDER BY LOWER(COLUMN) PRODUCES OUT-OF-ORDER RESULTS
Problem:- We have created a table with UTF8_BIN collation. In case, when in our query we have ORDER BY clause over a function call we are getting result in incorrect order. Note:the bug is not there in 5.5. Analysis: In 5.5, for UTF16_BIN, we have min and max multi-byte length is 2 and 4 respectively.In make_sortkey(),for 2 byte character character we are assuming that the resultant length will be 2 byte/character. But when we use my_strnxfrm_unicode_full_bin(), we store sorting weights using 3 bytes per character.This result in truncated result. Same thing happen for UTF8MB4, where we have 1 byte min multi-byte and 4 byte max multi-byte.We will accsume resultant data as 1 byte/character, which result in truncated result. Solution:- use strnxfrm(means use of MY_CS_STRNXFRM macro) is used for sort, in which the resultant length is not dependent on source length.
1 parent d6893cd commit 97657db

7 files changed

Lines changed: 74 additions & 17 deletions

File tree

mysql-test/include/ctype_filesort2.inc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,12 @@ SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
1414
ALTER TABLE t1 ADD KEY(a);
1515
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
1616
DROP TABLE IF EXISTS t1;
17+
--echo #
18+
--echo # BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
19+
--echo # OUT-OF-ORDER RESULTS
20+
--echo #
21+
CREATE TABLE t1 SELECT ('a a') as n;
22+
INSERT INTO t1 VALUES('a b');
23+
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
24+
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
25+
DROP TABLE t1;

mysql-test/r/ctype_utf16.result

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,21 @@ FF9D EFBE9D
636636
D800DF84 F0908E84
637637
DBC0DC00 F4808080
638638
DROP TABLE IF EXISTS t1;
639+
#
640+
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
641+
# OUT-OF-ORDER RESULTS
642+
#
643+
CREATE TABLE t1 SELECT ('a a') as n;
644+
INSERT INTO t1 VALUES('a b');
645+
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
646+
n
647+
a a
648+
a b
649+
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
650+
n
651+
a b
652+
a a
653+
DROP TABLE t1;
639654
select @@collation_connection;
640655
@@collation_connection
641656
utf16_bin

mysql-test/r/ctype_utf32.result

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,21 @@ HEX(a) HEX(CONVERT(a USING utf8mb4))
635635
00010384 F0908E84
636636
00100000 F4808080
637637
DROP TABLE IF EXISTS t1;
638+
#
639+
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
640+
# OUT-OF-ORDER RESULTS
641+
#
642+
CREATE TABLE t1 SELECT ('a a') as n;
643+
INSERT INTO t1 VALUES('a b');
644+
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
645+
n
646+
a a
647+
a b
648+
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
649+
n
650+
a b
651+
a a
652+
DROP TABLE t1;
638653
select @@collation_connection;
639654
@@collation_connection
640655
utf32_bin

mysql-test/r/ctype_utf8mb4.result

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,21 @@ EFBE9D EFBE9D
10121012
F0908E84 F0908E84
10131013
F4808080 F4808080
10141014
DROP TABLE IF EXISTS t1;
1015+
#
1016+
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
1017+
# OUT-OF-ORDER RESULTS
1018+
#
1019+
CREATE TABLE t1 SELECT ('a a') as n;
1020+
INSERT INTO t1 VALUES('a b');
1021+
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
1022+
n
1023+
a a
1024+
a b
1025+
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
1026+
n
1027+
a b
1028+
a a
1029+
DROP TABLE t1;
10151030
select @@collation_connection;
10161031
@@collation_connection
10171032
utf8mb4_bin

sql/filesort.cc

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,6 @@ static void make_sortkey(register SORTPARAM *param,
813813
{
814814
CHARSET_INFO *cs=item->collation.collation;
815815
char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
816-
int diff;
817-
uint sort_field_length;
818816

819817
if (maybe_null)
820818
*to++=1;
@@ -842,25 +840,13 @@ static void make_sortkey(register SORTPARAM *param,
842840
break;
843841
}
844842
length= res->length();
845-
sort_field_length= sort_field->length - sort_field->suffix_length;
846-
diff=(int) (sort_field_length - length);
847-
if (diff < 0)
848-
{
849-
diff=0;
850-
length= sort_field_length;
851-
}
852-
if (sort_field->suffix_length)
853-
{
854-
/* Store length last in result_string */
855-
store_length(to + sort_field_length, length,
856-
sort_field->suffix_length);
857-
}
858843
if (sort_field->need_strxnfrm)
859844
{
860845
char *from=(char*) res->ptr();
861846
uint tmp_length;
862847
if ((uchar*) from == to)
863848
{
849+
DBUG_ASSERT(sort_field->length >= length);
864850
set_if_smaller(length,sort_field->length);
865851
memcpy(param->tmp_buffer,from,length);
866852
from=param->tmp_buffer;
@@ -871,6 +857,22 @@ static void make_sortkey(register SORTPARAM *param,
871857
}
872858
else
873859
{
860+
uint diff;
861+
uint sort_field_length= sort_field->length -
862+
sort_field->suffix_length;
863+
if (sort_field_length < length)
864+
{
865+
diff= 0;
866+
length= sort_field_length;
867+
}
868+
else
869+
diff= sort_field_length - length;
870+
if (sort_field->suffix_length)
871+
{
872+
/* Store length last in result_string */
873+
store_length(to + sort_field_length, length,
874+
sort_field->suffix_length);
875+
}
874876
my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
875877
cs->cset->fill(cs, (char *)to+length,diff,fill_char);
876878
}

strings/ctype-ucs2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1664,7 +1664,7 @@ CHARSET_INFO my_charset_utf16_general_ci=
16641664
CHARSET_INFO my_charset_utf16_bin=
16651665
{
16661666
55,0,0, /* number */
1667-
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
1667+
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
16681668
"utf16", /* cs name */
16691669
"utf16_bin", /* name */
16701670
"UTF-16 Unicode", /* comment */

strings/ctype-utf8.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5435,7 +5435,8 @@ CHARSET_INFO my_charset_utf8mb4_general_ci=
54355435
CHARSET_INFO my_charset_utf8mb4_bin=
54365436
{
54375437
46,0,0, /* number */
5438-
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */
5438+
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|
5439+
MY_CS_UNICODE_SUPPLEMENT, /* state */
54395440
MY_UTF8MB4, /* cs name */
54405441
MY_UTF8MB4_BIN, /* name */
54415442
"UTF-8 Unicode", /* comment */

0 commit comments

Comments
 (0)