Skip to content

Commit 9878c8d

Browse files
author
[email protected]/bar.intranet.mysql.r18.ru
committed
Bug#20854 XML functions: wrong result in ExtractValue
1 parent db89fb3 commit 9878c8d

File tree

4 files changed

+152
-16
lines changed

4 files changed

+152
-16
lines changed

mysql-test/r/xml.result

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b');
570570
extractvalue('<a>a<b>B</b></a>','a|/b')
571571
a
572572
select extractvalue('<a>A</a>','/<a>');
573-
ERROR HY000: XPATH syntax error: '<a>'
573+
ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>'
574574
select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!');
575575
ERROR HY000: XPATH syntax error: '!'
576576
select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*');
@@ -710,3 +710,29 @@ Data
710710
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
711711
extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something')
712712
Otherdata
713+
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
714+
ERROR HY000: XPATH syntax error: '02'
715+
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
716+
extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*')
717+
NULL
718+
Warnings:
719+
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)'
720+
select extractValue('<.>test</.>','//*');
721+
extractValue('<.>test</.>','//*')
722+
NULL
723+
Warnings:
724+
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
725+
select extractValue('<->test</->','//*');
726+
extractValue('<->test</->','//*')
727+
NULL
728+
Warnings:
729+
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
730+
select extractValue('<:>test</:>','//*');
731+
extractValue('<:>test</:>','//*')
732+
test
733+
select extractValue('<_>test</_>','//*');
734+
extractValue('<_>test</_>','//*')
735+
test
736+
select extractValue('<x.-_:>test</x.-_:>','//*');
737+
extractValue('<x.-_:>test</x.-_:>','//*')
738+
test

mysql-test/t/xml.test

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x
360360
#
361361
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar');
362362
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
363+
364+
#
365+
# Bug#20854 XML functions: wrong result in ExtractValue
366+
#
367+
--error 1105
368+
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
369+
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
370+
# dot and dash are bad identtifier start character
371+
select extractValue('<.>test</.>','//*');
372+
select extractValue('<->test</->','//*');
373+
# semicolon is good identifier start character
374+
select extractValue('<:>test</:>','//*');
375+
# underscore is good identifier start character
376+
select extractValue('<_>test</_>','//*');
377+
# dot, dash, underscore and semicolon are good identifier middle characters
378+
select extractValue('<x.-_:>test</x.-_:>','//*');

sql/item_xmlfunc.cc

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ typedef struct my_xpath_st
105105
String *context_cache; /* last context provider */
106106
String *pxml; /* Parsed XML, an array of MY_XML_NODE */
107107
CHARSET_INFO *cs; /* character set/collation string comparison */
108+
int error;
108109
} MY_XPATH;
109110

110111

@@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b)
913914
RETURN
914915
The newly created item.
915916
*/
916-
static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
917+
static Item *create_comparator(MY_XPATH *xpath,
918+
int oper, MY_XPATH_LEX *context,
919+
Item *a, Item *b)
917920
{
918921
if (a->type() != Item::XPATH_NODESET &&
919922
b->type() != Item::XPATH_NODESET)
@@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
923926
else if (a->type() == Item::XPATH_NODESET &&
924927
b->type() == Item::XPATH_NODESET)
925928
{
929+
uint len= context->end - context->beg;
930+
set_if_bigger(len, 32);
931+
my_printf_error(ER_UNKNOWN_ERROR,
932+
"XPATH error: "
933+
"comparison of two nodesets is not supported: '%.*s'",
934+
MYF(0), len, context->beg);
935+
926936
return 0; // TODO: Comparison of two nodesets
927937
}
928938
else
@@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath,
14301440
static int
14311441
my_xpath_parse_term(MY_XPATH *xpath, int term)
14321442
{
1433-
if (xpath->lasttok.term == term)
1443+
if (xpath->lasttok.term == term && !xpath->error)
14341444
{
14351445
xpath->prevtok= xpath->lasttok;
14361446
my_xpath_lex_scan(xpath, &xpath->lasttok,
@@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath)
15581568
return my_xpath_parse_RelativeLocationPath(xpath);
15591569
}
15601570

1561-
return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) ||
1562-
my_xpath_parse_RelativeLocationPath(xpath);
1571+
my_xpath_parse_RelativeLocationPath(xpath);
1572+
1573+
return (xpath->error == 0);
15631574
}
15641575

15651576

@@ -1596,7 +1607,10 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath)
15961607
"*", 1,
15971608
xpath->pxml, 1);
15981609
if (!my_xpath_parse_Step(xpath))
1610+
{
1611+
xpath->error= 1;
15991612
return 0;
1613+
}
16001614
}
16011615
return 1;
16021616
}
@@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath)
16331647
xpath->context_cache= context_cache;
16341648

16351649
if(!my_xpath_parse_PredicateExpr(xpath))
1650+
{
1651+
xpath->error= 1;
16361652
return 0;
1653+
}
16371654

16381655
if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB))
1656+
{
1657+
xpath->error= 1;
16391658
return 0;
1659+
}
16401660

16411661
xpath->item= nodeset2bool(xpath, xpath->item);
16421662

@@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath)
18931913

18941914
if (!my_xpath_parse_PathExpr(xpath)
18951915
|| xpath->item->type() != Item::XPATH_NODESET)
1916+
{
1917+
xpath->error= 1;
18961918
return 0;
1919+
}
18971920
xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml);
18981921
}
18991922
return 1;
@@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath)
19291952
{
19301953
return my_xpath_parse_LocationPath(xpath) ||
19311954
my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath);
1955+
19321956
}
19331957

19341958

@@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath)
19751999
{
19762000
Item *prev= xpath->item;
19772001
if (!my_xpath_parse_AndExpr(xpath))
2002+
{
19782003
return 0;
2004+
xpath->error= 1;
2005+
}
19792006
xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
19802007
nodeset2bool(xpath, xpath->item));
19812008
}
@@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath)
20032030
{
20042031
Item *prev= xpath->item;
20052032
if (!my_xpath_parse_EqualityExpr(xpath))
2033+
{
2034+
xpath->error= 1;
20062035
return 0;
2036+
}
20072037

20082038
xpath->item= new Item_cond_and(nodeset2bool(xpath,prev),
20092039
nodeset2bool(xpath,xpath->item));
@@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath)
20572087
}
20582088
static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath)
20592089
{
2090+
MY_XPATH_LEX operator_context;
20602091
if (!my_xpath_parse_RelationalExpr(xpath))
20612092
return 0;
2093+
2094+
operator_context= xpath->lasttok;
20622095
while (my_xpath_parse_EqualityOperator(xpath))
20632096
{
20642097
Item *prev= xpath->item;
20652098
int oper= xpath->extra;
20662099
if (!my_xpath_parse_RelationalExpr(xpath))
2100+
{
2101+
xpath->error= 1;
20672102
return 0;
2103+
}
20682104

2069-
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
2105+
if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
2106+
prev, xpath->item)))
20702107
return 0;
2108+
2109+
operator_context= xpath->lasttok;
20712110
}
20722111
return 1;
20732112
}
@@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath)
21092148
}
21102149
static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath)
21112150
{
2151+
MY_XPATH_LEX operator_context;
21122152
if (!my_xpath_parse_AdditiveExpr(xpath))
21132153
return 0;
2154+
operator_context= xpath->lasttok;
21142155
while (my_xpath_parse_RelationalOperator(xpath))
21152156
{
21162157
Item *prev= xpath->item;
21172158
int oper= xpath->extra;
21182159

21192160
if (!my_xpath_parse_AdditiveExpr(xpath))
2161+
{
2162+
xpath->error= 1;
21202163
return 0;
2164+
}
21212165

2122-
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
2166+
if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
2167+
prev, xpath->item)))
21232168
return 0;
2169+
operator_context= xpath->lasttok;
21242170
}
21252171
return 1;
21262172
}
@@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath)
21532199
int oper= xpath->prevtok.term;
21542200
Item *prev= xpath->item;
21552201
if (!my_xpath_parse_MultiplicativeExpr(xpath))
2202+
{
2203+
xpath->error= 1;
21562204
return 0;
2205+
}
21572206

21582207
if (oper == MY_XPATH_LEX_PLUS)
21592208
xpath->item= new Item_func_plus(prev, xpath->item);
@@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath)
21982247
int oper= xpath->prevtok.term;
21992248
Item *prev= xpath->item;
22002249
if (!my_xpath_parse_UnaryExpr(xpath))
2250+
{
2251+
xpath->error= 1;
22012252
return 0;
2253+
}
22022254
switch (oper)
22032255
{
22042256
case MY_XPATH_LEX_ASTERISK:

strings/xml.c

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "my_xml.h"
2020

2121

22+
#define MY_XML_UNKNOWN 'U'
2223
#define MY_XML_EOF 'E'
2324
#define MY_XML_STRING 'S'
2425
#define MY_XML_IDENT 'I'
@@ -39,6 +40,46 @@ typedef struct xml_attr_st
3940
} MY_XML_ATTR;
4041

4142

43+
/*
44+
XML ctype:
45+
*/
46+
#define MY_XML_ID0 0x01 /* Identifier initial character */
47+
#define MY_XML_ID1 0x02 /* Identifier medial character */
48+
#define MY_XML_SPC 0x08 /* Spacing character */
49+
50+
51+
/*
52+
http://www.w3.org/TR/REC-xml/
53+
[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
54+
CombiningChar | Extender
55+
[5] Name ::= (Letter | '_' | ':') (NameChar)*
56+
*/
57+
58+
static char my_xml_ctype[256]=
59+
{
60+
/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
61+
/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62+
/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
63+
/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
64+
/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
65+
/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
66+
/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
67+
/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
68+
/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69+
/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70+
/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71+
/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72+
/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73+
/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74+
/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
75+
/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
76+
};
77+
78+
#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
79+
#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
80+
#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
81+
82+
4283
static const char *lex2str(int lex)
4384
{
4485
switch(lex)
@@ -56,21 +97,21 @@ static const char *lex2str(int lex)
5697
case MY_XML_QUESTION: return "'?'";
5798
case MY_XML_EXCLAM: return "'!'";
5899
}
59-
return "UNKNOWN";
100+
return "unknown token";
60101
}
61102

62103
static void my_xml_norm_text(MY_XML_ATTR *a)
63104
{
64-
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ );
65-
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- );
105+
for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
106+
for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
66107
}
67108

68109

69110
static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
70111
{
71112
int lex;
72113

73-
for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++);
114+
for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
74115

75116
if (p->cur >= p->end)
76117
{
@@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
124165
my_xml_norm_text(a);
125166
lex=MY_XML_STRING;
126167
}
127-
else
168+
else if (my_xml_is_id0(p->cur[0]))
128169
{
129-
for(;
130-
(p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]);
131-
p->cur++)
132-
{}
170+
p->cur++;
171+
while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
172+
p->cur++;
133173
a->end=p->cur;
134174
my_xml_norm_text(a);
135175
lex=MY_XML_IDENT;
136176
}
177+
else
178+
lex= MY_XML_UNKNOWN;
137179

138180
#if 0
139181
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);

0 commit comments

Comments
 (0)