Skip to content

Commit 2302e93

Browse files
author
Shaohua Wang
committed
Commit Message:
BUG#18233051 - FTS: FAILING ASSERTION: NUM_TOKEN < MAX_PROXIMITY_ITEM Analysis: We don't check NUM_TOKEN < MAX_PROXIMITY_ITEM, so the assertion fails. Solution: 1. Return error if number of token in a phrase or proximity search; 2. Fix related problem in fts_proximity_get_positions. the possible combination containing all the words in a proximity search can be a large number greater than MAX_PROXIMITY_ITEM. rb://4686 approved by Jimmy.Yang
1 parent 1bcea7e commit 2302e93

7 files changed

Lines changed: 28 additions & 17 deletions

File tree

include/my_base.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,8 @@ is the global server default. */
483483
#define HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT 188 /* FTS query exceeds result cache limit */
484484
#define HA_ERR_TEMP_FILE_WRITE_FAILURE 189 /* Temporary file write failure */
485485
#define HA_ERR_INNODB_FORCED_RECOVERY 190 /* Innodb is in force recovery mode */
486-
#define HA_ERR_LAST 190 /* Copy of last error nr */
486+
#define HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE 191 /* Too many words in a phrase */
487+
#define HA_ERR_LAST 191 /* Copy of last error nr */
487488

488489
/* Number of different errors */
489490
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)

mysys/my_handler_errors.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ static const char *handler_error_messages[]=
9393
"InnoDB is in read only mode",
9494
"FTS query exceeds result cache memory limit",
9595
"Temporary file write failure",
96-
"Operation not allowed when innodb_forced_recovery > 0"
96+
"Operation not allowed when innodb_forced_recovery > 0",
97+
"Too many words in a FTS phrase or proximity search"
9798
};
9899

99100
extern void my_handler_error_register(void);

sql/handler.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ int ha_init_errors(void)
557557
SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");
558558
SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
559559
SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
560+
SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");
560561
/* Register the error messages for use with my_error(). */
561562
return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
562563
}

storage/innobase/fts/fts0que.cc

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
4747
#define RANK_DOWNGRADE (-1.0F)
4848
#define RANK_UPGRADE (1.0F)
4949

50-
/* Maximum number of words supported in a proximity search.
51-
FIXME, this limitation can be removed easily. Need to see
52-
if we want to enforce such limitation */
50+
/* Maximum number of words supported in a phrase or proximity search. */
5351
#define MAX_PROXIMITY_ITEM 128
5452

5553
/* Memory used by rbt itself for create and node add */
@@ -183,6 +181,8 @@ struct fts_select_t {
183181
the FTS index */
184182
};
185183

184+
typedef std::vector<ulint> pos_vector_t;
185+
186186
/** structure defines a set of ranges for original documents, each of which
187187
has a minimum position and maximum position. Text in such range should
188188
contain all words in the proximity search. We will need to count the
@@ -192,9 +192,9 @@ struct fts_proximity_t {
192192
ulint n_pos; /*!< number of position set, defines
193193
a range (min to max) containing all
194194
matching words */
195-
ulint* min_pos; /*!< the minimum position (in bytes)
195+
pos_vector_t min_pos; /*!< the minimum position (in bytes)
196196
of the range */
197-
ulint* max_pos; /*!< the maximum position (in bytes)
197+
pos_vector_t max_pos; /*!< the maximum position (in bytes)
198198
of the range */
199199
};
200200

@@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range(
17051705
{
17061706
fts_proximity_t* proximity_pos = phrase->proximity_pos;
17071707

1708+
ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
1709+
ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
1710+
17081711
/* Search each matched position pair (with min and max positions)
17091712
and count the number of words in the range */
17101713
for (ulint i = 0; i < proximity_pos->n_pos; i++) {
@@ -2588,6 +2591,11 @@ fts_query_phrase_search(
25882591
}
25892592

25902593
num_token = ib_vector_size(tokens);
2594+
if (num_token > MAX_PROXIMITY_ITEM) {
2595+
query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
2596+
goto func_exit;
2597+
}
2598+
25912599
ut_ad(ib_vector_size(orig_tokens) >= num_token);
25922600

25932601
/* Ignore empty strings. */
@@ -2613,7 +2621,7 @@ fts_query_phrase_search(
26132621
heap_alloc, sizeof(fts_match_t),
26142622
64);
26152623
} else {
2616-
ut_a(num_token < MAX_PROXIMITY_ITEM);
2624+
ut_a(num_token <= MAX_PROXIMITY_ITEM);
26172625
query->match_array =
26182626
(ib_vector_t**) mem_heap_alloc(
26192627
heap,
@@ -4236,10 +4244,6 @@ fts_phrase_or_proximity_search(
42364244
ulint j;
42374245
ulint k = 0;
42384246
fts_proximity_t qualified_pos;
4239-
ulint qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
4240-
4241-
qualified_pos.min_pos = &qualified_pos_buf[0];
4242-
qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
42434247

42444248
match[0] = static_cast<fts_match_t*>(
42454249
ib_vector_get(query->match_array[0], i));
@@ -4371,7 +4375,7 @@ fts_proximity_get_positions(
43714375

43724376
qualified_pos->n_pos = 0;
43734377

4374-
ut_a(num_match < MAX_PROXIMITY_ITEM);
4378+
ut_a(num_match <= MAX_PROXIMITY_ITEM);
43754379

43764380
/* Each word could appear multiple times in a doc. So
43774381
we need to walk through each word's position list, and find
@@ -4426,8 +4430,8 @@ fts_proximity_get_positions(
44264430
length encoding, record the min_pos and
44274431
max_pos, we will need to verify the actual
44284432
number of characters */
4429-
qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
4430-
qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
4433+
qualified_pos->min_pos.push_back(min_pos);
4434+
qualified_pos->max_pos.push_back(max_pos);
44314435
qualified_pos->n_pos++;
44324436
}
44334437

@@ -4436,7 +4440,5 @@ fts_proximity_get_positions(
44364440
idx[min_idx]++;
44374441
}
44384442

4439-
ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
4440-
44414443
return(qualified_pos->n_pos != 0);
44424444
}

storage/innobase/handler/ha_innodb.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,6 +1530,8 @@ convert_error_code_to_mysql(
15301530
return(HA_ERR_TABLESPACE_EXISTS);
15311531
case DB_IDENTIFIER_TOO_LONG:
15321532
return(HA_ERR_INTERNAL_ERROR);
1533+
case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
1534+
return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
15331535
}
15341536
}
15351537

storage/innobase/include/db0err.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ enum dberr_t {
128128
DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory
129129
exceeds result cache limit */
130130
DB_TEMP_FILE_WRITE_FAILURE, /*!< Temp file write failure */
131+
DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
132+
/*< Too many words in a phrase */
131133

132134
/* The following are partial failure codes */
133135
DB_FAIL = 1000,

storage/innobase/ut/ut0ut.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,8 @@ ut_strerr(
820820
return("FTS query exceeds result cache limit");
821821
case DB_TEMP_FILE_WRITE_FAILURE:
822822
return("Temp file write failure");
823+
case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
824+
return("Too many words in a FTS phrase or proximity search");
823825

824826
/* do not add default: in order to produce a warning if new code
825827
is added to the enum but not added here */

0 commit comments

Comments
 (0)