@@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
4747#define RANK_DOWNGRADE (-1 .0F )
4848#define RANK_UPGRADE (1 .0F )
4949
50- /* Maximum number of words supported in a proximity search.
51- FIXME, this limitation can be removed easily. Need to see
52- if we want to enforce such limitation */
50+ /* Maximum number of words supported in a phrase or proximity search. */
5351#define MAX_PROXIMITY_ITEM 128
5452
5553/* Memory used by rbt itself for create and node add */
@@ -183,6 +181,8 @@ struct fts_select_t {
183181 the FTS index */
184182};
185183
184+ typedef std::vector<ulint> pos_vector_t ;
185+
186186/* * structure defines a set of ranges for original documents, each of which
187187has a minimum position and maximum position. Text in such range should
188188contain all words in the proximity search. We will need to count the
@@ -192,9 +192,9 @@ struct fts_proximity_t {
192192 ulint n_pos; /* !< number of position set, defines
193193 a range (min to max) containing all
194194 matching words */
195- ulint* min_pos; /* !< the minimum position (in bytes)
195+ pos_vector_t min_pos; /* !< the minimum position (in bytes)
196196 of the range */
197- ulint* max_pos; /* !< the maximum position (in bytes)
197+ pos_vector_t max_pos; /* !< the maximum position (in bytes)
198198 of the range */
199199};
200200
@@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range(
17051705{
17061706 fts_proximity_t * proximity_pos = phrase->proximity_pos ;
17071707
1708+ ut_ad (proximity_pos->n_pos == proximity_pos->min_pos .size ());
1709+ ut_ad (proximity_pos->n_pos == proximity_pos->max_pos .size ());
1710+
17081711 /* Search each matched position pair (with min and max positions)
17091712 and count the number of words in the range */
17101713 for (ulint i = 0 ; i < proximity_pos->n_pos ; i++) {
@@ -2588,6 +2591,11 @@ fts_query_phrase_search(
25882591 }
25892592
25902593 num_token = ib_vector_size (tokens);
2594+ if (num_token > MAX_PROXIMITY_ITEM) {
2595+ query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
2596+ goto func_exit;
2597+ }
2598+
25912599 ut_ad (ib_vector_size (orig_tokens) >= num_token);
25922600
25932601 /* Ignore empty strings. */
@@ -2613,7 +2621,7 @@ fts_query_phrase_search(
26132621 heap_alloc, sizeof (fts_match_t ),
26142622 64 );
26152623 } else {
2616- ut_a (num_token < MAX_PROXIMITY_ITEM);
2624+ ut_a (num_token <= MAX_PROXIMITY_ITEM);
26172625 query->match_array =
26182626 (ib_vector_t **) mem_heap_alloc (
26192627 heap,
@@ -4236,10 +4244,6 @@ fts_phrase_or_proximity_search(
42364244 ulint j;
42374245 ulint k = 0 ;
42384246 fts_proximity_t qualified_pos;
4239- ulint qualified_pos_buf[MAX_PROXIMITY_ITEM * 2 ];
4240-
4241- qualified_pos.min_pos = &qualified_pos_buf[0 ];
4242- qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
42434247
42444248 match[0 ] = static_cast <fts_match_t *>(
42454249 ib_vector_get (query->match_array [0 ], i));
@@ -4371,7 +4375,7 @@ fts_proximity_get_positions(
43714375
43724376 qualified_pos->n_pos = 0 ;
43734377
4374- ut_a (num_match < MAX_PROXIMITY_ITEM);
4378+ ut_a (num_match <= MAX_PROXIMITY_ITEM);
43754379
43764380 /* Each word could appear multiple times in a doc. So
43774381 we need to walk through each word's position list, and find
@@ -4426,8 +4430,8 @@ fts_proximity_get_positions(
44264430 length encoding, record the min_pos and
44274431 max_pos, we will need to verify the actual
44284432 number of characters */
4429- qualified_pos->min_pos [qualified_pos-> n_pos ] = min_pos;
4430- qualified_pos->max_pos [qualified_pos-> n_pos ] = max_pos;
4433+ qualified_pos->min_pos . push_back ( min_pos) ;
4434+ qualified_pos->max_pos . push_back ( max_pos) ;
44314435 qualified_pos->n_pos ++;
44324436 }
44334437
@@ -4436,7 +4440,5 @@ fts_proximity_get_positions(
44364440 idx[min_idx]++;
44374441 }
44384442
4439- ut_ad (qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
4440-
44414443 return (qualified_pos->n_pos != 0 );
44424444}
0 commit comments