Commit Message:

Shaohua Wang · Shaohua Wang · commit 2302e93bcae8 · 2014-02-19T16:41:14.000+08:00
BUG#18233051 - FTS: FAILING ASSERTION: NUM_TOKEN &lt; MAX_PROXIMITY_ITEM

Analysis:
We don't check NUM_TOKEN &lt; MAX_PROXIMITY_ITEM, so the assertion fails.

Solution:
1. Return error if number of token in a phrase or proximity search;
2. Fix related problem in fts_proximity_get_positions.
   the possible combination containing all the words in a proximity search
   can be a large number greater than MAX_PROXIMITY_ITEM.

rb://4686 approved by Jimmy.Yang
diff --git a/include/my_base.h b/include/my_base.h
@@ -483,7 +483,8 @@ is the global server default. */
 #define HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT  188 /* FTS query exceeds result cache limit */
 #define HA_ERR_TEMP_FILE_WRITE_FAILURE	189	/* Temporary file write failure */
 #define HA_ERR_INNODB_FORCED_RECOVERY 190	/* Innodb is in force recovery mode */
-#define HA_ERR_LAST 190    /* Copy of last error nr */
+#define HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE	191 /* Too many words in a phrase */
+#define HA_ERR_LAST               191    /* Copy of last error nr */
 
 /* Number of different errors */
 #define HA_ERR_ERRORS            (HA_ERR_LAST - HA_ERR_FIRST + 1)
diff --git a/mysys/my_handler_errors.h b/mysys/my_handler_errors.h
@@ -93,7 +93,8 @@ static const char *handler_error_messages[]=
   "InnoDB is in read only mode",
   "FTS query exceeds result cache memory limit",
   "Temporary file write failure",
-  "Operation not allowed when innodb_forced_recovery > 0"
+  "Operation not allowed when innodb_forced_recovery > 0",
+  "Too many words in a FTS phrase or proximity search"
 };
 
 extern void my_handler_error_register(void);
diff --git a/sql/handler.cc b/sql/handler.cc
@@ -557,6 +557,7 @@ int ha_init_errors(void)
   SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT,  "FTS query exceeds result cache limit");
   SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE,	ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));
   SETMSG(HA_ERR_INNODB_FORCED_RECOVERY,	ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));
+  SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE,  "Too many words in a FTS phrase or proximity search");
   /* Register the error messages for use with my_error(). */
   return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
 }
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
@@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 #define RANK_DOWNGRADE		(-1.0F)
 #define RANK_UPGRADE		(1.0F)
 
-/* Maximum number of words supported in a proximity search.
-FIXME, this limitation can be removed easily. Need to see
-if we want to enforce such limitation */
+/* Maximum number of words supported in a phrase or proximity search. */
 #define MAX_PROXIMITY_ITEM	128
 
 /* Memory used by rbt itself for create and node add */
@@ -183,6 +181,8 @@ struct fts_select_t {
 					the FTS index */
 };
 
+typedef std::vector<ulint>       pos_vector_t;
+
 /** structure defines a set of ranges for original documents, each of which
 has a minimum position and maximum position. Text in such range should
 contain all words in the proximity search. We will need to count the
@@ -192,9 +192,9 @@ struct fts_proximity_t {
 	ulint		n_pos;		/*!< number of position set, defines
 					a range (min to max) containing all
 					matching words */
-	ulint*		min_pos;	/*!< the minimum position (in bytes)
+	pos_vector_t	min_pos;	/*!< the minimum position (in bytes)
 					of the range */
-	ulint*		max_pos;	/*!< the maximum position (in bytes)
+	pos_vector_t	max_pos;	/*!< the maximum position (in bytes)
 					of the range */
 };
 
@@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range(
 {
 	fts_proximity_t*	proximity_pos = phrase->proximity_pos;
 
+	ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
+	ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
+
 	/* Search each matched position pair (with min and max positions)
 	and count the number of words in the range */
 	for (ulint i = 0; i < proximity_pos->n_pos; i++) {
@@ -2588,6 +2591,11 @@ fts_query_phrase_search(
 	}
 
 	num_token = ib_vector_size(tokens);
+	if (num_token > MAX_PROXIMITY_ITEM) {
+		query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
+		goto func_exit;
+	}
+
 	ut_ad(ib_vector_size(orig_tokens) >= num_token);
 
 	/* Ignore empty strings. */
@@ -2613,7 +2621,7 @@ fts_query_phrase_search(
 					heap_alloc, sizeof(fts_match_t),
 					64);
 			} else {
-				ut_a(num_token < MAX_PROXIMITY_ITEM);
+				ut_a(num_token <= MAX_PROXIMITY_ITEM);
 				query->match_array =
 					(ib_vector_t**) mem_heap_alloc(
 						heap,
@@ -4236,10 +4244,6 @@ fts_phrase_or_proximity_search(
 		ulint		j;
 		ulint		k = 0;
 		fts_proximity_t	qualified_pos;
-		ulint		qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
-
-		qualified_pos.min_pos = &qualified_pos_buf[0];
-		qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
 
 		match[0] = static_cast<fts_match_t*>(
 			ib_vector_get(query->match_array[0], i));
@@ -4371,7 +4375,7 @@ fts_proximity_get_positions(
 
 	qualified_pos->n_pos = 0;
 
-	ut_a(num_match < MAX_PROXIMITY_ITEM);
+	ut_a(num_match <= MAX_PROXIMITY_ITEM);
 
 	/* Each word could appear multiple times in a doc. So
 	we need to walk through each word's position list, and find
@@ -4426,8 +4430,8 @@ fts_proximity_get_positions(
 			length encoding, record the min_pos and
 			max_pos, we will need to verify the actual
 			number of characters */
-			qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
-			qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+			qualified_pos->min_pos.push_back(min_pos);
+			qualified_pos->max_pos.push_back(max_pos);
 			qualified_pos->n_pos++;
 		}
 
@@ -4436,7 +4440,5 @@ fts_proximity_get_positions(
 		idx[min_idx]++;
 	}
 
-	ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
-
 	return(qualified_pos->n_pos != 0);
 }
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
@@ -1530,6 +1530,8 @@ convert_error_code_to_mysql(
 		return(HA_ERR_TABLESPACE_EXISTS);
 	case DB_IDENTIFIER_TOO_LONG:
 		return(HA_ERR_INTERNAL_ERROR);
+	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
+		return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
 	}
 }
 
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
@@ -128,6 +128,8 @@ enum dberr_t {
 	DB_FTS_EXCEED_RESULT_CACHE_LIMIT,	/*!< FTS query memory
 					exceeds result cache limit */
 	DB_TEMP_FILE_WRITE_FAILURE,	/*!< Temp file write failure */
+	DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
+					/*< Too many words in a phrase */
 
 	/* The following are partial failure codes */
 	DB_FAIL = 1000,
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
@@ -820,6 +820,8 @@ ut_strerr(
 		return("FTS query exceeds result cache limit");
 	case DB_TEMP_FILE_WRITE_FAILURE:
 		return("Temp file write failure");
+	case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
+		return("Too many words in a FTS phrase or proximity search");
 
 	/* do not add default: in order to produce a warning if new code
 	is added to the enum but not added here */

Original file line number	Diff line number	Diff line change
`@@ -557,6 +557,7 @@ int ha_init_errors(void)`
`557`	`557`	`SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, "FTS query exceeds result cache limit");`
`558`	`558`	`SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE));`
`559`	`559`	`SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY));`
	`560`	`+ SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search");`
`560`	`561`	`/* Register the error messages for use with my_error(). */`
`561`	`562`	`return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);`
`562`	`563`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1530,6 +1530,8 @@ convert_error_code_to_mysql(`
`1530`	`1530`	`return(HA_ERR_TABLESPACE_EXISTS);`
`1531`	`1531`	`case DB_IDENTIFIER_TOO_LONG:`
`1532`	`1532`	`return(HA_ERR_INTERNAL_ERROR);`
	`1533`	`+ case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:`
	`1534`	`+ return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);`
`1533`	`1535`	`}`
`1534`	`1536`	`}`
`1535`	`1537`