Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java =================================================================== --- lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java (revision 0) +++ lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java (revision 0) @@ -0,0 +1,92 @@ +package org.apache.lucene.search.spell; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; + +public class TestDirectSpellChecker extends LuceneTestCase { + + public void testSimpleExamples() throws Exception { + DirectSpellChecker spellChecker = new DirectSpellChecker(); + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + new MockAnalyzer(MockTokenizer.SIMPLE, true)); + + for (int i = 0; i < 20; i++) { + Document doc = new Document(); + doc.add(newField("numbers", English.intToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + IndexReader ir = writer.getReader(); + + BytesRef[] similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); + assertTrue(similar.length > 0); + assertEquals("five", similar[0].utf8ToString()); + + similar = spellChecker.suggestSimilar(new Term("numbers", "five"), 2, ir, false); + if (similar.length > 0) { + assertFalse(similar[0].utf8ToString().equals("five")); // don't suggest a word for itself + } + + similar = spellChecker.suggestSimilar(new Term("numbers", "fvie"), 2, ir, false); + assertTrue(similar.length > 0); + assertEquals("five", similar[0].utf8ToString()); + + similar = spellChecker.suggestSimilar(new Term("numbers", "fiv"), 2, ir, false); + assertTrue(similar.length > 0); + assertEquals("five", similar[0].utf8ToString()); + + similar = spellChecker.suggestSimilar(new Term("numbers", "fives"), 2, ir, false); + assertTrue(similar.length > 0); + assertEquals("five", similar[0].utf8ToString()); + + assertTrue(similar.length > 0); + similar = spellChecker.suggestSimilar(new Term("numbers", "fie"), 2, ir, false); + assertEquals("five", similar[0].utf8ToString()); + + // add some more documents + for (int i = 1000; i < 1100; i++) { + Document doc = new Document(); + doc.add(newField("numbers", English.intToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + ir.close(); + ir = writer.getReader(); + + // look ma, no spellcheck index rebuild + similar = spellChecker.suggestSimilar(new Term("numbers", "tousand"), 10, ir, false); + assertTrue(similar.length > 0); + assertEquals("thousand", similar[0].utf8ToString()); + + ir.close(); + writer.close(); + dir.close(); + } +} Property changes on: lucene\contrib\spellchecker\src\test\org\apache\lucene\search\spell\TestDirectSpellChecker.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java =================================================================== --- lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (revision 0) +++ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (revision 0) @@ -0,0 +1,146 @@ +package org.apache.lucene.search.spell; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.PriorityQueue; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.FuzzyTermsEnum; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.LevenshteinAutomata; + +/** + * Simple automaton-based spellchecker. + *
+ * Candidates are presented directly from the term dictionary, based on
+ * Levenshtein distance.
+ */
+public class DirectSpellChecker {
+ /** maximum edit distance for candidate terms */
+ private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
+
+ /** Get the accuracy 0 < maxEdits <= 2; default 2 */
+ public int getMaxEdits() {
+ return maxEdits;
+ }
+
+ /** Sets the accuracy 0 < maxEdits <= 2; default 2 */
+ public void setMaxEdits(int maxEdits) {
+ if (maxEdits < 1 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
+ throw new UnsupportedOperationException("Invalid maxEdits");
+ this.maxEdits = maxEdits;
+ }
+
+ public BytesRef[] suggestSimilar(Term term, int numSug, IndexReader ir,
+ boolean morePopular) throws IOException {
+
+ if (invalidOrEmptyField(term.field(), ir)) {
+ return new BytesRef[0];
+ }
+
+ int docfreq = morePopular ? ir.docFreq(term) : 0;
+
+ PriorityQueue