public void TestInternalLevenshteinDistance() { DirectSpellChecker spellchecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone); String[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið‘" }; for (int i = 0; i < termsToAdd.Length; i++) { Document doc = new Document(); doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; String misspelled = "metanoix"; SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length == 4); IStringDistance sd = spellchecker.Distance; assertTrue(sd is LuceneLevenshteinDistance); foreach (SuggestWord word in similar) { assertTrue(word.Score == sd.GetDistance(word.String, misspelled)); assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()? } ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestTransposition2() { DirectSpellChecker spellChecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; SuggestWord[] similar = spellChecker.SuggestSimilar(new Term( "numbers", "seevntene"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.Length); assertEquals("seventeen", similar[0].String); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestTransposition() { DirectSpellChecker spellChecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true)); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.GetReader(); SuggestWord[] similar = spellChecker.SuggestSimilar(new Term( "numbers", "fvie"), 1, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.Length); assertEquals("five", similar[0].String); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestBogusField() { DirectSpellChecker spellChecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true)); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.GetReader(); SuggestWord[] similar = spellChecker.SuggestSimilar(new Term( "bogusFieldBogusField", "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(0, similar.Length); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestSimpleExamples() { DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.MinQueryLength = (0); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); if (similar.Length > 0) { assertFalse(similar[0].String.equals("five")); // don't suggest a word for itself } similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("five", similar[0].String); assertTrue(similar.Length > 0); similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals("five", similar[0].String); // add some more documents for (int i = 1000; i < 1100; i++) { Document doc = new Document(); doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO)); writer.AddDocument(doc); } ir.Dispose(); ir = writer.Reader; // look ma, no spellcheck index rebuild similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length > 0); assertEquals("thousand", similar[0].String); ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public void TestOptions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone); Document doc = new Document(); doc.Add(NewTextField("text", "foobar", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "foobar", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "foobaz", Field.Store.NO)); writer.AddDocument(doc); doc.Add(NewTextField("text", "fobar", Field.Store.NO)); writer.AddDocument(doc); IndexReader ir = writer.Reader; DirectSpellChecker spellChecker = new DirectSpellChecker(); spellChecker.MaxQueryFrequency = (0F); SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinQueryLength = (5); similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MaxEdits = (1); similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.Accuracy = (0.9F); similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinPrefix = (0); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(1, similar.Length); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MinPrefix = (1); similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR); assertEquals(0, similar.Length); spellChecker = new DirectSpellChecker(); // reset defaults spellChecker.MaxEdits = (2); similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir, SuggestMode.SUGGEST_ALWAYS); assertEquals(2, similar.Length); ir.Dispose(); writer.Dispose(); dir.Dispose(); }