Beispiel #1
0
        public void TestInternalLevenshteinDistance()
        {
            DirectSpellChecker spellchecker = new DirectSpellChecker();
            Directory          dir          = NewDirectory();
            RandomIndexWriter  writer       = new RandomIndexWriter(Random(), dir,
                                                                    new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone);

            String[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoi𐑍" };
            for (int i = 0; i < termsToAdd.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir         = writer.Reader;
            String      misspelled = "metanoix";

            SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length == 4);

            IStringDistance sd = spellchecker.Distance;

            assertTrue(sd is LuceneLevenshteinDistance);
            foreach (SuggestWord word in similar)
            {
                assertTrue(word.Score == sd.GetDistance(word.String, misspelled));
                assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()?
            }

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #2
0
        public void TestTransposition2()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();
            Directory          dir          = NewDirectory();
            RandomIndexWriter  writer       = new RandomIndexWriter(Random(), dir,
                                                                    new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone);

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
                                                                    "numbers", "seevntene"), 2, ir,
                                                                SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals(1, similar.Length);
            assertEquals("seventeen", similar[0].String);
            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public void TestTransposition()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();
            Directory          dir          = NewDirectory();
            RandomIndexWriter  writer       = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir,
                new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.GetReader();

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
                                                                    "numbers", "fvie"), 1, ir,
                                                                SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals(1, similar.Length);
            assertEquals("five", similar[0].String);
            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #4
0
        public void TestBogusField()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();
            Directory          dir          = NewDirectory();
            RandomIndexWriter  writer       = new RandomIndexWriter(Random, dir,
                                                                    new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.Int32ToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.GetReader();

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term(
                                                                    "bogusFieldBogusField", "fvie"), 2, ir,
                                                                SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals(0, similar.Length);
            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #5
0
        public void TestSimpleExamples()
        {
            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MinQueryLength = (0);
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                                                             new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone);

            for (int i = 0; i < 20; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;

            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("numbers",
                                                                         "fvie"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "five"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            if (similar.Length > 0)
            {
                assertFalse(similar[0].String.equals("five")); // don't suggest a word for itself
            }

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fvie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fiv"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            similar = spellChecker.SuggestSimilar(new Term("numbers", "fives"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("five", similar[0].String);

            assertTrue(similar.Length > 0);
            similar = spellChecker.SuggestSimilar(new Term("numbers", "fie"), 2, ir,
                                                  SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertEquals("five", similar[0].String);

            // add some more documents
            for (int i = 1000; i < 1100; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("numbers", English.IntToEnglish(i), Field.Store.NO));
                writer.AddDocument(doc);
            }

            ir.Dispose();
            ir = writer.Reader;

            // look ma, no spellcheck index rebuild
            similar = spellChecker.SuggestSimilar(new Term("numbers", "tousand"), 10,
                                                  ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            assertTrue(similar.Length > 0);
            assertEquals("thousand", similar[0].String);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #6
0
        public void TestOptions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir,
                                                             new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true), Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobar", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "foobaz", Field.Store.NO));
            writer.AddDocument(doc);
            doc.Add(NewTextField("text", "fobar", Field.Store.NO));
            writer.AddDocument(doc);

            IndexReader ir = writer.Reader;

            DirectSpellChecker spellChecker = new DirectSpellChecker();

            spellChecker.MaxQueryFrequency = (0F);
            SuggestWord[] similar = spellChecker.SuggestSimilar(new Term("text",
                                                                         "fobar"), 1, ir, SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker = new DirectSpellChecker(); // reset defaults
            spellChecker.MinQueryLength = (5);
            similar = spellChecker.SuggestSimilar(new Term("text", "foba"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.Accuracy = (0.9F);
            similar = spellChecker.SuggestSimilar(new Term("text", "foobazzz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (0);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(1, similar.Length);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);

            spellChecker           = new DirectSpellChecker(); // reset defaults
            spellChecker.MinPrefix = (1);
            similar = spellChecker.SuggestSimilar(new Term("text", "roobaz"), 1, ir,
                                                  SuggestMode.SUGGEST_MORE_POPULAR);
            assertEquals(0, similar.Length);

            spellChecker          = new DirectSpellChecker(); // reset defaults
            spellChecker.MaxEdits = (2);
            similar = spellChecker.SuggestSimilar(new Term("text", "fobar"), 2, ir,
                                                  SuggestMode.SUGGEST_ALWAYS);
            assertEquals(2, similar.Length);

            ir.Dispose();
            writer.Dispose();
            dir.Dispose();
        }