예제 #1
0
            internal SuggestWordArrayWrapper(WordBreakSpellChecker outerInstance, SuggestWord[] suggestWords)
            {
                this.outerInstance = outerInstance;
                this.suggestWords  = suggestWords;
                int aFreqSum = 0;
                int aFreqMax = 0;

                foreach (SuggestWord sw in suggestWords)
                {
                    aFreqSum += sw.Freq;
                    aFreqMax  = Math.Max(aFreqMax, sw.Freq);
                }
                this.freqSum = aFreqSum;
                this.freqMax = aFreqMax;
            }
예제 #2
0
        public void TestCombiningWords()
        {
            IndexReader ir = null;

            try
            {
                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();

                {
                    Term[] terms =
                    {
                        new Term("numbers", "one"),
                        new Term("numbers", "hun"),
                        new Term("numbers", "dred"),
                        new Term("numbers", "eight"),
                        new Term("numbers", "y"),
                        new Term("numbers", "eight"),
                    };
                    wbsp.MaxChanges             = (3);
                    wbsp.MaxCombineWordLength   = (20);
                    wbsp.MinSuggestionFrequency = (1);
                    CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms, 10, ir, SuggestMode.SUGGEST_ALWAYS);
                    assertTrue(cs.Length == 5);

                    assertTrue(cs[0].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[0].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[0].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[0].Suggestion.String.Equals("hundred", StringComparison.Ordinal));
                    assertTrue(cs[0].Suggestion.Score == 1);

                    assertTrue(cs[1].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[1].OriginalTermIndexes[0] == 3);
                    assertTrue(cs[1].OriginalTermIndexes[1] == 4);
                    assertTrue(cs[1].Suggestion.String.Equals("eighty", StringComparison.Ordinal));
                    assertTrue(cs[1].Suggestion.Score == 1);

                    assertTrue(cs[2].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[2].OriginalTermIndexes[0] == 4);
                    assertTrue(cs[2].OriginalTermIndexes[1] == 5);
                    assertTrue(cs[2].Suggestion.String.Equals("yeight", StringComparison.Ordinal));
                    assertTrue(cs[2].Suggestion.Score == 1);

                    for (int i = 3; i < 5; i++)
                    {
                        assertTrue(cs[i].OriginalTermIndexes.Length == 3);
                        assertTrue(cs[i].Suggestion.Score == 2);
                        assertTrue(
                            (cs[i].OriginalTermIndexes[0] == 1 &&
                             cs[i].OriginalTermIndexes[1] == 2 &&
                             cs[i].OriginalTermIndexes[2] == 3 &&
                             cs[i].Suggestion.String.Equals("hundredeight", StringComparison.Ordinal)) ||
                            (cs[i].OriginalTermIndexes[0] == 3 &&
                             cs[i].OriginalTermIndexes[1] == 4 &&
                             cs[i].OriginalTermIndexes[2] == 5 &&
                             cs[i].Suggestion.String.Equals("eightyeight", StringComparison.Ordinal))
                            );
                    }

                    cs = wbsp.SuggestWordCombinations(terms, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
                    assertTrue(cs.Length == 2);
                    assertTrue(cs[0].OriginalTermIndexes.Length == 2);
                    assertTrue(cs[0].Suggestion.Score == 1);
                    assertTrue(cs[0].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[0].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[0].Suggestion.String.Equals("hundred", StringComparison.Ordinal));
                    assertTrue(cs[0].Suggestion.Score == 1);

                    assertTrue(cs[1].OriginalTermIndexes.Length == 3);
                    assertTrue(cs[1].Suggestion.Score == 2);
                    assertTrue(cs[1].OriginalTermIndexes[0] == 1);
                    assertTrue(cs[1].OriginalTermIndexes[1] == 2);
                    assertTrue(cs[1].OriginalTermIndexes[2] == 3);
                    assertTrue(cs[1].Suggestion.String.Equals("hundredeight", StringComparison.Ordinal));
                }
            }
            //catch (Exception e) // LUCENENET: Senseless to catch and rethrow here
            //{
            //    throw e;
            //}
            finally
            {
                try { ir.Dispose(); } catch (Exception /*e1*/) { }
            }
        }
예제 #3
0
        public void TestRandom()
        {
            int numDocs = TestUtil.NextInt32(Random, (10 * RandomMultiplier),
                                             (100 * RandomMultiplier));
            Directory         dir    = null;
            RandomIndexWriter writer = null;
            IndexReader       ir     = null;

            try
            {
                dir    = NewDirectory();
                writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, dir, new MockAnalyzer(Random,
                                                  MockTokenizer.WHITESPACE, false));
                int              maxLength = TestUtil.NextInt32(Random, 5, 50);
                IList <string>   originals = new JCG.List <string>(numDocs);
                IList <string[]> breaks    = new JCG.List <string[]>(numDocs);
                for (int i = 0; i < numDocs; i++)
                {
                    string orig = "";
                    if (Random.nextBoolean())
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomSimpleString(Random, maxLength);
                        }
                    }
                    else
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomUnicodeString(Random, maxLength);
                        }
                    }
                    originals.Add(orig);
                    int totalLength = orig.CodePointCount(0, orig.Length);
                    int breakAt     = orig.OffsetByCodePoints(0,
                                                              TestUtil.NextInt32(Random, 1, totalLength - 1));
                    string[] broken = new string[2];
                    broken[0] = orig.Substring(0, breakAt - 0);
                    broken[1] = orig.Substring(breakAt);
                    breaks.Add(broken);
                    Document doc = new Document();
                    doc.Add(NewTextField("random_break", broken[0] + " " + broken[1],
                                         Field.Store.NO));
                    doc.Add(NewTextField("random_combine", orig, Field.Store.NO));
                    writer.AddDocument(doc);
                }
                writer.Commit();
                writer.Dispose();

                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
                wbsp.MaxChanges             = (1);
                wbsp.MinBreakWordLength     = (1);
                wbsp.MinSuggestionFrequency = (1);
                wbsp.MaxCombineWordLength   = (maxLength);
                for (int i = 0; i < originals.size(); i++)
                {
                    string orig  = originals[i];
                    string left  = breaks[i][0];
                    string right = breaks[i][1];
                    {
                        Term term = new Term("random_break", orig);

                        SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(),
                                                                    ir, SuggestMode.SUGGEST_ALWAYS,
                                                                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                        bool failed = true;
                        foreach (SuggestWord[] sw1 in sw)
                        {
                            assertTrue(sw1.Length == 2);
                            if (sw1[0].String.Equals(left, StringComparison.Ordinal) && sw1[1].String.Equals(right, StringComparison.Ordinal))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting break suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                    {
                        Term[] terms = { new Term("random_combine", left),
                                         new Term("random_combine", right) };
                        CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms,
                                                                              originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
                        bool failed = true;
                        foreach (CombineSuggestion cs1 in cs)
                        {
                            assertTrue(cs1.OriginalTermIndexes.Length == 2);
                            if (cs1.Suggestion.String.Equals(left + right, StringComparison.Ordinal))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting combine suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                }
            }
            //catch (Exception e) when (e.IsException()) // LUCENENET: Senseless to catch and rethrow here
            //{
            //    throw e;
            //}
            finally
            {
                try
                {
                    ir.Dispose();
                }
                catch (Exception e1) when(e1.IsException())
                {
                }
                try
                {
                    writer.Dispose();
                }
                catch (Exception e1) when(e1.IsException())
                {
                }
                try
                {
                    dir.Dispose();
                }
                catch (Exception e1) when(e1.IsException())
                {
                }
            }
        }
예제 #4
0
        public void TestBreakingWords()
        {
            IndexReader ir = null;

            try
            {
                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();

                {
                    Term term = new Term("numbers", "ninetynine");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.Equals("ninety", StringComparison.Ordinal));
                    assertTrue(sw[0][1].String.Equals("nine", StringComparison.Ordinal));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);
                }
                {
                    Term term = new Term("numbers", "onethousand");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.Equals("one", StringComparison.Ordinal));
                    assertTrue(sw[0][1].String.Equals("thousand", StringComparison.Ordinal));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (1);
                    sw = wbsp.SuggestWordBreaks(term, 1, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (2);
                    sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 2);

                    wbsp.MaxChanges             = (2);
                    wbsp.MinSuggestionFrequency = (1);
                    sw = wbsp.SuggestWordBreaks(term, 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 2);
                    assertTrue(sw[0].Length == 2);
                    assertTrue(sw[0][0].String.Equals("one", StringComparison.Ordinal));
                    assertTrue(sw[0][1].String.Equals("thousand", StringComparison.Ordinal));
                    assertTrue(sw[0][0].Score == 1);
                    assertTrue(sw[0][1].Score == 1);
                    assertTrue(sw[0][1].Freq > 1);
                    assertTrue(sw[0][0].Freq > sw[0][1].Freq);
                    assertTrue(sw[1].Length == 3);
                    assertTrue(sw[1][0].String.Equals("one", StringComparison.Ordinal));
                    assertTrue(sw[1][1].String.Equals("thou", StringComparison.Ordinal));
                    assertTrue(sw[1][2].String.Equals("sand", StringComparison.Ordinal));
                    assertTrue(sw[1][0].Score == 2);
                    assertTrue(sw[1][1].Score == 2);
                    assertTrue(sw[1][2].Score == 2);
                    assertTrue(sw[1][0].Freq > 1);
                    assertTrue(sw[1][1].Freq == 1);
                    assertTrue(sw[1][2].Freq == 1);
                }
                {
                    Term term = new Term("numbers", "onethousandonehundredeleven");
                    wbsp.MaxChanges             = (3);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 0);

                    wbsp.MaxChanges = (4);
                    sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 1);
                    assertTrue(sw[0].Length == 5);

                    wbsp.MaxChanges = (5);
                    sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 2);
                    assertTrue(sw[0].Length == 5);
                    assertTrue(sw[0][1].String.Equals("thousand", StringComparison.Ordinal));
                    assertTrue(sw[1].Length == 6);
                    assertTrue(sw[1][1].String.Equals("thou", StringComparison.Ordinal));
                    assertTrue(sw[1][2].String.Equals("sand", StringComparison.Ordinal));
                }
                {
                    //make sure we can handle 2-char codepoints
                    Term term = new Term("numbers", "\uD864\uDC79");
                    wbsp.MaxChanges             = (1);
                    wbsp.MinBreakWordLength     = (1);
                    wbsp.MinSuggestionFrequency = (1);
                    SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, 5, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                    assertTrue(sw.Length == 0);
                }
            }
            //catch (Exception e) // LUCENENET: Senseless to catch and rethrow here
            //{
            //    throw e;
            //}
            finally
            {
                try { ir.Dispose(); } catch (Exception /*e1*/) { }
            }
        }
예제 #5
0
 internal CombineSuggestionWrapper(WordBreakSpellChecker outerInstance, CombineSuggestion combineSuggestion, int numCombinations)
 {
     this.outerInstance     = outerInstance;
     this.combineSuggestion = combineSuggestion;
     this.numCombinations   = numCombinations;
 }
예제 #6
0
 public CombinationsThenFreqComparer(WordBreakSpellChecker outerInstance)
 {
     this.outerInstance = outerInstance;
 }
예제 #7
0
 public LengthThenSumFreqComparer(WordBreakSpellChecker outerInstance)
 {
     this.outerInstance = outerInstance;
 }
예제 #8
0
 public LengthThenMaxFreqComparator(WordBreakSpellChecker outerInstance)
 {
     this.outerInstance = outerInstance;
 }
예제 #9
0
        public void GRandom()
        {
            int numDocs = TestUtil.NextInt(Random(), (10 * RANDOM_MULTIPLIER),
                                           (100 * RANDOM_MULTIPLIER));
            Directory         dir    = null;
            RandomIndexWriter writer = null;
            IndexReader       ir     = null;

            try
            {
                dir    = NewDirectory();
                writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(),
                                                                               MockTokenizer.WHITESPACE, false), Similarity, TimeZone);
                int             maxLength = TestUtil.NextInt(Random(), 5, 50);
                List <string>   originals = new List <string>(numDocs);
                List <string[]> breaks    = new List <string[]>(numDocs);
                for (int i = 0; i < numDocs; i++)
                {
                    string orig = "";
                    if (Random().nextBoolean())
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomSimpleString(Random(), maxLength);
                        }
                    }
                    else
                    {
                        while (!GoodTestString(orig))
                        {
                            orig = TestUtil.RandomUnicodeString(Random(), maxLength);
                        }
                    }
                    originals.Add(orig);
                    int totalLength = orig.CodePointCount(0, orig.Length);
                    int breakAt     = orig.OffsetByCodePoints(0,
                                                              TestUtil.NextInt(Random(), 1, totalLength - 1));
                    string[] broken = new string[2];
                    broken[0] = orig.Substring(0, breakAt - 0);
                    broken[1] = orig.Substring(breakAt);
                    breaks.Add(broken);
                    Document doc = new Document();
                    doc.Add(NewTextField("random_break", broken[0] + " " + broken[1],
                                         Field.Store.NO));
                    doc.Add(NewTextField("random_combine", orig, Field.Store.NO));
                    writer.AddDocument(doc);
                }
                writer.Commit();
                writer.Dispose();

                ir = DirectoryReader.Open(dir);
                WordBreakSpellChecker wbsp = new WordBreakSpellChecker();
                wbsp.MaxChanges             = (1);
                wbsp.MinBreakWordLength     = (1);
                wbsp.MinSuggestionFrequency = (1);
                wbsp.MaxCombineWordLength   = (maxLength);
                for (int i = 0; i < originals.size(); i++)
                {
                    string orig  = originals[i];
                    string left  = breaks[i][0];
                    string right = breaks[i][1];
                    {
                        Term term = new Term("random_break", orig);

                        SuggestWord[][] sw = wbsp.SuggestWordBreaks(term, originals.size(),
                                                                    ir, SuggestMode.SUGGEST_ALWAYS,
                                                                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
                        bool failed = true;
                        foreach (SuggestWord[] sw1 in sw)
                        {
                            assertTrue(sw1.Length == 2);
                            if (sw1[0].String.equals(left) && sw1[1].String.equals(right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting break suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                    {
                        Term[] terms = { new Term("random_combine", left),
                                         new Term("random_combine", right) };
                        CombineSuggestion[] cs = wbsp.SuggestWordCombinations(terms,
                                                                              originals.size(), ir, SuggestMode.SUGGEST_ALWAYS);
                        bool failed = true;
                        foreach (CombineSuggestion cs1 in cs)
                        {
                            assertTrue(cs1.OriginalTermIndexes.Length == 2);
                            if (cs1.Suggestion.String.equals(left + right))
                            {
                                failed = false;
                            }
                        }
                        assertFalse("Failed getting combine suggestions\n >Original: "
                                    + orig + "\n >Left: " + left + "\n >Right: " + right, failed);
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                try
                {
                    ir.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    writer.Dispose();
                }
                catch (Exception /*e1*/) { }
                try
                {
                    dir.Dispose();
                }
                catch (Exception /*e1*/) { }
            }
        }