A suggestion generated by combining one or more original query terms

Esempio n. 1
0
        /// <summary>
        /// <para>
        /// Generate suggestions by combining one or more of the passed-in terms into
        /// single words. The returned <see cref="CombineSuggestion"/> contains both a
        /// <see cref="SuggestWord"/> and also an array detailing which passed-in terms were
        /// involved in creating this combination. The scores returned are equal to the
        /// number of word combinations needed, also one less than the length of the
        /// array <see cref="CombineSuggestion.OriginalTermIndexes"/>. Generally, a
        /// suggestion with a lower score is preferred over a higher score.
        /// </para>
        /// <para>
        /// To prevent two adjacent terms from being combined (for instance, if one is
        /// mandatory and the other is prohibited), separate the two terms with
        /// <see cref="WordBreakSpellChecker.SEPARATOR_TERM"/>
        /// </para>
        /// <para>
        /// When suggestMode equals <see cref="SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX"/>, each
        /// suggestion will include at least one term not in the index.
        /// </para>
        /// <para>
        /// When suggestMode equals <see cref="SuggestMode.SUGGEST_MORE_POPULAR"/>, each
        /// suggestion will have the same, or better frequency than the most-popular
        /// included term.
        /// </para>
        /// </summary>
        /// <returns> an array of words generated by combining original terms </returns>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public virtual CombineSuggestion[] SuggestWordCombinations(Term[] terms, int maxSuggestions,
                                                                   IndexReader ir, SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)
        {
            if (maxSuggestions < 1)
            {
                return(Arrays.Empty <CombineSuggestion>());
            }

            int[] origFreqs = null;
            if (suggestMode != SuggestMode.SUGGEST_ALWAYS)
            {
                origFreqs = new int[terms.Length];
                for (int i = 0; i < terms.Length; i++)
                {
                    origFreqs[i] = ir.DocFreq(terms[i]);
                }
            }

            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
            IComparer <CombineSuggestionWrapper> queueComparer = new CombinationsThenFreqComparer();

            JCG.PriorityQueue <CombineSuggestionWrapper> suggestions = new JCG.PriorityQueue <CombineSuggestionWrapper>(queueInitialCapacity, queueComparer);

            int thisTimeEvaluations = 0;

            for (int i = 0; i < terms.Length - 1; i++)
            {
                if (terms[i].Equals(SEPARATOR_TERM))
                {
                    continue;
                }
                string leftTermText   = terms[i].Text;
                int    leftTermLength = leftTermText.CodePointCount(0, leftTermText.Length);
                if (leftTermLength > maxCombineWordLength)
                {
                    continue;
                }
                int maxFreq = 0;
                int minFreq = int.MaxValue;
                if (origFreqs != null)
                {
                    maxFreq = origFreqs[i];
                    minFreq = origFreqs[i];
                }
                string combinedTermText = leftTermText;
                int    combinedLength   = leftTermLength;
                for (int j = i + 1; j < terms.Length && j - i <= maxChanges; j++)
                {
                    if (terms[j].Equals(SEPARATOR_TERM))
                    {
                        break;
                    }
                    string rightTermText   = terms[j].Text;
                    int    rightTermLength = rightTermText.CodePointCount(0, rightTermText.Length);
                    combinedTermText += rightTermText;
                    combinedLength   += rightTermLength;
                    if (combinedLength > maxCombineWordLength)
                    {
                        break;
                    }

                    if (origFreqs != null)
                    {
                        maxFreq = Math.Max(maxFreq, origFreqs[j]);
                        minFreq = Math.Min(minFreq, origFreqs[j]);
                    }

                    Term combinedTerm     = new Term(terms[0].Field, combinedTermText);
                    int  combinedTermFreq = ir.DocFreq(combinedTerm);

                    if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR || combinedTermFreq >= maxFreq)
                    {
                        if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX || minFreq == 0)
                        {
                            if (combinedTermFreq >= minSuggestionFrequency)
                            {
                                int[] origIndexes = new int[j - i + 1];
                                origIndexes[0] = i;
                                for (int k = 1; k < origIndexes.Length; k++)
                                {
                                    origIndexes[k] = i + k;
                                }
                                SuggestWord word = new SuggestWord();
                                word.Freq   = combinedTermFreq;
                                word.Score  = origIndexes.Length - 1;
                                word.String = combinedTerm.Text;
                                CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(new CombineSuggestion(word, origIndexes), (origIndexes.Length - 1));
                                suggestions.Enqueue(suggestion);
                                if (suggestions.Count > maxSuggestions)
                                {
                                    suggestions.TryDequeue(out CombineSuggestionWrapper _);
                                }
                            }
                        }
                    }
                    thisTimeEvaluations++;
                    if (thisTimeEvaluations == maxEvaluations)
                    {
                        break;
                    }
                }
            }
            CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions.Count];
            for (int i = suggestions.Count - 1; i >= 0; i--)
            {
                combineSuggestions[i] = suggestions.Dequeue().CombineSuggestion;
            }
            return(combineSuggestions);
        }
Esempio n. 2
0
 internal CombineSuggestionWrapper(WordBreakSpellChecker outerInstance, CombineSuggestion combineSuggestion, int numCombinations)
 {
     this.outerInstance     = outerInstance;
     this.combineSuggestion = combineSuggestion;
     this.numCombinations   = numCombinations;
 }