/// <summary> /// <para> /// Generate suggestions by combining one or more of the passed-in terms into /// single words. The returned <see cref="CombineSuggestion"/> contains both a /// <see cref="SuggestWord"/> and also an array detailing which passed-in terms were /// involved in creating this combination. The scores returned are equal to the /// number of word combinations needed, also one less than the length of the /// array <see cref="CombineSuggestion.OriginalTermIndexes"/>. Generally, a /// suggestion with a lower score is preferred over a higher score. /// </para> /// <para> /// To prevent two adjacent terms from being combined (for instance, if one is /// mandatory and the other is prohibited), separate the two terms with /// <see cref="WordBreakSpellChecker.SEPARATOR_TERM"/> /// </para> /// <para> /// When suggestMode equals <see cref="SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX"/>, each /// suggestion will include at least one term not in the index. /// </para> /// <para> /// When suggestMode equals <see cref="SuggestMode.SUGGEST_MORE_POPULAR"/>, each /// suggestion will have the same, or better frequency than the most-popular /// included term. /// </para> /// </summary> /// <returns> an array of words generated by combining original terms </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public virtual CombineSuggestion[] SuggestWordCombinations(Term[] terms, int maxSuggestions, IndexReader ir, SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) { if (maxSuggestions < 1) { return(Arrays.Empty <CombineSuggestion>()); } int[] origFreqs = null; if (suggestMode != SuggestMode.SUGGEST_ALWAYS) { origFreqs = new int[terms.Length]; for (int i = 0; i < terms.Length; i++) { origFreqs[i] = ir.DocFreq(terms[i]); } } int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions; IComparer <CombineSuggestionWrapper> queueComparer = new CombinationsThenFreqComparer(); JCG.PriorityQueue <CombineSuggestionWrapper> suggestions = new JCG.PriorityQueue <CombineSuggestionWrapper>(queueInitialCapacity, queueComparer); int thisTimeEvaluations = 0; for (int i = 0; i < terms.Length - 1; i++) { if (terms[i].Equals(SEPARATOR_TERM)) { continue; } string leftTermText = terms[i].Text; int leftTermLength = leftTermText.CodePointCount(0, leftTermText.Length); if (leftTermLength > maxCombineWordLength) { continue; } int maxFreq = 0; int minFreq = int.MaxValue; if (origFreqs != null) { maxFreq = origFreqs[i]; minFreq = origFreqs[i]; } string combinedTermText = leftTermText; int combinedLength = leftTermLength; for (int j = i + 1; j < terms.Length && j - i <= maxChanges; j++) { if (terms[j].Equals(SEPARATOR_TERM)) { break; } string rightTermText = terms[j].Text; int rightTermLength = rightTermText.CodePointCount(0, rightTermText.Length); combinedTermText += rightTermText; combinedLength += rightTermLength; if (combinedLength > maxCombineWordLength) { break; } if (origFreqs != null) { maxFreq = Math.Max(maxFreq, origFreqs[j]); minFreq = Math.Min(minFreq, origFreqs[j]); } Term combinedTerm = new Term(terms[0].Field, combinedTermText); int combinedTermFreq = ir.DocFreq(combinedTerm); if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR || combinedTermFreq >= maxFreq) { if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX || minFreq == 0) { if (combinedTermFreq >= minSuggestionFrequency) { int[] origIndexes = new int[j - i + 1]; origIndexes[0] = i; for (int k = 1; k < origIndexes.Length; k++) { origIndexes[k] = i + k; } SuggestWord word = new SuggestWord(); word.Freq = combinedTermFreq; word.Score = origIndexes.Length - 1; word.String = combinedTerm.Text; CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(new CombineSuggestion(word, origIndexes), (origIndexes.Length - 1)); suggestions.Enqueue(suggestion); if (suggestions.Count > maxSuggestions) { suggestions.TryDequeue(out CombineSuggestionWrapper _); } } } } thisTimeEvaluations++; if (thisTimeEvaluations == maxEvaluations) { break; } } } CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions.Count]; for (int i = suggestions.Count - 1; i >= 0; i--) { combineSuggestions[i] = suggestions.Dequeue().CombineSuggestion; } return(combineSuggestions); }
internal CombineSuggestionWrapper(WordBreakSpellChecker outerInstance, CombineSuggestion combineSuggestion, int numCombinations) { this.outerInstance = outerInstance; this.combineSuggestion = combineSuggestion; this.numCombinations = numCombinations; }