private int GenerateBreakUpSuggestions(Term term, IndexReader ir, int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency, SuggestWord[] prefix, JCG.PriorityQueue <SuggestWordArrayWrapper> suggestions, int totalEvaluations, BreakSuggestionSortMethod sortMethod) { string termText = term.Text; int termLength = termText.CodePointCount(0, termText.Length); int useMinBreakWordLength = minBreakWordLength; if (useMinBreakWordLength < 1) { useMinBreakWordLength = 1; } if (termLength < (useMinBreakWordLength * 2)) { return(0); } int thisTimeEvaluations = 0; for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++) { int end = termText.OffsetByCodePoints(0, i); string leftText = termText.Substring(0, end); string rightText = termText.Substring(end); SuggestWord leftWord = GenerateSuggestWord(ir, term.Field, leftText); if (leftWord.Freq >= useMinSuggestionFrequency) { SuggestWord rightWord = GenerateSuggestWord(ir, term.Field, rightText); if (rightWord.Freq >= useMinSuggestionFrequency) { SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(NewSuggestion(prefix, leftWord, rightWord)); suggestions.Enqueue(suggestion); if (suggestions.Count > maxSuggestions) { suggestions.Dequeue(); } } int newNumberBreaks = numberBreaks + 1; if (newNumberBreaks <= maxChanges) { int evaluations = GenerateBreakUpSuggestions(new Term(term.Field, rightWord.String), ir, newNumberBreaks, maxSuggestions, useMinSuggestionFrequency, NewPrefix(prefix, leftWord), suggestions, totalEvaluations, sortMethod); totalEvaluations += evaluations; } } thisTimeEvaluations++; totalEvaluations++; if (totalEvaluations >= maxEvaluations) { break; } } return(thisTimeEvaluations); }
/// <summary> /// Generate suggestions by breaking the passed-in term into multiple words. /// The scores returned are equal to the number of word breaks needed so a /// lower score is generally preferred over a higher score. /// </summary> /// <param name="suggestMode"> /// - default = <see cref="SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX"/> </param> /// <param name="sortMethod"> /// - default = <see cref="BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY"/> </param> /// <returns> one or more arrays of words formed by breaking up the original term </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, BreakSuggestionSortMethod sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY) { if (maxSuggestions < 1) { return(Arrays.Empty <SuggestWord[]>()); } int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions; IComparer <SuggestWordArrayWrapper> queueComparer = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? (IComparer <SuggestWordArrayWrapper>) new LengthThenMaxFreqComparer() : new LengthThenSumFreqComparer(); JCG.PriorityQueue <SuggestWordArrayWrapper> suggestions = new JCG.PriorityQueue <SuggestWordArrayWrapper>(queueInitialCapacity, queueComparer); int origFreq = ir.DocFreq(term); if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) { return(Arrays.Empty <SuggestWord[]>()); } int useMinSuggestionFrequency = minSuggestionFrequency; if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) { useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq); } GenerateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, Arrays.Empty <SuggestWord>(), suggestions, 0, sortMethod); SuggestWord[][] suggestionArray = new SuggestWord[suggestions.Count][]; for (int i = suggestions.Count - 1; i >= 0; i--) { suggestionArray[i] = suggestions.Dequeue().SuggestWords; } return(suggestionArray); }
/// <summary> /// <para> /// Generate suggestions by breaking the passed-in term into multiple words. /// The scores returned are equal to the number of word breaks needed so a /// lower score is generally preferred over a higher score. /// </para> /// </summary> /// <param name="suggestMode"> /// - default = <seealso cref="SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX"/> </param> /// <param name="sortMethod"> /// - default = /// <seealso cref="BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY"/> </param> /// <returns> one or more arrays of words formed by breaking up the original term </returns> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod) { if (maxSuggestions < 1) { return(new SuggestWord[0][]); } if (suggestMode == null) { suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; } if (sortMethod == null) { sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY; } int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions; IComparer <SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator(this) : new LengthThenSumFreqComparator(this); LinkedList <SuggestWordArrayWrapper> suggestions = new PriorityQueue <SuggestWordArrayWrapper>(queueInitialCapacity, queueComparator); int origFreq = ir.DocFreq(term); if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) { return(new SuggestWord[0][]); } int useMinSuggestionFrequency = minSuggestionFrequency; if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) { useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq); } GenerateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0, sortMethod); SuggestWord[][] suggestionArray = new SuggestWord[suggestions.Count][]; for (int i = suggestions.Count - 1; i >= 0; i--) { suggestionArray[i] = suggestions.RemoveFirst().SuggestWords; } return(suggestionArray); }