SuggestWord, used in suggestSimilar method in SpellChecker class.

Default sort is first by score, then by frequency.

 private static SuggestWord[] NewPrefix(SuggestWord[] oldPrefix, SuggestWord append)
 {
     SuggestWord[] newPrefix = new SuggestWord[oldPrefix.Length + 1];
     Array.Copy(oldPrefix, 0, newPrefix, 0, oldPrefix.Length);
     newPrefix[newPrefix.Length - 1] = append;
     return(newPrefix);
 }
        private int GenerateBreakUpSuggestions(Term term, IndexReader ir,
                                               int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency,
                                               SuggestWord[] prefix, JCG.PriorityQueue <SuggestWordArrayWrapper> suggestions,
                                               int totalEvaluations, BreakSuggestionSortMethod sortMethod)
        {
            string termText              = term.Text;
            int    termLength            = termText.CodePointCount(0, termText.Length);
            int    useMinBreakWordLength = minBreakWordLength;

            if (useMinBreakWordLength < 1)
            {
                useMinBreakWordLength = 1;
            }
            if (termLength < (useMinBreakWordLength * 2))
            {
                return(0);
            }

            int thisTimeEvaluations = 0;

            for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++)
            {
                int         end       = termText.OffsetByCodePoints(0, i);
                string      leftText  = termText.Substring(0, end);
                string      rightText = termText.Substring(end);
                SuggestWord leftWord  = GenerateSuggestWord(ir, term.Field, leftText);

                if (leftWord.Freq >= useMinSuggestionFrequency)
                {
                    SuggestWord rightWord = GenerateSuggestWord(ir, term.Field, rightText);
                    if (rightWord.Freq >= useMinSuggestionFrequency)
                    {
                        SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(NewSuggestion(prefix, leftWord, rightWord));
                        suggestions.Enqueue(suggestion);
                        if (suggestions.Count > maxSuggestions)
                        {
                            suggestions.Dequeue();
                        }
                    }
                    int newNumberBreaks = numberBreaks + 1;
                    if (newNumberBreaks <= maxChanges)
                    {
                        int evaluations = GenerateBreakUpSuggestions(new Term(term.Field, rightWord.String),
                                                                     ir, newNumberBreaks, maxSuggestions,
                                                                     useMinSuggestionFrequency, NewPrefix(prefix, leftWord),
                                                                     suggestions, totalEvaluations, sortMethod);
                        totalEvaluations += evaluations;
                    }
                }

                thisTimeEvaluations++;
                totalEvaluations++;
                if (totalEvaluations >= maxEvaluations)
                {
                    break;
                }
            }
            return(thisTimeEvaluations);
        }
        private static SuggestWord GenerateSuggestWord(IndexReader ir, string fieldname, string text) // LUCENENET: CA1822: Mark members as static
        {
            Term        term = new Term(fieldname, text);
            int         freq = ir.DocFreq(term);
            SuggestWord word = new SuggestWord();

            word.Freq   = freq;
            word.Score  = 1;
            word.String = text;
            return(word);
        }
Beispiel #4
0
        private SuggestWord GenerateSuggestWord(IndexReader ir, string fieldname, string text)
        {
            Term        term = new Term(fieldname, text);
            int         freq = ir.DocFreq(term);
            SuggestWord word = new SuggestWord();

            word.Freq   = freq;
            word.Score  = 1;
            word.String = text;
            return(word);
        }
        private static SuggestWord[] NewSuggestion(SuggestWord[] prefix, SuggestWord append1, SuggestWord append2)
        {
            SuggestWord[] newSuggestion = new SuggestWord[prefix.Length + 2];
            int           score         = prefix.Length + 1;

            for (int i = 0; i < prefix.Length; i++)
            {
                SuggestWord word = new SuggestWord();
                word.String      = prefix[i].String;
                word.Freq        = prefix[i].Freq;
                word.Score       = score;
                newSuggestion[i] = word;
            }
            append1.Score = score;
            append2.Score = score;
            newSuggestion[newSuggestion.Length - 2] = append1;
            newSuggestion[newSuggestion.Length - 1] = append2;
            return(newSuggestion);
        }
        /// <summary>
        /// <para>
        /// Generate suggestions by breaking the passed-in term into multiple words.
        /// The scores returned are equal to the number of word breaks needed so a
        /// lower score is generally preferred over a higher score.
        /// </para>
        /// </summary>
        /// <param name="suggestMode">
        ///          - default = <seealso cref="SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX"/> </param>
        /// <param name="sortMethod">
        ///          - default =
        ///          <seealso cref="BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY"/> </param>
        /// <returns> one or more arrays of words formed by breaking up the original term </returns>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod)
        {
            if (maxSuggestions < 1)
            {
                return(new SuggestWord[0][]);
            }
            if (suggestMode == null)
            {
                suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
            }
            if (sortMethod == null)
            {
                sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
            }

            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
            IComparer <SuggestWordArrayWrapper>  queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator(this) : new LengthThenSumFreqComparator(this);
            LinkedList <SuggestWordArrayWrapper> suggestions     = new PriorityQueue <SuggestWordArrayWrapper>(queueInitialCapacity, queueComparator);

            int origFreq = ir.DocFreq(term);

            if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)
            {
                return(new SuggestWord[0][]);
            }

            int useMinSuggestionFrequency = minSuggestionFrequency;

            if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
            {
                useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
            }

            GenerateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0, sortMethod);

            SuggestWord[][] suggestionArray = new SuggestWord[suggestions.Count][];
            for (int i = suggestions.Count - 1; i >= 0; i--)
            {
                suggestionArray[i] = suggestions.RemoveFirst().SuggestWords;
            }

            return(suggestionArray);
        }
        /// <summary>
        /// Generate suggestions by breaking the passed-in term into multiple words.
        /// The scores returned are equal to the number of word breaks needed so a
        /// lower score is generally preferred over a higher score.
        /// </summary>
        /// <param name="suggestMode">
        ///          - default = <see cref="SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX"/> </param>
        /// <param name="sortMethod">
        ///          - default = <see cref="BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY"/> </param>
        /// <returns> one or more arrays of words formed by breaking up the original term </returns>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir,
                                                         SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX,
                                                         BreakSuggestionSortMethod sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY)
        {
            if (maxSuggestions < 1)
            {
                return(Arrays.Empty <SuggestWord[]>());
            }

            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
            IComparer <SuggestWordArrayWrapper> queueComparer = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY
                ? (IComparer <SuggestWordArrayWrapper>) new LengthThenMaxFreqComparer()
                : new LengthThenSumFreqComparer();

            JCG.PriorityQueue <SuggestWordArrayWrapper> suggestions = new JCG.PriorityQueue <SuggestWordArrayWrapper>(queueInitialCapacity, queueComparer);

            int origFreq = ir.DocFreq(term);

            if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)
            {
                return(Arrays.Empty <SuggestWord[]>());
            }

            int useMinSuggestionFrequency = minSuggestionFrequency;

            if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
            {
                useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
            }

            GenerateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, Arrays.Empty <SuggestWord>(), suggestions, 0, sortMethod);

            SuggestWord[][] suggestionArray = new SuggestWord[suggestions.Count][];
            for (int i = suggestions.Count - 1; i >= 0; i--)
            {
                suggestionArray[i] = suggestions.Dequeue().SuggestWords;
            }

            return(suggestionArray);
        }
Beispiel #8
0
        /// <summary>
        /// Suggest similar words.
        ///
        /// <para>
        /// Unlike <see cref="SpellChecker"/>, the similarity used to fetch the most
        /// relevant terms is an edit distance, therefore typically a low value
        /// for numSug will work very well.
        /// </para>
        /// </summary>
        /// <param name="term"> Term you want to spell check on </param>
        /// <param name="numSug"> the maximum number of suggested words </param>
        /// <param name="ir"> IndexReader to find terms from </param>
        /// <param name="suggestMode"> specifies when to return suggested words </param>
        /// <param name="accuracy"> return only suggested words that match with this similarity </param>
        /// <returns> sorted list of the suggested words according to the comparer </returns>
        /// <exception cref="System.IO.IOException"> If there is a low-level I/O error. </exception>
        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir,
                                                    SuggestMode suggestMode, float accuracy)
        {
            CharsRef spare = new CharsRef();
            string   text  = term.Text();

            if (minQueryLength > 0 && text.CodePointCount(0, text.Length) < minQueryLength)
            {
                return(new SuggestWord[0]);
            }

            if (lowerCaseTerms)
            {
                term = new Term(term.Field, text.ToLower());
            }

            int docfreq = ir.DocFreq(term);

            if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0)
            {
                return(new SuggestWord[0]);
            }

            int maxDoc = ir.MaxDoc;

            if (maxQueryFrequency >= 1f && docfreq > maxQueryFrequency)
            {
                return(new SuggestWord[0]);
            }
            else if (docfreq > (int)Math.Ceiling(maxQueryFrequency * maxDoc))
            {
                return(new SuggestWord[0]);
            }

            if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR)
            {
                docfreq = 0;
            }

            if (thresholdFrequency >= 1f)
            {
                docfreq = Math.Max(docfreq, (int)thresholdFrequency);
            }
            else if (thresholdFrequency > 0f)
            {
                docfreq = Math.Max(docfreq, (int)(thresholdFrequency * maxDoc) - 1);
            }

            IEnumerable <ScoreTerm> terms = null;
            int inspections = numSug * maxInspections;

            // try ed=1 first, in case we get lucky
            terms = SuggestSimilar(term, inspections, ir, docfreq, 1, accuracy, spare);
            if (maxEdits > 1 && terms.Count() < inspections)
            {
                var moreTerms = new HashSet <ScoreTerm>();
                moreTerms.AddAll(terms);
                moreTerms.AddAll(SuggestSimilar(term, inspections, ir, docfreq, maxEdits, accuracy, spare));
                terms = moreTerms;
            }

            // create the suggestword response, sort it, and trim it to size.

            var suggestions = new SuggestWord[terms.Count()];
            int index       = suggestions.Length - 1;

            foreach (ScoreTerm s in terms)
            {
                SuggestWord suggestion = new SuggestWord();
                if (s.TermAsString == null)
                {
                    UnicodeUtil.UTF8toUTF16(s.Term, spare);
                    s.TermAsString = spare.ToString();
                }
                suggestion.String    = s.TermAsString;
                suggestion.Score     = s.Score;
                suggestion.Freq      = s.Docfreq;
                suggestions[index--] = suggestion;
            }

            ArrayUtil.TimSort(suggestions, Collections.ReverseOrder(comparer));
            if (numSug < suggestions.Length)
            {
                SuggestWord[] trimmed = new SuggestWord[numSug];
                Array.Copy(suggestions, 0, trimmed, 0, numSug);
                suggestions = trimmed;
            }
            return(suggestions);
        }
        /// <summary>
        /// <para>
        /// Generate suggestions by combining one or more of the passed-in terms into
        /// single words. The returned <see cref="CombineSuggestion"/> contains both a
        /// <see cref="SuggestWord"/> and also an array detailing which passed-in terms were
        /// involved in creating this combination. The scores returned are equal to the
        /// number of word combinations needed, also one less than the length of the
        /// array <see cref="CombineSuggestion.OriginalTermIndexes"/>. Generally, a
        /// suggestion with a lower score is preferred over a higher score.
        /// </para>
        /// <para>
        /// To prevent two adjacent terms from being combined (for instance, if one is
        /// mandatory and the other is prohibited), separate the two terms with
        /// <see cref="WordBreakSpellChecker.SEPARATOR_TERM"/>
        /// </para>
        /// <para>
        /// When suggestMode equals <see cref="SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX"/>, each
        /// suggestion will include at least one term not in the index.
        /// </para>
        /// <para>
        /// When suggestMode equals <see cref="SuggestMode.SUGGEST_MORE_POPULAR"/>, each
        /// suggestion will have the same, or better frequency than the most-popular
        /// included term.
        /// </para>
        /// </summary>
        /// <returns> an array of words generated by combining original terms </returns>
        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
        public virtual CombineSuggestion[] SuggestWordCombinations(Term[] terms, int maxSuggestions,
                                                                   IndexReader ir, SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)
        {
            if (maxSuggestions < 1)
            {
                return(Arrays.Empty <CombineSuggestion>());
            }

            int[] origFreqs = null;
            if (suggestMode != SuggestMode.SUGGEST_ALWAYS)
            {
                origFreqs = new int[terms.Length];
                for (int i = 0; i < terms.Length; i++)
                {
                    origFreqs[i] = ir.DocFreq(terms[i]);
                }
            }

            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
            IComparer <CombineSuggestionWrapper> queueComparer = new CombinationsThenFreqComparer();

            JCG.PriorityQueue <CombineSuggestionWrapper> suggestions = new JCG.PriorityQueue <CombineSuggestionWrapper>(queueInitialCapacity, queueComparer);

            int thisTimeEvaluations = 0;

            for (int i = 0; i < terms.Length - 1; i++)
            {
                if (terms[i].Equals(SEPARATOR_TERM))
                {
                    continue;
                }
                string leftTermText   = terms[i].Text;
                int    leftTermLength = leftTermText.CodePointCount(0, leftTermText.Length);
                if (leftTermLength > maxCombineWordLength)
                {
                    continue;
                }
                int maxFreq = 0;
                int minFreq = int.MaxValue;
                if (origFreqs != null)
                {
                    maxFreq = origFreqs[i];
                    minFreq = origFreqs[i];
                }
                string combinedTermText = leftTermText;
                int    combinedLength   = leftTermLength;
                for (int j = i + 1; j < terms.Length && j - i <= maxChanges; j++)
                {
                    if (terms[j].Equals(SEPARATOR_TERM))
                    {
                        break;
                    }
                    string rightTermText   = terms[j].Text;
                    int    rightTermLength = rightTermText.CodePointCount(0, rightTermText.Length);
                    combinedTermText += rightTermText;
                    combinedLength   += rightTermLength;
                    if (combinedLength > maxCombineWordLength)
                    {
                        break;
                    }

                    if (origFreqs != null)
                    {
                        maxFreq = Math.Max(maxFreq, origFreqs[j]);
                        minFreq = Math.Min(minFreq, origFreqs[j]);
                    }

                    Term combinedTerm     = new Term(terms[0].Field, combinedTermText);
                    int  combinedTermFreq = ir.DocFreq(combinedTerm);

                    if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR || combinedTermFreq >= maxFreq)
                    {
                        if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX || minFreq == 0)
                        {
                            if (combinedTermFreq >= minSuggestionFrequency)
                            {
                                int[] origIndexes = new int[j - i + 1];
                                origIndexes[0] = i;
                                for (int k = 1; k < origIndexes.Length; k++)
                                {
                                    origIndexes[k] = i + k;
                                }
                                SuggestWord word = new SuggestWord();
                                word.Freq   = combinedTermFreq;
                                word.Score  = origIndexes.Length - 1;
                                word.String = combinedTerm.Text;
                                CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(new CombineSuggestion(word, origIndexes), (origIndexes.Length - 1));
                                suggestions.Enqueue(suggestion);
                                if (suggestions.Count > maxSuggestions)
                                {
                                    suggestions.TryDequeue(out CombineSuggestionWrapper _);
                                }
                            }
                        }
                    }
                    thisTimeEvaluations++;
                    if (thisTimeEvaluations == maxEvaluations)
                    {
                        break;
                    }
                }
            }
            CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions.Count];
            for (int i = suggestions.Count - 1; i >= 0; i--)
            {
                combineSuggestions[i] = suggestions.Dequeue().CombineSuggestion;
            }
            return(combineSuggestions);
        }
Beispiel #10
0
 /// <summary>
 /// Creates a new CombineSuggestion from a <code>suggestion</code> and
 /// an array of term ids (referencing the indexes to the original terms that
 /// form this combined suggestion)
 /// </summary>
 public CombineSuggestion(SuggestWord suggestion, int[] originalTermIndexes)
 {
     this.suggestion          = suggestion;
     this.originalTermIndexes = originalTermIndexes;
 }
Beispiel #11
0
        /// <summary>
        /// Suggest similar words (optionally restricted to a field of an index).
        ///
        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
        /// is not the same as the edit distance strategy used to calculate the best
        /// matching spell-checked word from the hits that Lucene found, one usually has
        /// to retrieve a couple of numSug's in order to get the true best match.
        ///
        /// </para>
        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
        ///
        /// </para>
        /// </summary>
        /// <param name="word"> the word you want a spell check done on </param>
        /// <param name="numSug"> the number of suggested words </param>
        /// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
        /// <param name="field"> the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field. </param>
        /// <param name="suggestMode">
        /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
        /// <returns> String[] the sorted list of the suggest words with these 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        ///  </returns>
        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();

            try
            {
                if (ir == null || field == null)
                {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }
                if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
                {
                    ir    = null;
                    field = null;
                }

                int lengthWord = word.Length;

                int freq     = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
                {
                    return(new string[] { word });
                }

                BooleanQuery query = new BooleanQuery();
                string[]     grams;
                string       key;

                for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {
                    key = "gram" + ng;           // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0)                                            // should we boost prefixes?
                    {
                        Add(query, "start" + ng, grams[0], bStart);            // matches start of word
                    }
                    if (bEnd > 0)                                              // should we boost suffixes
                    {
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int         stop    = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {
                    sugWord.@string = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word

                    // don't suggest a word for itself, that would be silly
                    if ([email protected](word))
                    {
                        continue;
                    }

                    // edit distance
                    sugWord.score = sd.GetDistance(word, sugWord.@string);
                    if (sugWord.score < accuracy)
                    {
                        continue;
                    }

                    if (ir != null && field != null)                                 // use the user index
                    {
                        sugWord.freq = ir.DocFreq(new Term(field, sugWord.@string)); // freq in the index
                        // don't suggest a word that is not present in the field
                        if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1)
                        {
                            continue;
                        }
                    }
                    sugQueue.InsertWithOverflow(sugWord);
                    if (sugQueue.Size() == numSug)
                    {
                        // if queue full, maintain the minScore score
                        accuracy = sugQueue.Top().score;
                    }
                    sugWord = new SuggestWord();
                }

                // convert to array string
                string[] list = new string[sugQueue.Size()];
                for (int i = sugQueue.Size() - 1; i >= 0; i--)
                {
                    list[i] = sugQueue.Pop().@string;
                }

                return(list);
            }
            finally
            {
                ReleaseSearcher(indexSearcher);
            }
        }
Beispiel #12
0
        /// <summary>
        /// Suggest similar words (optionally restricted to a field of an index).
        /// 
        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
        /// is not the same as the edit distance strategy used to calculate the best
        /// matching spell-checked word from the hits that Lucene found, one usually has
        /// to retrieve a couple of numSug's in order to get the true best match.
        /// 
        /// </para>
        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
        /// 
        /// </para>
        /// </summary>
        /// <param name="word"> the word you want a spell check done on </param>
        /// <param name="numSug"> the number of suggested words </param>
        /// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
        /// <param name="field"> the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field. </param>
        /// <param name="suggestMode"> 
        /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
        /// <returns> String[] the sorted list of the suggest words with these 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        ///  </returns>
        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();
            try
            {
                if (ir == null || field == null)
                {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }
                if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
                {
                    ir = null;
                    field = null;
                }

                int lengthWord = word.Length;

                int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
                {
                    return new string[] { word };
                }

                BooleanQuery query = new BooleanQuery();
                string[] grams;
                string key;

                for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {

                    key = "gram" + ng; // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0) // should we boost prefixes?
                    {
                        Add(query, "start" + ng, grams[0], bStart); // matches start of word

                    }
                    if (bEnd > 0) // should we boost suffixes
                    {
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word

                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int stop = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {

                    sugWord.@string = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word

                    // don't suggest a word for itself, that would be silly
                    if ([email protected](word))
                    {
                        continue;
                    }

                    // edit distance
                    sugWord.score = sd.GetDistance(word, sugWord.@string);
                    if (sugWord.score < accuracy)
                    {
                        continue;
                    }

                    if (ir != null && field != null) // use the user index
                    {
                        sugWord.freq = ir.DocFreq(new Term(field, sugWord.@string)); // freq in the index
                        // don't suggest a word that is not present in the field
                        if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1)
                        {
                            continue;
                        }
                    }
                    sugQueue.InsertWithOverflow(sugWord);
                    if (sugQueue.Size() == numSug)
                    {
                        // if queue full, maintain the minScore score
                        accuracy = sugQueue.Top().score;
                    }
                    sugWord = new SuggestWord();
                }

                // convert to array string
                string[] list = new string[sugQueue.Size()];
                for (int i = sugQueue.Size() - 1; i >= 0; i--)
                {
                    list[i] = sugQueue.Pop().@string;
                }

                return list;
            }
            finally
            {
                ReleaseSearcher(indexSearcher);
            }
        }