/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary> /// <param name="word">String the word you want a spell check done on /// </param> /// <param name="num_sug">int the number of suggest words /// </param> /// <param name="ir">the indexReader of the user index (can be null see field param) /// </param> /// <param name="field">String the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. /// </param> /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word /// (only if restricted mode = (indexReader!=null and field!=null) /// </param> /// <throws> IOException </throws> /// <returns> String[] the sorted list of the suggest words with this 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular) { float min = this.minScore; TRStringDistance sd = new TRStringDistance(word); int lengthWord = word.Length; int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0; if (!morePopular && goalFreq > 0) { return(new System.String[] { word }); // return the word if it exist in the index and i don't want a more popular word } BooleanQuery query = new BooleanQuery(); System.String[] grams; System.String key; for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } IndexSearcher searcher = new IndexSearcher(this.spellindex); Hits hits = searcher.Search(query); SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug); int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers SuggestWord sugword = new SuggestWord(); for (int i = 0; i < stop; i++) { sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word) if (sugword.string_Renamed.Equals(word)) { continue; // don't suggest a word for itself, that would be silly } //edit distance/normalize with the min word length sugword.score = 1.0f - ((float)sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord)); if (sugword.score < min) { continue; } if (ir != null) { // use the user index sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field continue; } } sugqueue.Insert(sugword); if (sugqueue.Size() == num_sug) { //if queue full , maintain the min score min = ((SuggestWord)sugqueue.Top()).score; } sugword = new SuggestWord(); } // convert to array string System.String[] list = new System.String[sugqueue.Size()]; for (int i = sugqueue.Size() - 1; i >= 0; i--) { list[i] = ((SuggestWord)sugqueue.Pop()).string_Renamed; } searcher.Close(); return(list); }
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary> /// <param name="word">String the word you want a spell check done on /// </param> /// <param name="num_sug">int the number of suggest words /// </param> /// <param name="ir">the indexReader of the user index (can be null see field param) /// </param> /// <param name="field">String the field of the user index: if field is not null, the suggested /// words are restricted to the words present in this field. /// </param> /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word /// (only if restricted mode = (indexReader!=null and field!=null) /// </param> /// <throws> IOException </throws> /// <returns> String[] the sorted list of the suggest words with this 2 criteria: /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity /// of the suggest words in the field of the user index /// </returns> public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular) { float min = this.minScore; TRStringDistance sd = new TRStringDistance(word); int lengthWord = word.Length; int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0; if (!morePopular && goalFreq > 0) { return new System.String[]{word}; // return the word if it exist in the index and i don't want a more popular word } BooleanQuery query = new BooleanQuery(); System.String[] grams; System.String key; for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = FormGrams(word, ng); // form word into ngrams (allow dups too) if (grams.Length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? Add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.Length; i++) { Add(query, key, grams[i]); } } IndexSearcher searcher = new IndexSearcher(this.spellindex); Hits hits = searcher.Search(query); SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug); int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers SuggestWord sugword = new SuggestWord(); for (int i = 0; i < stop; i++) { sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word) if (sugword.string_Renamed.Equals(word)) { continue; // don't suggest a word for itself, that would be silly } //edit distance/normalize with the min word length sugword.score = 1.0f - ((float) sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord)); if (sugword.score < min) { continue; } if (ir != null) { // use the user index sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field continue; } } sugqueue.Insert(sugword); if (sugqueue.Size() == num_sug) { //if queue full , maintain the min score min = ((SuggestWord) sugqueue.Top()).score; } sugword = new SuggestWord(); } // convert to array string System.String[] list = new System.String[sugqueue.Size()]; for (int i = sugqueue.Size() - 1; i >= 0; i--) { list[i] = ((SuggestWord) sugqueue.Pop()).string_Renamed; } searcher.Close(); return list; }