private static void CollectFirstAndLastTermMatches( IndexReader indexReader, string fieldToSearch, List <Term> firstTermMatches, List <Term> lastTermMatches, string firstTerm, string lastTerm) { bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First())); bool isLastTermPunctuation = (lastTerm.Length == 1 && LuceneHelper.IsPunctuation(lastTerm.First())); //punctuation characters are always by themselves, no need to check for terms starting/ending with one! if (isFirstTermPunctuation && isLastTermPunctuation) { return; } // !! reader.Terms(new Term(..., ...)) does not as expected, so check every term !! var termEnum = indexReader.Terms(); HashSet <string> addedFirstTerms = new HashSet <string>(); HashSet <string> addedLastTerms = new HashSet <string>(); //Use actions for adding first/last terms //This will yield better performance! //Since there are thousands of terms in the index, //performing the string operations for first and last term on all of them, //even though we already know they're not going to match (because punctuations stand alone), //will waste a lot of time! Action <Term> addFirstTermAction = (curTerm) => { if (addedFirstTerms.Contains(curTerm.Text) || !curTerm.Text.EndsWith(firstTerm)) { return; } firstTermMatches.Add(curTerm); addedFirstTerms.Add(curTerm.Text); }; Action <Term> addLastTermAction = (curTerm) => { if (addedLastTerms.Contains(curTerm.Text) || !curTerm.Text.StartsWith(lastTerm)) { return; } lastTermMatches.Add(curTerm); addedLastTerms.Add(curTerm.Text); }; Action <Term> collectTermsAction = null; if (!isFirstTermPunctuation) { collectTermsAction += addFirstTermAction; } if (!isLastTermPunctuation) { collectTermsAction += addLastTermAction; } while (termEnum.Next()) { var curTerm = termEnum.Term; //skip wrong fields if (curTerm.Field != fieldToSearch) { continue; } collectTermsAction(curTerm); } }
private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase) { List <string> searchTerms = null; string adjustedSearchString = expandedSearchString; string fieldToSearch = Constants.IndexFields.Content; if (!matchCase) { fieldToSearch = Constants.IndexFields.ContentCaseInsensitive; adjustedSearchString = adjustedSearchString.ToLower(); } searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList(); bool onlyOneTerm = searchTerms.Count == 1; var firstTerm = searchTerms.FirstOrDefault(); var lastTerm = searchTerms.LastOrDefault(); Query contentQuery = null; if (onlyOneTerm) { bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First())); if (isFirstTermPunctuation) { contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm)); } else { contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*")); } } else { MultiPhraseQuery phraseQuery = new MultiPhraseQuery(); List <Term> firstTermMatches = new List <Term>(); List <Term> lastTermMatches = new List <Term>(); CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm); if (firstTermMatches.Count > 0) { phraseQuery.Add(firstTermMatches.ToArray()); } bool includeFirstTerm = firstTermMatches.Count == 0; bool includeLastTerm = lastTermMatches.Count == 0; int startIndex = includeFirstTerm ? 0 : 1; int endIndex = searchTerms.Count - (includeLastTerm ? 0 : 1); for (int i = startIndex; i < endIndex; i++) { phraseQuery.Add(new Term(fieldToSearch, searchTerms[i])); } if (lastTermMatches.Count > 0) { phraseQuery.Add(lastTermMatches.ToArray()); } contentQuery = phraseQuery; } return(contentQuery); }