Пример #1
0
        private static void CollectFirstAndLastTermMatches(
            IndexReader indexReader,
            string fieldToSearch,
            List <Term> firstTermMatches,
            List <Term> lastTermMatches,
            string firstTerm,
            string lastTerm)
        {
            bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First()));
            bool isLastTermPunctuation  = (lastTerm.Length == 1 && LuceneHelper.IsPunctuation(lastTerm.First()));

            //punctuation characters are always by themselves, no need to check for terms starting/ending with one!
            if (isFirstTermPunctuation && isLastTermPunctuation)
            {
                return;
            }

            // !! reader.Terms(new Term(..., ...)) does not as expected, so check every term !!
            var termEnum = indexReader.Terms();

            HashSet <string> addedFirstTerms = new HashSet <string>();
            HashSet <string> addedLastTerms  = new HashSet <string>();

            //Use actions for adding first/last terms
            //This will yield better performance!
            //Since there are thousands of terms in the index,
            //performing the string operations for first and last term on all of them,
            //even though we already know they're not going to match (because punctuations stand alone),
            //will waste a lot of time!
            Action <Term> addFirstTermAction = (curTerm) =>
            {
                if (addedFirstTerms.Contains(curTerm.Text) || !curTerm.Text.EndsWith(firstTerm))
                {
                    return;
                }

                firstTermMatches.Add(curTerm);
                addedFirstTerms.Add(curTerm.Text);
            };

            Action <Term> addLastTermAction = (curTerm) =>
            {
                if (addedLastTerms.Contains(curTerm.Text) || !curTerm.Text.StartsWith(lastTerm))
                {
                    return;
                }

                lastTermMatches.Add(curTerm);
                addedLastTerms.Add(curTerm.Text);
            };

            Action <Term> collectTermsAction = null;

            if (!isFirstTermPunctuation)
            {
                collectTermsAction += addFirstTermAction;
            }
            if (!isLastTermPunctuation)
            {
                collectTermsAction += addLastTermAction;
            }

            while (termEnum.Next())
            {
                var curTerm = termEnum.Term;
                //skip wrong fields
                if (curTerm.Field != fieldToSearch)
                {
                    continue;
                }

                collectTermsAction(curTerm);
            }
        }
Пример #2
0
        private Query BuildMatchAnywhereQuery(IndexReader indexReader, string expandedSearchString, bool matchCase)
        {
            List <string> searchTerms          = null;
            string        adjustedSearchString = expandedSearchString;
            string        fieldToSearch        = Constants.IndexFields.Content;

            if (!matchCase)
            {
                fieldToSearch        = Constants.IndexFields.ContentCaseInsensitive;
                adjustedSearchString = adjustedSearchString.ToLower();
            }

            searchTerms = adjustedSearchString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            bool onlyOneTerm = searchTerms.Count == 1;
            var  firstTerm   = searchTerms.FirstOrDefault();
            var  lastTerm    = searchTerms.LastOrDefault();

            Query contentQuery = null;

            if (onlyOneTerm)
            {
                bool isFirstTermPunctuation = (firstTerm.Length == 1 && LuceneHelper.IsPunctuation(firstTerm.First()));
                if (isFirstTermPunctuation)
                {
                    contentQuery = new TermQuery(new Term(fieldToSearch, firstTerm));
                }
                else
                {
                    contentQuery = new WildcardQuery(new Term(fieldToSearch, "*" + firstTerm + "*"));
                }
            }
            else
            {
                MultiPhraseQuery phraseQuery = new MultiPhraseQuery();

                List <Term> firstTermMatches = new List <Term>();
                List <Term> lastTermMatches  = new List <Term>();
                CollectFirstAndLastTermMatches(indexReader, fieldToSearch, firstTermMatches, lastTermMatches, firstTerm, lastTerm);

                if (firstTermMatches.Count > 0)
                {
                    phraseQuery.Add(firstTermMatches.ToArray());
                }

                bool includeFirstTerm = firstTermMatches.Count == 0;
                bool includeLastTerm  = lastTermMatches.Count == 0;

                int startIndex = includeFirstTerm ? 0 : 1;
                int endIndex   = searchTerms.Count - (includeLastTerm ? 0 : 1);

                for (int i = startIndex; i < endIndex; i++)
                {
                    phraseQuery.Add(new Term(fieldToSearch, searchTerms[i]));
                }

                if (lastTermMatches.Count > 0)
                {
                    phraseQuery.Add(lastTermMatches.ToArray());
                }

                contentQuery = phraseQuery;
            }

            return(contentQuery);
        }