Beispiel #1
0
        /// <summary>
        /// Performs a search in the index.
        /// </summary>
        /// <param name="query">The search query.</param>
        /// <param name="documentTypeTags">The document type tags to include in the search.</param>
        /// <param name="filterDocumentType"><c>true</c> to apply the filter on the document type.</param>
        /// <param name="options">The search options.</param>
        /// <param name="fetcher">An object that is able to fetch words.</param>
        /// <returns>The results.</returns>
        /// <exception cref="ArgumentNullException">If <paramref name="query"/> or <paramref name="fetcher"/> are <c>null</c>.</exception>
        /// <exception cref="ArgumentException">If <paramref name="query"/> is empty.</exception>
        /// <exception cref="ArgumentNullException">If <paramref name="filterDocumentType"/> is <c>true</c> and <paramref name="documentTypeTags"/> is <c>null</c>.</exception>
        /// <exception cref="ArgumentException">If <paramref name="filterDocumentType"/> is <c>true</c> and <paramref name="documentTypeTags"/> is empty.</exception>
        public static SearchResultCollection SearchInternal(string query, string[] documentTypeTags, bool filterDocumentType, SearchOptions options, IWordFetcher fetcher)
        {
            if (query == null)
            {
                throw new ArgumentNullException("query");
            }
            if (query.Length == 0)
            {
                throw new ArgumentException("Query cannot be empty", "query");
            }

            if (filterDocumentType && documentTypeTags == null)
            {
                throw new ArgumentNullException("documentTypeTags");
            }
            if (filterDocumentType && documentTypeTags.Length == 0)
            {
                throw new ArgumentException("documentTypeTags cannot be empty", "documentTypeTags");
            }

            if (fetcher == null)
            {
                throw new ArgumentNullException("fetcher");
            }

            SearchResultCollection results = new SearchResultCollection();

            query = query.ToLowerInvariant();
            string[] queryWords = query.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

            float totalRelevance = 0;

            Word word = null;

            foreach (string q in queryWords)
            {
                if (fetcher.TryGetWord(q, out word))
                {
                    foreach (IDocument doc in word.Occurrences.Keys)
                    {
                        // Skip documents with excluded tags
                        if (filterDocumentType &&
                            !IsDocumentTypeTagIncluded(doc.TypeTag, documentTypeTags))
                        {
                            continue;
                        }
                        foreach (BasicWordInfo info in word.Occurrences[doc])
                        {
                            // If a search result is already present, add a new match to it,
                            // otherwise create a new search result object
                            WordInfo     mi  = new WordInfo(q, info.FirstCharIndex, info.WordIndex, info.Location);
                            SearchResult res = results.GetSearchResult(doc);
                            if (res == null)
                            {
                                res = new SearchResult(doc);
                                res.Relevance.SetValue(info.Location.RelativeRelevance);
                                res.Matches.Add(mi);
                                results.Add(res);
                            }
                            else
                            {
                                // Avoid adding duplicate matches (happens when query contains the same word multiple times)
                                if (!res.Matches.ContainsOccurrence(mi.Text, mi.FirstCharIndex))
                                {
                                    res.Matches.Add(mi);
                                }
                                res.Relevance.SetValue(res.Relevance.Value + info.Location.RelativeRelevance);
                            }
                            totalRelevance += info.Location.RelativeRelevance;
                        }
                    }
                }
            }

            if (options == SearchOptions.AllWords)
            {
                totalRelevance -= PurgeResultsForAllWords(results, queryWords);
            }
            else if (options == SearchOptions.ExactPhrase)
            {
                totalRelevance -= PurgeResultsForExactPhrase(results, queryWords);
            }
            else if (options == SearchOptions.AtLeastOneWord)
            {
                // Nothing to do
            }
            else
            {
                throw new InvalidOperationException("Unsupported SearchOptions");
            }

            // Finalize relevance values
            for (int i = 0; i < results.Count; i++)
            {
                results[i].Relevance.Finalize(totalRelevance);
            }

            return(results);
        }
Beispiel #2
0
        /// <summary>
        /// Purges the invalid results when SearchOptions is ExactPhrase.
        /// </summary>
        /// <param name="results">The results to purge.</param>
        /// <param name="queryWords">The query words.</param>
        /// <returns>The relevance value of the removed matches.</returns>
        public static float PurgeResultsForExactPhrase(SearchResultCollection results, string[] queryWords)
        {
            // Remove results that do not contain the exact phrase
            float relevanceToRemove      = 0;
            List <SearchResult> toRemove = new List <SearchResult>();

            foreach (SearchResult r in results)
            {
                // Shortcut
                if (r.Matches.Count < queryWords.Length)
                {
                    toRemove.Add(r);
                }
                else
                {
                    // Verify that all matches are in the same order as in the query
                    // and that their indices make up contiguous words,
                    // re-iterating from every word in the result, for example:
                    // query = 'repeated content', result = 'content repeated content'
                    // result must be tested with 'content repeated' (failing) and with 'repeated content' (succeeding)

                    int maxTestShift = 0;
                    if (queryWords.Length < r.Matches.Count)
                    {
                        maxTestShift = r.Matches.Count - queryWords.Length;
                    }

                    bool sequenceFound = false;

                    for (int shift = 0; shift <= maxTestShift; shift++)
                    {
                        int  firstWordIndex = r.Matches[shift].WordIndex;
                        bool allOk          = true;

                        for (int i = 0; i < queryWords.Length; i++)
                        {
                            if (queryWords[i] != r.Matches[i + shift].Text.ToLowerInvariant() ||
                                r.Matches[i + shift].WordIndex != firstWordIndex + i)
                            {
                                //toRemove.Add(r);
                                allOk = false;
                                break;
                            }
                        }

                        if (allOk)
                        {
                            sequenceFound = true;
                            break;
                        }
                    }

                    if (!sequenceFound)
                    {
                        toRemove.Add(r);
                    }
                }
            }
            foreach (SearchResult r in toRemove)
            {
                results.Remove(r);
                relevanceToRemove += r.Relevance.Value;
            }
            return(relevanceToRemove);
        }