/// <summary>
        /// Helper method to create an ISearchCriteria for searching all fields in a <see cref="BaseLuceneSearcher"/>
        /// </summary>
        /// <param name="searchText"></param>
        /// <param name="useWildcards"></param>
        /// <param name="searcher"></param>
        /// <returns></returns>
        /// <remarks>
        /// This is here because some of this stuff is internal in Examine
        /// </remarks>
        private ISearchCriteria SearchAllFields(string searchText, bool useWildcards, BaseLuceneSearcher searcher)
        {
            var sc = searcher.CreateSearchCriteria();

            if (_examineGetSearchFields == null)
            {
                //get the GetSearchFields method from BaseLuceneSearcher
                _examineGetSearchFields = typeof(BaseLuceneSearcher).GetMethod("GetSearchFields", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static);
            }
            //get the results of searcher.BaseLuceneSearcher() using ugly reflection since it's not public
            var searchFields = (IEnumerable <string>)_examineGetSearchFields.Invoke(searcher, null);

            //this is what Examine does internally to create ISearchCriteria for searching all fields
            var strArray = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

            sc = useWildcards == false
                ? sc.GroupedOr(searchFields, strArray).Compile()
                : sc.GroupedOr(searchFields, strArray.Select(x => new CustomExamineValue(Examineness.ComplexWildcard, x.MultipleCharacterWildcard().Value)).ToArray <IExamineValue>()).Compile();

            return(sc);
        }
        public IEnumerable <SearchResult> GetRawResults(string searchTerm, bool isFuzzy)
        {
            searchTerm = searchTerm.Trim(); //sanitise input
            var    searchCriteria = _searcher.CreateSearchCriteria(BooleanOperation.Or);
            bool   isPhrase       = searchTerm.Contains(' ');
            string luceneRawQuery;

            if (isFuzzy && !isPhrase)//fuzzy single word
            {
                luceneRawQuery = searchTerm + '~';
            }
            else if (isFuzzy)//fuzzy multiple words
            {
                //More info about grouping https://lucene.apache.org/core/2_9_4/queryparsersyntax.html
                var searchedTerms = searchTerm.Split(' ', ',', '.');
                var luceneString  = new StringBuilder();

                for (var i = 0; i < searchedTerms.Length; i++)
                {
                    var word = searchedTerms[i];
                    luceneString.Append(word + "~");
                    if (i != searchedTerms.Length - 1)
                    {
                        luceneString.Append(" AND ");
                    }
                }

                luceneRawQuery = luceneString.ToString();
            }
            else //not fuzzy, exact word or phrase
            {
                luceneRawQuery = "\"" + searchTerm + "\"" + "~1"; //one word proximity/distance
            }

            var query   = searchCriteria.RawQuery(luceneRawQuery);
            var results = _searcher.Search(query).OrderByDescending(x => x.Score);

            return(isFuzzy ? results.TakeWhile(x => x.Score > 0.05f) : results);
        }
示例#3
0
        /// <summary>
        /// Generates the search criteria that we want to search on
        /// </summary>
        /// <param name="searcher"></param>
        /// <returns></returns>
        private ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher)
        {
            var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria();

            //check if there's anything to process
            if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any())
            {
                return(null);
            }


            var sb = new StringBuilder();

            if (string.IsNullOrEmpty(NodeTypeAlias) == false)
            {
                sb.AppendFormat("+nodeTypeAlias: {0} ", NodeTypeAlias);
            }

            // Three possiblities:
            // * docs version (MajorDocsVersion) supplied, give current version and NEGATE OTHERS
            // * no docs version (MajorDocsVersion) is not suplied, use it and NEGATE others
            // * all versions are requests, this is currently not implemented
            var currentMajorVersions = new string[] { "6", "7", "8", "9" };

            // add mandatory majorVersion is parameter is supplied
            string versionToFilterBy = MajorDocsVersion == null
                ? ConfigurationManager.AppSettings[Constants.AppSettings.DocumentationCurrentMajorVersion]
                : MajorDocsVersion.ToString();

            //we filter by this version by excluding the other major versions in lucene so
            if (NodeTypeAlias.InvariantEquals("documentation") == false)
            {
                var versionsToNegate = currentMajorVersions.Where(f => f != versionToFilterBy).ToArray <string>();
                foreach (var versionToNegate in versionsToNegate)
                {
                    sb.AppendFormat("-majorVersion:{0} ", versionToNegate);
                }
            }

            // do it the other way around for documentation
            if (NodeTypeAlias.InvariantEquals("documentation"))
            {
                //we filter by this version by using the major versions
                var versionsToFind = currentMajorVersions.Where(f => f == versionToFilterBy).ToArray <string>();
                foreach (var versionToFind in versionsToFind)
                {
                    sb.AppendFormat("+majorVersion:{0} ", versionToFind);
                }
            }

            if (!string.IsNullOrEmpty(Term))
            {
                sb.Append("+(");
                // Cleanup the term so there are no errors
                Term = Term
                       .Replace("\"", string.Empty)
                       .Replace(":", string.Empty)
                       .Replace("\\", string.Empty)
                       .Trim('*');
                // Replace OR's with case insensitive matching
                Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase);
                // Replace double whitespaces with single space as they were giving errors
                Term = Regex.Replace(Term, @"\s{2,}", " ");

                // Do an exact phrase match with boost
                sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term);

                // SEARCH YAML stuff
                sb.AppendFormat("tags:\"{0}\"^20000 ", Term);
                sb.AppendFormat("keywords:\"{0}\"^20000 ", Term);

                // Now we need to split the phrase into individual terms so the query parser can understand
                var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

                //now de-duplicate the terms and remove the stop words and don't include single chars
                var deduped = new List <string>();
                foreach (var s in split)
                {
                    if (s.Length > 1 &&
                        !deduped.Contains(s, StringComparer.InvariantCultureIgnoreCase) &&
                        !StandardAnalyzer.STOP_WORDS_SET.Contains(s))
                    {
                        deduped.Add(s);
                    }
                }

                if (deduped.Count > 20)
                {
                    //truncate, we don't want to search on all of these individually
                    deduped = deduped.Take(20).ToList();
                }

                if (deduped.Count > 0)
                {
                    //do standard match with boost on each term
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s);
                    }

                    //do suffix with wildcards
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s);
                    }

                    //do fuzzy (close match 0.9)
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s);
                    }
                }
                sb.Append(")");
            }

            //nothing to process, return
            if (sb.Length > 0)
            {
                //render out the raw query that was constructed above
                criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString());
            }

            //Now we can apply any filters, this is done by using native Lucene query objects
            if (Filters.Any())
            {
                //If there is a filter applied to the entire result then add it here, this is a MUST sub query
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneAddFilters(searcher, criteria);
                }

                //need to process the excludes after - since that is how lucene works, you can only exclude after you've included
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneExcludeFilters(searcher, criteria);
                }
            }

            if (string.IsNullOrEmpty(OrderBy) == false)
            {
                criteria.OrderByDescending(OrderBy);
            }

            return(criteria);
        }
示例#4
0
        /// <summary>
        /// Generates the search criteria that we want to search on
        /// </summary>
        /// <param name="searcher"></param>
        /// <returns></returns>
        public ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher)
        {
            var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria();

            //check if there's anything to process
            if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any())
            {
                return(null);
            }

            if (string.IsNullOrEmpty(NodeTypeAlias) == false)
            {
                criteria.Field("nodeTypeAlias", NodeTypeAlias);
            }

            var sb = new StringBuilder();

            if (!string.IsNullOrEmpty(Term))
            {
                //Cleanup the term so there are no errors
                Term = Term.Replace("\"", string.Empty)
                       .Replace(":", string.Empty)
                       .Replace("\\", string.Empty).Trim('*');
                //replace OR's with case insensitive matching
                Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase);
                // Replace double whitespaces with single space as they were giving errors
                Term = Regex.Replace(Term, @"\s{2,}", " ");

                //now we need to split the phrase into individual terms so the query parser can understand
                var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

                if (split.Length > 1)
                {
                    //do an exact phrase match with boost
                    sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term);
                }

                if (split.Length > 0)
                {
                    //do standard match with boost on each term
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s);
                    }

                    //do suffix with wildcards
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s);
                    }

                    //do fuzzy (close match 0.9)
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s);
                    }
                }
            }

            //nothing to process, return
            if (sb.Length > 0)
            {
                //render out the raw query that was constructed above
                criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString());
            }

            //Now we can apply any filters, this is done by using native Lucene query objects
            if (Filters.Any())
            {
                //If there is a filter applied to the entire result then add it here, this is a MUST sub query
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneAddFilters(searcher, criteria);
                }

                //need to process the excludes after - since that is how lucene works, you can only exclude after you've included
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneExcludeFilters(searcher, criteria);
                }
            }

            if (string.IsNullOrEmpty(OrderBy) == false)
            {
                criteria.OrderByDescending(OrderBy);
            }

            return(criteria);
        }